commit 70227efa08d77f65b1eea069ae93bd0ba0bf1993
Author: Damian Johnson <atagar(a)torproject.org>
Date: Fri Oct 19 09:41:34 2012 -0700
Utils for human readable time and size metrics
Snagging the utilities from arm's src/util/uiTools.py for making human readable
labels of time and size metrics. They'll not only be handy for stem's users,
but also our exception messages and something I want to use in our tutorials.
I've simplified these functions a bit and added unit tests for the pydoc
examples. More tests are needed, though...
---
run_tests.py | 2 +
stem/util/enum.py | 24 +----
stem/util/str_tools.py | 244 +++++++++++++++++++++++++++++++++++++++++++
stem/util/term.py | 3 +-
test/unit/util/__init__.py | 1 +
test/unit/util/enum.py | 16 ---
test/unit/util/str_tools.py | 53 +++++++++
7 files changed, 304 insertions(+), 39 deletions(-)
diff --git a/run_tests.py b/run_tests.py
index c891bf6..8cd393c 100755
--- a/run_tests.py
+++ b/run_tests.py
@@ -37,6 +37,7 @@ import test.unit.util.conf
import test.unit.util.connection
import test.unit.util.enum
import test.unit.util.proc
+import test.unit.util.str_tools
import test.unit.util.system
import test.unit.util.tor_tools
import test.unit.exit_policy.policy
@@ -114,6 +115,7 @@ UNIT_TESTS = (
test.unit.util.connection.TestConnection,
test.unit.util.conf.TestConf,
test.unit.util.proc.TestProc,
+ test.unit.util.str_tools.TestStrTools,
test.unit.util.system.TestSystem,
test.unit.util.tor_tools.TestTorTools,
test.unit.descriptor.export.TestExport,
diff --git a/stem/util/enum.py b/stem/util/enum.py
index 50f6728..72c4443 100644
--- a/stem/util/enum.py
+++ b/stem/util/enum.py
@@ -24,7 +24,6 @@ with overwritten string counterparts:
::
- to_camel_case - converts a string to camel case
UppercaseEnum - Provides an enum instance with capitalized values.
Enum - Provides a basic, ordered enumeration.
|- keys - string representation of our enum keys
@@ -35,26 +34,7 @@ with overwritten string counterparts:
+- __iter__ - iterator over our enum keys
"""
-def to_camel_case(label, word_divider = " "):
- """
- Converts the given string to camel case, ie:
-
- ::
-
- >>> to_camel_case("I_LIKE_PEPPERJACK!")
- 'I Like Pepperjack!'
-
- :param str label: input string to be converted
- :param str word_divider: string used to replace underscores
- """
-
- words = []
- for entry in label.split("_"):
- if len(entry) == 0: words.append("")
- elif len(entry) == 1: words.append(entry.upper())
- else: words.append(entry[0].upper() + entry[1:].lower())
-
- return word_divider.join(words)
+import stem.util.str_tools
def UppercaseEnum(*args):
"""
@@ -86,7 +66,7 @@ class Enum(object):
for entry in args:
if isinstance(entry, str):
- key, val = entry, to_camel_case(entry)
+ key, val = entry, stem.util.str_tools.to_camel_case(entry)
elif isinstance(entry, tuple) and len(entry) == 2:
key, val = entry
else: raise ValueError("Unrecognized input: %s" % args)
diff --git a/stem/util/str_tools.py b/stem/util/str_tools.py
new file mode 100644
index 0000000..b874425
--- /dev/null
+++ b/stem/util/str_tools.py
@@ -0,0 +1,244 @@
+"""
+Toolkit for various string activity.
+
+**Module Overview:**
+
+::
+
+ to_camel_case - converts a string to camel case
+ get_size_label - human readable label for a number of bytes
+ get_time_label - human readable label for a number of seconds
+ get_time_labels - human readable labels for each time unit
+ get_short_time_label - condensed time label output
+ parse_short_time_label - seconds represented by a short time label
+"""
+
+# label conversion tuples of the form...
+# (bits / bytes / seconds, short label, long label)
+SIZE_UNITS_BITS = (
+ (140737488355328.0, " Pb", " Petabit"),
+ (137438953472.0, " Tb", " Terabit"),
+ (134217728.0, " Gb", " Gigabit"),
+ (131072.0, " Mb", " Megabit"),
+ (128.0, " Kb", " Kilobit"),
+ (0.125, " b", " Bit"),
+)
+
+SIZE_UNITS_BYTES = (
+ (1125899906842624.0, " PB", " Petabyte"),
+ (1099511627776.0, " TB", " Terabyte"),
+ (1073741824.0, " GB", " Gigabyte"),
+ (1048576.0, " MB", " Megabyte"),
+ (1024.0, " KB", " Kilobyte"),
+ (1.0, " B", " Byte"),
+)
+
+TIME_UNITS = (
+ (86400.0, "d", " day"),
+ (3600.0, "h", " hour"),
+ (60.0, "m", " minute"),
+ (1.0, "s", " second"),
+)
+
+def to_camel_case(label, word_divider = " "):
+ """
+ Converts the given string to camel case, ie:
+
+ ::
+
+ >>> to_camel_case("I_LIKE_PEPPERJACK!")
+ 'I Like Pepperjack!'
+
+ :param str label: input string to be converted
+ :param str word_divider: string used to replace underscores
+ """
+
+ words = []
+ for entry in label.split("_"):
+ if len(entry) == 0: words.append("")
+ elif len(entry) == 1: words.append(entry.upper())
+ else: words.append(entry[0].upper() + entry[1:].lower())
+
+ return word_divider.join(words)
+
+def get_size_label(byte_count, decimal = 0, is_long = False, is_bytes = True):
+ """
+ Converts a number of bytes into a human readable label in its most
+ significant units. For instance, 7500 bytes would return "7 KB". If the
+ is_long option is used this expands unit labels to be the properly pluralized
+ full word (for instance 'Kilobytes' rather than 'KB'). Units go up through
+ petabytes.
+
+ ::
+
+ >>> get_size_label(2000000)
+ '1 MB'
+
+ >>> get_size_label(1050, 2)
+ '1.02 KB'
+
+ >>> get_size_label(1050, 3, True)
+ '1.025 Kilobytes'
+
+ :param int byte_count: number of bytes to be converted
+ :param int decimal: number of decimal digits to be included
+ :param bool is_long: expands units label
+ :param bool is_bytes: provides units in bytes if true, bits otherwise
+ """
+
+ if is_bytes: return _get_label(SIZE_UNITS_BYTES, byte_count, decimal, is_long)
+ else: return _get_label(SIZE_UNITS_BITS, byte_count, decimal, is_long)
+
+def get_time_label(seconds, decimal = 0, is_long = False):
+ """
+ Converts seconds into a time label truncated to its most significant units.
+ For instance, 7500 seconds would return "2h". Units go up through days.
+
+ This defaults to presenting single character labels, but if the is_long
+ option is used this expands labels to be the full word (space included and
+ properly pluralized). For instance, "4h" would be "4 hours" and "1m" would
+ become "1 minute".
+
+ ::
+
+ >>> get_time_label(10000)
+ '2h'
+
+ >>> get_time_label(61, 1, True)
+ '1.0 minute'
+
+ >>> get_time_label(61, 2, True)
+ '1.01 minutes'
+
+ :param int seconds: number of seconds to be converted
+ :param int decimal: number of decimal digits to be included
+ :param bool is_long: expands units label
+ """
+
+ return _get_label(TIME_UNITS, seconds, decimal, is_long)
+
+def get_time_labels(seconds, is_long = False):
+ """
+ Provides a list of label conversions for each time unit, starting with its
+ most significant units on down. Any counts that evaluate to zero are omitted.
+ For example...
+
+ ::
+
+ >>> get_time_labels(400)
+ ['6m', '40s']
+
+ >>> get_time_labels(3640, True)
+ ['1 hour', '40 seconds']
+
+ :param int seconds: number of seconds to be converted
+ :param bool is_long: expands units label
+ """
+
+ time_labels = []
+
+ for count_per_unit, _, _ in TIME_UNITS:
+ if seconds >= count_per_unit:
+ time_labels.append(_get_label(TIME_UNITS, seconds, 0, is_long))
+ seconds %= count_per_unit
+
+ return time_labels
+
+def get_short_time_label(seconds):
+ """
+ Provides a time in the following format:
+ [[dd-]hh:]mm:ss
+
+ :param int seconds: number of seconds to be converted
+ """
+
+ time_comp = {}
+
+ for amount, _, label in TIME_UNITS:
+ count = int(seconds / amount)
+ seconds %= amount
+ time_comp[label.strip()] = count
+
+ label = "%02i:%02i" % (time_comp["minute"], time_comp["second"])
+
+ if time_comp["day"]:
+ label = "%i-%02i:%s" % (time_comp["day"], time_comp["hour"], label)
+ elif time_comp["hour"]:
+ label = "%02i:%s" % (time_comp["hour"], label)
+
+ return label
+
+def parse_short_time_label(label):
+ """
+ Provides the number of seconds corresponding to the formatting used for the
+ cputime and etime fields of ps:
+ [[dd-]hh:]mm:ss or mm:ss.ss
+
+ :param str label: time entry to be parsed
+
+ :raises: ValueError if input is malformed
+ """
+
+ days, hours, minutes, seconds = '0', '0', '0', '0'
+
+ if '-' in label:
+ days, label = label.split('-', 1)
+
+ time_comp = label.split(":")
+
+ if len(time_comp) == 3:
+ hours, minutes, seconds = time_comp
+ elif len(time_comp) == 2:
+ minutes, seconds = time_comp
+ else:
+ raise ValueError("Invalid time format, we expected '[[dd-]hh:]mm:ss' or 'mm:ss.ss': %s" % label)
+
+ try:
+ time_sum = int(float(seconds))
+ time_sum += int(minutes) * 60
+ time_sum += int(hours) * 3600
+ time_sum += int(days) * 86400
+ return time_sum
+ except ValueError:
+ raise ValueError("Non-numeric value in time entry: %s" % label)
+
+def _get_label(units, count, decimal, is_long):
+ """
+ Provides label corresponding to units of the highest significance in the
+ provided set. This rounds down (ie, integer truncation after visible units).
+
+ :param tuple units: type of units to be used for conversion, containing (count_per_unit, short_label, long_label)
+ :param int count: number of base units being converted
+ :param int decimal: decimal precision of label
+ :param bool is_long: uses the long label if true, short label otherwise
+ """
+
+ # formatted string for the requested number of digits
+ label_format = "%%.%if" % decimal
+
+ # for zero or negative values use the smallest units
+ if count < 1:
+ units_label = units[-1][2] + "s" if is_long else units[-1][1]
+ return "%s%s" % (label_format % count, units_label)
+
+ for count_per_unit, short_label, long_label in units:
+ if count >= count_per_unit:
+ # Rounding down with a '%f' is a little clunky. Reducing the count so
+ # it'll divide evenly as the rounded down value.
+
+ count -= count % (count_per_unit / (10 ** decimal))
+ count_label = label_format % (count / count_per_unit)
+
+ if is_long:
+ # Pluralize if any of the visible units make it greater than one. For
+ # instance 1.0003 is plural but 1.000 isn't.
+
+ if decimal > 0:
+ is_plural = count > count_per_unit
+ else:
+ is_plural = count >= count_per_unit * 2
+
+ return count_label + long_label + ("s" if is_plural else "")
+ else:
+ return count_label + short_label
+
diff --git a/stem/util/term.py b/stem/util/term.py
index fd4c22e..556b481 100644
--- a/stem/util/term.py
+++ b/stem/util/term.py
@@ -3,6 +3,7 @@ Utilities for working with the terminal.
"""
import stem.util.enum
+import stem.util.str_tools
TERM_COLORS = ("BLACK", "RED", "GREEN", "YELLOW", "BLUE", "MAGENTA", "CYAN", "WHITE")
@@ -41,7 +42,7 @@ def format(msg, *attr):
encodings = []
for text_attr in attr:
- text_attr, encoding = stem.util.enum.to_camel_case(text_attr), None
+ text_attr, encoding = stem.util.str_tools.to_camel_case(text_attr), None
encoding = FG_ENCODING.get(text_attr, encoding)
encoding = BG_ENCODING.get(text_attr, encoding)
encoding = ATTR_ENCODING.get(text_attr, encoding)
diff --git a/test/unit/util/__init__.py b/test/unit/util/__init__.py
index d0eeda3..33b81d3 100644
--- a/test/unit/util/__init__.py
+++ b/test/unit/util/__init__.py
@@ -7,6 +7,7 @@ __all__ = [
"connection",
"enum",
"proc",
+ "str_tools",
"system",
"tor_tools",
]
diff --git a/test/unit/util/enum.py b/test/unit/util/enum.py
index 5a28f62..88469dd 100644
--- a/test/unit/util/enum.py
+++ b/test/unit/util/enum.py
@@ -6,22 +6,6 @@ import unittest
import stem.util.enum
class TestEnum(unittest.TestCase):
- def test_to_camel_case(self):
- """
- Checks the stem.util.enum.to_camel_case function.
- """
-
- # test the pydoc example
- self.assertEquals("I Like Pepperjack!", stem.util.enum.to_camel_case("I_LIKE_PEPPERJACK!"))
-
- # check a few edge cases
- self.assertEquals("", stem.util.enum.to_camel_case(""))
- self.assertEquals("Hello", stem.util.enum.to_camel_case("hello"))
- self.assertEquals("Hello", stem.util.enum.to_camel_case("HELLO"))
- self.assertEquals("Hello World", stem.util.enum.to_camel_case("hello__world"))
- self.assertEquals("Hello\tworld", stem.util.enum.to_camel_case("hello\tWORLD"))
- self.assertEquals("Hello\t\tWorld", stem.util.enum.to_camel_case("hello__world", "\t"))
-
def test_enum_examples(self):
"""
Checks that the pydoc examples are accurate.
diff --git a/test/unit/util/str_tools.py b/test/unit/util/str_tools.py
new file mode 100644
index 0000000..ce5539f
--- /dev/null
+++ b/test/unit/util/str_tools.py
@@ -0,0 +1,53 @@
+"""
+Unit tests for the stem.util.str_tools functions.
+"""
+
+import unittest
+from stem.util import str_tools
+
+class TestStrTools(unittest.TestCase):
+ def test_to_camel_case(self):
+ """
+ Checks the to_camel_case() function.
+ """
+
+ # test the pydoc example
+ self.assertEquals("I Like Pepperjack!", str_tools.to_camel_case("I_LIKE_PEPPERJACK!"))
+
+ # check a few edge cases
+ self.assertEquals("", str_tools.to_camel_case(""))
+ self.assertEquals("Hello", str_tools.to_camel_case("hello"))
+ self.assertEquals("Hello", str_tools.to_camel_case("HELLO"))
+ self.assertEquals("Hello World", str_tools.to_camel_case("hello__world"))
+ self.assertEquals("Hello\tworld", str_tools.to_camel_case("hello\tWORLD"))
+ self.assertEquals("Hello\t\tWorld", str_tools.to_camel_case("hello__world", "\t"))
+
+ def test_get_size_label(self):
+ """
+ Checks the get_size_label() function.
+ """
+
+ # test the pydoc examples
+ self.assertEquals('1 MB', str_tools.get_size_label(2000000))
+ self.assertEquals('1.02 KB', str_tools.get_size_label(1050, 2))
+ self.assertEquals('1.025 Kilobytes', str_tools.get_size_label(1050, 3, True))
+
+ def test_get_time_label(self):
+ """
+ Checks the get_time_label() function.
+ """
+
+ # test the pydoc examples
+ self.assertEquals('2h', str_tools.get_time_label(10000))
+ self.assertEquals('1.0 minute', str_tools.get_time_label(61, 1, True))
+ self.assertEquals('1.01 minutes', str_tools.get_time_label(61, 2, True))
+
+ def test_get_time_labels(self):
+ """
+ Checks the get_time_labels() function.
+ """
+
+ # test the pydoc examples
+ self.assertEquals(['6m', '40s'], str_tools.get_time_labels(400))
+ self.assertEquals(['1 hour', '40 seconds'], str_tools.get_time_labels(3640, True))
+