commit 70227efa08d77f65b1eea069ae93bd0ba0bf1993 Author: Damian Johnson atagar@torproject.org Date: Fri Oct 19 09:41:34 2012 -0700
Utils for human readable time and size metrics
Snagging the utilities from arm's src/util/uiTools.py for making human readable labels of time and size metrics. They'll not only be handy for stem's users, but also our exception messages and something I want to use in our tutorials.
I've simplified these functions a bit and added unit tests for the pydoc examples. More tests are needed, though... --- run_tests.py | 2 + stem/util/enum.py | 24 +---- stem/util/str_tools.py | 244 +++++++++++++++++++++++++++++++++++++++++++ stem/util/term.py | 3 +- test/unit/util/__init__.py | 1 + test/unit/util/enum.py | 16 --- test/unit/util/str_tools.py | 53 +++++++++ 7 files changed, 304 insertions(+), 39 deletions(-)
diff --git a/run_tests.py b/run_tests.py index c891bf6..8cd393c 100755 --- a/run_tests.py +++ b/run_tests.py @@ -37,6 +37,7 @@ import test.unit.util.conf import test.unit.util.connection import test.unit.util.enum import test.unit.util.proc +import test.unit.util.str_tools import test.unit.util.system import test.unit.util.tor_tools import test.unit.exit_policy.policy @@ -114,6 +115,7 @@ UNIT_TESTS = ( test.unit.util.connection.TestConnection, test.unit.util.conf.TestConf, test.unit.util.proc.TestProc, + test.unit.util.str_tools.TestStrTools, test.unit.util.system.TestSystem, test.unit.util.tor_tools.TestTorTools, test.unit.descriptor.export.TestExport, diff --git a/stem/util/enum.py b/stem/util/enum.py index 50f6728..72c4443 100644 --- a/stem/util/enum.py +++ b/stem/util/enum.py @@ -24,7 +24,6 @@ with overwritten string counterparts:
::
- to_camel_case - converts a string to camel case UppercaseEnum - Provides an enum instance with capitalized values. Enum - Provides a basic, ordered enumeration. |- keys - string representation of our enum keys @@ -35,26 +34,7 @@ with overwritten string counterparts: +- __iter__ - iterator over our enum keys """
-def to_camel_case(label, word_divider = " "): - """ - Converts the given string to camel case, ie: - - :: - - >>> to_camel_case("I_LIKE_PEPPERJACK!") - 'I Like Pepperjack!' - - :param str label: input string to be converted - :param str word_divider: string used to replace underscores - """ - - words = [] - for entry in label.split("_"): - if len(entry) == 0: words.append("") - elif len(entry) == 1: words.append(entry.upper()) - else: words.append(entry[0].upper() + entry[1:].lower()) - - return word_divider.join(words) +import stem.util.str_tools
def UppercaseEnum(*args): """ @@ -86,7 +66,7 @@ class Enum(object):
for entry in args: if isinstance(entry, str): - key, val = entry, to_camel_case(entry) + key, val = entry, stem.util.str_tools.to_camel_case(entry) elif isinstance(entry, tuple) and len(entry) == 2: key, val = entry else: raise ValueError("Unrecognized input: %s" % args) diff --git a/stem/util/str_tools.py b/stem/util/str_tools.py new file mode 100644 index 0000000..b874425 --- /dev/null +++ b/stem/util/str_tools.py @@ -0,0 +1,244 @@ +""" +Toolkit for various string activity. + +**Module Overview:** + +:: + + to_camel_case - converts a string to camel case + get_size_label - human readable label for a number of bytes + get_time_label - human readable label for a number of seconds + get_time_labels - human readable labels for each time unit + get_short_time_label - condensed time label output + parse_short_time_label - seconds represented by a short time label +""" + +# label conversion tuples of the form... +# (bits / bytes / seconds, short label, long label) +SIZE_UNITS_BITS = ( + (140737488355328.0, " Pb", " Petabit"), + (137438953472.0, " Tb", " Terabit"), + (134217728.0, " Gb", " Gigabit"), + (131072.0, " Mb", " Megabit"), + (128.0, " Kb", " Kilobit"), + (0.125, " b", " Bit"), +) + +SIZE_UNITS_BYTES = ( + (1125899906842624.0, " PB", " Petabyte"), + (1099511627776.0, " TB", " Terabyte"), + (1073741824.0, " GB", " Gigabyte"), + (1048576.0, " MB", " Megabyte"), + (1024.0, " KB", " Kilobyte"), + (1.0, " B", " Byte"), +) + +TIME_UNITS = ( + (86400.0, "d", " day"), + (3600.0, "h", " hour"), + (60.0, "m", " minute"), + (1.0, "s", " second"), +) + +def to_camel_case(label, word_divider = " "): + """ + Converts the given string to camel case, ie: + + :: + + >>> to_camel_case("I_LIKE_PEPPERJACK!") + 'I Like Pepperjack!' + + :param str label: input string to be converted + :param str word_divider: string used to replace underscores + """ + + words = [] + for entry in label.split("_"): + if len(entry) == 0: words.append("") + elif len(entry) == 1: words.append(entry.upper()) + else: words.append(entry[0].upper() + entry[1:].lower()) + + return word_divider.join(words) + +def get_size_label(byte_count, decimal = 0, is_long = False, is_bytes = True): + """ + Converts a number of bytes into a human readable label in its most + significant units. For instance, 7500 bytes would return "7 KB". If the + is_long option is used this expands unit labels to be the properly pluralized + full word (for instance 'Kilobytes' rather than 'KB'). Units go up through + petabytes. + + :: + + >>> get_size_label(2000000) + '1 MB' + + >>> get_size_label(1050, 2) + '1.02 KB' + + >>> get_size_label(1050, 3, True) + '1.025 Kilobytes' + + :param int byte_count: number of bytes to be converted + :param int decimal: number of decimal digits to be included + :param bool is_long: expands units label + :param bool is_bytes: provides units in bytes if true, bits otherwise + """ + + if is_bytes: return _get_label(SIZE_UNITS_BYTES, byte_count, decimal, is_long) + else: return _get_label(SIZE_UNITS_BITS, byte_count, decimal, is_long) + +def get_time_label(seconds, decimal = 0, is_long = False): + """ + Converts seconds into a time label truncated to its most significant units. + For instance, 7500 seconds would return "2h". Units go up through days. + + This defaults to presenting single character labels, but if the is_long + option is used this expands labels to be the full word (space included and + properly pluralized). For instance, "4h" would be "4 hours" and "1m" would + become "1 minute". + + :: + + >>> get_time_label(10000) + '2h' + + >>> get_time_label(61, 1, True) + '1.0 minute' + + >>> get_time_label(61, 2, True) + '1.01 minutes' + + :param int seconds: number of seconds to be converted + :param int decimal: number of decimal digits to be included + :param bool is_long: expands units label + """ + + return _get_label(TIME_UNITS, seconds, decimal, is_long) + +def get_time_labels(seconds, is_long = False): + """ + Provides a list of label conversions for each time unit, starting with its + most significant units on down. Any counts that evaluate to zero are omitted. + For example... + + :: + + >>> get_time_labels(400) + ['6m', '40s'] + + >>> get_time_labels(3640, True) + ['1 hour', '40 seconds'] + + :param int seconds: number of seconds to be converted + :param bool is_long: expands units label + """ + + time_labels = [] + + for count_per_unit, _, _ in TIME_UNITS: + if seconds >= count_per_unit: + time_labels.append(_get_label(TIME_UNITS, seconds, 0, is_long)) + seconds %= count_per_unit + + return time_labels + +def get_short_time_label(seconds): + """ + Provides a time in the following format: + [[dd-]hh:]mm:ss + + :param int seconds: number of seconds to be converted + """ + + time_comp = {} + + for amount, _, label in TIME_UNITS: + count = int(seconds / amount) + seconds %= amount + time_comp[label.strip()] = count + + label = "%02i:%02i" % (time_comp["minute"], time_comp["second"]) + + if time_comp["day"]: + label = "%i-%02i:%s" % (time_comp["day"], time_comp["hour"], label) + elif time_comp["hour"]: + label = "%02i:%s" % (time_comp["hour"], label) + + return label + +def parse_short_time_label(label): + """ + Provides the number of seconds corresponding to the formatting used for the + cputime and etime fields of ps: + [[dd-]hh:]mm:ss or mm:ss.ss + + :param str label: time entry to be parsed + + :raises: ValueError if input is malformed + """ + + days, hours, minutes, seconds = '0', '0', '0', '0' + + if '-' in label: + days, label = label.split('-', 1) + + time_comp = label.split(":") + + if len(time_comp) == 3: + hours, minutes, seconds = time_comp + elif len(time_comp) == 2: + minutes, seconds = time_comp + else: + raise ValueError("Invalid time format, we expected '[[dd-]hh:]mm:ss' or 'mm:ss.ss': %s" % label) + + try: + time_sum = int(float(seconds)) + time_sum += int(minutes) * 60 + time_sum += int(hours) * 3600 + time_sum += int(days) * 86400 + return time_sum + except ValueError: + raise ValueError("Non-numeric value in time entry: %s" % label) + +def _get_label(units, count, decimal, is_long): + """ + Provides label corresponding to units of the highest significance in the + provided set. This rounds down (ie, integer truncation after visible units). + + :param tuple units: type of units to be used for conversion, containing (count_per_unit, short_label, long_label) + :param int count: number of base units being converted + :param int decimal: decimal precision of label + :param bool is_long: uses the long label if true, short label otherwise + """ + + # formatted string for the requested number of digits + label_format = "%%.%if" % decimal + + # for zero or negative values use the smallest units + if count < 1: + units_label = units[-1][2] + "s" if is_long else units[-1][1] + return "%s%s" % (label_format % count, units_label) + + for count_per_unit, short_label, long_label in units: + if count >= count_per_unit: + # Rounding down with a '%f' is a little clunky. Reducing the count so + # it'll divide evenly as the rounded down value. + + count -= count % (count_per_unit / (10 ** decimal)) + count_label = label_format % (count / count_per_unit) + + if is_long: + # Pluralize if any of the visible units make it greater than one. For + # instance 1.0003 is plural but 1.000 isn't. + + if decimal > 0: + is_plural = count > count_per_unit + else: + is_plural = count >= count_per_unit * 2 + + return count_label + long_label + ("s" if is_plural else "") + else: + return count_label + short_label + diff --git a/stem/util/term.py b/stem/util/term.py index fd4c22e..556b481 100644 --- a/stem/util/term.py +++ b/stem/util/term.py @@ -3,6 +3,7 @@ Utilities for working with the terminal. """
import stem.util.enum +import stem.util.str_tools
TERM_COLORS = ("BLACK", "RED", "GREEN", "YELLOW", "BLUE", "MAGENTA", "CYAN", "WHITE")
@@ -41,7 +42,7 @@ def format(msg, *attr):
encodings = [] for text_attr in attr: - text_attr, encoding = stem.util.enum.to_camel_case(text_attr), None + text_attr, encoding = stem.util.str_tools.to_camel_case(text_attr), None encoding = FG_ENCODING.get(text_attr, encoding) encoding = BG_ENCODING.get(text_attr, encoding) encoding = ATTR_ENCODING.get(text_attr, encoding) diff --git a/test/unit/util/__init__.py b/test/unit/util/__init__.py index d0eeda3..33b81d3 100644 --- a/test/unit/util/__init__.py +++ b/test/unit/util/__init__.py @@ -7,6 +7,7 @@ __all__ = [ "connection", "enum", "proc", + "str_tools", "system", "tor_tools", ] diff --git a/test/unit/util/enum.py b/test/unit/util/enum.py index 5a28f62..88469dd 100644 --- a/test/unit/util/enum.py +++ b/test/unit/util/enum.py @@ -6,22 +6,6 @@ import unittest import stem.util.enum
class TestEnum(unittest.TestCase): - def test_to_camel_case(self): - """ - Checks the stem.util.enum.to_camel_case function. - """ - - # test the pydoc example - self.assertEquals("I Like Pepperjack!", stem.util.enum.to_camel_case("I_LIKE_PEPPERJACK!")) - - # check a few edge cases - self.assertEquals("", stem.util.enum.to_camel_case("")) - self.assertEquals("Hello", stem.util.enum.to_camel_case("hello")) - self.assertEquals("Hello", stem.util.enum.to_camel_case("HELLO")) - self.assertEquals("Hello World", stem.util.enum.to_camel_case("hello__world")) - self.assertEquals("Hello\tworld", stem.util.enum.to_camel_case("hello\tWORLD")) - self.assertEquals("Hello\t\tWorld", stem.util.enum.to_camel_case("hello__world", "\t")) - def test_enum_examples(self): """ Checks that the pydoc examples are accurate. diff --git a/test/unit/util/str_tools.py b/test/unit/util/str_tools.py new file mode 100644 index 0000000..ce5539f --- /dev/null +++ b/test/unit/util/str_tools.py @@ -0,0 +1,53 @@ +""" +Unit tests for the stem.util.str_tools functions. +""" + +import unittest +from stem.util import str_tools + +class TestStrTools(unittest.TestCase): + def test_to_camel_case(self): + """ + Checks the to_camel_case() function. + """ + + # test the pydoc example + self.assertEquals("I Like Pepperjack!", str_tools.to_camel_case("I_LIKE_PEPPERJACK!")) + + # check a few edge cases + self.assertEquals("", str_tools.to_camel_case("")) + self.assertEquals("Hello", str_tools.to_camel_case("hello")) + self.assertEquals("Hello", str_tools.to_camel_case("HELLO")) + self.assertEquals("Hello World", str_tools.to_camel_case("hello__world")) + self.assertEquals("Hello\tworld", str_tools.to_camel_case("hello\tWORLD")) + self.assertEquals("Hello\t\tWorld", str_tools.to_camel_case("hello__world", "\t")) + + def test_get_size_label(self): + """ + Checks the get_size_label() function. + """ + + # test the pydoc examples + self.assertEquals('1 MB', str_tools.get_size_label(2000000)) + self.assertEquals('1.02 KB', str_tools.get_size_label(1050, 2)) + self.assertEquals('1.025 Kilobytes', str_tools.get_size_label(1050, 3, True)) + + def test_get_time_label(self): + """ + Checks the get_time_label() function. + """ + + # test the pydoc examples + self.assertEquals('2h', str_tools.get_time_label(10000)) + self.assertEquals('1.0 minute', str_tools.get_time_label(61, 1, True)) + self.assertEquals('1.01 minutes', str_tools.get_time_label(61, 2, True)) + + def test_get_time_labels(self): + """ + Checks the get_time_labels() function. + """ + + # test the pydoc examples + self.assertEquals(['6m', '40s'], str_tools.get_time_labels(400)) + self.assertEquals(['1 hour', '40 seconds'], str_tools.get_time_labels(3640, True)) +
tor-commits@lists.torproject.org