[tor-commits] [stem/master] Adding crop() method to our str_tools

Mon Sep 1 00:11:08 UTC 2014

commit 28f4f4bc987ad098e3237906df0a55e433fa1d14
Author: Damian Johnson <atagar at torproject.org>
Date:   Sun Aug 31 17:02:43 2014 -0700

    Adding crop() method to our str_tools
    
    Method from arm's ui_tools with a little cleanup. This is a pretty delicate
    method and I'm a tad worried about some changes so we might see some regression
    from what arm had. That said, regressions can easily be added to our unit tests
    (it'll be soooo nice to finally have those for this helper....).
---
 docs/change_log.rst         |    1 +
 stem/util/enum.py           |    6 +--
 stem/util/str_tools.py      |  120 +++++++++++++++++++++++++++++++++++++++++++
 test/unit/doctest.py        |    1 +
 test/unit/util/str_tools.py |    6 +++
 5 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/docs/change_log.rst b/docs/change_log.rst
index 4ec16a0..0e605ca 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -49,6 +49,7 @@ The following are only available within Stem's `git repository
 
   * Added support for directories to :func:`stem.util.conf.Config.load`.
   * Changed :func:`stem.util.conf.uses_settings` to only provide a 'config' keyword arument if the decorated function would accept it.
+  * Added :func:`stem.util.str_tools.crop`
 
  * **Interpreter**
 
diff --git a/stem/util/enum.py b/stem/util/enum.py
index 4a86fc5..473f0de 100644
--- a/stem/util/enum.py
+++ b/stem/util/enum.py
@@ -40,8 +40,6 @@ constructed as simple type listings...
     +- __iter__ - iterator over our enum keys
 """
 
-import stem.util.str_tools
-
 
 def UppercaseEnum(*args):
   """
@@ -70,12 +68,14 @@ class Enum(object):
   """
 
   def __init__(self, *args):
+    from stem.util.str_tools import _to_camel_case
+
     # ordered listings of our keys and values
     keys, values = [], []
 
     for entry in args:
       if isinstance(entry, (bytes, unicode)):
-        key, val = entry, stem.util.str_tools._to_camel_case(entry)
+        key, val = entry, _to_camel_case(entry)
       elif isinstance(entry, tuple) and len(entry) == 2:
         key, val = entry
       else:
diff --git a/stem/util/str_tools.py b/stem/util/str_tools.py
index 4b5ccca..e166a4c 100644
--- a/stem/util/str_tools.py
+++ b/stem/util/str_tools.py
@@ -8,6 +8,8 @@ Toolkit for various string activity.
 
 ::
 
+  crop - shortens string to a given length
+
   get_size_label - human readable label for a number of bytes
   get_time_label - human readable label for a number of seconds
   get_time_labels - human readable labels for each time unit
@@ -17,8 +19,11 @@ Toolkit for various string activity.
 
 import codecs
 import datetime
+import sys
 
 import stem.prereq
+import stem.util.enum
+
 
 # label conversion tuples of the form...
 # (bits / bytes / seconds, short label, long label)
@@ -131,6 +136,121 @@ def _to_camel_case(label, divider = '_', joiner = ' '):
   return joiner.join(words)
 
 
+# This needs to be defined after _to_camel_case() to avoid a circular
+# dependency with the enum module.
+
+Ending = stem.util.enum.Enum('ELLIPSE', 'HYPHEN')
+
+
+def crop(msg, size, min_word_length = 4, min_crop = 0, ending = Ending.ELLIPSE, get_remainder = False):
+  """
+  Shortens a string to a given length.
+
+  If we crop content then a given ending is included (counting itself toward
+  the size limitation). This crops on word breaks so we only include a word if
+  we can display at least **min_word_length** characters of it.
+
+  If there isn't room for even a truncated single word (or one word plus the
+  ellipse if including those) then this provides an empty string.
+
+  If a cropped string ends with a comma or period then it's stripped (unless
+  we're providing the remainder back). For example...
+
+    >>> crop('This is a looooong message', 17)
+    'This is a looo...'
+
+    >>> crop('This is a looooong message', 12)
+    'This is a...'
+
+    >>> crop('This is a looooong message', 3)
+    ''
+
+  The whole point of this method is to provide human friendly croppings, and as
+  such details of how this works might change in the future. Callers should not
+  rely on the details of how this crops.
+
+  :param str msg: text to be processed
+  :param int size: space available for text
+  :param int min_word_length: minimum characters before which a word is
+    dropped, requires whole word if **None**
+  :param int min_crop: minimum characters that must be dropped if a word is
+    cropped
+  :param Ending ending: type of ending used when truncating, no special
+    truncation is used if **None**
+  :param bool get_remainder: returns a tuple with the second part being the
+    cropped portion of the message
+
+  :returns: **str** of the text truncated to the given length
+  """
+
+  # checks if there's room for the whole message
+
+  if len(msg) <= size:
+    return (msg, '') if get_remainder else msg
+
+  if size < 0:
+    raise ValueError("Crop size can't be negative (received %i)" % size)
+  elif min_word_length and min_word_length < 0:
+    raise ValueError("Crop's min_word_length can't be negative (received %i)" % min_word_length)
+  elif min_crop < 0:
+    raise ValueError("Crop's min_crop can't be negative (received %i)" % min_crop)
+
+  # since we're cropping, the effective space available is less with an
+  # ellipse, and cropping words requires an extra space for hyphens
+
+  if ending == Ending.ELLIPSE:
+    size -= 3
+  elif min_word_length and ending == Ending.HYPHEN:
+    min_word_length += 1
+
+  if min_word_length is None:
+    min_word_length = sys.maxint
+
+  # checks if there isn't the minimum space needed to include anything
+
+  last_wordbreak = msg.rfind(' ', 0, size + 1)
+
+  if last_wordbreak == -1:
+    # we're splitting the first word
+
+    if size < min_word_length:
+      return ('', msg) if get_remainder else ''
+
+    include_crop = True
+  else:
+    last_wordbreak = len(msg[:last_wordbreak].rstrip())  # drops extra ending whitespaces
+    include_crop = size - last_wordbreak - 1 >= min_word_length
+
+  # if there's a max crop size then make sure we're cropping at least that many characters
+
+  if include_crop and min_crop:
+    next_wordbreak = msg.find(' ', size)
+
+    if next_wordbreak == -1:
+      next_wordbreak = len(msg)
+
+    include_crop = next_wordbreak - size + 1 >= min_crop
+
+  if include_crop:
+    return_msg, remainder = msg[:size], msg[size:]
+
+    if ending == Ending.HYPHEN:
+      remainder = return_msg[-1] + remainder
+      return_msg = return_msg[:-1].rstrip() + '-'
+  else:
+    return_msg, remainder = msg[:last_wordbreak], msg[last_wordbreak:]
+
+  # if this is ending with a comma or period then strip it off
+
+  if not get_remainder and return_msg and return_msg[-1] in (',', '.'):
+    return_msg = return_msg[:-1]
+
+  if ending == Ending.ELLIPSE:
+    return_msg = return_msg.rstrip() + '...'
+
+  return (return_msg, remainder) if get_remainder else return_msg
+
+
 def get_size_label(byte_count, decimal = 0, is_long = False, is_bytes = True):
   """
   Converts a number of bytes into a human readable label in its most
diff --git a/test/unit/doctest.py b/test/unit/doctest.py
index 4692dd6..1c7c415 100644
--- a/test/unit/doctest.py
+++ b/test/unit/doctest.py
@@ -59,6 +59,7 @@ class TestDocumentation(unittest.TestCase):
       elif path.endswith('/stem/util/str_tools.py'):
         args['globs'] = {
           '_to_camel_case': stem.util.str_tools._to_camel_case,
+          'crop': stem.util.str_tools.crop,
           'get_size_label': stem.util.str_tools.get_size_label,
           'get_time_label': stem.util.str_tools.get_time_label,
           'get_time_labels': stem.util.str_tools.get_time_labels,
diff --git a/test/unit/util/str_tools.py b/test/unit/util/str_tools.py
index 82d5a97..7abdedf 100644
--- a/test/unit/util/str_tools.py
+++ b/test/unit/util/str_tools.py
@@ -25,6 +25,12 @@ class TestStrTools(unittest.TestCase):
     self.assertEquals('Hello\tworld', str_tools._to_camel_case('hello\tWORLD'))
     self.assertEquals('Hello\t\tWorld', str_tools._to_camel_case('hello__world', '_', '\t'))
 
+  def test_crop(self):
+    # test the pydoc examples
+    self.assertEquals('This is a looo...', str_tools.crop('This is a looooong message', 17))
+    self.assertEquals('This is a...', str_tools.crop('This is a looooong message', 12))
+    self.assertEquals('', str_tools.crop('This is a looooong message', 3))
+
   def test_get_size_label(self):
     """
     Checks the get_size_label() function.