[tor-commits] [bridgedb/develop] Add Levenshtein Distance function to bridgedb.util module.

isis at torproject.org isis at torproject.org
Thu Jul 24 04:37:11 UTC 2014


commit 55d05ac4bf876af85078de491de858ed8550dc29
Author: Isis Lovecruft <isis at torproject.org>
Date:   Sun Jul 6 19:43:09 2014 +0000

    Add Levenshtein Distance function to bridgedb.util module.
    
     * ADD new function, `bridgedb.util.levenshteinDistance()`.
---
 lib/bridgedb/util.py |   40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/lib/bridgedb/util.py b/lib/bridgedb/util.py
index 6d95129..63fd5e6 100644
--- a/lib/bridgedb/util.py
+++ b/lib/bridgedb/util.py
@@ -142,6 +142,46 @@ def configureLogging(cfg):
     logging.info("Level: %s", logLevel)
     logging.info("Safe Logging: %sabled" % ("En" if safelogging else "Dis"))
 
+def levenshteinDistance(s1, s2, len1=None, len2=None,
+                        offset1=0, offset2=0, memo=None):
+    """Compute the Levenstein Distance between two strings.
+
+    The `Levenshtein String Distance Algorithm`_ efficiently computes the
+    number of characters which must be changed in **s1** to make it
+    identical to **s2**.
+
+    .. `Levenshtein String Distance Algorithm`:
+        https://en.wikipedia.org/wiki/Levenshtein_distance
+
+    >>> levenshteinDistance('cat', 'cat')
+    0
+    >>> levenshteinDistance('cat', 'hat')
+    1
+    >>> levenshteinDistance('arma', 'armadillo')
+    5
+
+    :param str s1: The string which should be changed.
+    :param str s2: The string which **stringOne** should be compared to.
+    """
+    len1 = len(s1) if len1 is None else len1
+    len2 = len(s2) if len2 is None else len2
+    memo = {} if memo is None else memo
+
+    key = ','.join([str(offset1), str(len1), str(offset2), str(len2)])
+    if memo.get(key) is not None: return memo[key]
+
+    if len1 == 0: return len2
+    elif len2 == 0: return len1
+
+    cost = 0 if (s1[offset1] == s2[offset2]) else 1
+    distance = min(
+        levenshteinDistance(s1, s2, len1-1, len2,   offset1+1, offset2,   memo) + 1,
+        levenshteinDistance(s1, s2, len1,   len2-1, offset1,   offset2+1, memo) + 1,
+        levenshteinDistance(s1, s2, len1-1, len2-1, offset1+1, offset2+1, memo) + cost,
+    )
+    memo[key] = distance
+    return distance
+
 
 class JustifiedLogFormatter(logging.Formatter):
     """A logging formatter which pretty prints thread and calling function





More information about the tor-commits mailing list