commit 55d05ac4bf876af85078de491de858ed8550dc29 Author: Isis Lovecruft isis@torproject.org Date: Sun Jul 6 19:43:09 2014 +0000
Add Levenshtein Distance function to bridgedb.util module.
* ADD new function, `bridgedb.util.levenshteinDistance()`. --- lib/bridgedb/util.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+)
diff --git a/lib/bridgedb/util.py b/lib/bridgedb/util.py index 6d95129..63fd5e6 100644 --- a/lib/bridgedb/util.py +++ b/lib/bridgedb/util.py @@ -142,6 +142,46 @@ def configureLogging(cfg): logging.info("Level: %s", logLevel) logging.info("Safe Logging: %sabled" % ("En" if safelogging else "Dis"))
+def levenshteinDistance(s1, s2, len1=None, len2=None, + offset1=0, offset2=0, memo=None): + """Compute the Levenstein Distance between two strings. + + The `Levenshtein String Distance Algorithm`_ efficiently computes the + number of characters which must be changed in **s1** to make it + identical to **s2**. + + .. `Levenshtein String Distance Algorithm`: + https://en.wikipedia.org/wiki/Levenshtein_distance + + >>> levenshteinDistance('cat', 'cat') + 0 + >>> levenshteinDistance('cat', 'hat') + 1 + >>> levenshteinDistance('arma', 'armadillo') + 5 + + :param str s1: The string which should be changed. + :param str s2: The string which **stringOne** should be compared to. + """ + len1 = len(s1) if len1 is None else len1 + len2 = len(s2) if len2 is None else len2 + memo = {} if memo is None else memo + + key = ','.join([str(offset1), str(len1), str(offset2), str(len2)]) + if memo.get(key) is not None: return memo[key] + + if len1 == 0: return len2 + elif len2 == 0: return len1 + + cost = 0 if (s1[offset1] == s2[offset2]) else 1 + distance = min( + levenshteinDistance(s1, s2, len1-1, len2, offset1+1, offset2, memo) + 1, + levenshteinDistance(s1, s2, len1, len2-1, offset1, offset2+1, memo) + 1, + levenshteinDistance(s1, s2, len1-1, len2-1, offset1+1, offset2+1, memo) + cost, + ) + memo[key] = distance + return distance +
class JustifiedLogFormatter(logging.Formatter): """A logging formatter which pretty prints thread and calling function
tor-commits@lists.torproject.org