commit c1dc0959feed16b9e0264c4b795fb7b5d0f16ef8 Author: Ossi Herrala oherrala@gmail.com Date: Thu Nov 20 22:08:43 2014 +0200
stem/descriptor/extrainfo_descriptor.py: Optimize away calls to datetime.datetime.strptime()
datetime.datetime.strptime() seems to be slow. We can go faster with regexp and build the datetime.datetime object from the results.
before (lines sorted): 6003241 function calls (5996319 primitive calls) in 12.508 seconds 6003241 function calls (5996319 primitive calls) in 12.564 seconds 6003241 function calls (5996319 primitive calls) in 12.677 seconds 6003241 function calls (5996319 primitive calls) in 12.720 seconds 6003241 function calls (5996319 primitive calls) in 12.871 seconds 6003241 function calls (5996319 primitive calls) in 12.871 seconds 6003241 function calls (5996319 primitive calls) in 12.935 seconds 6003241 function calls (5996319 primitive calls) in 12.982 seconds 6003241 function calls (5996319 primitive calls) in 13.230 seconds 6003241 function calls (5996319 primitive calls) in 13.663 seconds
after (lines sorted): 5444306 function calls (5437532 primitive calls) in 11.417 seconds 5444306 function calls (5437532 primitive calls) in 11.419 seconds 5444306 function calls (5437532 primitive calls) in 11.494 seconds 5444306 function calls (5437532 primitive calls) in 11.568 seconds 5444306 function calls (5437532 primitive calls) in 11.593 seconds 5444306 function calls (5437532 primitive calls) in 11.629 seconds 5444306 function calls (5437532 primitive calls) in 11.630 seconds 5444306 function calls (5437532 primitive calls) in 11.649 seconds 5444306 function calls (5437532 primitive calls) in 11.751 seconds 5444306 function calls (5437532 primitive calls) in 12.247 seconds --- stem/descriptor/extrainfo_descriptor.py | 5 ++--- stem/util/str_tools.py | 36 ++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py index 31f0e20..b677888 100644 --- a/stem/descriptor/extrainfo_descriptor.py +++ b/stem/descriptor/extrainfo_descriptor.py @@ -69,7 +69,6 @@ Extra-info descriptors are available from a few sources... ===================== =========== """
-import datetime import hashlib import re
@@ -220,7 +219,7 @@ def _parse_timestamp_and_interval(keyword, content): raise ValueError("%s line's interval wasn't a number: %s" % (keyword, line))
try: - timestamp = datetime.datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S') + timestamp = stem.util.str_tools._parse_timestamp(timestamp_str) return timestamp, int(interval), remainder except ValueError: raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line)) @@ -652,7 +651,7 @@ class ExtraInfoDescriptor(Descriptor): # "<keyword>" YYYY-MM-DD HH:MM:SS
try: - timestamp = datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S') + timestamp = stem.util.str_tools._parse_timestamp(value)
if keyword == 'published': self.published = timestamp diff --git a/stem/util/str_tools.py b/stem/util/str_tools.py index 16681d5..039447e 100644 --- a/stem/util/str_tools.py +++ b/stem/util/str_tools.py @@ -23,6 +23,7 @@ Toolkit for various string activity.
import codecs import datetime +import re import sys
import stem.prereq @@ -56,6 +57,8 @@ TIME_UNITS = ( (1.0, 's', ' second'), )
+_timestamp_re = re.compile(r'(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})') + if stem.prereq.is_python_3(): def _to_bytes_impl(msg): if isinstance(msg, str): @@ -436,6 +439,32 @@ def parse_short_time_label(label): raise ValueError('Non-numeric value in time entry: %s' % label)
+def _parse_timestamp(entry): + """ + Parses the date and time that in format like like... + + :: + + 2012-11-08 16:48:41 + + :param str entry: timestamp to be parsed + + :returns: datetime for the time represented by the timestamp + + :raises: ValueError if the timestamp is malformed + """ + + if not isinstance(entry, (str, unicode)): + raise IOError('parse_iso_timestamp() input must be a str, got a %s' % type(entry)) + + try: + time = [int(x) for x in _timestamp_re.match(entry).groups()] + except AttributeError: + raise ValueError("Expected timestamp in format YYYY-MM-DD HH:MM:ss but got " + entry) + + return datetime.datetime(time[0], time[1], time[2], time[3], time[4], time[5]) + + def _parse_iso_timestamp(entry): """ Parses the ISO 8601 standard that provides for timestamps like... @@ -465,7 +494,12 @@ def _parse_iso_timestamp(entry): if len(microseconds) != 6 or not microseconds.isdigit(): raise ValueError("timestamp's microseconds should be six digits")
- timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S") + if timestamp_str[10] == 'T': + timestamp_str = timestamp_str[:10] + ' ' + timestamp_str[11:] + else: + raise ValueError("timestamp didn't contain delimeter 'T' between date and time") + + timestamp = _parse_timestamp(timestamp_str) return timestamp + datetime.timedelta(microseconds = int(microseconds))