[tor-commits] [stem/master] stem/descriptor/extrainfo_descriptor.py: Optimize away calls to datetime.datetime.strptime()

atagar at torproject.org atagar at torproject.org
Sun Nov 23 00:13:20 UTC 2014


commit c1dc0959feed16b9e0264c4b795fb7b5d0f16ef8
Author: Ossi Herrala <oherrala at gmail.com>
Date:   Thu Nov 20 22:08:43 2014 +0200

    stem/descriptor/extrainfo_descriptor.py: Optimize away calls to datetime.datetime.strptime()
    
    datetime.datetime.strptime() seems to be slow. We can go faster with
    regexp and build the datetime.datetime object from the results.
    
    before (lines sorted):
    6003241 function calls (5996319 primitive calls) in 12.508 seconds
    6003241 function calls (5996319 primitive calls) in 12.564 seconds
    6003241 function calls (5996319 primitive calls) in 12.677 seconds
    6003241 function calls (5996319 primitive calls) in 12.720 seconds
    6003241 function calls (5996319 primitive calls) in 12.871 seconds
    6003241 function calls (5996319 primitive calls) in 12.871 seconds
    6003241 function calls (5996319 primitive calls) in 12.935 seconds
    6003241 function calls (5996319 primitive calls) in 12.982 seconds
    6003241 function calls (5996319 primitive calls) in 13.230 seconds
    6003241 function calls (5996319 primitive calls) in 13.663 seconds
    
    after (lines sorted):
    5444306 function calls (5437532 primitive calls) in 11.417 seconds
    5444306 function calls (5437532 primitive calls) in 11.419 seconds
    5444306 function calls (5437532 primitive calls) in 11.494 seconds
    5444306 function calls (5437532 primitive calls) in 11.568 seconds
    5444306 function calls (5437532 primitive calls) in 11.593 seconds
    5444306 function calls (5437532 primitive calls) in 11.629 seconds
    5444306 function calls (5437532 primitive calls) in 11.630 seconds
    5444306 function calls (5437532 primitive calls) in 11.649 seconds
    5444306 function calls (5437532 primitive calls) in 11.751 seconds
    5444306 function calls (5437532 primitive calls) in 12.247 seconds
---
 stem/descriptor/extrainfo_descriptor.py |    5 ++---
 stem/util/str_tools.py                  |   36 ++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/stem/descriptor/extrainfo_descriptor.py b/stem/descriptor/extrainfo_descriptor.py
index 31f0e20..b677888 100644
--- a/stem/descriptor/extrainfo_descriptor.py
+++ b/stem/descriptor/extrainfo_descriptor.py
@@ -69,7 +69,6 @@ Extra-info descriptors are available from a few sources...
   ===================== ===========
 """
 
-import datetime
 import hashlib
 import re
 
@@ -220,7 +219,7 @@ def _parse_timestamp_and_interval(keyword, content):
     raise ValueError("%s line's interval wasn't a number: %s" % (keyword, line))
 
   try:
-    timestamp = datetime.datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
+    timestamp = stem.util.str_tools._parse_timestamp(timestamp_str)
     return timestamp, int(interval), remainder
   except ValueError:
     raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line))
@@ -652,7 +651,7 @@ class ExtraInfoDescriptor(Descriptor):
         # "<keyword>" YYYY-MM-DD HH:MM:SS
 
         try:
-          timestamp = datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
+          timestamp = stem.util.str_tools._parse_timestamp(value)
 
           if keyword == 'published':
             self.published = timestamp
diff --git a/stem/util/str_tools.py b/stem/util/str_tools.py
index 16681d5..039447e 100644
--- a/stem/util/str_tools.py
+++ b/stem/util/str_tools.py
@@ -23,6 +23,7 @@ Toolkit for various string activity.
 
 import codecs
 import datetime
+import re
 import sys
 
 import stem.prereq
@@ -56,6 +57,8 @@ TIME_UNITS = (
   (1.0, 's', ' second'),
 )
 
+_timestamp_re = re.compile(r'(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})')
+
 if stem.prereq.is_python_3():
   def _to_bytes_impl(msg):
     if isinstance(msg, str):
@@ -436,6 +439,32 @@ def parse_short_time_label(label):
     raise ValueError('Non-numeric value in time entry: %s' % label)
 
 
+def _parse_timestamp(entry):
+  """
+  Parses the date and time that in format like like...
+
+  ::
+
+    2012-11-08 16:48:41
+
+  :param str entry: timestamp to be parsed
+
+  :returns: datetime for the time represented by the timestamp
+
+  :raises: ValueError if the timestamp is malformed
+  """
+
+  if not isinstance(entry, (str, unicode)):
+    raise IOError('parse_iso_timestamp() input must be a str, got a %s' % type(entry))
+
+  try:
+    time = [int(x) for x in _timestamp_re.match(entry).groups()]
+  except AttributeError:
+    raise ValueError("Expected timestamp in format YYYY-MM-DD HH:MM:ss but got " + entry)
+
+  return datetime.datetime(time[0], time[1], time[2], time[3], time[4], time[5])
+
+
 def _parse_iso_timestamp(entry):
   """
   Parses the ISO 8601 standard that provides for timestamps like...
@@ -465,7 +494,12 @@ def _parse_iso_timestamp(entry):
   if len(microseconds) != 6 or not microseconds.isdigit():
     raise ValueError("timestamp's microseconds should be six digits")
 
-  timestamp = datetime.datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S")
+  if timestamp_str[10] == 'T':
+    timestamp_str = timestamp_str[:10] + ' ' + timestamp_str[11:]
+  else:
+    raise ValueError("timestamp didn't contain delimeter 'T' between date and time")
+
+  timestamp = _parse_timestamp(timestamp_str)
   return timestamp + datetime.timedelta(microseconds = int(microseconds))
 
 





More information about the tor-commits mailing list