[tor-commits] [stem/master] Normalizing CRLF line endings when reading descriptors on Windows

atagar at torproject.org atagar at torproject.org
Mon Sep 21 17:09:13 UTC 2015


commit 3ba84bb3d296a431c9262c7b51c2960ece26d6d9
Author: Damian Johnson <atagar at torproject.org>
Date:   Mon Sep 21 10:10:48 2015 -0700

    Normalizing CRLF line endings when reading descriptors on Windows
    
    Nice catch from trodun. On Windows cached descriptor files use the platform's
    newlines. For Windows this means CRLF line endings. Providing an option to
    normalize these, and making it the default when reading a data directory on
    Windows.
    
      https://trac.torproject.org/projects/tor/ticket/17051
---
 docs/change_log.rst                                |    1 +
 stem/descriptor/__init__.py                        |   34 ++++-
 ...ached-microdesc-consensus_with_carriage_returns |  140 ++++++++++++++++++++
 test/unit/descriptor/reader.py                     |    2 +-
 test/unit/descriptor/router_status_entry.py        |   19 +++
 5 files changed, 194 insertions(+), 2 deletions(-)

diff --git a/docs/change_log.rst b/docs/change_log.rst
index 273dc26..b36e143 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -57,6 +57,7 @@ The following are only available within Stem's `git repository
   * Server descriptor validation fails with 'extra-info-digest line had an invalid value' from additions in proposal 228 (:trac:`16227`)
   * :class:`~stem.descriptor.server_descriptor.BridgeDescriptor` now has 'ntor_onion_key' like its unsanitized counterparts
   * Replaced the :class:`~stem.descriptor.microdescriptor.Microdescriptor` identifier and identifier_type attributes with an identifiers hash since it can now appear multiple times (:spec:`09ff9e2`)
+  * Unable to read descriptors from data directories on Windows due to their CRLF newlines (:trac:`17051`)
   * TypeError under python3 when using 'use_mirrors = True' (:trac:`17083`)
 
  * **Website**
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 719ee84..f829239 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -85,7 +85,7 @@ DocumentHandler = stem.util.enum.UppercaseEnum(
 )
 
 
-def parse_file(descriptor_file, descriptor_type = None, validate = False, document_handler = DocumentHandler.ENTRIES, **kwargs):
+def parse_file(descriptor_file, descriptor_type = None, validate = False, document_handler = DocumentHandler.ENTRIES, normalize_newlines = False, **kwargs):
   """
   Simple function to read the descriptor contents from a file, providing an
   iterator for its :class:`~stem.descriptor.__init__.Descriptor` contents.
@@ -143,6 +143,8 @@ def parse_file(descriptor_file, descriptor_type = None, validate = False, docume
     **True**, skips these checks otherwise
   :param stem.descriptor.__init__.DocumentHandler document_handler: method in
     which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
+  :param bool normalize_newlines: converts windows newlines (CRLF), this is the
+    default when reading data directories on windows
   :param dict kwargs: additional arguments for the descriptor constructor
 
   :returns: iterator for :class:`~stem.descriptor.__init__.Descriptor` instances in the file
@@ -204,6 +206,8 @@ def parse_file(descriptor_file, descriptor_type = None, validate = False, docume
   else:
     # Cached descriptor handling. These contain multiple descriptors per file.
 
+    normalize_newlines |= stem.util.system.is_windows()
+
     if filename == 'cached-descriptors' or filename == 'cached-descriptors.new':
       file_parser = lambda f: stem.descriptor.server_descriptor._parse_file(f, validate = validate, **kwargs)
     elif filename == 'cached-extrainfo' or filename == 'cached-extrainfo.new':
@@ -215,6 +219,9 @@ def parse_file(descriptor_file, descriptor_type = None, validate = False, docume
     elif filename == 'cached-microdesc-consensus':
       file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, is_microdescriptor = True, validate = validate, document_handler = document_handler, **kwargs)
 
+  if normalize_newlines:
+    descriptor_file = NewlineNormalizer(descriptor_file)
+
   if file_parser:
     for desc in file_parser(descriptor_file):
       if descriptor_path is not None:
@@ -623,6 +630,31 @@ class Descriptor(object):
       return self._raw_contents
 
 
+class NewlineNormalizer(object):
+  """
+  File wrapper that normalizes CRLF line endings.
+  """
+
+  def __init__(self, wrapped_file):
+    self._wrapped_file = wrapped_file
+    self.name = getattr(wrapped_file, 'name', None)
+
+  def read(self, *args):
+    return self._wrapped_file.read(*args).replace('\r\n', '\n')
+
+  def readline(self, *args):
+    return self._wrapped_file.readline(*args).replace('\r\n', '\n')
+
+  def readlines(self, *args):
+    return [line.rstrip('\r') for line in self._wrapped_file.readlines(*args)]
+
+  def seek(self, *args):
+    return self._wrapped_file.seek(*args)
+
+  def tell(self, *args):
+    return self._wrapped_file.tell(*args)
+
+
 def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False):
   """
   Reads from the descriptor file until we get to one of the given keywords or reach the
diff --git a/test/unit/descriptor/data/cached-microdesc-consensus_with_carriage_returns b/test/unit/descriptor/data/cached-microdesc-consensus_with_carriage_returns
new file mode 100644
index 0000000..7fd7d3f
--- /dev/null
+++ b/test/unit/descriptor/data/cached-microdesc-consensus_with_carriage_returns
@@ -0,0 +1,140 @@
+network-status-version 3 microdesc
+vote-status consensus
+consensus-method 20
+valid-after 2015-09-15 18:00:00
+fresh-until 2015-09-15 19:00:00
+valid-until 2015-09-15 21:00:00
+voting-delay 300 300
+client-versions 0.2.4.23,0.2.4.24,0.2.4.25,0.2.4.26,0.2.4.27,0.2.5.8-rc,0.2.5.9-rc,0.2.5.10,0.2.5.11,0.2.5.12,0.2.6.5-rc,0.2.6.6,0.2.6.7,0.2.6.8,0.2.6.9,0.2.6.10,0.2.7.1-alpha,0.2.7.2-alpha
+server-versions 0.2.4.23,0.2.4.24,0.2.4.25,0.2.4.26,0.2.4.27,0.2.5.8-rc,0.2.5.9-rc,0.2.5.10,0.2.5.11,0.2.5.12,0.2.6.5-rc,0.2.6.6,0.2.6.7,0.2.6.8,0.2.6.9,0.2.6.10,0.2.7.1-alpha,0.2.7.2-alpha
+known-flags Authority BadExit Exit Fast Guard HSDir Running Stable V2Dir Valid
+params CircuitPriorityHalflifeMsec=30000 NumDirectoryGuards=3 NumEntryGuards=1 NumNTorsPerTAP=100 Support022HiddenServices=0 UseNTorHandshake=1 UseOptimisticData=1 bwauthpid=1 cbttestfreq=1000 pb_disablepct=0 usecreatefast=0
+dir-source tor26 14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4 86.59.21.38 86.59.21.38 80 443
+contact Peter Palfrader
+vote-digest 7DD2661BBBA3E600CB9D641CFFF27D14D77EDCCE
+dir-source longclaw 23D15D965BC35114467363C165C4F724B64B4F66 longclaw.riseup.net 199.254.238.52 80 443
+contact Riseup Networks <collective at riseup dot net> - 1nNzekuHGGzBYRzyjfjFEfeisNvxkn4RT
+vote-digest 2EBB0C5EFEFE51ECE8C630A45F607201491D7E09
+dir-source maatuska 49015F787433103580E3B66A1707A00E60F2D15B 171.25.193.9 171.25.193.9 443 80
+contact 4096R/23291265 Linus Nordberg <linus at nordberg.se>
+vote-digest 7FC1487C8D7C810C6FA8037AE6EC81ACECC408B8
+dir-source dannenberg 585769C78764D58426B8B52B6651A5A71137189A dannenberg.torauth.de 193.23.244.244 80 443
+contact Andreas Lehner <ops at torauth.de>
+vote-digest 32F82FE133ADAAED686A7360297C56B327994799
+dir-source urras 80550987E1D626E3EBA5E5E75A458DE0626D088C 208.83.223.34 208.83.223.34 443 80
+contact 4096R/D255D3F5C868227F Jacob Appelbaum <jacob at appelbaum.net>
+vote-digest 86348CCEC68457D5C1777978D1043B211FA44010
+dir-source moria1 D586D18309DED4CD6D57C18FDB97EFA96D330566 128.31.0.34 128.31.0.34 9131 9101
+contact 1024D/28988BF5 arma mit edu
+vote-digest 9D18B15C2C039CB0B18B51496F8ACBDA2836DE19
+dir-source dizum E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58 194.109.206.212 194.109.206.212 80 443
+contact 1024R/8D56913D Alex de Joode <adejoode at sabotage.org>
+vote-digest 5473986DC071B8840F9E8C72AED3251B71A8244F
+dir-source gabelmoo ED03BB616EB2F60BEC80151114BB25CEF515B226 131.188.40.189 131.188.40.189 80 443
+contact 4096R/261C5FBE77285F88FB0C343266C8C2D7C5AA446D Sebastian Hahn <tor at sebastianhahn.net> - 12NbRAjAG5U3LLWETSF7fSTcdaz32Mu5CN
+vote-digest 6EF25161284DB1AD19411EBF52E96927DFD66F0C
+dir-source Faravahar EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97 154.35.175.225 154.35.175.225 80 443
+contact 0x0B47D56D Sina Rabbani (inf0) <sina redteam net>
+vote-digest 76466BEF6525FCEE0B9FDDB205B527CBB89FAB17
+r PDrelay1 AAFJ5u9xAqrKlpDW6N0pMhJLlKs 2015-09-15 04:53:47 95.215.44.189 8080 0
+m iH2s4cVPTmThmmx1phnjDd6oGpHAEP7FDK38AFYRV74
+s Fast Running Stable Valid
+v Tor 0.2.7.2-alpha-dev
+w Bandwidth=456
+r seele AAoQ1DAR6kkoo19hBAX5K0QztNw 2015-09-15 04:43:45 73.15.150.172 9001 0
+m LqTE9J4jxtMVm2IPDxmUKYyrzajcjqwYqLkX6JNOEpY
+s Fast Running Stable Valid
+v Tor 0.2.6.10
+w Bandwidth=15
+r TorNinurtaName AA8YrCza5McQugiY3J4h5y4BF9g 2015-09-15 17:06:16 151.236.6.198 443 80
+m YxghdOEPO/uPv0N1GZV+6gCuYR5NZPaDseU45R/Yyns
+s Fast HSDir Running Stable V2Dir Valid
+v Tor 0.2.6.10
+w Bandwidth=1750
+r CalyxInstitute14 ABG9JIWtRdmE7EFZyI/AZuXjMA4 2015-09-15 16:12:03 162.247.72.201 443 80
+m 6PnpSoAVS1oaJCqs4uk8y706JSZ6rdN8uUM78Oohgo0
+s Exit Fast Guard HSDir Running Stable V2Dir Valid
+v Tor 0.2.6.10
+w Bandwidth=11400
+directory-footer
+bandwidth-weights Wbd=0 Wbe=0 Wbg=3944 Wbm=10000 Wdb=10000 Web=10000 Wed=10000 Wee=10000 Weg=10000 Wem=10000 Wgb=10000 Wgd=0 Wgg=6056 Wgm=6056 Wmb=10000 Wmd=0 Wme=0 Wmg=3944 Wmm=10000
+directory-signature sha256 14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4 0B49FD2F55D02B630E7A500E34FD5B6F98FDA9D2
+-----BEGIN SIGNATURE-----
+OYThM95E2Pcj4RWZeND9+llOUlyHLwGtHqF+n92xKTsZdc7ogigEosdqVEB1GaeJ
+D/GWSESN9VLEiiw5VKBG4UVvZUaFVRUnZSJVS4gNgckH5LD0VUJb0C/tIeBHJ7ea
+cTOwsb0vHau/W8hkAciEWLfjmAtcWutdT6lljSJ65NWyZynDPHTmtKnsB65kKT+2
+p5W9GBxnMUH+Pnt2JkFqyKAsE8GgppMuk4O+AX+BtQgzK4K+pTcKFko6e/NmrBh3
+W6AV/fR9/PdSUNYb6+uH/LsUhVePFVbke44u6bBN39LkvxWErQdd3yA91lUdqwz8
+zR/uAngvj05Bt1zmH9eG1Kd02dgL4PqNmTI0VKZjWamf4k/xMmecU0xfrHLv52Rw
+/hync6FsEIrfI+B942sjIEGfv+T6UuIMNoMvkwrby6nw6WL7lAoJJq5pyBYLzlFj
+xLW6e567FFjqf4siHpGCzWCHoXUU+NmygDSakh0orgGoHdVW6JcKbs0QieUyUu+X
+-----END SIGNATURE-----
+directory-signature sha256 23D15D965BC35114467363C165C4F724B64B4F66 3C12B8EE0B3DC3AEDD8CD27CCB02C564281BE765
+-----BEGIN SIGNATURE-----
+doESbeqw4jQagxP8o0Yr6VVUdB2Hnmct4F5sH1pBntPYOqIeT3GbYuFT1TdYr9SG
+xSpI5+aRcbSQrKryvF+qSgOJS4131YuCmL7zlwhQrQcFtltyRS8UTopubrLbvAJ1
+njJt05lM8NWg6NN3uZ4Zy4L/qzn9qZB9l8WbxKZJnxott6+Rp1H19tsl77ENB4GJ
+Bfhc9C46LOKUfmV5IB0f+0nFOO3PDhbRFEcDw5EEGPx6xLk+EH9YHvP98VIY/YPi
+vtYN9G5G2PplvqG4hyIhGAYy/hm3ZTHf9eIsKvsBgHadBeFCupA/l6tf36nK94Ts
+d68QhqPo8QhrjM5FTddGOA==
+-----END SIGNATURE-----
+directory-signature sha256 49015F787433103580E3B66A1707A00E60F2D15B 2467B90267EEEDDD614694F16A523B0E6EBF6FAA
+-----BEGIN SIGNATURE-----
+NVXfCo512TGvfHr+9XB7nVTDMRurSlwrz/vHfXjSzmJ/6dbIK6PA98lBgvX3i83N
+2AZoUcIc5wasYjUleRrP4k2jn5sBArCr3g64FsZ4/RJMLg2pWxEw4xDY2DLjKpyZ
+BfndyOG5KSEtIszXw0szS0kkTyz7oI76L1vu016Mh35YjaeqK0Vu+P2gxJtKxNyt
+pLPZwTWVQfjqLAebtXD2Bi5Csty7TSAhsNe9KE5pUB7+DyzvbamuJzyhUFbuOgii
+Y1nguoknHVgHnrlVGNaF1T3x2QFb3SR5Q4v8p2u1j0zqktkYTZ693gHYzkjWuyGk
+W/MyhtG1cE7knxu/no+HBw==
+-----END SIGNATURE-----
+directory-signature sha256 585769C78764D58426B8B52B6651A5A71137189A 02958C9BE542EEA43130E54FE6634B6E874B196D
+-----BEGIN SIGNATURE-----
+XAZPIaIQSTj+8QWZm5dAtiQz2CnEeuM7WMcJikol7CL53n1ewG6k6vYgtFPCJ5uh
+EDGTM4eojUB0VXNpc+jeOv207Y5z2r6tjAknr1eMxPX65z2BT4WkeBZLnc68RRrM
+LUAe2IbOVeaqTnQmPAtSejO8xxIkJhfZAssrxdLtvagCpkcBYwAbHiVtYWv/G+r0
+pyqVzvxIaEoEFShe77bJqfjOnlLOgcTsxdmU3qcDad/5k8ewXfLVcqR8YsjoNF9D
+9xbHyqkBZs0EfcNoDhqWsbuq184zMjlSH0Q3QElK/0ZoQQ1y6vlSicKM9rl8vxmf
+hRx2hnKyx+yhBT4fpL22ew==
+-----END SIGNATURE-----
+directory-signature sha256 80550987E1D626E3EBA5E5E75A458DE0626D088C 7C5D0700D9C266B7D3F93E7C904A62FEC6B30A60
+-----BEGIN SIGNATURE-----
+WwrdpjcjuzxmV+E1ULuuFo98nMiN0iMw6MLAs7+kuOdjbW+ayKzizZvnDPjlMv1p
+2cJM7qAwnD6aSxzn1wpc0ecdaVae1cMYqEnP3BxFFY9Xh4dxu8JdlNhB7W3rlIva
+lptekvZQnRBb2J4ULws4455AP6oNTCO9x3UpGNd5tbc=
+-----END SIGNATURE-----
+directory-signature sha256 D586D18309DED4CD6D57C18FDB97EFA96D330566 F1E67374A96F51A0BB4C8C24748200885EFF8645
+-----BEGIN SIGNATURE-----
+fgn1RZSa2xtYrOiG9YUtQdPl9RMZ8YQsxImg7D53AoXrctC8Ul48CNnIz/JWdIts
+qcczwUtSSixuEDD5gqBcFn0Fi7S1E2F8VcmCUAh7AMBtVSn5vUKkmaQcSy+zG0N+
+ospb0wQYbJ2DcsVTqL9z4TLpNB09BLN0kOzS64MJAqH9thE/1GHGNeXRFHupYgTO
+PERdyFvFEAmS9XnSxSRitgf6ptG5sAlFbzQrueKmnlrVe51gJBTEZNgVb1wFTjEw
+7nHTvTz9c0AQfgW5if1nIh2RdQ9AeOo3FxCbFMeT53QzHiEZWSDsCZebE/EHf/lz
+od6P497tWLK6TIxctbWAXw==
+-----END SIGNATURE-----
+directory-signature sha256 E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58 292252D59020B903943C025D96134575E1245ADF
+-----BEGIN SIGNATURE-----
+LzZynAMcx4EWSAqaXRaL2iHCET9T+T8e15kDiPyefxTc5U00Ko6ASKJPpmaHHHAI
+SsYQm54TauFX7AOb7Q+9Fx8ujxA0AR85DglLXftAicM27L48E4d9E3qFtzi+0ZHs
+At0Wp9odW7ZeFcBNHWFGB/KfqOwHYkukRvqR3vBlKG8nvQ7cC+9o9aXhl4CLoA8X
+ViigiEINXCC1BFibg/XA/XZqDB7uxoeazririKp0qZTTZPp8sLSQBnrSvA3OL1I5
+c0m4xogooiG+YhGBC7N+MR+hDA9nyV+njVABpy/89/6Zy+y4iydqqO0qSRWXio9W
+3T36ECUvEQ9l+FONnfk0cQ==
+-----END SIGNATURE-----
+directory-signature sha256 ED03BB616EB2F60BEC80151114BB25CEF515B226 5E04DFE1E5E0852F56E524B7C81D1E5FAFDBA650
+-----BEGIN SIGNATURE-----
+UtcSi7Qb7CijqBhpMsFeqlhRxMTOGqldmdiDCp8ZLJFOCQGIHA9m9c62mlTIleDH
+iz/LdCn7RdGm7VoX7+jd3FfU86dajnyREDcwvvTHZ8eIUx6Bz+Q7zmv6yRBctmNL
+hqYcK0WHxmELYxwICljLPeu6QOaRE+4Ku1YbNoEFL/YJ4GmgAPODxxy35N+t3uRr
+MzWK7N+Ssdo0cRvz47cMBZqRQOKzn3a5/KMnRcBmZbhmJbhi5O9C3tZVA+XFa3fF
+jRGbtUir/OE23WmykdXKGmsymXX3b/io+w6UQjBUVzGHQtJLszMTKNE24eUnVOFD
+9Yc8aj5LgGkmJbNxyiF0mQ==
+-----END SIGNATURE-----
+directory-signature sha256 EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97 F22B7C6FB4E6DA30A3C63D265A1D4AB374CB34E3
+-----BEGIN SIGNATURE-----
+uh/7AVwA+T8EWm9GgQxYzPPBibvb2/FP5Ny6p9x95oJRWjWeKEM77hnojJ540N6u
+IyyIJTUB5JgUrsN2BXEnyUqZu0+OWB9hYxFIzVjwUxtuL2uaPeBfXqv5iJNEqG9w
+Gl1ubnFUbVZXM83e61ClgwaswhvVPzfgirqpmD+L9m/DaZj+EmzZtoquqaBNtv1g
+qU/yZgFG4AFApjvSaP6kkttZ8EYTVtGMq6muIxLeBDlVRP2dmC+R5FwGmsbj92is
+uxaCO4raz5BM6yyDS44vzJwFePOyQhiYNV52g8dod/xtrhZOlWQtCbX3jXxv4ofV
+nfX9MJA9VJ5WbLSpwU1p/w==
+-----END SIGNATURE-----
diff --git a/test/unit/descriptor/reader.py b/test/unit/descriptor/reader.py
index eaea7ea..8f617f9 100644
--- a/test/unit/descriptor/reader.py
+++ b/test/unit/descriptor/reader.py
@@ -466,7 +466,7 @@ class TestDescriptorReader(unittest.TestCase):
     reader = stem.descriptor.reader.DescriptorReader(DESCRIPTOR_TEST_DATA)
     reader.register_skip_listener(skip_listener.listener)
 
-    expected_skip_files = ('riddle', 'tiny.png', 'vote', 'new_metrics_type')
+    expected_skip_files = ('riddle', 'tiny.png', 'vote', 'new_metrics_type', 'cached-microdesc-consensus_with_carriage_returns')
 
     with reader:
       list(reader)  # iterates over all of the descriptors
diff --git a/test/unit/descriptor/router_status_entry.py b/test/unit/descriptor/router_status_entry.py
index 774a3e4..73cda28 100644
--- a/test/unit/descriptor/router_status_entry.py
+++ b/test/unit/descriptor/router_status_entry.py
@@ -5,11 +5,15 @@ Unit tests for stem.descriptor.router_status_entry.
 import datetime
 import unittest
 
+import stem.descriptor
+
 from stem import Flag
 from stem.descriptor.router_status_entry import RouterStatusEntryV3, _base64_to_hex
 from stem.exit_policy import MicroExitPolicy
 from stem.version import Version
 
+from test.unit.descriptor import get_resource
+
 from test.mocking import (
   get_router_status_entry_v2,
   get_router_status_entry_v3,
@@ -613,6 +617,21 @@ class TestRouterStatusEntry(unittest.TestCase):
       content = get_router_status_entry_v3({'m': m_line}, content = True)
       self.assertRaises(ValueError, RouterStatusEntryV3, content, True, vote_document())
 
+  def test_with_carriage_returns(self):
+    """
+    Read a descriptor file with windows newlines (CRLF).
+    """
+
+    descriptor_path = get_resource('cached-microdesc-consensus_with_carriage_returns')
+
+    with open(descriptor_path, 'rb') as descriptor_file:
+      descriptors = stem.descriptor.parse_file(descriptor_file, 'network-status-microdesc-consensus-3 1.0', normalize_newlines = True)
+
+      # if we didn't strip \r then it would be part of the last flag
+
+      router = next(descriptors)
+      self.assertEqual([Flag.FAST, Flag.RUNNING, Flag.STABLE, Flag.VALID], router.flags)
+
   def _expect_invalid_attr(self, content, attr = None, expected_value = None):
     """
     Asserts that construction will fail due to content having a malformed



More information about the tor-commits mailing list