[tor-commits] [bridgedb/master] Explicitly specify parser for BeautifulSoup.

phw at torproject.org phw at torproject.org
Wed Feb 19 18:26:38 UTC 2020


commit 93783bba150e4714a60825974a91aec83e2cdb59
Author: Philipp Winter <phw at nymity.ch>
Date:   Wed Jan 29 21:16:16 2020 -0800

    Explicitly specify parser for BeautifulSoup.
    
    This should fix CI warnings like the following:
    
      /home/travis/build/NullHypothesis/bridgedb/bridgedb/test/test_https_server.py:533: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("html5lib"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.
    
      The code that caused this warning is on line 533 of the file /home/travis/build/NullHypothesis/bridgedb/bridgedb/test/test_https_server.py. To get rid of this warning, pass the additional argument 'features="html5lib"' to the BeautifulSoup constructor.
---
 bridgedb/test/test_https.py        |  2 +-
 bridgedb/test/test_https_server.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/bridgedb/test/test_https.py b/bridgedb/test/test_https.py
index 2349e28..8e3de1b 100644
--- a/bridgedb/test/test_https.py
+++ b/bridgedb/test/test_https.py
@@ -137,7 +137,7 @@ class HTTPTests(unittest.TestCase):
         # ------------- Results
         # URL should be the same as last time
         self.assertEquals(self.br.response().geturl(), EXPECTED_URL)
-        soup = BeautifulSoup(captcha_response.read())
+        soup = BeautifulSoup(captcha_response.read(), features="html5lib")
         return soup
 
     def getBridgeLinesFromSoup(self, soup, fieldsPerBridge):
diff --git a/bridgedb/test/test_https_server.py b/bridgedb/test/test_https_server.py
index a4d6873..ff7a1e3 100644
--- a/bridgedb/test/test_https_server.py
+++ b/bridgedb/test/test_https_server.py
@@ -337,7 +337,7 @@ class CaptchaProtectedResourceTests(unittest.TestCase):
         request = DummyRequest([self.pagename])
         request.method = b'POST'
         page = self.captchaResource.render_POST(request)
-        self.assertEqual(BeautifulSoup(page).find('meta')['http-equiv'],
+        self.assertEqual(BeautifulSoup(page, features="html5lib").find('meta')['http-equiv'],
                          'refresh')
 
 
@@ -462,7 +462,7 @@ class GimpCaptchaProtectedResourceTests(unittest.TestCase):
         self.request.addArg('captcha_response_field', '')
 
         page = self.captchaResource.render_POST(self.request)
-        self.assertEqual(BeautifulSoup(page).find('meta')['http-equiv'],
+        self.assertEqual(BeautifulSoup(page, features="html5lib").find('meta')['http-equiv'],
                          'refresh')
 
     def test_render_POST_wrongSolution(self):
@@ -477,7 +477,7 @@ class GimpCaptchaProtectedResourceTests(unittest.TestCase):
         self.request.addArg('captcha_response_field', expectedResponse)
 
         page = self.captchaResource.render_POST(self.request)
-        self.assertEqual(BeautifulSoup(page).find('meta')['http-equiv'],
+        self.assertEqual(BeautifulSoup(page, features="html5lib").find('meta')['http-equiv'],
                          'refresh')
 
 
@@ -530,7 +530,7 @@ class ReCaptchaProtectedResourceTests(unittest.TestCase):
         def testCB(request):
             """Check the ``Request`` returned from ``_renderDeferred``."""
             self.assertIsInstance(request, DummyRequest)
-            soup = BeautifulSoup(b''.join(request.written)).find(b'meta')['http-equiv']
+            soup = BeautifulSoup(b''.join(request.written), features="html5lib").find(b'meta')['http-equiv']
             self.assertEqual(soup, 'refresh')
 
         d = task.deferLater(reactor, 0, lambda x: x, (False, self.request))
@@ -674,7 +674,7 @@ class BridgesResourceTests(unittest.TestCase):
         :returns: A list of the bridge lines contained on the **page**.
         """
         # The bridge lines are contained in a <div class='bridges'> tag:
-        soup = BeautifulSoup(page)
+        soup = BeautifulSoup(page, features="html5lib")
         well = soup.find('div', {'class': 'bridge-lines'})
         content = well.renderContents().decode('utf-8').strip()
         lines = content.splitlines()





More information about the tor-commits mailing list