commit 3a367f6edbf0f832c786e767f49e71b2e2a18517
Author: hiro <hiro(a)torproject.org>
Date: Fri Oct 4 18:47:29 2019 +0200
Add validate_email to utils
---
gettor/parse/email.py | 2 +-
gettor/utils/validate_email.py | 212 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 213 insertions(+), 1 deletion(-)
diff --git a/gettor/parse/email.py b/gettor/parse/email.py
index 99d90c6..3980763 100644
--- a/gettor/parse/email.py
+++ b/gettor/parse/email.py
@@ -15,7 +15,6 @@ from __future__ import absolute_import
import re
import dkim
import hashlib
-import validate_email
from datetime import datetime
import configparser
@@ -29,6 +28,7 @@ from twisted.enterprise import adbapi
from ..utils.db import SQLite3
from ..utils import strings
+from ..utils import validate_email
class AddressError(Exception):
"""
diff --git a/gettor/utils/validate_email.py b/gettor/utils/validate_email.py
new file mode 100644
index 0000000..0f18e3e
--- /dev/null
+++ b/gettor/utils/validate_email.py
@@ -0,0 +1,212 @@
+# RFC 2822 - style email validation for Python
+# (c) 2012 Syrus Akbary <me(a)syrusakbary.com>
+# Extended from (c) 2011 Noel Bush <noel(a)aitools.org>
+# for support of mx and user check
+# This code is made available to you under the GNU LGPL v3.
+#
+# This module provides a single method, valid_email_address(),
+# which returns True or False to indicate whether a given address
+# is valid according to the 'addr-spec' part of the specification
+# given in RFC 2822. Ideally, we would like to find this
+# in some other library, already thoroughly tested and well-
+# maintained. The standard Python library email.utils
+# contains a parse_addr() function, but it is not sufficient
+# to detect many malformed addresses.
+#
+# This implementation aims to be faithful to the RFC, with the
+# exception of a circular definition (see comments below), and
+# with the omission of the pattern components marked as "obsolete".
+
+import re
+import smtplib
+import logging
+import socket
+
+try:
+ raw_input
+except NameError:
+ def raw_input(prompt=''):
+ return input(prompt)
+
+try:
+ import DNS
+ ServerError = DNS.ServerError
+ DNS.DiscoverNameServers()
+except (ImportError, AttributeError):
+ DNS = None
+
+ class ServerError(Exception):
+ pass
+
+# All we are really doing is comparing the input string to one
+# gigantic regular expression. But building that regexp, and
+# ensuring its correctness, is made much easier by assembling it
+# from the "tokens" defined by the RFC. Each of these tokens is
+# tested in the accompanying unit test file.
+#
+# The section of RFC 2822 from which each pattern component is
+# derived is given in an accompanying comment.
+#
+# (To make things simple, every string below is given as 'raw',
+# even when it's not strictly necessary. This way we don't forget
+# when it is necessary.)
+#
+WSP = r'[\s]' # see 2.2.2. Structured Header Field Bodies
+CRLF = r'(?:\r\n)' # see 2.2.3. Long Header Fields
+NO_WS_CTL = r'\x01-\x08\x0b\x0c\x0f-\x1f\x7f' # see 3.2.1. Primitive Tokens
+QUOTED_PAIR = r'(?:\\.)' # see 3.2.2. Quoted characters
+FWS = r'(?:(?:' + WSP + r'*' + CRLF + r')?' + \
+ WSP + r'+)' # see 3.2.3. Folding white space and comments
+CTEXT = r'[' + NO_WS_CTL + \
+ r'\x21-\x27\x2a-\x5b\x5d-\x7e]' # see 3.2.3
+CCONTENT = r'(?:' + CTEXT + r'|' + \
+ QUOTED_PAIR + r')' # see 3.2.3 (NB: The RFC includes COMMENT here
+# as well, but that would be circular.)
+COMMENT = r'\((?:' + FWS + r'?' + CCONTENT + \
+ r')*' + FWS + r'?\)' # see 3.2.3
+CFWS = r'(?:' + FWS + r'?' + COMMENT + ')*(?:' + \
+ FWS + '?' + COMMENT + '|' + FWS + ')' # see 3.2.3
+ATEXT = r'[\w!#$%&\'\*\+\-/=\?\^`\{\|\}~]' # see 3.2.4. Atom
+ATOM = CFWS + r'?' + ATEXT + r'+' + CFWS + r'?' # see 3.2.4
+DOT_ATOM_TEXT = ATEXT + r'+(?:\.' + ATEXT + r'+)*' # see 3.2.4
+DOT_ATOM = CFWS + r'?' + DOT_ATOM_TEXT + CFWS + r'?' # see 3.2.4
+QTEXT = r'[' + NO_WS_CTL + \
+ r'\x21\x23-\x5b\x5d-\x7e]' # see 3.2.5. Quoted strings
+QCONTENT = r'(?:' + QTEXT + r'|' + \
+ QUOTED_PAIR + r')' # see 3.2.5
+QUOTED_STRING = CFWS + r'?' + r'"(?:' + FWS + \
+ r'?' + QCONTENT + r')*' + FWS + \
+ r'?' + r'"' + CFWS + r'?'
+LOCAL_PART = r'(?:' + DOT_ATOM + r'|' + \
+ QUOTED_STRING + r')' # see 3.4.1. Addr-spec specification
+DTEXT = r'[' + NO_WS_CTL + r'\x21-\x5a\x5e-\x7e]' # see 3.4.1
+DCONTENT = r'(?:' + DTEXT + r'|' + \
+ QUOTED_PAIR + r')' # see 3.4.1
+DOMAIN_LITERAL = CFWS + r'?' + r'\[' + \
+ r'(?:' + FWS + r'?' + DCONTENT + \
+ r')*' + FWS + r'?\]' + CFWS + r'?' # see 3.4.1
+DOMAIN = r'(?:' + DOT_ATOM + r'|' + \
+ DOMAIN_LITERAL + r')' # see 3.4.1
+ADDR_SPEC = LOCAL_PART + r'@' + DOMAIN # see 3.4.1
+
+# A valid address will match exactly the 3.4.1 addr-spec.
+VALID_ADDRESS_REGEXP = '^' + ADDR_SPEC + '$'
+
+MX_DNS_CACHE = {}
+MX_CHECK_CACHE = {}
+
+
+def get_mx_ip(hostname):
+ if hostname not in MX_DNS_CACHE:
+ try:
+ MX_DNS_CACHE[hostname] = DNS.mxlookup(hostname)
+ except ServerError as e:
+ if e.rcode == 3 or e.rcode == 2: # NXDOMAIN (Non-Existent Domain) or SERVFAIL
+ MX_DNS_CACHE[hostname] = None
+ else:
+ raise
+
+ return MX_DNS_CACHE[hostname]
+
+
+def validate_email(email, check_mx=False, verify=False, debug=False, smtp_timeout=10):
+ """Indicate whether the given string is a valid email address
+ according to the 'addr-spec' portion of RFC 2822 (see section
+ 3.4.1). Parts of the spec that are marked obsolete are *not*
+ included in this test, and certain arcane constructions that
+ depend on circular definitions in the spec may not pass, but in
+ general this should correctly identify any email address likely
+ to be in use as of 2011."""
+ if debug:
+ logger = logging.getLogger('validate_email')
+ logger.setLevel(logging.DEBUG)
+ else:
+ logger = None
+
+ try:
+ assert re.match(VALID_ADDRESS_REGEXP, email) is not None
+ check_mx |= verify
+ if check_mx:
+ if not DNS:
+ raise Exception('For check the mx records or check if the email exists you must '
+ 'have installed pyDNS python package')
+ hostname = email[email.find('@') + 1:]
+ mx_hosts = get_mx_ip(hostname)
+ if mx_hosts is None:
+ return False
+ for mx in mx_hosts:
+ try:
+ if not verify and mx[1] in MX_CHECK_CACHE:
+ return MX_CHECK_CACHE[mx[1]]
+ smtp = smtplib.SMTP(timeout=smtp_timeout)
+ smtp.connect(mx[1])
+ MX_CHECK_CACHE[mx[1]] = True
+ if not verify:
+ try:
+ smtp.quit()
+ except smtplib.SMTPServerDisconnected:
+ pass
+ return True
+ status, _ = smtp.helo()
+ if status != 250:
+ smtp.quit()
+ if debug:
+ logger.debug(u'%s answer: %s - %s', mx[1], status, _)
+ continue
+ smtp.mail('')
+ status, _ = smtp.rcpt(email)
+ if status == 250:
+ smtp.quit()
+ return True
+ if debug:
+ logger.debug(u'%s answer: %s - %s', mx[1], status, _)
+ smtp.quit()
+ except smtplib.SMTPServerDisconnected: # Server not permits verify user
+ if debug:
+ logger.debug(u'%s disconected.', mx[1])
+ except smtplib.SMTPConnectError:
+ if debug:
+ logger.debug(u'Unable to connect to %s.', mx[1])
+ return None
+ except AssertionError:
+ return False
+ except (ServerError, socket.error) as e:
+ if debug:
+ logger.debug('ServerError or socket.error exception raised (%s).', e)
+ return None
+ return True
+
+if __name__ == "__main__":
+ import time
+ while True:
+ email = raw_input('Enter email for validation: ')
+
+ mx = raw_input('Validate MX record? [yN] ')
+ if mx.strip().lower() == 'y':
+ mx = True
+ else:
+ mx = False
+
+ validate = raw_input('Try to contact server for address validation? [yN] ')
+ if validate.strip().lower() == 'y':
+ validate = True
+ else:
+ validate = False
+
+ logging.basicConfig()
+
+ result = validate_email(email, mx, validate, debug=True, smtp_timeout=1)
+ if result:
+ print("Valid!")
+ elif result is None:
+ print("I'm not sure.")
+ else:
+ print("Invalid!")
+
+ time.sleep(1)
+
+
+# import sys
+
+# sys.modules[__name__],sys.modules['validate_email_module'] = validate_email,sys.modules[__name__]
+# from validate_email_module import *