commit 3a367f6edbf0f832c786e767f49e71b2e2a18517 Author: hiro hiro@torproject.org Date: Fri Oct 4 18:47:29 2019 +0200
Add validate_email to utils --- gettor/parse/email.py | 2 +- gettor/utils/validate_email.py | 212 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+), 1 deletion(-)
diff --git a/gettor/parse/email.py b/gettor/parse/email.py index 99d90c6..3980763 100644 --- a/gettor/parse/email.py +++ b/gettor/parse/email.py @@ -15,7 +15,6 @@ from __future__ import absolute_import import re import dkim import hashlib -import validate_email
from datetime import datetime import configparser @@ -29,6 +28,7 @@ from twisted.enterprise import adbapi
from ..utils.db import SQLite3 from ..utils import strings +from ..utils import validate_email
class AddressError(Exception): """ diff --git a/gettor/utils/validate_email.py b/gettor/utils/validate_email.py new file mode 100644 index 0000000..0f18e3e --- /dev/null +++ b/gettor/utils/validate_email.py @@ -0,0 +1,212 @@ +# RFC 2822 - style email validation for Python +# (c) 2012 Syrus Akbary me@syrusakbary.com +# Extended from (c) 2011 Noel Bush noel@aitools.org +# for support of mx and user check +# This code is made available to you under the GNU LGPL v3. +# +# This module provides a single method, valid_email_address(), +# which returns True or False to indicate whether a given address +# is valid according to the 'addr-spec' part of the specification +# given in RFC 2822. Ideally, we would like to find this +# in some other library, already thoroughly tested and well- +# maintained. The standard Python library email.utils +# contains a parse_addr() function, but it is not sufficient +# to detect many malformed addresses. +# +# This implementation aims to be faithful to the RFC, with the +# exception of a circular definition (see comments below), and +# with the omission of the pattern components marked as "obsolete". + +import re +import smtplib +import logging +import socket + +try: + raw_input +except NameError: + def raw_input(prompt=''): + return input(prompt) + +try: + import DNS + ServerError = DNS.ServerError + DNS.DiscoverNameServers() +except (ImportError, AttributeError): + DNS = None + + class ServerError(Exception): + pass + +# All we are really doing is comparing the input string to one +# gigantic regular expression. But building that regexp, and +# ensuring its correctness, is made much easier by assembling it +# from the "tokens" defined by the RFC. Each of these tokens is +# tested in the accompanying unit test file. +# +# The section of RFC 2822 from which each pattern component is +# derived is given in an accompanying comment. +# +# (To make things simple, every string below is given as 'raw', +# even when it's not strictly necessary. This way we don't forget +# when it is necessary.) +# +WSP = r'[\s]' # see 2.2.2. Structured Header Field Bodies +CRLF = r'(?:\r\n)' # see 2.2.3. Long Header Fields +NO_WS_CTL = r'\x01-\x08\x0b\x0c\x0f-\x1f\x7f' # see 3.2.1. Primitive Tokens +QUOTED_PAIR = r'(?:\.)' # see 3.2.2. Quoted characters +FWS = r'(?:(?:' + WSP + r'*' + CRLF + r')?' + \ + WSP + r'+)' # see 3.2.3. Folding white space and comments +CTEXT = r'[' + NO_WS_CTL + \ + r'\x21-\x27\x2a-\x5b\x5d-\x7e]' # see 3.2.3 +CCONTENT = r'(?:' + CTEXT + r'|' + \ + QUOTED_PAIR + r')' # see 3.2.3 (NB: The RFC includes COMMENT here +# as well, but that would be circular.) +COMMENT = r'((?:' + FWS + r'?' + CCONTENT + \ + r')*' + FWS + r'?)' # see 3.2.3 +CFWS = r'(?:' + FWS + r'?' + COMMENT + ')*(?:' + \ + FWS + '?' + COMMENT + '|' + FWS + ')' # see 3.2.3 +ATEXT = r'[\w!#$%&'*+-/=?^`{|}~]' # see 3.2.4. Atom +ATOM = CFWS + r'?' + ATEXT + r'+' + CFWS + r'?' # see 3.2.4 +DOT_ATOM_TEXT = ATEXT + r'+(?:.' + ATEXT + r'+)*' # see 3.2.4 +DOT_ATOM = CFWS + r'?' + DOT_ATOM_TEXT + CFWS + r'?' # see 3.2.4 +QTEXT = r'[' + NO_WS_CTL + \ + r'\x21\x23-\x5b\x5d-\x7e]' # see 3.2.5. Quoted strings +QCONTENT = r'(?:' + QTEXT + r'|' + \ + QUOTED_PAIR + r')' # see 3.2.5 +QUOTED_STRING = CFWS + r'?' + r'"(?:' + FWS + \ + r'?' + QCONTENT + r')*' + FWS + \ + r'?' + r'"' + CFWS + r'?' +LOCAL_PART = r'(?:' + DOT_ATOM + r'|' + \ + QUOTED_STRING + r')' # see 3.4.1. Addr-spec specification +DTEXT = r'[' + NO_WS_CTL + r'\x21-\x5a\x5e-\x7e]' # see 3.4.1 +DCONTENT = r'(?:' + DTEXT + r'|' + \ + QUOTED_PAIR + r')' # see 3.4.1 +DOMAIN_LITERAL = CFWS + r'?' + r'[' + \ + r'(?:' + FWS + r'?' + DCONTENT + \ + r')*' + FWS + r'?]' + CFWS + r'?' # see 3.4.1 +DOMAIN = r'(?:' + DOT_ATOM + r'|' + \ + DOMAIN_LITERAL + r')' # see 3.4.1 +ADDR_SPEC = LOCAL_PART + r'@' + DOMAIN # see 3.4.1 + +# A valid address will match exactly the 3.4.1 addr-spec. +VALID_ADDRESS_REGEXP = '^' + ADDR_SPEC + '$' + +MX_DNS_CACHE = {} +MX_CHECK_CACHE = {} + + +def get_mx_ip(hostname): + if hostname not in MX_DNS_CACHE: + try: + MX_DNS_CACHE[hostname] = DNS.mxlookup(hostname) + except ServerError as e: + if e.rcode == 3 or e.rcode == 2: # NXDOMAIN (Non-Existent Domain) or SERVFAIL + MX_DNS_CACHE[hostname] = None + else: + raise + + return MX_DNS_CACHE[hostname] + + +def validate_email(email, check_mx=False, verify=False, debug=False, smtp_timeout=10): + """Indicate whether the given string is a valid email address + according to the 'addr-spec' portion of RFC 2822 (see section + 3.4.1). Parts of the spec that are marked obsolete are *not* + included in this test, and certain arcane constructions that + depend on circular definitions in the spec may not pass, but in + general this should correctly identify any email address likely + to be in use as of 2011.""" + if debug: + logger = logging.getLogger('validate_email') + logger.setLevel(logging.DEBUG) + else: + logger = None + + try: + assert re.match(VALID_ADDRESS_REGEXP, email) is not None + check_mx |= verify + if check_mx: + if not DNS: + raise Exception('For check the mx records or check if the email exists you must ' + 'have installed pyDNS python package') + hostname = email[email.find('@') + 1:] + mx_hosts = get_mx_ip(hostname) + if mx_hosts is None: + return False + for mx in mx_hosts: + try: + if not verify and mx[1] in MX_CHECK_CACHE: + return MX_CHECK_CACHE[mx[1]] + smtp = smtplib.SMTP(timeout=smtp_timeout) + smtp.connect(mx[1]) + MX_CHECK_CACHE[mx[1]] = True + if not verify: + try: + smtp.quit() + except smtplib.SMTPServerDisconnected: + pass + return True + status, _ = smtp.helo() + if status != 250: + smtp.quit() + if debug: + logger.debug(u'%s answer: %s - %s', mx[1], status, _) + continue + smtp.mail('') + status, _ = smtp.rcpt(email) + if status == 250: + smtp.quit() + return True + if debug: + logger.debug(u'%s answer: %s - %s', mx[1], status, _) + smtp.quit() + except smtplib.SMTPServerDisconnected: # Server not permits verify user + if debug: + logger.debug(u'%s disconected.', mx[1]) + except smtplib.SMTPConnectError: + if debug: + logger.debug(u'Unable to connect to %s.', mx[1]) + return None + except AssertionError: + return False + except (ServerError, socket.error) as e: + if debug: + logger.debug('ServerError or socket.error exception raised (%s).', e) + return None + return True + +if __name__ == "__main__": + import time + while True: + email = raw_input('Enter email for validation: ') + + mx = raw_input('Validate MX record? [yN] ') + if mx.strip().lower() == 'y': + mx = True + else: + mx = False + + validate = raw_input('Try to contact server for address validation? [yN] ') + if validate.strip().lower() == 'y': + validate = True + else: + validate = False + + logging.basicConfig() + + result = validate_email(email, mx, validate, debug=True, smtp_timeout=1) + if result: + print("Valid!") + elif result is None: + print("I'm not sure.") + else: + print("Invalid!") + + time.sleep(1) + + +# import sys + +# sys.modules[__name__],sys.modules['validate_email_module'] = validate_email,sys.modules[__name__] +# from validate_email_module import *