blob: ee26396f24e7c7adea540cb373d41977c5e02669 [file] [log] [blame]
Copybara854996b2021-09-07 19:36:02 +00001# Copyright 2016 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style
3# license that can be found in the LICENSE file or at
4# https://developers.google.com/open-source/licenses/bsd
5
6"""A set of Python input field validators."""
7from __future__ import print_function
8from __future__ import division
9from __future__ import absolute_import
10
11import re
12
13# RFC 2821-compliant email address regex
14#
15# Please see sections "4.1.2 Command Argument Syntax" and
16# "4.1.3 Address Literals" of: http://www.faqs.org/rfcs/rfc2821.html
17#
18# The following implementation is still a subset of RFC 2821. Fully
19# double-quoted <user> parts are not supported (since the RFC discourages
20# their use anyway), and using the backslash to escape other characters
21# that are normally invalid, such as commas, is not supported.
22#
23# The groups in this regular expression are:
24#
25# <user>: all of the valid non-quoted portion of the email address before
26# the @ sign (not including the @ sign)
27#
28# <domain>: all of the domain name between the @ sign (but not including it)
29# and the dot before the TLD (but not including that final dot)
30#
31# <tld>: the top-level domain after the last dot (but not including that
32# final dot)
33#
34_RFC_2821_EMAIL_REGEX = r"""(?x)
35 (?P<user>
36 # Part of the username that comes before any dots that may occur in it.
37 # At least one of the listed non-dot characters is required before the
38 # first dot.
39 [-a-zA-Z0-9!#$%&'*+/=?^_`{|}~]+
40
41 # Remaining part of the username that starts with the dot and
42 # which may have other dots, if such a part exists. Only one dot
43 # is permitted between each "Atom", and a trailing dot is not permitted.
44 (?:[.][-a-zA-Z0-9!#$%&'*+/=?^_`{|}~]+)*
45 )
46
47 # Domain name, where subdomains are allowed. Also, dashes are allowed
48 # given that they are preceded and followed by at least one character.
49 @(?P<domain>
50 (?:[0-9a-zA-Z] # at least one non-dash
51 (?:[-]* # plus zero or more dashes
52 [0-9a-zA-Z]+ # plus at least one non-dash
53 )* # zero or more of dashes followed by non-dashes
54 ) # one required domain part (may be a sub-domain)
55
56 (?:\. # dot separator before additional sub-domain part
57 [0-9a-zA-Z] # at least one non-dash
58 (?:[-]* # plus zero or more dashes
59 [0-9a-zA-Z]+ # plus at least one non-dash
60 )* # zero or more of dashes followed by non-dashes
61 )* # at least one sub-domain part and a dot
62 )
63 \. # dot separator before TLD
64
65 # TLD, the part after 'usernames@domain.' which can consist of 2-9
66 # letters.
67 (?P<tld>[a-zA-Z]{2,9})
68 """
69
70# object used with <re>.search() or <re>.sub() to find email addresses
71# within a string (or with <re>.match() to find email addresses at the
72# beginning of a string that may be followed by trailing characters,
73# since <re>.match() implicitly anchors at the beginning of the string)
74RE_EMAIL_SEARCH = re.compile(_RFC_2821_EMAIL_REGEX)
75
76# object used with <re>.match to find strings that contain *only* a single
77# email address (by adding the end-of-string anchor $)
78RE_EMAIL_ONLY = re.compile('^%s$' % _RFC_2821_EMAIL_REGEX)
79
80_SCHEME_PATTERN = r'(?:https?|ftp)://'
81_SHORT_HOST_PATTERN = (
82 r'(?=[a-zA-Z])[-a-zA-Z0-9]*[a-zA-Z0-9](:[0-9]+)?'
83 r'/' # Slash is manditory for short host names.
84 r'[^\s]*'
85 )
86_DOTTED_HOST_PATTERN = (
87 r'[-a-zA-Z0-9.]+\.[a-zA-Z]{2,9}(:[0-9]+)?'
88 r'(/[^\s]*)?'
89 )
90_URL_REGEX = r'%s(%s|%s)' % (
91 _SCHEME_PATTERN, _SHORT_HOST_PATTERN, _DOTTED_HOST_PATTERN)
92
93# A more complete URL regular expression based on a combination of the
94# existing _URL_REGEX and the pattern found for URI regular expressions
95# found in the URL RFC document. It's detailed here:
96# http://www.ietf.org/rfc/rfc2396.txt
97RE_COMPLEX_URL = re.compile(r'^%s(\?([^# ]*))?(#(.*))?$' % _URL_REGEX)
98
99
100def IsValidEmail(s):
101 """Return true iff the string is a properly formatted email address."""
102 return RE_EMAIL_ONLY.match(s)
103
104
105def IsValidMailTo(s):
106 """Return true iff the string is a properly formatted mailto:."""
107 return s.startswith('mailto:') and RE_EMAIL_ONLY.match(s[7:])
108
109
110def IsValidURL(s):
111 """Return true iff the string is a properly formatted web or ftp URL."""
112 return RE_COMPLEX_URL.match(s)