blob: baf6d6e570bcf46789112d7226250abe0485aa91 [file] [log] [blame]
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +01001# Copyright 2016 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Copybara854996b2021-09-07 19:36:02 +00004
5"""A set of Python input field validators."""
6from __future__ import print_function
7from __future__ import division
8from __future__ import absolute_import
9
10import re
11
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +010012# RFC 5322-compliant email address regex
13# https://stackoverflow.com/a/201378
14_RFC_2821_EMAIL_REGEX = r"""
15 (?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|
16 "(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|
17 \\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@
18 (?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|
19 \[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|
20 [0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:
21 (?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|
22 \\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])
Copybara854996b2021-09-07 19:36:02 +000023 """
24
25# object used with <re>.search() or <re>.sub() to find email addresses
26# within a string (or with <re>.match() to find email addresses at the
27# beginning of a string that may be followed by trailing characters,
28# since <re>.match() implicitly anchors at the beginning of the string)
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +010029RE_EMAIL_SEARCH = re.compile(_RFC_2821_EMAIL_REGEX, re.X)
Copybara854996b2021-09-07 19:36:02 +000030
31# object used with <re>.match to find strings that contain *only* a single
32# email address (by adding the end-of-string anchor $)
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +010033RE_EMAIL_ONLY = re.compile('^%s$' % _RFC_2821_EMAIL_REGEX, re.X)
Copybara854996b2021-09-07 19:36:02 +000034
35_SCHEME_PATTERN = r'(?:https?|ftp)://'
36_SHORT_HOST_PATTERN = (
37 r'(?=[a-zA-Z])[-a-zA-Z0-9]*[a-zA-Z0-9](:[0-9]+)?'
38 r'/' # Slash is manditory for short host names.
39 r'[^\s]*'
40 )
41_DOTTED_HOST_PATTERN = (
42 r'[-a-zA-Z0-9.]+\.[a-zA-Z]{2,9}(:[0-9]+)?'
43 r'(/[^\s]*)?'
44 )
45_URL_REGEX = r'%s(%s|%s)' % (
46 _SCHEME_PATTERN, _SHORT_HOST_PATTERN, _DOTTED_HOST_PATTERN)
47
48# A more complete URL regular expression based on a combination of the
49# existing _URL_REGEX and the pattern found for URI regular expressions
50# found in the URL RFC document. It's detailed here:
51# http://www.ietf.org/rfc/rfc2396.txt
52RE_COMPLEX_URL = re.compile(r'^%s(\?([^# ]*))?(#(.*))?$' % _URL_REGEX)
53
54
55def IsValidEmail(s):
56 """Return true iff the string is a properly formatted email address."""
57 return RE_EMAIL_ONLY.match(s)
58
59
60def IsValidMailTo(s):
61 """Return true iff the string is a properly formatted mailto:."""
62 return s.startswith('mailto:') and RE_EMAIL_ONLY.match(s[7:])
63
64
65def IsValidURL(s):
66 """Return true iff the string is a properly formatted web or ftp URL."""
67 return RE_COMPLEX_URL.match(s)