Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 1 | # Copyright 2016 The Chromium Authors |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 4 | |
| 5 | """A set of Python input field validators.""" |
| 6 | from __future__ import print_function |
| 7 | from __future__ import division |
| 8 | from __future__ import absolute_import |
| 9 | |
| 10 | import re |
| 11 | |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 12 | # RFC 5322-compliant email address regex |
| 13 | # https://stackoverflow.com/a/201378 |
| 14 | _RFC_2821_EMAIL_REGEX = r""" |
| 15 | (?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*| |
| 16 | "(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]| |
| 17 | \\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@ |
| 18 | (?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?| |
| 19 | \[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]| |
| 20 | [0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]: |
| 21 | (?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]| |
| 22 | \\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\]) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 23 | """ |
| 24 | |
| 25 | # object used with <re>.search() or <re>.sub() to find email addresses |
| 26 | # within a string (or with <re>.match() to find email addresses at the |
| 27 | # beginning of a string that may be followed by trailing characters, |
| 28 | # since <re>.match() implicitly anchors at the beginning of the string) |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 29 | RE_EMAIL_SEARCH = re.compile(_RFC_2821_EMAIL_REGEX, re.X) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 30 | |
| 31 | # object used with <re>.match to find strings that contain *only* a single |
| 32 | # email address (by adding the end-of-string anchor $) |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 33 | RE_EMAIL_ONLY = re.compile('^%s$' % _RFC_2821_EMAIL_REGEX, re.X) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 34 | |
| 35 | _SCHEME_PATTERN = r'(?:https?|ftp)://' |
| 36 | _SHORT_HOST_PATTERN = ( |
| 37 | r'(?=[a-zA-Z])[-a-zA-Z0-9]*[a-zA-Z0-9](:[0-9]+)?' |
| 38 | r'/' # Slash is manditory for short host names. |
| 39 | r'[^\s]*' |
| 40 | ) |
| 41 | _DOTTED_HOST_PATTERN = ( |
| 42 | r'[-a-zA-Z0-9.]+\.[a-zA-Z]{2,9}(:[0-9]+)?' |
| 43 | r'(/[^\s]*)?' |
| 44 | ) |
| 45 | _URL_REGEX = r'%s(%s|%s)' % ( |
| 46 | _SCHEME_PATTERN, _SHORT_HOST_PATTERN, _DOTTED_HOST_PATTERN) |
| 47 | |
| 48 | # A more complete URL regular expression based on a combination of the |
| 49 | # existing _URL_REGEX and the pattern found for URI regular expressions |
| 50 | # found in the URL RFC document. It's detailed here: |
| 51 | # http://www.ietf.org/rfc/rfc2396.txt |
| 52 | RE_COMPLEX_URL = re.compile(r'^%s(\?([^# ]*))?(#(.*))?$' % _URL_REGEX) |
| 53 | |
| 54 | |
| 55 | def IsValidEmail(s): |
| 56 | """Return true iff the string is a properly formatted email address.""" |
| 57 | return RE_EMAIL_ONLY.match(s) |
| 58 | |
| 59 | |
| 60 | def IsValidMailTo(s): |
| 61 | """Return true iff the string is a properly formatted mailto:.""" |
| 62 | return s.startswith('mailto:') and RE_EMAIL_ONLY.match(s[7:]) |
| 63 | |
| 64 | |
| 65 | def IsValidURL(s): |
| 66 | """Return true iff the string is a properly formatted web or ftp URL.""" |
| 67 | return RE_COMPLEX_URL.match(s) |