Merge branch 'main' into avm99963-monorail

Merged commit 34d8229ae2b51fb1a15bd208e6fe6185c94f6266

GitOrigin-RevId: 7ee0917f93a577e475f8e09526dd144d245593f4
diff --git a/framework/validate.py b/framework/validate.py
index ee26396..baf6d6e 100644
--- a/framework/validate.py
+++ b/framework/validate.py
@@ -1,7 +1,6 @@
-# Copyright 2016 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file or at
-# https://developers.google.com/open-source/licenses/bsd
+# Copyright 2016 The Chromium Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
 
 """A set of Python input field validators."""
 from __future__ import print_function
@@ -10,72 +9,28 @@
 
 import re
 
-# RFC 2821-compliant email address regex
-#
-# Please see sections "4.1.2 Command Argument Syntax" and
-# "4.1.3 Address Literals" of:  http://www.faqs.org/rfcs/rfc2821.html
-#
-# The following implementation is still a subset of RFC 2821.  Fully
-# double-quoted <user> parts are not supported (since the RFC discourages
-# their use anyway), and using the backslash to escape other characters
-# that are normally invalid, such as commas, is not supported.
-#
-# The groups in this regular expression are:
-#
-# <user>: all of the valid non-quoted portion of the email address before
-#   the @ sign (not including the @ sign)
-#
-# <domain>: all of the domain name between the @ sign (but not including it)
-#   and the dot before the TLD (but not including that final dot)
-#
-# <tld>: the top-level domain after the last dot (but not including that
-#   final dot)
-#
-_RFC_2821_EMAIL_REGEX = r"""(?x)
-  (?P<user>
-    # Part of the username that comes before any dots that may occur in it.
-    # At least one of the listed non-dot characters is required before the
-    # first dot.
-    [-a-zA-Z0-9!#$%&'*+/=?^_`{|}~]+
-
-    # Remaining part of the username that starts with the dot and
-    # which may have other dots, if such a part exists.  Only one dot
-    # is permitted between each "Atom", and a trailing dot is not permitted.
-    (?:[.][-a-zA-Z0-9!#$%&'*+/=?^_`{|}~]+)*
-  )
-
-  # Domain name, where subdomains are allowed.  Also, dashes are allowed
-  # given that they are preceded and followed by at least one character.
-  @(?P<domain>
-    (?:[0-9a-zA-Z]       # at least one non-dash
-       (?:[-]*           # plus zero or more dashes
-          [0-9a-zA-Z]+   # plus at least one non-dash
-       )*                # zero or more of dashes followed by non-dashes
-    )                    # one required domain part (may be a sub-domain)
-
-    (?:\.                # dot separator before additional sub-domain part
-       [0-9a-zA-Z]       # at least one non-dash
-       (?:[-]*           # plus zero or more dashes
-          [0-9a-zA-Z]+   # plus at least one non-dash
-       )*                # zero or more of dashes followed by non-dashes
-    )*                   # at least one sub-domain part and a dot
-   )
-  \.                     # dot separator before TLD
-
-  # TLD, the part after 'usernames@domain.' which can consist of 2-9
-  # letters.
-  (?P<tld>[a-zA-Z]{2,9})
+# RFC 5322-compliant email address regex
+# https://stackoverflow.com/a/201378
+_RFC_2821_EMAIL_REGEX = r"""
+  (?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|
+  "(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|
+  \\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@
+  (?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|
+  \[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|
+  [0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:
+  (?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|
+  \\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])
   """
 
 # object used with <re>.search() or <re>.sub() to find email addresses
 # within a string (or with <re>.match() to find email addresses at the
 # beginning of a string that may be followed by trailing characters,
 # since <re>.match() implicitly anchors at the beginning of the string)
-RE_EMAIL_SEARCH = re.compile(_RFC_2821_EMAIL_REGEX)
+RE_EMAIL_SEARCH = re.compile(_RFC_2821_EMAIL_REGEX, re.X)
 
 # object used with <re>.match to find strings that contain *only* a single
 # email address (by adding the end-of-string anchor $)
-RE_EMAIL_ONLY = re.compile('^%s$' % _RFC_2821_EMAIL_REGEX)
+RE_EMAIL_ONLY = re.compile('^%s$' % _RFC_2821_EMAIL_REGEX, re.X)
 
 _SCHEME_PATTERN = r'(?:https?|ftp)://'
 _SHORT_HOST_PATTERN = (