Blame - framework/validate.py - monorail-avm99963

blob: ee26396f24e7c7adea540cb373d41977c5e02669 [file] [log] [blame]

Copybara	854996b	2021-09-07 19:36:02 +0000	[diff] [blame]	1	# Copyright 2016 The Chromium Authors. All rights reserved.
				2	# Use of this source code is governed by a BSD-style
				3	# license that can be found in the LICENSE file or at
				4	# https://developers.google.com/open-source/licenses/bsd
				5
				6	"""A set of Python input field validators."""
				7	from __future__ import print_function
				8	from __future__ import division
				9	from __future__ import absolute_import
				10
				11	import re
				12
				13	# RFC 2821-compliant email address regex
				14	#
				15	# Please see sections "4.1.2 Command Argument Syntax" and
				16	# "4.1.3 Address Literals" of: http://www.faqs.org/rfcs/rfc2821.html
				17	#
				18	# The following implementation is still a subset of RFC 2821. Fully
				19	# double-quoted <user> parts are not supported (since the RFC discourages
				20	# their use anyway), and using the backslash to escape other characters
				21	# that are normally invalid, such as commas, is not supported.
				22	#
				23	# The groups in this regular expression are:
				24	#
				25	# <user>: all of the valid non-quoted portion of the email address before
				26	# the @ sign (not including the @ sign)
				27	#
				28	# <domain>: all of the domain name between the @ sign (but not including it)
				29	# and the dot before the TLD (but not including that final dot)
				30	#
				31	# <tld>: the top-level domain after the last dot (but not including that
				32	# final dot)
				33	#
				34	_RFC_2821_EMAIL_REGEX = r"""(?x)
				35	(?P<user>
				36	# Part of the username that comes before any dots that may occur in it.
				37	# At least one of the listed non-dot characters is required before the
				38	# first dot.
				39	[-a-zA-Z0-9!#$%&'*+/=?^_`{\|}~]+
				40
				41	# Remaining part of the username that starts with the dot and
				42	# which may have other dots, if such a part exists. Only one dot
				43	# is permitted between each "Atom", and a trailing dot is not permitted.
				44	(?:[.][-a-zA-Z0-9!#$%&'+/=?^_`{\|}~]+)
				45	)
				46
				47	# Domain name, where subdomains are allowed. Also, dashes are allowed
				48	# given that they are preceded and followed by at least one character.
				49	@(?P<domain>
				50	(?:[0-9a-zA-Z] # at least one non-dash
				51	(?:[-]* # plus zero or more dashes
				52	[0-9a-zA-Z]+ # plus at least one non-dash
				53	)* # zero or more of dashes followed by non-dashes
				54	) # one required domain part (may be a sub-domain)
				55
				56	(?:\. # dot separator before additional sub-domain part
				57	[0-9a-zA-Z] # at least one non-dash
				58	(?:[-]* # plus zero or more dashes
				59	[0-9a-zA-Z]+ # plus at least one non-dash
				60	)* # zero or more of dashes followed by non-dashes
				61	)* # at least one sub-domain part and a dot
				62	)
				63	\. # dot separator before TLD
				64
				65	# TLD, the part after 'usernames@domain.' which can consist of 2-9
				66	# letters.
				67	(?P<tld>[a-zA-Z]{2,9})
				68	"""
				69
				70	# object used with <re>.search() or <re>.sub() to find email addresses
				71	# within a string (or with <re>.match() to find email addresses at the
				72	# beginning of a string that may be followed by trailing characters,
				73	# since <re>.match() implicitly anchors at the beginning of the string)
				74	RE_EMAIL_SEARCH = re.compile(_RFC_2821_EMAIL_REGEX)
				75
				76	# object used with <re>.match to find strings that contain only a single
				77	# email address (by adding the end-of-string anchor $)
				78	RE_EMAIL_ONLY = re.compile('^%s$' % _RFC_2821_EMAIL_REGEX)
				79
				80	_SCHEME_PATTERN = r'(?:https?\|ftp)://'
				81	_SHORT_HOST_PATTERN = (
				82	r'(?=[a-zA-Z])[-a-zA-Z0-9]*[a-zA-Z0-9](:[0-9]+)?'
				83	r'/' # Slash is manditory for short host names.
				84	r'[^\s]*'
				85	)
				86	_DOTTED_HOST_PATTERN = (
				87	r'[-a-zA-Z0-9.]+\.[a-zA-Z]{2,9}(:[0-9]+)?'
				88	r'(/[^\s]*)?'
				89	)
				90	_URL_REGEX = r'%s(%s\|%s)' % (
				91	_SCHEME_PATTERN, _SHORT_HOST_PATTERN, _DOTTED_HOST_PATTERN)
				92
				93	# A more complete URL regular expression based on a combination of the
				94	# existing _URL_REGEX and the pattern found for URI regular expressions
				95	# found in the URL RFC document. It's detailed here:
				96	# http://www.ietf.org/rfc/rfc2396.txt
				97	RE_COMPLEX_URL = re.compile(r'^%s(\?([^# ]))?(#(.))?$' % _URL_REGEX)
				98
				99
				100	def IsValidEmail(s):
				101	"""Return true iff the string is a properly formatted email address."""
				102	return RE_EMAIL_ONLY.match(s)
				103
				104
				105	def IsValidMailTo(s):
				106	"""Return true iff the string is a properly formatted mailto:."""
				107	return s.startswith('mailto:') and RE_EMAIL_ONLY.match(s[7:])
				108
				109
				110	def IsValidURL(s):
				111	"""Return true iff the string is a properly formatted web or ftp URL."""
				112	return RE_COMPLEX_URL.match(s)