Blame - features/autolink_constants.py - monorail-avm99963

blob: ddb9bb3217d5b9a4583ecd957fcf0574df4687c2 [file] [log] [blame]

Copybara	854996b	2021-09-07 19:36:02 +0000	[diff] [blame^]	1	# Copyright 2017 The Chromium Authors. All rights reserved.
				2	# Use of this source code is governed by a BSD-style
				3	# license that can be found in the LICENSE file or at
				4	# https://developers.google.com/open-source/licenses/bsd
				5
				6	"""Some constants of regexes used in Monorail to validate urls and emails."""
				7	from __future__ import print_function
				8	from __future__ import division
				9	from __future__ import absolute_import
				10
				11	import re
				12	import settings
				13
				14	# We linkify http, https, ftp, and mailto schemes only.
				15	LINKIFY_SCHEMES = r'https?://\|ftp://\|mailto:'
				16
				17	# This regex matches shorthand URLs that we know are valid.
				18	# Example: go/monorail
				19	# The scheme is optional, and if it is missing we add it to the link.
				20	IS_A_SHORT_LINK_RE = re.compile(
				21	r'(?<![-/._])\b(%s)?' # Scheme is optional for short links.
				22	r'(%s)' # The list of know shorthand links from settings.py
				23	r'/([^\s<]+)' # Allow anything, checked with validation code.
				24	% (LINKIFY_SCHEMES, '\|'.join(settings.autolink_shorthand_hosts)),
				25	re.UNICODE)
				26	IS_A_NUMERIC_SHORT_LINK_RE = re.compile(
				27	r'(?<![-/._])\b(%s)?' # Scheme is optional for short links.
				28	r'(%s)' # The list of know shorthand links from settings.py
				29	r'/([0-9]+)' # Allow digits only for these domains.
				30	% (LINKIFY_SCHEMES, '\|'.join(settings.autolink_numeric_shorthand_hosts)),
				31	re.UNICODE)
				32
				33	# This regex matches fully-formed URLs, starting with a scheme.
				34	# Example: http://chromium.org or mailto:user@example.com
				35	# We link to the specified URL without adding anything.
				36	# Also count a start-tag '<' as a url delimeter, since the autolinker
				37	# is sometimes run against html fragments.
				38	IS_A_LINK_RE = re.compile(
				39	r'\b(%s)' # Scheme must be a whole word.
				40	r'([^\s<]+)' # Allow anything, checked with validation code.
				41	% LINKIFY_SCHEMES, re.UNICODE)
				42
				43	# This regex matches text that looks like a URL despite lacking a scheme.
				44	# Example: crrev.com
				45	# Since the scheme is not specified, we prepend "http://".
				46	IS_IMPLIED_LINK_RE = re.compile(
				47	r'(?<![-/._])\b[a-z]((-\|\.)?[a-z0-9])+\.(com\|net\|org\|edu)\b' # Domain.
				48	r'(/[^\s<]*)?', # Allow anything, check with validation code.
				49	re.UNICODE)
				50
				51	# This regex matches text that looks like an email address.
				52	# Example: user@example.com
				53	# These get linked to the user profile page if it exists, otherwise
				54	# they become a mailto:.
				55	IS_IMPLIED_EMAIL_RE = re.compile(
				56	r'\b[a-z]((-\|\.)?[a-z0-9])+@' # Username@
				57	r'[a-z]((-\|\.)?[a-z0-9])+\.(com\|net\|org\|edu)\b', # Domain
				58	re.UNICODE)