blob: ddb9bb3217d5b9a4583ecd957fcf0574df4687c2 [file] [log] [blame]
Copybara854996b2021-09-07 19:36:02 +00001# Copyright 2017 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style
3# license that can be found in the LICENSE file or at
4# https://developers.google.com/open-source/licenses/bsd
5
6"""Some constants of regexes used in Monorail to validate urls and emails."""
7from __future__ import print_function
8from __future__ import division
9from __future__ import absolute_import
10
11import re
12import settings
13
14# We linkify http, https, ftp, and mailto schemes only.
15LINKIFY_SCHEMES = r'https?://|ftp://|mailto:'
16
17# This regex matches shorthand URLs that we know are valid.
18# Example: go/monorail
19# The scheme is optional, and if it is missing we add it to the link.
20IS_A_SHORT_LINK_RE = re.compile(
21 r'(?<![-/._])\b(%s)?' # Scheme is optional for short links.
22 r'(%s)' # The list of know shorthand links from settings.py
23 r'/([^\s<]+)' # Allow anything, checked with validation code.
24 % (LINKIFY_SCHEMES, '|'.join(settings.autolink_shorthand_hosts)),
25 re.UNICODE)
26IS_A_NUMERIC_SHORT_LINK_RE = re.compile(
27 r'(?<![-/._])\b(%s)?' # Scheme is optional for short links.
28 r'(%s)' # The list of know shorthand links from settings.py
29 r'/([0-9]+)' # Allow digits only for these domains.
30 % (LINKIFY_SCHEMES, '|'.join(settings.autolink_numeric_shorthand_hosts)),
31 re.UNICODE)
32
33# This regex matches fully-formed URLs, starting with a scheme.
34# Example: http://chromium.org or mailto:user@example.com
35# We link to the specified URL without adding anything.
36# Also count a start-tag '<' as a url delimeter, since the autolinker
37# is sometimes run against html fragments.
38IS_A_LINK_RE = re.compile(
39 r'\b(%s)' # Scheme must be a whole word.
40 r'([^\s<]+)' # Allow anything, checked with validation code.
41 % LINKIFY_SCHEMES, re.UNICODE)
42
43# This regex matches text that looks like a URL despite lacking a scheme.
44# Example: crrev.com
45# Since the scheme is not specified, we prepend "http://".
46IS_IMPLIED_LINK_RE = re.compile(
47 r'(?<![-/._])\b[a-z]((-|\.)?[a-z0-9])+\.(com|net|org|edu)\b' # Domain.
48 r'(/[^\s<]*)?', # Allow anything, check with validation code.
49 re.UNICODE)
50
51# This regex matches text that looks like an email address.
52# Example: user@example.com
53# These get linked to the user profile page if it exists, otherwise
54# they become a mailto:.
55IS_IMPLIED_EMAIL_RE = re.compile(
56 r'\b[a-z]((-|\.)?[a-z0-9])+@' # Username@
57 r'[a-z]((-|\.)?[a-z0-9])+\.(com|net|org|edu)\b', # Domain
58 re.UNICODE)