blob: d6239d67cb76e7493b37e1688b9b66cb1c4ad04b [file] [log] [blame]
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +01001# Copyright 2017 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Copybara854996b2021-09-07 19:36:02 +00004
5"""Some constants of regexes used in Monorail to validate urls and emails."""
6from __future__ import print_function
7from __future__ import division
8from __future__ import absolute_import
9
10import re
11import settings
12
13# We linkify http, https, ftp, and mailto schemes only.
14LINKIFY_SCHEMES = r'https?://|ftp://|mailto:'
15
16# This regex matches shorthand URLs that we know are valid.
17# Example: go/monorail
18# The scheme is optional, and if it is missing we add it to the link.
19IS_A_SHORT_LINK_RE = re.compile(
20 r'(?<![-/._])\b(%s)?' # Scheme is optional for short links.
21 r'(%s)' # The list of know shorthand links from settings.py
22 r'/([^\s<]+)' # Allow anything, checked with validation code.
23 % (LINKIFY_SCHEMES, '|'.join(settings.autolink_shorthand_hosts)),
24 re.UNICODE)
25IS_A_NUMERIC_SHORT_LINK_RE = re.compile(
26 r'(?<![-/._])\b(%s)?' # Scheme is optional for short links.
27 r'(%s)' # The list of know shorthand links from settings.py
28 r'/([0-9]+)' # Allow digits only for these domains.
29 % (LINKIFY_SCHEMES, '|'.join(settings.autolink_numeric_shorthand_hosts)),
30 re.UNICODE)
31
32# This regex matches fully-formed URLs, starting with a scheme.
33# Example: http://chromium.org or mailto:user@example.com
34# We link to the specified URL without adding anything.
35# Also count a start-tag '<' as a url delimeter, since the autolinker
36# is sometimes run against html fragments.
37IS_A_LINK_RE = re.compile(
38 r'\b(%s)' # Scheme must be a whole word.
39 r'([^\s<]+)' # Allow anything, checked with validation code.
40 % LINKIFY_SCHEMES, re.UNICODE)
41
42# This regex matches text that looks like a URL despite lacking a scheme.
43# Example: crrev.com
44# Since the scheme is not specified, we prepend "http://".
45IS_IMPLIED_LINK_RE = re.compile(
46 r'(?<![-/._])\b[a-z]((-|\.)?[a-z0-9])+\.(com|net|org|edu)\b' # Domain.
47 r'(/[^\s<]*)?', # Allow anything, check with validation code.
48 re.UNICODE)
49
50# This regex matches text that looks like an email address.
51# Example: user@example.com
52# These get linked to the user profile page if it exists, otherwise
53# they become a mailto:.
54IS_IMPLIED_EMAIL_RE = re.compile(
55 r'\b[a-z]((-|\.)?[a-z0-9])+@' # Username@
56 r'[a-z]((-|\.)?[a-z0-9])+\.(com|net|org|edu)\b', # Domain
57 re.UNICODE)