Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame^] | 1 | # Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style |
| 3 | # license that can be found in the LICENSE file or at |
| 4 | # https://developers.google.com/open-source/licenses/bsd |
| 5 | |
| 6 | """Some constants of regexes used in Monorail to validate urls and emails.""" |
| 7 | from __future__ import print_function |
| 8 | from __future__ import division |
| 9 | from __future__ import absolute_import |
| 10 | |
| 11 | import re |
| 12 | import settings |
| 13 | |
| 14 | # We linkify http, https, ftp, and mailto schemes only. |
| 15 | LINKIFY_SCHEMES = r'https?://|ftp://|mailto:' |
| 16 | |
| 17 | # This regex matches shorthand URLs that we know are valid. |
| 18 | # Example: go/monorail |
| 19 | # The scheme is optional, and if it is missing we add it to the link. |
| 20 | IS_A_SHORT_LINK_RE = re.compile( |
| 21 | r'(?<![-/._])\b(%s)?' # Scheme is optional for short links. |
| 22 | r'(%s)' # The list of know shorthand links from settings.py |
| 23 | r'/([^\s<]+)' # Allow anything, checked with validation code. |
| 24 | % (LINKIFY_SCHEMES, '|'.join(settings.autolink_shorthand_hosts)), |
| 25 | re.UNICODE) |
| 26 | IS_A_NUMERIC_SHORT_LINK_RE = re.compile( |
| 27 | r'(?<![-/._])\b(%s)?' # Scheme is optional for short links. |
| 28 | r'(%s)' # The list of know shorthand links from settings.py |
| 29 | r'/([0-9]+)' # Allow digits only for these domains. |
| 30 | % (LINKIFY_SCHEMES, '|'.join(settings.autolink_numeric_shorthand_hosts)), |
| 31 | re.UNICODE) |
| 32 | |
| 33 | # This regex matches fully-formed URLs, starting with a scheme. |
| 34 | # Example: http://chromium.org or mailto:user@example.com |
| 35 | # We link to the specified URL without adding anything. |
| 36 | # Also count a start-tag '<' as a url delimeter, since the autolinker |
| 37 | # is sometimes run against html fragments. |
| 38 | IS_A_LINK_RE = re.compile( |
| 39 | r'\b(%s)' # Scheme must be a whole word. |
| 40 | r'([^\s<]+)' # Allow anything, checked with validation code. |
| 41 | % LINKIFY_SCHEMES, re.UNICODE) |
| 42 | |
| 43 | # This regex matches text that looks like a URL despite lacking a scheme. |
| 44 | # Example: crrev.com |
| 45 | # Since the scheme is not specified, we prepend "http://". |
| 46 | IS_IMPLIED_LINK_RE = re.compile( |
| 47 | r'(?<![-/._])\b[a-z]((-|\.)?[a-z0-9])+\.(com|net|org|edu)\b' # Domain. |
| 48 | r'(/[^\s<]*)?', # Allow anything, check with validation code. |
| 49 | re.UNICODE) |
| 50 | |
| 51 | # This regex matches text that looks like an email address. |
| 52 | # Example: user@example.com |
| 53 | # These get linked to the user profile page if it exists, otherwise |
| 54 | # they become a mailto:. |
| 55 | IS_IMPLIED_EMAIL_RE = re.compile( |
| 56 | r'\b[a-z]((-|\.)?[a-z0-9])+@' # Username@ |
| 57 | r'[a-z]((-|\.)?[a-z0-9])+\.(com|net|org|edu)\b', # Domain |
| 58 | re.UNICODE) |