Project import generated by Copybara.
GitOrigin-RevId: d9e9e3fb4e31372ec1fb43b178994ca78fa8fe70
diff --git a/features/autolink.py b/features/autolink.py
new file mode 100644
index 0000000..2787b9c
--- /dev/null
+++ b/features/autolink.py
@@ -0,0 +1,624 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file or at
+# https://developers.google.com/open-source/licenses/bsd
+
+"""Autolink helps auto-link references to artifacts in text.
+
+This class maintains a registry of artifact autolink syntax specs and
+callbacks. The structure of that registry is:
+ { component_name: (lookup_callback, match_to_reference_function,
+ { regex: substitution_callback, ...}),
+ ...
+ }
+
+For example:
+ { 'tracker':
+ (GetReferencedIssues,
+ ExtractProjectAndIssueIds,
+ {_ISSUE_REF_RE: ReplaceIssueRef}),
+ 'versioncontrol':
+ (GetReferencedRevisions,
+ ExtractProjectAndRevNum,
+ {_GIT_HASH_RE: ReplaceRevisionRef}),
+ }
+
+The dictionary of regexes is used here because, in the future, we
+might add more regexes for each component rather than have one complex
+regex per component.
+"""
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+
+import logging
+import re
+import urllib
+import urlparse
+
+import settings
+from features import autolink_constants
+from framework import template_helpers
+from framework import validate
+from proto import project_pb2
+from tracker import tracker_helpers
+
+
+# If the total length of all comments is too large, we don't autolink.
+_MAX_TOTAL_LENGTH = 150 * 1024 # 150KB
+# Special all_referenced_artifacts value used to indicate that the
+# text content is too big to lookup all referenced artifacts quickly.
+SKIP_LOOKUPS = 'skip lookups'
+
+_CLOSING_TAG_RE = re.compile('</[a-z0-9]+>$', re.IGNORECASE)
+
+# These are allowed in links, but if any of closing delimiters appear
+# at the end of the link, and the opening one is not part of the link,
+# then trim off the closing delimiters.
+_LINK_TRAILING_CHARS = [
+ (None, ':'),
+ (None, '.'),
+ (None, ','),
+ ('(', ')'),
+ ('[', ']'),
+ ('{', '}'),
+ ('<', '>'),
+ ("'", "'"),
+ ('"', '"'),
+ ]
+
+
+def LinkifyEmail(_mr, autolink_regex_match, component_ref_artifacts):
+ """Examine a textual reference and replace it with a hyperlink or not.
+
+ This is a callback for use with the autolink feature. The function
+ parameters are standard for this type of callback.
+
+ Args:
+ _mr: unused information parsed from the HTTP request.
+ autolink_regex_match: regex match for the textual reference.
+ component_ref_artifacts: result of call to GetReferencedUsers.
+
+ Returns:
+ A list of TextRuns with tag=a linking to the user profile page of
+ any defined users, otherwise a mailto: link is generated.
+ """
+ email = autolink_regex_match.group(0)
+
+ if not validate.IsValidEmail(email):
+ return [template_helpers.TextRun(email)]
+
+ if component_ref_artifacts and email in component_ref_artifacts:
+ href = '/u/%s' % email
+ else:
+ href = 'mailto:' + email
+
+ result = [template_helpers.TextRun(email, tag='a', href=href)]
+ return result
+
+
+def CurryGetReferencedUsers(services):
+ """Return a function to get ref'd users with these services objects bound.
+
+ Currying is a convienent way to give the callback access to the services
+ objects, but without requiring that all possible services objects be passed
+ through the autolink registry and functions.
+
+ Args:
+ services: connection to the user persistence layer.
+
+ Returns:
+ A ready-to-use function that accepts the arguments that autolink
+ expects to pass to it.
+ """
+
+ def GetReferencedUsers(mr, emails):
+ """Return a dict of users referenced by these comments.
+
+ Args:
+ mr: commonly used info parsed from the request.
+ ref_tuples: email address strings for each user
+ that is mentioned in the comment text.
+
+ Returns:
+ A dictionary {email: user_pb} including all existing users.
+ """
+ user_id_dict = services.user.LookupExistingUserIDs(mr.cnxn, emails)
+ users_by_id = services.user.GetUsersByIDs(mr.cnxn,
+ list(user_id_dict.values()))
+ users_by_email = {
+ email: users_by_id[user_id]
+ for email, user_id in user_id_dict.items()}
+ return users_by_email
+
+ return GetReferencedUsers
+
+
+def Linkify(_mr, autolink_regex_match, _component_ref_artifacts):
+ """Examine a textual reference and replace it with a hyperlink or not.
+
+ This is a callback for use with the autolink feature. The function
+ parameters are standard for this type of callback.
+
+ Args:
+ _mr: unused information parsed from the HTTP request.
+ autolink_regex_match: regex match for the textual reference.
+ _component_ref_artifacts: unused result of call to GetReferencedIssues.
+
+ Returns:
+ A list of TextRuns with tag=a for all matched ftp, http, https and mailto
+ links converted into HTML hyperlinks.
+ """
+ hyperlink = autolink_regex_match.group(0)
+
+ trailing = ''
+ for begin, end in _LINK_TRAILING_CHARS:
+ if hyperlink.endswith(end):
+ if not begin or hyperlink[:-len(end)].find(begin) == -1:
+ trailing = end + trailing
+ hyperlink = hyperlink[:-len(end)]
+
+ tag_match = _CLOSING_TAG_RE.search(hyperlink)
+ if tag_match:
+ trailing = hyperlink[tag_match.start(0):] + trailing
+ hyperlink = hyperlink[:tag_match.start(0)]
+
+ href = hyperlink
+ if not href.lower().startswith(('http', 'ftp', 'mailto')):
+ # We use http because redirects for https are not all set up.
+ href = 'http://' + href
+
+ if (not validate.IsValidURL(href) and
+ not (href.startswith('mailto') and validate.IsValidEmail(href[7:]))):
+ return [template_helpers.TextRun(autolink_regex_match.group(0))]
+
+ result = [template_helpers.TextRun(hyperlink, tag='a', href=href)]
+ if trailing:
+ result.append(template_helpers.TextRun(trailing))
+
+ return result
+
+
+# Regular expression to detect git hashes.
+# Used to auto-link to Git hashes on crrev.com when displaying issue details.
+# Matches "rN", "r#N", and "revision N" when "rN" is not part of a larger word
+# and N is a hexadecimal string of 40 chars.
+_GIT_HASH_RE = re.compile(
+ r'\b(?P<prefix>r(evision\s+#?)?)?(?P<revnum>([a-f0-9]{40}))\b',
+ re.IGNORECASE | re.MULTILINE)
+
+# This is for SVN revisions and Git commit posisitons.
+_SVN_REF_RE = re.compile(
+ r'\b(?P<prefix>r(evision\s+#?)?)(?P<revnum>([0-9]{4,7}))\b',
+ re.IGNORECASE | re.MULTILINE)
+
+
+def GetReferencedRevisions(_mr, _refs):
+ """Load the referenced revision objects."""
+ # For now we just autolink any revision hash without actually
+ # checking that such a revision exists,
+ # TODO(jrobbins): Hit crrev.com and check that the revision exists
+ # and show a rollover with revision info.
+ return None
+
+
+def ExtractRevNums(_mr, autolink_regex_match):
+ """Return internal representation of a rev reference."""
+ ref = autolink_regex_match.group('revnum')
+ logging.debug('revision ref = %s', ref)
+ return [ref]
+
+
+def ReplaceRevisionRef(
+ mr, autolink_regex_match, _component_ref_artifacts):
+ """Return HTML markup for an autolink reference."""
+ prefix = autolink_regex_match.group('prefix')
+ revnum = autolink_regex_match.group('revnum')
+ url = _GetRevisionURLFormat(mr.project).format(revnum=revnum)
+ content = revnum
+ if prefix:
+ content = '%s%s' % (prefix, revnum)
+ return [template_helpers.TextRun(content, tag='a', href=url)]
+
+
+def _GetRevisionURLFormat(project):
+ # TODO(jrobbins): Expose a UI to customize it to point to whatever site
+ # hosts the source code. Also, site-wide default.
+ return (project.revision_url_format or settings.revision_url_format)
+
+
+# Regular expression to detect issue references.
+# Used to auto-link to other issues when displaying issue details.
+# Matches "issue " when "issue" is not part of a larger word, or
+# "issue #", or just a "#" when it is preceeded by a space.
+_ISSUE_REF_RE = re.compile(r"""
+ (?P<prefix>\b(issues?|bugs?)[ \t]*(:|=)?)
+ ([ \t]*(?P<project_name>\b[-a-z0-9]+[:\#])?
+ (?P<number_sign>\#?)
+ (?P<local_id>\d+)\b
+ (,?[ \t]*(and|or)?)?)+""", re.IGNORECASE | re.VERBOSE)
+
+# This is for chromium.org's crbug.com shorthand domain.
+_CRBUG_REF_RE = re.compile(r"""
+ (?P<prefix>\b(https?://)?crbug.com/)
+ ((?P<project_name>\b[-a-z0-9]+)(?P<separator>/))?
+ (?P<local_id>\d+)\b
+ (?P<anchor>\#c[0-9]+)?""", re.IGNORECASE | re.VERBOSE)
+
+# Once the overall issue reference has been detected, pick out the specific
+# issue project:id items within it. Often there is just one, but the "and|or"
+# syntax can allow multiple issues.
+_SINGLE_ISSUE_REF_RE = re.compile(r"""
+ (?P<prefix>\b(issue|bug)[ \t]*)?
+ (?P<project_name>\b[-a-z0-9]+[:\#])?
+ (?P<number_sign>\#?)
+ (?P<local_id>\d+)\b""", re.IGNORECASE | re.VERBOSE)
+
+
+def CurryGetReferencedIssues(services):
+ """Return a function to get ref'd issues with these services objects bound.
+
+ Currying is a convienent way to give the callback access to the services
+ objects, but without requiring that all possible services objects be passed
+ through the autolink registry and functions.
+
+ Args:
+ services: connection to issue, config, and project persistence layers.
+
+ Returns:
+ A ready-to-use function that accepts the arguments that autolink
+ expects to pass to it.
+ """
+
+ def GetReferencedIssues(mr, ref_tuples):
+ """Return lists of open and closed issues referenced by these comments.
+
+ Args:
+ mr: commonly used info parsed from the request.
+ ref_tuples: list of (project_name, local_id) tuples for each issue
+ that is mentioned in the comment text. The project_name may be None,
+ in which case the issue is assumed to be in the current project.
+
+ Returns:
+ A list of open and closed issue dicts.
+ """
+ ref_projects = services.project.GetProjectsByName(
+ mr.cnxn,
+ [(ref_pn or mr.project_name) for ref_pn, _ in ref_tuples])
+ issue_ids, _misses = services.issue.ResolveIssueRefs(
+ mr.cnxn, ref_projects, mr.project_name, ref_tuples)
+ open_issues, closed_issues = (
+ tracker_helpers.GetAllowedOpenedAndClosedIssues(
+ mr, issue_ids, services))
+
+ open_dict = {}
+ for issue in open_issues:
+ open_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue
+
+ closed_dict = {}
+ for issue in closed_issues:
+ closed_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue
+
+ logging.info('autolinking dicts %r and %r', open_dict, closed_dict)
+
+ return open_dict, closed_dict
+
+ return GetReferencedIssues
+
+
+def _ParseProjectNameMatch(project_name):
+ """Process the passed project name and determine the best representation.
+
+ Args:
+ project_name: a string with the project name matched in a regex
+
+ Returns:
+ A minimal representation of the project name, None if no valid content.
+ """
+ if not project_name:
+ return None
+ return project_name.lstrip().rstrip('#: \t\n')
+
+
+def _ExtractProjectAndIssueIds(
+ autolink_regex_match, subregex, default_project_name=None):
+ """Convert a regex match for a textual reference into our internal form."""
+ whole_str = autolink_regex_match.group(0)
+ refs = []
+ for submatch in subregex.finditer(whole_str):
+ project_name = (
+ _ParseProjectNameMatch(submatch.group('project_name')) or
+ default_project_name)
+ ref = (project_name, int(submatch.group('local_id')))
+ refs.append(ref)
+ logging.info('issue ref = %s', ref)
+
+ return refs
+
+
+def ExtractProjectAndIssueIdsNormal(_mr, autolink_regex_match):
+ """Convert a regex match for a textual reference into our internal form."""
+ return _ExtractProjectAndIssueIds(
+ autolink_regex_match, _SINGLE_ISSUE_REF_RE)
+
+
+def ExtractProjectAndIssueIdsCrBug(_mr, autolink_regex_match):
+ """Convert a regex match for a textual reference into our internal form."""
+ return _ExtractProjectAndIssueIds(
+ autolink_regex_match, _CRBUG_REF_RE, default_project_name='chromium')
+
+
+# This uses project name to avoid a lookup on project ID in a function
+# that has no services object.
+def _IssueProjectKey(project_name, local_id):
+ """Make a dictionary key to identify a referenced issue."""
+ return '%s:%d' % (project_name, local_id)
+
+
+class IssueRefRun(object):
+ """A text run that links to a referenced issue."""
+
+ def __init__(self, issue, is_closed, project_name, content, anchor):
+ self.tag = 'a'
+ self.css_class = 'closed_ref' if is_closed else None
+ self.title = issue.summary
+ self.href = '/p/%s/issues/detail?id=%d%s' % (
+ project_name, issue.local_id, anchor)
+
+ self.content = content
+ if is_closed:
+ self.content = ' %s ' % self.content
+
+
+def _ReplaceIssueRef(
+ autolink_regex_match, component_ref_artifacts, single_issue_regex,
+ default_project_name):
+ """Examine a textual reference and replace it with an autolink or not.
+
+ Args:
+ autolink_regex_match: regex match for the textual reference.
+ component_ref_artifacts: result of earlier call to GetReferencedIssues.
+ single_issue_regex: regular expression to parse individual issue references
+ out of a multi-issue-reference phrase. E.g., "issues 12 and 34".
+ default_project_name: project name to use when not specified.
+
+ Returns:
+ A list of IssueRefRuns and TextRuns to replace the textual
+ reference. If there is an issue to autolink to, we return an HTML
+ hyperlink. Otherwise, we the run will have the original plain
+ text.
+ """
+ open_dict, closed_dict = {}, {}
+ if component_ref_artifacts:
+ open_dict, closed_dict = component_ref_artifacts
+ original = autolink_regex_match.group(0)
+ logging.info('called ReplaceIssueRef on %r', original)
+ result_runs = []
+ pos = 0
+ for submatch in single_issue_regex.finditer(original):
+ if submatch.start() >= pos:
+ if original[pos: submatch.start()]:
+ result_runs.append(template_helpers.TextRun(
+ original[pos: submatch.start()]))
+ replacement_run = _ReplaceSingleIssueRef(
+ submatch, open_dict, closed_dict, default_project_name)
+ result_runs.append(replacement_run)
+ pos = submatch.end()
+
+ if original[pos:]:
+ result_runs.append(template_helpers.TextRun(original[pos:]))
+
+ return result_runs
+
+
+def ReplaceIssueRefNormal(mr, autolink_regex_match, component_ref_artifacts):
+ """Replaces occurances of 'issue 123' with link TextRuns as needed."""
+ return _ReplaceIssueRef(
+ autolink_regex_match, component_ref_artifacts,
+ _SINGLE_ISSUE_REF_RE, mr.project_name)
+
+
+def ReplaceIssueRefCrBug(_mr, autolink_regex_match, component_ref_artifacts):
+ """Replaces occurances of 'crbug.com/123' with link TextRuns as needed."""
+ return _ReplaceIssueRef(
+ autolink_regex_match, component_ref_artifacts,
+ _CRBUG_REF_RE, 'chromium')
+
+
+def _ReplaceSingleIssueRef(
+ submatch, open_dict, closed_dict, default_project_name):
+ """Replace one issue reference with a link, or the original text."""
+ content = submatch.group(0)
+ project_name = submatch.group('project_name')
+ anchor = submatch.groupdict().get('anchor') or ''
+ if project_name:
+ project_name = project_name.lstrip().rstrip(':#')
+ else:
+ # We need project_name for the URL, even if it is not in the text.
+ project_name = default_project_name
+
+ local_id = int(submatch.group('local_id'))
+ issue_key = _IssueProjectKey(project_name, local_id)
+ if issue_key in open_dict:
+ return IssueRefRun(
+ open_dict[issue_key], False, project_name, content, anchor)
+ elif issue_key in closed_dict:
+ return IssueRefRun(
+ closed_dict[issue_key], True, project_name, content, anchor)
+ else: # Don't link to non-existent issues.
+ return template_helpers.TextRun(content)
+
+
+class Autolink(object):
+ """Maintains a registry of autolink syntax and can apply it to comments."""
+
+ def __init__(self):
+ self.registry = {}
+
+ def RegisterComponent(self, component_name, artifact_lookup_function,
+ match_to_reference_function, autolink_re_subst_dict):
+ """Register all the autolink info for a software component.
+
+ Args:
+ component_name: string name of software component, must be unique.
+ artifact_lookup_function: function to batch lookup all artifacts that
+ might have been referenced in a set of comments:
+ function(all_matches) -> referenced_artifacts
+ the referenced_artifacts will be pased to each subst function.
+ match_to_reference_function: convert a regex match object to
+ some internal representation of the artifact reference.
+ autolink_re_subst_dict: dictionary of regular expressions and
+ the substitution function that should be called for each match:
+ function(match, referenced_artifacts) -> replacement_markup
+ """
+ self.registry[component_name] = (artifact_lookup_function,
+ match_to_reference_function,
+ autolink_re_subst_dict)
+
+ def GetAllReferencedArtifacts(
+ self, mr, comment_text_list, max_total_length=_MAX_TOTAL_LENGTH):
+ """Call callbacks to lookup all artifacts possibly referenced.
+
+ Args:
+ mr: information parsed out of the user HTTP request.
+ comment_text_list: list of comment content strings.
+ max_total_length: int max number of characters to accept:
+ if more than this, then skip autolinking entirely.
+
+ Returns:
+ Opaque object that can be pased to MarkupAutolinks. It's
+ structure happens to be {component_name: artifact_list, ...},
+ or the special value SKIP_LOOKUPS.
+ """
+ total_len = sum(len(comment_text) for comment_text in comment_text_list)
+ if total_len > max_total_length:
+ return SKIP_LOOKUPS
+
+ all_referenced_artifacts = {}
+ for comp, (lookup, match_to_refs, re_dict) in self.registry.items():
+ refs = set()
+ for comment_text in comment_text_list:
+ for regex in re_dict:
+ for match in regex.finditer(comment_text):
+ additional_refs = match_to_refs(mr, match)
+ if additional_refs:
+ refs.update(additional_refs)
+
+ all_referenced_artifacts[comp] = lookup(mr, refs)
+
+ return all_referenced_artifacts
+
+ def MarkupAutolinks(self, mr, text_runs, all_referenced_artifacts):
+ """Loop over components and regexes, applying all substitutions.
+
+ Args:
+ mr: info parsed from the user's HTTP request.
+ text_runs: List of text runs for the user's comment.
+ all_referenced_artifacts: result of previous call to
+ GetAllReferencedArtifacts.
+
+ Returns:
+ List of text runs for the entire user comment, some of which may have
+ attribures that cause them to render as links in render-rich-text.ezt.
+ """
+ items = list(self.registry.items())
+ items.sort() # Process components in determinate alphabetical order.
+ for component, (_lookup, _match_ref, re_subst_dict) in items:
+ if all_referenced_artifacts == SKIP_LOOKUPS:
+ component_ref_artifacts = None
+ else:
+ component_ref_artifacts = all_referenced_artifacts[component]
+ for regex, subst_fun in re_subst_dict.items():
+ text_runs = self._ApplySubstFunctionToRuns(
+ text_runs, regex, subst_fun, mr, component_ref_artifacts)
+
+ return text_runs
+
+ def _ApplySubstFunctionToRuns(
+ self, text_runs, regex, subst_fun, mr, component_ref_artifacts):
+ """Apply autolink regex and substitution function to each text run.
+
+ Args:
+ text_runs: list of TextRun objects with parts of the original comment.
+ regex: Regular expression for detecting textual references to artifacts.
+ subst_fun: function to return autolink markup, or original text.
+ mr: common info parsed from the user HTTP request.
+ component_ref_artifacts: already-looked-up destination artifacts to use
+ when computing substitution text.
+
+ Returns:
+ A new list with more and smaller runs, some of which may have tag
+ and link attributes set.
+ """
+ result_runs = []
+ for run in text_runs:
+ content = run.content
+ if run.tag:
+ # This chunk has already been substituted, don't allow nested
+ # autolinking to mess up our output.
+ result_runs.append(run)
+ else:
+ pos = 0
+ for match in regex.finditer(content):
+ if match.start() > pos:
+ result_runs.append(template_helpers.TextRun(
+ content[pos: match.start()]))
+ replacement_runs = subst_fun(mr, match, component_ref_artifacts)
+ result_runs.extend(replacement_runs)
+ pos = match.end()
+
+ if run.content[pos:]: # Keep any text that came after the last match
+ result_runs.append(template_helpers.TextRun(run.content[pos:]))
+
+ # TODO(jrobbins): ideally we would merge consecutive plain text runs
+ # so that regexes can match across those run boundaries.
+
+ return result_runs
+
+
+def RegisterAutolink(services):
+ """Register all the autolink hooks."""
+ # The order of the RegisterComponent() calls does not matter so that we could
+ # do this registration from separate modules in the future if needed.
+ # Priority order of application is determined by the names of the registered
+ # handers, which are sorted in MarkupAutolinks().
+
+ services.autolink.RegisterComponent(
+ '01-tracker-crbug',
+ CurryGetReferencedIssues(services),
+ ExtractProjectAndIssueIdsCrBug,
+ {_CRBUG_REF_RE: ReplaceIssueRefCrBug})
+
+ services.autolink.RegisterComponent(
+ '02-linkify-full-urls',
+ lambda request, mr: None,
+ lambda mr, match: None,
+ {autolink_constants.IS_A_LINK_RE: Linkify})
+
+ services.autolink.RegisterComponent(
+ '03-linkify-user-profiles-or-mailto',
+ CurryGetReferencedUsers(services),
+ lambda _mr, match: [match.group(0)],
+ {autolink_constants.IS_IMPLIED_EMAIL_RE: LinkifyEmail})
+
+ services.autolink.RegisterComponent(
+ '04-tracker-regular',
+ CurryGetReferencedIssues(services),
+ ExtractProjectAndIssueIdsNormal,
+ {_ISSUE_REF_RE: ReplaceIssueRefNormal})
+
+ services.autolink.RegisterComponent(
+ '05-linkify-shorthand',
+ lambda request, mr: None,
+ lambda mr, match: None,
+ {autolink_constants.IS_A_SHORT_LINK_RE: Linkify,
+ autolink_constants.IS_A_NUMERIC_SHORT_LINK_RE: Linkify,
+ autolink_constants.IS_IMPLIED_LINK_RE: Linkify,
+ })
+
+ services.autolink.RegisterComponent(
+ '06-versioncontrol',
+ GetReferencedRevisions,
+ ExtractRevNums,
+ {_GIT_HASH_RE: ReplaceRevisionRef,
+ _SVN_REF_RE: ReplaceRevisionRef})