features/autolink.py - monorail-avm99963 - Gitiles

 # Copyright 2016 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file or at
 # https://developers.google.com/open-source/licenses/bsd

 """Autolink helps auto-link references to artifacts in text.

 This class maintains a registry of artifact autolink syntax specs and
 callbacks. The structure of that registry is:
   { component_name: (lookup_callback, match_to_reference_function,
                      { regex: substitution_callback, ...}),
     ...
   }

 For example:
   { 'tracker':
      (GetReferencedIssues,
       ExtractProjectAndIssueIds,
       {_ISSUE_REF_RE: ReplaceIssueRef}),
     'versioncontrol':
      (GetReferencedRevisions,
       ExtractProjectAndRevNum,
       {_GIT_HASH_RE: ReplaceRevisionRef}),
   }

 The dictionary of regexes is used here because, in the future, we
 might add more regexes for each component rather than have one complex
 regex per component.
 """
 from __future__ import print_function
 from __future__ import division
 from __future__ import absolute_import

 import logging
 import re
 import urllib
 import urlparse

 import settings
 from features import autolink_constants
 from framework import template_helpers
 from framework import validate
 from proto import project_pb2
 from tracker import tracker_helpers


 # If the total length of all comments is too large, we don't autolink.
 _MAX_TOTAL_LENGTH = 150 * 1024  # 150KB
 # Special all_referenced_artifacts value used to indicate that the
 # text content is too big to lookup all referenced artifacts quickly.
 SKIP_LOOKUPS = 'skip lookups'

 _CLOSING_TAG_RE = re.compile('</[a-z0-9]+>$', re.IGNORECASE)

 # These are allowed in links, but if any of closing delimiters appear
 # at the end of the link, and the opening one is not part of the link,
 # then trim off the closing delimiters.
 _LINK_TRAILING_CHARS = [
     (None, ':'),
     (None, '.'),
     (None, ','),
     ('(', ')'),
     ('[', ']'),
     ('{', '}'),
     ('<', '>'),
     ("'", "'"),
     ('"', '"'),
     ]


 def LinkifyEmail(_mr, autolink_regex_match, component_ref_artifacts):
   """Examine a textual reference and replace it with a hyperlink or not.

   This is a callback for use with the autolink feature.  The function
   parameters are standard for this type of callback.

   Args:
     _mr: unused information parsed from the HTTP request.
     autolink_regex_match: regex match for the textual reference.
     component_ref_artifacts: result of call to GetReferencedUsers.

   Returns:
     A list of TextRuns with tag=a linking to the user profile page of
     any defined users, otherwise a mailto: link is generated.
   """
   email = autolink_regex_match.group(0)

   if not validate.IsValidEmail(email):
     return [template_helpers.TextRun(email)]

   if component_ref_artifacts and email in component_ref_artifacts:
     href = '/u/%s' % email
   else:
     href = 'mailto:' + email

   result = [template_helpers.TextRun(email, tag='a', href=href)]
   return result


 def CurryGetReferencedUsers(services):
   """Return a function to get ref'd users with these services objects bound.

   Currying is a convienent way to give the callback access to the services
   objects, but without requiring that all possible services objects be passed
   through the autolink registry and functions.

   Args:
     services: connection to the user persistence layer.

   Returns:
     A ready-to-use function that accepts the arguments that autolink
     expects to pass to it.
   """

   def GetReferencedUsers(mr, emails):
     """Return a dict of users referenced by these comments.

     Args:
       mr: commonly used info parsed from the request.
       ref_tuples: email address strings for each user
           that is mentioned in the comment text.

     Returns:
       A dictionary {email: user_pb} including all existing users.
     """
     user_id_dict = services.user.LookupExistingUserIDs(mr.cnxn, emails)
     users_by_id = services.user.GetUsersByIDs(mr.cnxn,
         list(user_id_dict.values()))
     users_by_email = {
       email: users_by_id[user_id]
       for email, user_id in user_id_dict.items()}
     return users_by_email

   return GetReferencedUsers


 def Linkify(_mr, autolink_regex_match, _component_ref_artifacts):
   """Examine a textual reference and replace it with a hyperlink or not.

   This is a callback for use with the autolink feature.  The function
   parameters are standard for this type of callback.

   Args:
     _mr: unused information parsed from the HTTP request.
     autolink_regex_match: regex match for the textual reference.
     _component_ref_artifacts: unused result of call to GetReferencedIssues.

   Returns:
     A list of TextRuns with tag=a for all matched ftp, http, https and mailto
     links converted into HTML hyperlinks.
   """
   hyperlink = autolink_regex_match.group(0)

   trailing = ''
   for begin, end in _LINK_TRAILING_CHARS:
     if hyperlink.endswith(end):
       if not begin or hyperlink[:-len(end)].find(begin) == -1:
         trailing = end + trailing
         hyperlink = hyperlink[:-len(end)]

   tag_match = _CLOSING_TAG_RE.search(hyperlink)
   if tag_match:
     trailing = hyperlink[tag_match.start(0):] + trailing
     hyperlink = hyperlink[:tag_match.start(0)]

   href = hyperlink
   if not href.lower().startswith(('http', 'ftp', 'mailto')):
     # We use http because redirects for https are not all set up.
     href = 'http://' + href

   if (not validate.IsValidURL(href) and
       not (href.startswith('mailto') and validate.IsValidEmail(href[7:]))):
     return [template_helpers.TextRun(autolink_regex_match.group(0))]

   result = [template_helpers.TextRun(hyperlink, tag='a', href=href)]
   if trailing:
     result.append(template_helpers.TextRun(trailing))

   return result


 # Regular expression to detect git hashes.
 # Used to auto-link to Git hashes on crrev.com when displaying issue details.
 # Matches "rN", "r#N", and "revision N" when "rN" is not part of a larger word
 # and N is a hexadecimal string of 40 chars.
 _GIT_HASH_RE = re.compile(
     r'\b(?P<prefix>r(evision\s+#?)?)?(?P<revnum>([a-f0-9]{40}))\b',
     re.IGNORECASE | re.MULTILINE)

 # This is for SVN revisions and Git commit posisitons.
 _SVN_REF_RE = re.compile(
     r'\b(?P<prefix>r(evision\s+#?)?)(?P<revnum>([0-9]{4,7}))\b',
     re.IGNORECASE | re.MULTILINE)


 def GetReferencedRevisions(_mr, _refs):
   """Load the referenced revision objects."""
   # For now we just autolink any revision hash without actually
   # checking that such a revision exists,
   # TODO(jrobbins): Hit crrev.com and check that the revision exists
   # and show a rollover with revision info.
   return None


 def ExtractRevNums(_mr, autolink_regex_match):
   """Return internal representation of a rev reference."""
   ref = autolink_regex_match.group('revnum')
   logging.debug('revision ref = %s', ref)
   return [ref]


 def ReplaceRevisionRef(
     mr, autolink_regex_match, _component_ref_artifacts):
   """Return HTML markup for an autolink reference."""
   prefix = autolink_regex_match.group('prefix')
   revnum = autolink_regex_match.group('revnum')
   url = _GetRevisionURLFormat(mr.project).format(revnum=revnum)
   content = revnum
   if prefix:
     content = '%s%s' % (prefix, revnum)
   return [template_helpers.TextRun(content, tag='a', href=url)]


 def _GetRevisionURLFormat(project):
   # TODO(jrobbins): Expose a UI to customize it to point to whatever site
   # hosts the source code. Also, site-wide default.
   return (project.revision_url_format or settings.revision_url_format)


 # Regular expression to detect issue references.
 # Used to auto-link to other issues when displaying issue details.
 # Matches "issue " when "issue" is not part of a larger word, or
 # "issue #", or just a "#" when it is preceeded by a space.
 _ISSUE_REF_RE = re.compile(r"""
     (?P<prefix>\b(issues?|bugs?)[ \t]*(:|=)?)
     ([ \t]*(?P<project_name>\b[-a-z0-9]+[:\#])?
      (?P<number_sign>\#?)
      (?P<local_id>\d+)\b
      (,?[ \t]*(and|or)?)?)+""", re.IGNORECASE | re.VERBOSE)

 # This is for chromium.org's crbug.com shorthand domain.
 _CRBUG_REF_RE = re.compile(r"""
     (?P<prefix>\b(https?://)?crbug.com/)
     ((?P<project_name>\b[-a-z0-9]+)(?P<separator>/))?
     (?P<local_id>\d+)\b
     (?P<anchor>\#c[0-9]+)?""", re.IGNORECASE | re.VERBOSE)

 # Once the overall issue reference has been detected, pick out the specific
 # issue project:id items within it.  Often there is just one, but the "and|or"
 # syntax can allow multiple issues.
 _SINGLE_ISSUE_REF_RE = re.compile(r"""
     (?P<prefix>\b(issue|bug)[ \t]*)?
     (?P<project_name>\b[-a-z0-9]+[:\#])?
     (?P<number_sign>\#?)
     (?P<local_id>\d+)\b""", re.IGNORECASE | re.VERBOSE)


 def CurryGetReferencedIssues(services):
   """Return a function to get ref'd issues with these services objects bound.

   Currying is a convienent way to give the callback access to the services
   objects, but without requiring that all possible services objects be passed
   through the autolink registry and functions.

   Args:
     services: connection to issue, config, and project persistence layers.

   Returns:
     A ready-to-use function that accepts the arguments that autolink
     expects to pass to it.
   """

   def GetReferencedIssues(mr, ref_tuples):
     """Return lists of open and closed issues referenced by these comments.

     Args:
       mr: commonly used info parsed from the request.
       ref_tuples: list of (project_name, local_id) tuples for each issue
           that is mentioned in the comment text. The project_name may be None,
           in which case the issue is assumed to be in the current project.

     Returns:
       A list of open and closed issue dicts.
     """
     ref_projects = services.project.GetProjectsByName(
         mr.cnxn,
         [(ref_pn or mr.project_name) for ref_pn, _ in ref_tuples])
     issue_ids, _misses = services.issue.ResolveIssueRefs(
         mr.cnxn, ref_projects, mr.project_name, ref_tuples)
     open_issues, closed_issues = (
         tracker_helpers.GetAllowedOpenedAndClosedIssues(
             mr, issue_ids, services))

     open_dict = {}
     for issue in open_issues:
       open_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue

     closed_dict = {}
     for issue in closed_issues:
       closed_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue

     logging.info('autolinking dicts %r and %r', open_dict, closed_dict)

     return open_dict, closed_dict

   return GetReferencedIssues


 def _ParseProjectNameMatch(project_name):
   """Process the passed project name and determine the best representation.

   Args:
     project_name: a string with the project name matched in a regex

   Returns:
     A minimal representation of the project name, None if no valid content.
   """
   if not project_name:
     return None
   return project_name.lstrip().rstrip('#: \t\n')


 def _ExtractProjectAndIssueIds(
     autolink_regex_match, subregex, default_project_name=None):
   """Convert a regex match for a textual reference into our internal form."""
   whole_str = autolink_regex_match.group(0)
   refs = []
   for submatch in subregex.finditer(whole_str):
     project_name = (
         _ParseProjectNameMatch(submatch.group('project_name')) or
         default_project_name)
     ref = (project_name, int(submatch.group('local_id')))
     refs.append(ref)
     logging.info('issue ref = %s', ref)

   return refs


 def ExtractProjectAndIssueIdsNormal(_mr, autolink_regex_match):
   """Convert a regex match for a textual reference into our internal form."""
   return _ExtractProjectAndIssueIds(
       autolink_regex_match, _SINGLE_ISSUE_REF_RE)


 def ExtractProjectAndIssueIdsCrBug(_mr, autolink_regex_match):
   """Convert a regex match for a textual reference into our internal form."""
   return _ExtractProjectAndIssueIds(
       autolink_regex_match, _CRBUG_REF_RE, default_project_name='chromium')


 # This uses project name to avoid a lookup on project ID in a function
 # that has no services object.
 def _IssueProjectKey(project_name, local_id):
   """Make a dictionary key to identify a referenced issue."""
   return '%s:%d' % (project_name, local_id)


 class IssueRefRun(object):
   """A text run that links to a referenced issue."""

   def __init__(self, issue, is_closed, project_name, content, anchor):
     self.tag = 'a'
     self.css_class = 'closed_ref' if is_closed else None
     self.title = issue.summary
     self.href = '/p/%s/issues/detail?id=%d%s' % (
         project_name, issue.local_id, anchor)

     self.content = content
     if is_closed:
       self.content = ' %s ' % self.content


 def _ReplaceIssueRef(
     autolink_regex_match, component_ref_artifacts, single_issue_regex,
     default_project_name):
   """Examine a textual reference and replace it with an autolink or not.

   Args:
     autolink_regex_match: regex match for the textual reference.
     component_ref_artifacts: result of earlier call to GetReferencedIssues.
     single_issue_regex: regular expression to parse individual issue references
         out of a multi-issue-reference phrase.  E.g., "issues 12 and 34".
     default_project_name: project name to use when not specified.

   Returns:
     A list of IssueRefRuns and TextRuns to replace the textual
     reference.  If there is an issue to autolink to, we return an HTML
     hyperlink.  Otherwise, we the run will have the original plain
     text.
   """
   open_dict, closed_dict = {}, {}
   if component_ref_artifacts:
     open_dict, closed_dict = component_ref_artifacts
   original = autolink_regex_match.group(0)
   logging.info('called ReplaceIssueRef on %r', original)
   result_runs = []
   pos = 0
   for submatch in single_issue_regex.finditer(original):
     if submatch.start() >= pos:
       if original[pos: submatch.start()]:
         result_runs.append(template_helpers.TextRun(
             original[pos: submatch.start()]))
       replacement_run = _ReplaceSingleIssueRef(
           submatch, open_dict, closed_dict, default_project_name)
       result_runs.append(replacement_run)
       pos = submatch.end()

   if original[pos:]:
     result_runs.append(template_helpers.TextRun(original[pos:]))

   return result_runs


 def ReplaceIssueRefNormal(mr, autolink_regex_match, component_ref_artifacts):
   """Replaces occurances of 'issue 123' with link TextRuns as needed."""
   return _ReplaceIssueRef(
       autolink_regex_match, component_ref_artifacts,
       _SINGLE_ISSUE_REF_RE, mr.project_name)


 def ReplaceIssueRefCrBug(_mr, autolink_regex_match, component_ref_artifacts):
   """Replaces occurances of 'crbug.com/123' with link TextRuns as needed."""
   return _ReplaceIssueRef(
       autolink_regex_match, component_ref_artifacts,
       _CRBUG_REF_RE, 'chromium')


 def _ReplaceSingleIssueRef(
     submatch, open_dict, closed_dict, default_project_name):
   """Replace one issue reference with a link, or the original text."""
   content = submatch.group(0)
   project_name = submatch.group('project_name')
   anchor = submatch.groupdict().get('anchor') or ''
   if project_name:
     project_name = project_name.lstrip().rstrip(':#')
   else:
     # We need project_name for the URL, even if it is not in the text.
     project_name = default_project_name

   local_id = int(submatch.group('local_id'))
   issue_key = _IssueProjectKey(project_name, local_id)
   if issue_key in open_dict:
     return IssueRefRun(
         open_dict[issue_key], False, project_name, content, anchor)
   elif issue_key in closed_dict:
     return IssueRefRun(
         closed_dict[issue_key], True, project_name, content, anchor)
   else:  # Don't link to non-existent issues.
     return template_helpers.TextRun(content)


 class Autolink(object):
   """Maintains a registry of autolink syntax and can apply it to comments."""

   def __init__(self):
     self.registry = {}

   def RegisterComponent(self, component_name, artifact_lookup_function,
                         match_to_reference_function, autolink_re_subst_dict):
     """Register all the autolink info for a software component.

     Args:
       component_name: string name of software component, must be unique.
       artifact_lookup_function: function to batch lookup all artifacts that
           might have been referenced in a set of comments:
           function(all_matches) -> referenced_artifacts
           the referenced_artifacts will be pased to each subst function.
       match_to_reference_function: convert a regex match object to
           some internal representation of the artifact reference.
       autolink_re_subst_dict: dictionary of regular expressions and
           the substitution function that should be called for each match:
           function(match, referenced_artifacts) -> replacement_markup
     """
     self.registry[component_name] = (artifact_lookup_function,
                                      match_to_reference_function,
                                      autolink_re_subst_dict)

   def GetAllReferencedArtifacts(
       self, mr, comment_text_list, max_total_length=_MAX_TOTAL_LENGTH):
     """Call callbacks to lookup all artifacts possibly referenced.

     Args:
       mr: information parsed out of the user HTTP request.
       comment_text_list: list of comment content strings.
       max_total_length: int max number of characters to accept:
           if more than this, then skip autolinking entirely.

     Returns:
       Opaque object that can be pased to MarkupAutolinks.  It's
       structure happens to be {component_name: artifact_list, ...},
       or the special value SKIP_LOOKUPS.
     """
     total_len = sum(len(comment_text) for comment_text in comment_text_list)
     if total_len > max_total_length:
       return SKIP_LOOKUPS

     all_referenced_artifacts = {}
     for comp, (lookup, match_to_refs, re_dict) in self.registry.items():
       refs = set()
       for comment_text in comment_text_list:
         for regex in re_dict:
           for match in regex.finditer(comment_text):
             additional_refs = match_to_refs(mr, match)
             if additional_refs:
               refs.update(additional_refs)

       all_referenced_artifacts[comp] = lookup(mr, refs)

     return all_referenced_artifacts

   def MarkupAutolinks(self, mr, text_runs, all_referenced_artifacts):
     """Loop over components and regexes, applying all substitutions.

     Args:
       mr: info parsed from the user's HTTP request.
       text_runs: List of text runs for the user's comment.
       all_referenced_artifacts: result of previous call to
         GetAllReferencedArtifacts.

     Returns:
       List of text runs for the entire user comment, some of which may have
       attribures that cause them to render as links in render-rich-text.ezt.
     """
     items = list(self.registry.items())
     items.sort()  # Process components in determinate alphabetical order.
     for component, (_lookup, _match_ref, re_subst_dict) in items:
       if all_referenced_artifacts == SKIP_LOOKUPS:
         component_ref_artifacts = None
       else:
         component_ref_artifacts = all_referenced_artifacts[component]
       for regex, subst_fun in re_subst_dict.items():
         text_runs = self._ApplySubstFunctionToRuns(
             text_runs, regex, subst_fun, mr, component_ref_artifacts)

     return text_runs

   def _ApplySubstFunctionToRuns(
       self, text_runs, regex, subst_fun, mr, component_ref_artifacts):
     """Apply autolink regex and substitution function to each text run.

     Args:
       text_runs: list of TextRun objects with parts of the original comment.
       regex: Regular expression for detecting textual references to artifacts.
       subst_fun: function to return autolink markup, or original text.
       mr: common info parsed from the user HTTP request.
       component_ref_artifacts: already-looked-up destination artifacts to use
         when computing substitution text.

     Returns:
       A new list with more and smaller runs, some of which may have tag
       and link attributes set.
     """
     result_runs = []
     for run in text_runs:
       content = run.content
       if run.tag:
         # This chunk has already been substituted, don't allow nested
         # autolinking to mess up our output.
         result_runs.append(run)
       else:
         pos = 0
         for match in regex.finditer(content):
           if match.start() > pos:
             result_runs.append(template_helpers.TextRun(
                 content[pos: match.start()]))
           replacement_runs = subst_fun(mr, match, component_ref_artifacts)
           result_runs.extend(replacement_runs)
           pos = match.end()

         if run.content[pos:]:  # Keep any text that came after the last match
           result_runs.append(template_helpers.TextRun(run.content[pos:]))

     # TODO(jrobbins): ideally we would merge consecutive plain text runs
     # so that regexes can match across those run boundaries.

     return result_runs


 def RegisterAutolink(services):
   """Register all the autolink hooks."""
   # The order of the RegisterComponent() calls does not matter so that we could
   # do this registration from separate modules in the future if needed.
   # Priority order of application is determined by the names of the registered
   # handers, which are sorted in MarkupAutolinks().

   services.autolink.RegisterComponent(
       '01-tracker-crbug',
       CurryGetReferencedIssues(services),
       ExtractProjectAndIssueIdsCrBug,
       {_CRBUG_REF_RE: ReplaceIssueRefCrBug})

   services.autolink.RegisterComponent(
       '02-linkify-full-urls',
       lambda request, mr: None,
       lambda mr, match: None,
       {autolink_constants.IS_A_LINK_RE: Linkify})

   services.autolink.RegisterComponent(
       '03-linkify-user-profiles-or-mailto',
       CurryGetReferencedUsers(services),
       lambda _mr, match: [match.group(0)],
       {autolink_constants.IS_IMPLIED_EMAIL_RE: LinkifyEmail})

   services.autolink.RegisterComponent(
       '04-tracker-regular',
       CurryGetReferencedIssues(services),
       ExtractProjectAndIssueIdsNormal,
       {_ISSUE_REF_RE: ReplaceIssueRefNormal})

   services.autolink.RegisterComponent(
       '05-linkify-shorthand',
       lambda request, mr: None,
       lambda mr, match: None,
       {autolink_constants.IS_A_SHORT_LINK_RE: Linkify,
        autolink_constants.IS_A_NUMERIC_SHORT_LINK_RE: Linkify,
        autolink_constants.IS_IMPLIED_LINK_RE: Linkify,
        })

   services.autolink.RegisterComponent(
       '06-versioncontrol',
       GetReferencedRevisions,
       ExtractRevNums,
       {_GIT_HASH_RE: ReplaceRevisionRef,
        _SVN_REF_RE: ReplaceRevisionRef})
	# Copyright 2016 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style
	# license that can be found in the LICENSE file or at
	# https://developers.google.com/open-source/licenses/bsd

	"""Autolink helps auto-link references to artifacts in text.

	This class maintains a registry of artifact autolink syntax specs and
	callbacks. The structure of that registry is:
	{ component_name: (lookup_callback, match_to_reference_function,
	{ regex: substitution_callback, ...}),
	...
	}

	For example:
	{ 'tracker':
	(GetReferencedIssues,
	ExtractProjectAndIssueIds,
	{_ISSUE_REF_RE: ReplaceIssueRef}),
	'versioncontrol':
	(GetReferencedRevisions,
	ExtractProjectAndRevNum,
	{_GIT_HASH_RE: ReplaceRevisionRef}),
	}

	The dictionary of regexes is used here because, in the future, we
	might add more regexes for each component rather than have one complex
	regex per component.
	"""
	from __future__ import print_function
	from __future__ import division
	from __future__ import absolute_import

	import logging
	import re
	import urllib
	import urlparse

	import settings
	from features import autolink_constants
	from framework import template_helpers
	from framework import validate
	from proto import project_pb2
	from tracker import tracker_helpers


	# If the total length of all comments is too large, we don't autolink.
	_MAX_TOTAL_LENGTH = 150 * 1024 # 150KB
	# Special all_referenced_artifacts value used to indicate that the
	# text content is too big to lookup all referenced artifacts quickly.
	SKIP_LOOKUPS = 'skip lookups'

	_CLOSING_TAG_RE = re.compile('</[a-z0-9]+>$', re.IGNORECASE)

	# These are allowed in links, but if any of closing delimiters appear
	# at the end of the link, and the opening one is not part of the link,
	# then trim off the closing delimiters.
	_LINK_TRAILING_CHARS = [
	(None, ':'),
	(None, '.'),
	(None, ','),
	('(', ')'),
	('[', ']'),
	('{', '}'),
	('<', '>'),
	("'", "'"),
	('"', '"'),
	]


	def LinkifyEmail(_mr, autolink_regex_match, component_ref_artifacts):
	"""Examine a textual reference and replace it with a hyperlink or not.

	This is a callback for use with the autolink feature. The function
	parameters are standard for this type of callback.

	Args:
	_mr: unused information parsed from the HTTP request.
	autolink_regex_match: regex match for the textual reference.
	component_ref_artifacts: result of call to GetReferencedUsers.

	Returns:
	A list of TextRuns with tag=a linking to the user profile page of
	any defined users, otherwise a mailto: link is generated.
	"""
	email = autolink_regex_match.group(0)

	if not validate.IsValidEmail(email):
	return [template_helpers.TextRun(email)]

	if component_ref_artifacts and email in component_ref_artifacts:
	href = '/u/%s' % email
	else:
	href = 'mailto:' + email

	result = [template_helpers.TextRun(email, tag='a', href=href)]
	return result


	def CurryGetReferencedUsers(services):
	"""Return a function to get ref'd users with these services objects bound.

	Currying is a convienent way to give the callback access to the services
	objects, but without requiring that all possible services objects be passed
	through the autolink registry and functions.

	Args:
	services: connection to the user persistence layer.

	Returns:
	A ready-to-use function that accepts the arguments that autolink
	expects to pass to it.
	"""

	def GetReferencedUsers(mr, emails):
	"""Return a dict of users referenced by these comments.

	Args:
	mr: commonly used info parsed from the request.
	ref_tuples: email address strings for each user
	that is mentioned in the comment text.

	Returns:
	A dictionary {email: user_pb} including all existing users.
	"""
	user_id_dict = services.user.LookupExistingUserIDs(mr.cnxn, emails)
	users_by_id = services.user.GetUsersByIDs(mr.cnxn,
	list(user_id_dict.values()))
	users_by_email = {
	email: users_by_id[user_id]
	for email, user_id in user_id_dict.items()}
	return users_by_email

	return GetReferencedUsers


	def Linkify(_mr, autolink_regex_match, _component_ref_artifacts):
	"""Examine a textual reference and replace it with a hyperlink or not.

	This is a callback for use with the autolink feature. The function
	parameters are standard for this type of callback.

	Args:
	_mr: unused information parsed from the HTTP request.
	autolink_regex_match: regex match for the textual reference.
	_component_ref_artifacts: unused result of call to GetReferencedIssues.

	Returns:
	A list of TextRuns with tag=a for all matched ftp, http, https and mailto
	links converted into HTML hyperlinks.
	"""
	hyperlink = autolink_regex_match.group(0)

	trailing = ''
	for begin, end in _LINK_TRAILING_CHARS:
	if hyperlink.endswith(end):
	if not begin or hyperlink[:-len(end)].find(begin) == -1:
	trailing = end + trailing
	hyperlink = hyperlink[:-len(end)]

	tag_match = _CLOSING_TAG_RE.search(hyperlink)
	if tag_match:
	trailing = hyperlink[tag_match.start(0):] + trailing
	hyperlink = hyperlink[:tag_match.start(0)]

	href = hyperlink
	if not href.lower().startswith(('http', 'ftp', 'mailto')):
	# We use http because redirects for https are not all set up.
	href = 'http://' + href

	if (not validate.IsValidURL(href) and
	not (href.startswith('mailto') and validate.IsValidEmail(href[7:]))):
	return [template_helpers.TextRun(autolink_regex_match.group(0))]

	result = [template_helpers.TextRun(hyperlink, tag='a', href=href)]
	if trailing:
	result.append(template_helpers.TextRun(trailing))

	return result


	# Regular expression to detect git hashes.
	# Used to auto-link to Git hashes on crrev.com when displaying issue details.
	# Matches "rN", "r#N", and "revision N" when "rN" is not part of a larger word
	# and N is a hexadecimal string of 40 chars.
	_GIT_HASH_RE = re.compile(
	r'\b(?P<prefix>r(evision\s+#?)?)?(?P<revnum>([a-f0-9]{40}))\b',
	re.IGNORECASE \| re.MULTILINE)

	# This is for SVN revisions and Git commit posisitons.
	_SVN_REF_RE = re.compile(
	r'\b(?P<prefix>r(evision\s+#?)?)(?P<revnum>([0-9]{4,7}))\b',
	re.IGNORECASE \| re.MULTILINE)


	def GetReferencedRevisions(_mr, _refs):
	"""Load the referenced revision objects."""
	# For now we just autolink any revision hash without actually
	# checking that such a revision exists,
	# TODO(jrobbins): Hit crrev.com and check that the revision exists
	# and show a rollover with revision info.
	return None


	def ExtractRevNums(_mr, autolink_regex_match):
	"""Return internal representation of a rev reference."""
	ref = autolink_regex_match.group('revnum')
	logging.debug('revision ref = %s', ref)
	return [ref]


	def ReplaceRevisionRef(
	mr, autolink_regex_match, _component_ref_artifacts):
	"""Return HTML markup for an autolink reference."""
	prefix = autolink_regex_match.group('prefix')
	revnum = autolink_regex_match.group('revnum')
	url = _GetRevisionURLFormat(mr.project).format(revnum=revnum)
	content = revnum
	if prefix:
	content = '%s%s' % (prefix, revnum)
	return [template_helpers.TextRun(content, tag='a', href=url)]


	def _GetRevisionURLFormat(project):
	# TODO(jrobbins): Expose a UI to customize it to point to whatever site
	# hosts the source code. Also, site-wide default.
	return (project.revision_url_format or settings.revision_url_format)


	# Regular expression to detect issue references.
	# Used to auto-link to other issues when displaying issue details.
	# Matches "issue " when "issue" is not part of a larger word, or
	# "issue #", or just a "#" when it is preceeded by a space.
	_ISSUE_REF_RE = re.compile(r"""
	(?P<prefix>\b(issues?\|bugs?)[ \t]*(:\|=)?)
	([ \t]*(?P<project_name>\b[-a-z0-9]+[:\#])?
	(?P<number_sign>\#?)
	(?P<local_id>\d+)\b
	(,?[ \t]*(and\|or)?)?)+""", re.IGNORECASE \| re.VERBOSE)

	# This is for chromium.org's crbug.com shorthand domain.
	_CRBUG_REF_RE = re.compile(r"""
	(?P<prefix>\b(https?://)?crbug.com/)
	((?P<project_name>\b[-a-z0-9]+)(?P<separator>/))?
	(?P<local_id>\d+)\b
	(?P<anchor>\#c[0-9]+)?""", re.IGNORECASE \| re.VERBOSE)

	# Once the overall issue reference has been detected, pick out the specific
	# issue project:id items within it. Often there is just one, but the "and\|or"
	# syntax can allow multiple issues.
	_SINGLE_ISSUE_REF_RE = re.compile(r"""
	(?P<prefix>\b(issue\|bug)[ \t]*)?
	(?P<project_name>\b[-a-z0-9]+[:\#])?
	(?P<number_sign>\#?)
	(?P<local_id>\d+)\b""", re.IGNORECASE \| re.VERBOSE)


	def CurryGetReferencedIssues(services):
	"""Return a function to get ref'd issues with these services objects bound.

	Currying is a convienent way to give the callback access to the services
	objects, but without requiring that all possible services objects be passed
	through the autolink registry and functions.

	Args:
	services: connection to issue, config, and project persistence layers.

	Returns:
	A ready-to-use function that accepts the arguments that autolink
	expects to pass to it.
	"""

	def GetReferencedIssues(mr, ref_tuples):
	"""Return lists of open and closed issues referenced by these comments.

	Args:
	mr: commonly used info parsed from the request.
	ref_tuples: list of (project_name, local_id) tuples for each issue
	that is mentioned in the comment text. The project_name may be None,
	in which case the issue is assumed to be in the current project.

	Returns:
	A list of open and closed issue dicts.
	"""
	ref_projects = services.project.GetProjectsByName(
	mr.cnxn,
	[(ref_pn or mr.project_name) for ref_pn, _ in ref_tuples])
	issue_ids, _misses = services.issue.ResolveIssueRefs(
	mr.cnxn, ref_projects, mr.project_name, ref_tuples)
	open_issues, closed_issues = (
	tracker_helpers.GetAllowedOpenedAndClosedIssues(
	mr, issue_ids, services))

	open_dict = {}
	for issue in open_issues:
	open_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue

	closed_dict = {}
	for issue in closed_issues:
	closed_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue

	logging.info('autolinking dicts %r and %r', open_dict, closed_dict)

	return open_dict, closed_dict

	return GetReferencedIssues


	def _ParseProjectNameMatch(project_name):
	"""Process the passed project name and determine the best representation.

	Args:
	project_name: a string with the project name matched in a regex

	Returns:
	A minimal representation of the project name, None if no valid content.
	"""
	if not project_name:
	return None
	return project_name.lstrip().rstrip('#: \t\n')


	def _ExtractProjectAndIssueIds(
	autolink_regex_match, subregex, default_project_name=None):
	"""Convert a regex match for a textual reference into our internal form."""
	whole_str = autolink_regex_match.group(0)
	refs = []
	for submatch in subregex.finditer(whole_str):
	project_name = (
	_ParseProjectNameMatch(submatch.group('project_name')) or
	default_project_name)
	ref = (project_name, int(submatch.group('local_id')))
	refs.append(ref)
	logging.info('issue ref = %s', ref)

	return refs


	def ExtractProjectAndIssueIdsNormal(_mr, autolink_regex_match):
	"""Convert a regex match for a textual reference into our internal form."""
	return _ExtractProjectAndIssueIds(
	autolink_regex_match, _SINGLE_ISSUE_REF_RE)


	def ExtractProjectAndIssueIdsCrBug(_mr, autolink_regex_match):
	"""Convert a regex match for a textual reference into our internal form."""
	return _ExtractProjectAndIssueIds(
	autolink_regex_match, _CRBUG_REF_RE, default_project_name='chromium')


	# This uses project name to avoid a lookup on project ID in a function
	# that has no services object.
	def _IssueProjectKey(project_name, local_id):
	"""Make a dictionary key to identify a referenced issue."""
	return '%s:%d' % (project_name, local_id)


	class IssueRefRun(object):
	"""A text run that links to a referenced issue."""

	def __init__(self, issue, is_closed, project_name, content, anchor):
	self.tag = 'a'
	self.css_class = 'closed_ref' if is_closed else None
	self.title = issue.summary
	self.href = '/p/%s/issues/detail?id=%d%s' % (
	project_name, issue.local_id, anchor)

	self.content = content
	if is_closed:
	self.content = ' %s ' % self.content


	def _ReplaceIssueRef(
	autolink_regex_match, component_ref_artifacts, single_issue_regex,
	default_project_name):
	"""Examine a textual reference and replace it with an autolink or not.

	Args:
	autolink_regex_match: regex match for the textual reference.
	component_ref_artifacts: result of earlier call to GetReferencedIssues.
	single_issue_regex: regular expression to parse individual issue references
	out of a multi-issue-reference phrase. E.g., "issues 12 and 34".
	default_project_name: project name to use when not specified.

	Returns:
	A list of IssueRefRuns and TextRuns to replace the textual
	reference. If there is an issue to autolink to, we return an HTML
	hyperlink. Otherwise, we the run will have the original plain
	text.
	"""
	open_dict, closed_dict = {}, {}
	if component_ref_artifacts:
	open_dict, closed_dict = component_ref_artifacts
	original = autolink_regex_match.group(0)
	logging.info('called ReplaceIssueRef on %r', original)
	result_runs = []
	pos = 0
	for submatch in single_issue_regex.finditer(original):
	if submatch.start() >= pos:
	if original[pos: submatch.start()]:
	result_runs.append(template_helpers.TextRun(
	original[pos: submatch.start()]))
	replacement_run = _ReplaceSingleIssueRef(
	submatch, open_dict, closed_dict, default_project_name)
	result_runs.append(replacement_run)
	pos = submatch.end()

	if original[pos:]:
	result_runs.append(template_helpers.TextRun(original[pos:]))

	return result_runs


	def ReplaceIssueRefNormal(mr, autolink_regex_match, component_ref_artifacts):
	"""Replaces occurances of 'issue 123' with link TextRuns as needed."""
	return _ReplaceIssueRef(
	autolink_regex_match, component_ref_artifacts,
	_SINGLE_ISSUE_REF_RE, mr.project_name)


	def ReplaceIssueRefCrBug(_mr, autolink_regex_match, component_ref_artifacts):
	"""Replaces occurances of 'crbug.com/123' with link TextRuns as needed."""
	return _ReplaceIssueRef(
	autolink_regex_match, component_ref_artifacts,
	_CRBUG_REF_RE, 'chromium')


	def _ReplaceSingleIssueRef(
	submatch, open_dict, closed_dict, default_project_name):
	"""Replace one issue reference with a link, or the original text."""
	content = submatch.group(0)
	project_name = submatch.group('project_name')
	anchor = submatch.groupdict().get('anchor') or ''
	if project_name:
	project_name = project_name.lstrip().rstrip(':#')
	else:
	# We need project_name for the URL, even if it is not in the text.
	project_name = default_project_name

	local_id = int(submatch.group('local_id'))
	issue_key = _IssueProjectKey(project_name, local_id)
	if issue_key in open_dict:
	return IssueRefRun(
	open_dict[issue_key], False, project_name, content, anchor)
	elif issue_key in closed_dict:
	return IssueRefRun(
	closed_dict[issue_key], True, project_name, content, anchor)
	else: # Don't link to non-existent issues.
	return template_helpers.TextRun(content)


	class Autolink(object):
	"""Maintains a registry of autolink syntax and can apply it to comments."""

	def __init__(self):
	self.registry = {}

	def RegisterComponent(self, component_name, artifact_lookup_function,
	match_to_reference_function, autolink_re_subst_dict):
	"""Register all the autolink info for a software component.

	Args:
	component_name: string name of software component, must be unique.
	artifact_lookup_function: function to batch lookup all artifacts that
	might have been referenced in a set of comments:
	function(all_matches) -> referenced_artifacts
	the referenced_artifacts will be pased to each subst function.
	match_to_reference_function: convert a regex match object to
	some internal representation of the artifact reference.
	autolink_re_subst_dict: dictionary of regular expressions and
	the substitution function that should be called for each match:
	function(match, referenced_artifacts) -> replacement_markup
	"""
	self.registry[component_name] = (artifact_lookup_function,
	match_to_reference_function,
	autolink_re_subst_dict)

	def GetAllReferencedArtifacts(
	self, mr, comment_text_list, max_total_length=_MAX_TOTAL_LENGTH):
	"""Call callbacks to lookup all artifacts possibly referenced.

	Args:
	mr: information parsed out of the user HTTP request.
	comment_text_list: list of comment content strings.
	max_total_length: int max number of characters to accept:
	if more than this, then skip autolinking entirely.

	Returns:
	Opaque object that can be pased to MarkupAutolinks. It's
	structure happens to be {component_name: artifact_list, ...},
	or the special value SKIP_LOOKUPS.
	"""
	total_len = sum(len(comment_text) for comment_text in comment_text_list)
	if total_len > max_total_length:
	return SKIP_LOOKUPS

	all_referenced_artifacts = {}
	for comp, (lookup, match_to_refs, re_dict) in self.registry.items():
	refs = set()
	for comment_text in comment_text_list:
	for regex in re_dict:
	for match in regex.finditer(comment_text):
	additional_refs = match_to_refs(mr, match)
	if additional_refs:
	refs.update(additional_refs)

	all_referenced_artifacts[comp] = lookup(mr, refs)

	return all_referenced_artifacts

	def MarkupAutolinks(self, mr, text_runs, all_referenced_artifacts):
	"""Loop over components and regexes, applying all substitutions.

	Args:
	mr: info parsed from the user's HTTP request.
	text_runs: List of text runs for the user's comment.
	all_referenced_artifacts: result of previous call to
	GetAllReferencedArtifacts.

	Returns:
	List of text runs for the entire user comment, some of which may have
	attribures that cause them to render as links in render-rich-text.ezt.
	"""
	items = list(self.registry.items())
	items.sort() # Process components in determinate alphabetical order.
	for component, (_lookup, _match_ref, re_subst_dict) in items:
	if all_referenced_artifacts == SKIP_LOOKUPS:
	component_ref_artifacts = None
	else:
	component_ref_artifacts = all_referenced_artifacts[component]
	for regex, subst_fun in re_subst_dict.items():
	text_runs = self._ApplySubstFunctionToRuns(
	text_runs, regex, subst_fun, mr, component_ref_artifacts)

	return text_runs

	def _ApplySubstFunctionToRuns(
	self, text_runs, regex, subst_fun, mr, component_ref_artifacts):
	"""Apply autolink regex and substitution function to each text run.

	Args:
	text_runs: list of TextRun objects with parts of the original comment.
	regex: Regular expression for detecting textual references to artifacts.
	subst_fun: function to return autolink markup, or original text.
	mr: common info parsed from the user HTTP request.
	component_ref_artifacts: already-looked-up destination artifacts to use
	when computing substitution text.

	Returns:
	A new list with more and smaller runs, some of which may have tag
	and link attributes set.
	"""
	result_runs = []
	for run in text_runs:
	content = run.content
	if run.tag:
	# This chunk has already been substituted, don't allow nested
	# autolinking to mess up our output.
	result_runs.append(run)
	else:
	pos = 0
	for match in regex.finditer(content):
	if match.start() > pos:
	result_runs.append(template_helpers.TextRun(
	content[pos: match.start()]))
	replacement_runs = subst_fun(mr, match, component_ref_artifacts)
	result_runs.extend(replacement_runs)
	pos = match.end()

	if run.content[pos:]: # Keep any text that came after the last match
	result_runs.append(template_helpers.TextRun(run.content[pos:]))

	# TODO(jrobbins): ideally we would merge consecutive plain text runs
	# so that regexes can match across those run boundaries.

	return result_runs


	def RegisterAutolink(services):
	"""Register all the autolink hooks."""
	# The order of the RegisterComponent() calls does not matter so that we could
	# do this registration from separate modules in the future if needed.
	# Priority order of application is determined by the names of the registered
	# handers, which are sorted in MarkupAutolinks().

	services.autolink.RegisterComponent(
	'01-tracker-crbug',
	CurryGetReferencedIssues(services),
	ExtractProjectAndIssueIdsCrBug,
	{_CRBUG_REF_RE: ReplaceIssueRefCrBug})

	services.autolink.RegisterComponent(
	'02-linkify-full-urls',
	lambda request, mr: None,
	lambda mr, match: None,
	{autolink_constants.IS_A_LINK_RE: Linkify})

	services.autolink.RegisterComponent(
	'03-linkify-user-profiles-or-mailto',
	CurryGetReferencedUsers(services),
	lambda _mr, match: [match.group(0)],
	{autolink_constants.IS_IMPLIED_EMAIL_RE: LinkifyEmail})

	services.autolink.RegisterComponent(
	'04-tracker-regular',
	CurryGetReferencedIssues(services),
	ExtractProjectAndIssueIdsNormal,
	{_ISSUE_REF_RE: ReplaceIssueRefNormal})

	services.autolink.RegisterComponent(
	'05-linkify-shorthand',
	lambda request, mr: None,
	lambda mr, match: None,
	{autolink_constants.IS_A_SHORT_LINK_RE: Linkify,
	autolink_constants.IS_A_NUMERIC_SHORT_LINK_RE: Linkify,
	autolink_constants.IS_IMPLIED_LINK_RE: Linkify,
	})

	services.autolink.RegisterComponent(
	'06-versioncontrol',
	GetReferencedRevisions,
	ExtractRevNums,
	{_GIT_HASH_RE: ReplaceRevisionRef,
	_SVN_REF_RE: ReplaceRevisionRef})