Project import generated by Copybara.

GitOrigin-RevId: d9e9e3fb4e31372ec1fb43b178994ca78fa8fe70
diff --git a/search/ast2sort.py b/search/ast2sort.py
new file mode 100644
index 0000000..08ed346
--- /dev/null
+++ b/search/ast2sort.py
@@ -0,0 +1,451 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file or at
+# https://developers.google.com/open-source/licenses/bsd
+
+"""Convert a user's issue sorting directives into SQL clauses.
+
+Some sort directives translate into simple ORDER BY column specifications.
+Other sort directives require that a LEFT JOIN be done to bring in
+relevant information that is then used in the ORDER BY.
+
+Sorting based on strings can slow down the DB because long sort-keys
+must be loaded into RAM, which means that fewer sort-keys fit into the
+DB's sorting buffers at a time.  Also, Monorail defines the sorting
+order of well-known labels and statuses based on the order in which
+they are defined in the project's config.  So, we determine the sort order of
+labels and status values before executing the query and then use the MySQL
+FIELD() function to sort their IDs in the desired order, without sorting
+strings.
+
+For more info, see the "Sorting in Monorail" and "What makes Monorail Fast?"
+design docs.
+"""
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+
+import logging
+
+from framework import sql
+from proto import tracker_pb2
+from tracker import tracker_constants
+
+
+NATIVE_SORTABLE_FIELDS = [
+    'id', 'stars', 'attachments', 'opened', 'closed', 'modified',
+    'ownermodified', 'statusmodified', 'componentmodified',
+    ]
+
+FIELDS_TO_COLUMNS = {
+    'id': 'local_id',
+    'stars': 'star_count',
+    'attachments': 'attachment_count',
+    'ownermodified': 'owner_modified',
+    'statusmodified': 'status_modified',
+    'componentmodified': 'component_modified',
+    }
+
+APPROVAL_STATUS_SORT_ORDER = [
+    '\'not_set\'', '\'needs_review\'', '\'na\'', '\'review_requested\'',
+    '\'review_started\'', '\'need_info\'', '\'approved\'', '\'not_approved\'']
+
+
+def BuildSortClauses(
+    sort_directives, harmonized_labels, harmonized_statuses,
+    harmonized_fields):
+  """Return LEFT JOIN and ORDER BY clauses needed to sort the results."""
+  if not sort_directives:
+    return [], []
+
+  all_left_joins = []
+  all_order_by = []
+  for i, sd in enumerate(sort_directives):
+    left_join_parts, order_by_parts = _OneSortDirective(
+        i, sd, harmonized_labels, harmonized_statuses, harmonized_fields)
+    all_left_joins.extend(left_join_parts)
+    all_order_by.extend(order_by_parts)
+
+  return all_left_joins, all_order_by
+
+
+def _ProcessProjectSD(fmt):
+  """Convert a 'project' sort directive into SQL."""
+  left_joins = []
+  order_by = [(fmt('Issue.project_id {sort_dir}'), [])]
+  return left_joins, order_by
+
+
+def _ProcessReporterSD(fmt):
+  """Convert a 'reporter' sort directive into SQL."""
+  left_joins = [
+      (fmt('User AS {alias} ON Issue.reporter_id = {alias}.user_id'), [])]
+  order_by = [
+      (fmt('ISNULL({alias}.email) {sort_dir}'), []),
+      (fmt('{alias}.email {sort_dir}'), [])]
+  return left_joins, order_by
+
+
+def _ProcessOwnerSD(fmt):
+  """Convert a 'owner' sort directive into SQL."""
+  left_joins = [
+      (fmt('User AS {alias}_exp ON Issue.owner_id = {alias}_exp.user_id'), []),
+      (fmt('User AS {alias}_der ON '
+           'Issue.derived_owner_id = {alias}_der.user_id'), [])]
+  order_by = [
+      (fmt('(ISNULL({alias}_exp.email) AND ISNULL({alias}_der.email)) '
+           '{sort_dir}'), []),
+      (fmt('CONCAT({alias}_exp.email, {alias}_der.email) {sort_dir}'), [])]
+  return left_joins, order_by
+
+
+def _ProcessCcSD(fmt):
+  """Convert a 'cc' sort directive into SQL."""
+  # Note: derived cc's are included automatically.
+  # Note: This sorts on the best Cc, not all Cc addresses.
+  # Being more exact might require GROUP BY and GROUP_CONCAT().
+  left_joins = [
+      (fmt('Issue2Cc AS {alias} ON Issue.id = {alias}.issue_id '
+           'LEFT JOIN User AS {alias}_user '
+           'ON {alias}.cc_id = {alias}_user.user_id'), [])]
+  order_by = [
+      (fmt('ISNULL({alias}_user.email) {sort_dir}'), []),
+      (fmt('{alias}_user.email {sort_dir}'), [])]
+  return left_joins, order_by
+
+
+def _ProcessComponentSD(fmt):
+  """Convert a 'component' sort directive into SQL."""
+  # Note: derived components are included automatically.
+  # Note: This sorts on the best component, not all of them.
+  # Being more exact might require GROUP BY and GROUP_CONCAT().
+  left_joins = [
+      (fmt('Issue2Component AS {alias} ON Issue.id = {alias}.issue_id '
+           'LEFT JOIN ComponentDef AS {alias}_component '
+           'ON {alias}.component_id = {alias}_component.id'), [])]
+  order_by = [
+      (fmt('ISNULL({alias}_component.path) {sort_dir}'), []),
+      (fmt('{alias}_component.path {sort_dir}'), [])]
+  return left_joins, order_by
+
+
+def _ProcessSummarySD(fmt):
+  """Convert a 'summary' sort directive into SQL."""
+  left_joins = [
+      (fmt('IssueSummary AS {alias} ON Issue.id = {alias}.issue_id'), [])]
+  order_by = [(fmt('{alias}.summary {sort_dir}'), [])]
+  return left_joins, order_by
+
+
+def _ProcessStatusSD(fmt, harmonized_statuses):
+  """Convert a 'status' sort directive into SQL."""
+  left_joins = []
+  # Note: status_def_rows are already ordered by REVERSED rank.
+  wk_status_ids = [
+      stat_id for stat_id, rank, _ in harmonized_statuses
+      if rank is not None]
+  odd_status_ids = [
+      stat_id for stat_id, rank, _ in harmonized_statuses
+      if rank is None]
+  wk_status_ph = sql.PlaceHolders(wk_status_ids)
+  # Even though oddball statuses sort lexographically, use FIELD to determine
+  # the order so that the database sorts ints rather than strings for speed.
+  odd_status_ph = sql.PlaceHolders(odd_status_ids)
+
+  order_by = []  # appended to below: both well-known and oddball can apply
+  sort_col = ('IF(ISNULL(Issue.status_id), Issue.derived_status_id, '
+              'Issue.status_id)')
+  # Reverse sort by using rev_sort_dir because we want NULLs at the end.
+  if wk_status_ids:
+    order_by.append(
+        (fmt('FIELD({sort_col}, {wk_status_ph}) {rev_sort_dir}',
+             sort_col=sort_col, wk_status_ph=wk_status_ph),
+         wk_status_ids))
+  if odd_status_ids:
+    order_by.append(
+        (fmt('FIELD({sort_col}, {odd_status_ph}) {rev_sort_dir}',
+             sort_col=sort_col, odd_status_ph=odd_status_ph),
+         odd_status_ids))
+
+  return left_joins, order_by
+
+
+def _ProcessBlockedSD(fmt):
+  """Convert a 'blocked' sort directive into SQL."""
+  left_joins = [
+      (fmt('IssueRelation AS {alias} ON Issue.id = {alias}.issue_id '
+           'AND {alias}.kind = %s'),
+       ['blockedon'])]
+  order_by = [(fmt('ISNULL({alias}.dst_issue_id) {sort_dir}'), [])]
+  return left_joins, order_by
+
+
+def _ProcessBlockedOnSD(fmt):
+  """Convert a 'blockedon' sort directive into SQL."""
+  left_joins = [
+      (fmt('IssueRelation AS {alias} ON Issue.id = {alias}.issue_id '
+           'AND {alias}.kind = %s'),
+       ['blockedon'])]
+  order_by = [(fmt('ISNULL({alias}.dst_issue_id) {sort_dir}'), []),
+              (fmt('{alias}.dst_issue_id {sort_dir}'), [])]
+  return left_joins, order_by
+
+
+def _ProcessBlockingSD(fmt):
+  """Convert a 'blocking' sort directive into SQL."""
+  left_joins = [
+      (fmt('IssueRelation AS {alias} ON Issue.id = {alias}.dst_issue_id '
+           'AND {alias}.kind = %s'),
+       ['blockedon'])]
+  order_by = [(fmt('ISNULL({alias}.issue_id) {sort_dir}'), []),
+              (fmt('{alias}.issue_id {sort_dir}'), [])]
+  return left_joins, order_by
+
+
+def _ProcessMergedIntoSD(fmt):
+  """Convert a 'mergedinto' sort directive into SQL."""
+  left_joins = [
+      (fmt('IssueRelation AS {alias} ON Issue.id = {alias}.issue_id '
+           'AND {alias}.kind = %s'),
+       ['mergedinto'])]
+  order_by = [(fmt('ISNULL({alias}.dst_issue_id) {sort_dir}'), []),
+              (fmt('{alias}.dst_issue_id {sort_dir}'), [])]
+  return left_joins, order_by
+
+
+def _ProcessOwnerLastVisitSD(fmt):
+  """Convert a 'ownerlastvisit' sort directive into SQL."""
+  left_joins = [
+      (fmt('User AS {alias} ON (Issue.owner_id = {alias}.user_id OR '
+           'Issue.derived_owner_id = {alias}.user_id)'), [])]
+  order_by = [
+      (fmt('ISNULL({alias}.last_visit_timestamp) {sort_dir}'), []),
+      (fmt('{alias}.last_visit_timestamp {sort_dir}'), [])]
+  return left_joins, order_by
+
+
+def _ProcessCustomAndLabelSD(
+    sd, harmonized_labels, harmonized_fields, alias, sort_dir, fmt):
+  """Convert a label or custom field sort directive into SQL."""
+  left_joins = []
+  order_by = []
+  phase_name = None
+  # If a custom field is an approval_type with no suffix, the
+  # approvals should be sorted by status.
+  approval_suffix = '-status'
+  approval_fd_list = []
+
+  # Check for reserved suffixes in col_name sd.
+  # TODO(jojwang): check for other suffixes in
+  # tracker_constants.RESERVED_COL_NAME_SUFFIXES
+  if sd.endswith(tracker_constants.APPROVER_COL_SUFFIX):
+    field_name = sd[:-len(tracker_constants.APPROVER_COL_SUFFIX)]
+    fd_list = []
+    approval_fd_list = [fd for fd in harmonized_fields
+                        if fd.field_name.lower() == field_name]
+    approval_suffix = tracker_constants.APPROVER_COL_SUFFIX
+  else:
+    field_name = sd
+    if '.' in sd:
+      phase_name, field_name = sd.split('.', 1)
+
+    fd_list = [fd for fd in harmonized_fields
+               if fd.field_name.lower() == field_name]
+    if not phase_name:
+      approval_fd_list = [fd for fd in fd_list if
+                          fd.field_type == tracker_pb2.FieldTypes.APPROVAL_TYPE]
+
+  # 'alias' is used for all the CustomField, Approval, and Label sort clauses.
+  # Custom field aliases are alwyas appended by the value_col name.
+  # Approval aliases are always appended with 'approval'.
+  # Label clauses use 'alias' as-is.
+  if fd_list:
+    int_left_joins, int_order_by = _CustomFieldSortClauses(
+        fd_list, tracker_pb2.FieldTypes.INT_TYPE, 'int_value',
+        alias, sort_dir, phase_name=phase_name)
+    str_left_joins, str_order_by = _CustomFieldSortClauses(
+        fd_list, tracker_pb2.FieldTypes.STR_TYPE, 'str_value',
+        alias, sort_dir, phase_name=phase_name)
+    user_left_joins, user_order_by = _CustomFieldSortClauses(
+        fd_list, tracker_pb2.FieldTypes.USER_TYPE, 'user_id',
+        alias, sort_dir, phase_name=phase_name)
+    left_joins.extend(int_left_joins + str_left_joins + user_left_joins)
+    order_by.extend(int_order_by + str_order_by + user_order_by)
+
+  if approval_fd_list:
+    approval_left_joins, approval_order_by = _ApprovalFieldSortClauses(
+        approval_fd_list, approval_suffix, fmt)
+    left_joins.extend(approval_left_joins)
+    order_by.extend(approval_order_by)
+
+  label_left_joinss, label_order_by = _LabelSortClauses(
+      sd, harmonized_labels, fmt)
+  left_joins.extend(label_left_joinss)
+  order_by.extend(label_order_by)
+
+  return left_joins, order_by
+
+
+def _ApprovalFieldSortClauses(
+    approval_fd_list, approval_suffix, fmt):
+  """Give LEFT JOIN and ORDER BY terms for approval sort directives."""
+  approver_left_joins = None
+  if approval_suffix == tracker_constants.APPROVER_COL_SUFFIX:
+    tbl_name = 'IssueApproval2Approver'
+    approver_left_joins = (
+        fmt('User AS {alias}_approval_user '
+            'ON {alias}_approval.approver_id = {alias}_approval_user.user_id'),
+        [])
+    order_by = [
+        (fmt('ISNULL({alias}_approval_user.email) {sort_dir}'), []),
+        (fmt('{alias}_approval_user.email {sort_dir}'), [])]
+  else:
+    tbl_name = 'Issue2ApprovalValue'
+    order_by = [
+        (fmt('FIELD({alias}_approval.status, {approval_status_ph}) '
+             '{rev_sort_dir}',
+             approval_status_ph=sql.PlaceHolders(APPROVAL_STATUS_SORT_ORDER)),
+         APPROVAL_STATUS_SORT_ORDER
+        )]
+
+  left_joins = [(
+      fmt('{tbl_name} AS {alias}_approval '
+          'ON Issue.id = {alias}_approval.issue_id '
+          'AND {alias}_approval.approval_id IN ({approval_ids_ph})',
+          approval_ids_ph=sql.PlaceHolders(approval_fd_list),
+          tbl_name=tbl_name),
+      [fd.field_id for fd in approval_fd_list]
+  )]
+
+  if approver_left_joins:
+    left_joins.append(approver_left_joins)
+
+  return left_joins, order_by
+
+
+def _LabelSortClauses(sd, harmonized_labels, fmt):
+  """Give LEFT JOIN and ORDER BY terms for label sort directives."""
+  # Note: derived labels should work automatically.
+
+  # label_def_rows are already ordered by REVERSED rank.
+  wk_label_ids = [
+      label_id for label_id, rank, label in harmonized_labels
+      if label.lower().startswith('%s-' % sd) and rank is not None]
+  odd_label_ids = [
+      label_id for label_id, rank, label in harmonized_labels
+      if label.lower().startswith('%s-' % sd) and rank is None]
+  all_label_ids = wk_label_ids + odd_label_ids
+
+  if all_label_ids:
+    left_joins = [
+        (fmt('Issue2Label AS {alias} ON Issue.id = {alias}.issue_id '
+             'AND {alias}.label_id IN ({all_label_ph})',
+             all_label_ph=sql.PlaceHolders(all_label_ids)),
+         all_label_ids)]
+  else:
+    left_joins = []
+
+  order_by = []
+  # Reverse sort by using rev_sort_dir because we want NULLs at the end.
+  if wk_label_ids:
+    order_by.append(
+        (fmt('FIELD({alias}.label_id, {wk_label_ph}) {rev_sort_dir}',
+             wk_label_ph=sql.PlaceHolders(wk_label_ids)),
+         wk_label_ids))
+  if odd_label_ids:
+    # Even though oddball labels sort lexographically, use FIELD to determine
+    # the order so that the database sorts ints rather than strings for speed
+    order_by.append(
+        (fmt('FIELD({alias}.label_id, {odd_label_ph}) {rev_sort_dir}',
+             odd_label_ph=sql.PlaceHolders(odd_label_ids)),
+         odd_label_ids))
+
+  return left_joins, order_by
+
+
+def _CustomFieldSortClauses(
+    fd_list, value_type, value_column, alias, sort_dir, phase_name=None):
+  """Give LEFT JOIN and ORDER BY terms for custom fields of the given type."""
+  relevant_fd_list = [fd for fd in fd_list if fd.field_type == value_type]
+  if not relevant_fd_list:
+    return [], []
+
+  field_ids_ph = sql.PlaceHolders(relevant_fd_list)
+  def Fmt(sql_str):
+    return sql_str.format(
+        value_column=value_column, sort_dir=sort_dir,
+        field_ids_ph=field_ids_ph, alias=alias + '_' + value_column,
+        phase_name=phase_name)
+
+  left_joins = [
+      (Fmt('Issue2FieldValue AS {alias} ON Issue.id = {alias}.issue_id '
+           'AND {alias}.field_id IN ({field_ids_ph})'),
+       [fd.field_id for fd in relevant_fd_list])]
+
+  if phase_name:
+    left_joins.append(
+        (Fmt('IssuePhaseDef AS {alias}_phase '
+             'ON {alias}.phase_id = {alias}_phase.id '
+             'AND LOWER({alias}_phase.name) = LOWER(%s)'),
+         [phase_name]))
+
+  if value_type == tracker_pb2.FieldTypes.USER_TYPE:
+    left_joins.append(
+        (Fmt('User AS {alias}_user ON {alias}.user_id = {alias}_user.user_id'),
+         []))
+    order_by = [
+        (Fmt('ISNULL({alias}_user.email) {sort_dir}'), []),
+        (Fmt('{alias}_user.email {sort_dir}'), [])]
+  else:
+    # Unfortunately, this sorts on the best field value, not all of them.
+    order_by = [
+        (Fmt('ISNULL({alias}.{value_column}) {sort_dir}'), []),
+        (Fmt('{alias}.{value_column} {sort_dir}'), [])]
+
+  return left_joins, order_by
+
+
+_PROCESSORS = {
+    'component': _ProcessComponentSD,
+    'project': _ProcessProjectSD,
+    'reporter': _ProcessReporterSD,
+    'owner': _ProcessOwnerSD,
+    'cc': _ProcessCcSD,
+    'summary': _ProcessSummarySD,
+    'blocked': _ProcessBlockedSD,
+    'blockedon': _ProcessBlockedOnSD,
+    'blocking': _ProcessBlockingSD,
+    'mergedinto': _ProcessMergedIntoSD,
+    'ownerlastvisit': _ProcessOwnerLastVisitSD,
+    }
+
+
+def _OneSortDirective(
+    i, sd, harmonized_labels, harmonized_statuses, harmonized_fields):
+  """Return SQL clauses to do the sorting for one sort directive."""
+  alias = 'Sort%d' % i
+  if sd.startswith('-'):
+    sort_dir, rev_sort_dir = 'DESC', 'ASC'
+    sd = sd[1:]
+  else:
+    sort_dir, rev_sort_dir = 'ASC', 'DESC'
+
+  def Fmt(sql_str, **kwargs):
+    return sql_str.format(
+        sort_dir=sort_dir, rev_sort_dir=rev_sort_dir, alias=alias,
+        sd=sd, col=FIELDS_TO_COLUMNS.get(sd, sd), **kwargs)
+
+  if sd in NATIVE_SORTABLE_FIELDS:
+    left_joins = []
+    order_by = [(Fmt('Issue.{col} {sort_dir}'), [])]
+    return left_joins, order_by
+
+  elif sd in _PROCESSORS:
+    proc = _PROCESSORS[sd]
+    return proc(Fmt)
+
+  elif sd == 'status':
+    return _ProcessStatusSD(Fmt, harmonized_statuses)
+  else:  # otherwise, it must be a field or label, or both
+    return _ProcessCustomAndLabelSD(
+        sd, harmonized_labels, harmonized_fields, alias, sort_dir, Fmt)