Project import generated by Copybara.
GitOrigin-RevId: d9e9e3fb4e31372ec1fb43b178994ca78fa8fe70
diff --git a/search/ast2sort.py b/search/ast2sort.py
new file mode 100644
index 0000000..08ed346
--- /dev/null
+++ b/search/ast2sort.py
@@ -0,0 +1,451 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file or at
+# https://developers.google.com/open-source/licenses/bsd
+
+"""Convert a user's issue sorting directives into SQL clauses.
+
+Some sort directives translate into simple ORDER BY column specifications.
+Other sort directives require that a LEFT JOIN be done to bring in
+relevant information that is then used in the ORDER BY.
+
+Sorting based on strings can slow down the DB because long sort-keys
+must be loaded into RAM, which means that fewer sort-keys fit into the
+DB's sorting buffers at a time. Also, Monorail defines the sorting
+order of well-known labels and statuses based on the order in which
+they are defined in the project's config. So, we determine the sort order of
+labels and status values before executing the query and then use the MySQL
+FIELD() function to sort their IDs in the desired order, without sorting
+strings.
+
+For more info, see the "Sorting in Monorail" and "What makes Monorail Fast?"
+design docs.
+"""
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+
+import logging
+
+from framework import sql
+from proto import tracker_pb2
+from tracker import tracker_constants
+
+
+NATIVE_SORTABLE_FIELDS = [
+ 'id', 'stars', 'attachments', 'opened', 'closed', 'modified',
+ 'ownermodified', 'statusmodified', 'componentmodified',
+ ]
+
+FIELDS_TO_COLUMNS = {
+ 'id': 'local_id',
+ 'stars': 'star_count',
+ 'attachments': 'attachment_count',
+ 'ownermodified': 'owner_modified',
+ 'statusmodified': 'status_modified',
+ 'componentmodified': 'component_modified',
+ }
+
+APPROVAL_STATUS_SORT_ORDER = [
+ '\'not_set\'', '\'needs_review\'', '\'na\'', '\'review_requested\'',
+ '\'review_started\'', '\'need_info\'', '\'approved\'', '\'not_approved\'']
+
+
+def BuildSortClauses(
+ sort_directives, harmonized_labels, harmonized_statuses,
+ harmonized_fields):
+ """Return LEFT JOIN and ORDER BY clauses needed to sort the results."""
+ if not sort_directives:
+ return [], []
+
+ all_left_joins = []
+ all_order_by = []
+ for i, sd in enumerate(sort_directives):
+ left_join_parts, order_by_parts = _OneSortDirective(
+ i, sd, harmonized_labels, harmonized_statuses, harmonized_fields)
+ all_left_joins.extend(left_join_parts)
+ all_order_by.extend(order_by_parts)
+
+ return all_left_joins, all_order_by
+
+
+def _ProcessProjectSD(fmt):
+ """Convert a 'project' sort directive into SQL."""
+ left_joins = []
+ order_by = [(fmt('Issue.project_id {sort_dir}'), [])]
+ return left_joins, order_by
+
+
+def _ProcessReporterSD(fmt):
+ """Convert a 'reporter' sort directive into SQL."""
+ left_joins = [
+ (fmt('User AS {alias} ON Issue.reporter_id = {alias}.user_id'), [])]
+ order_by = [
+ (fmt('ISNULL({alias}.email) {sort_dir}'), []),
+ (fmt('{alias}.email {sort_dir}'), [])]
+ return left_joins, order_by
+
+
+def _ProcessOwnerSD(fmt):
+ """Convert a 'owner' sort directive into SQL."""
+ left_joins = [
+ (fmt('User AS {alias}_exp ON Issue.owner_id = {alias}_exp.user_id'), []),
+ (fmt('User AS {alias}_der ON '
+ 'Issue.derived_owner_id = {alias}_der.user_id'), [])]
+ order_by = [
+ (fmt('(ISNULL({alias}_exp.email) AND ISNULL({alias}_der.email)) '
+ '{sort_dir}'), []),
+ (fmt('CONCAT({alias}_exp.email, {alias}_der.email) {sort_dir}'), [])]
+ return left_joins, order_by
+
+
+def _ProcessCcSD(fmt):
+ """Convert a 'cc' sort directive into SQL."""
+ # Note: derived cc's are included automatically.
+ # Note: This sorts on the best Cc, not all Cc addresses.
+ # Being more exact might require GROUP BY and GROUP_CONCAT().
+ left_joins = [
+ (fmt('Issue2Cc AS {alias} ON Issue.id = {alias}.issue_id '
+ 'LEFT JOIN User AS {alias}_user '
+ 'ON {alias}.cc_id = {alias}_user.user_id'), [])]
+ order_by = [
+ (fmt('ISNULL({alias}_user.email) {sort_dir}'), []),
+ (fmt('{alias}_user.email {sort_dir}'), [])]
+ return left_joins, order_by
+
+
+def _ProcessComponentSD(fmt):
+ """Convert a 'component' sort directive into SQL."""
+ # Note: derived components are included automatically.
+ # Note: This sorts on the best component, not all of them.
+ # Being more exact might require GROUP BY and GROUP_CONCAT().
+ left_joins = [
+ (fmt('Issue2Component AS {alias} ON Issue.id = {alias}.issue_id '
+ 'LEFT JOIN ComponentDef AS {alias}_component '
+ 'ON {alias}.component_id = {alias}_component.id'), [])]
+ order_by = [
+ (fmt('ISNULL({alias}_component.path) {sort_dir}'), []),
+ (fmt('{alias}_component.path {sort_dir}'), [])]
+ return left_joins, order_by
+
+
+def _ProcessSummarySD(fmt):
+ """Convert a 'summary' sort directive into SQL."""
+ left_joins = [
+ (fmt('IssueSummary AS {alias} ON Issue.id = {alias}.issue_id'), [])]
+ order_by = [(fmt('{alias}.summary {sort_dir}'), [])]
+ return left_joins, order_by
+
+
+def _ProcessStatusSD(fmt, harmonized_statuses):
+ """Convert a 'status' sort directive into SQL."""
+ left_joins = []
+ # Note: status_def_rows are already ordered by REVERSED rank.
+ wk_status_ids = [
+ stat_id for stat_id, rank, _ in harmonized_statuses
+ if rank is not None]
+ odd_status_ids = [
+ stat_id for stat_id, rank, _ in harmonized_statuses
+ if rank is None]
+ wk_status_ph = sql.PlaceHolders(wk_status_ids)
+ # Even though oddball statuses sort lexographically, use FIELD to determine
+ # the order so that the database sorts ints rather than strings for speed.
+ odd_status_ph = sql.PlaceHolders(odd_status_ids)
+
+ order_by = [] # appended to below: both well-known and oddball can apply
+ sort_col = ('IF(ISNULL(Issue.status_id), Issue.derived_status_id, '
+ 'Issue.status_id)')
+ # Reverse sort by using rev_sort_dir because we want NULLs at the end.
+ if wk_status_ids:
+ order_by.append(
+ (fmt('FIELD({sort_col}, {wk_status_ph}) {rev_sort_dir}',
+ sort_col=sort_col, wk_status_ph=wk_status_ph),
+ wk_status_ids))
+ if odd_status_ids:
+ order_by.append(
+ (fmt('FIELD({sort_col}, {odd_status_ph}) {rev_sort_dir}',
+ sort_col=sort_col, odd_status_ph=odd_status_ph),
+ odd_status_ids))
+
+ return left_joins, order_by
+
+
+def _ProcessBlockedSD(fmt):
+ """Convert a 'blocked' sort directive into SQL."""
+ left_joins = [
+ (fmt('IssueRelation AS {alias} ON Issue.id = {alias}.issue_id '
+ 'AND {alias}.kind = %s'),
+ ['blockedon'])]
+ order_by = [(fmt('ISNULL({alias}.dst_issue_id) {sort_dir}'), [])]
+ return left_joins, order_by
+
+
+def _ProcessBlockedOnSD(fmt):
+ """Convert a 'blockedon' sort directive into SQL."""
+ left_joins = [
+ (fmt('IssueRelation AS {alias} ON Issue.id = {alias}.issue_id '
+ 'AND {alias}.kind = %s'),
+ ['blockedon'])]
+ order_by = [(fmt('ISNULL({alias}.dst_issue_id) {sort_dir}'), []),
+ (fmt('{alias}.dst_issue_id {sort_dir}'), [])]
+ return left_joins, order_by
+
+
+def _ProcessBlockingSD(fmt):
+ """Convert a 'blocking' sort directive into SQL."""
+ left_joins = [
+ (fmt('IssueRelation AS {alias} ON Issue.id = {alias}.dst_issue_id '
+ 'AND {alias}.kind = %s'),
+ ['blockedon'])]
+ order_by = [(fmt('ISNULL({alias}.issue_id) {sort_dir}'), []),
+ (fmt('{alias}.issue_id {sort_dir}'), [])]
+ return left_joins, order_by
+
+
+def _ProcessMergedIntoSD(fmt):
+ """Convert a 'mergedinto' sort directive into SQL."""
+ left_joins = [
+ (fmt('IssueRelation AS {alias} ON Issue.id = {alias}.issue_id '
+ 'AND {alias}.kind = %s'),
+ ['mergedinto'])]
+ order_by = [(fmt('ISNULL({alias}.dst_issue_id) {sort_dir}'), []),
+ (fmt('{alias}.dst_issue_id {sort_dir}'), [])]
+ return left_joins, order_by
+
+
+def _ProcessOwnerLastVisitSD(fmt):
+ """Convert a 'ownerlastvisit' sort directive into SQL."""
+ left_joins = [
+ (fmt('User AS {alias} ON (Issue.owner_id = {alias}.user_id OR '
+ 'Issue.derived_owner_id = {alias}.user_id)'), [])]
+ order_by = [
+ (fmt('ISNULL({alias}.last_visit_timestamp) {sort_dir}'), []),
+ (fmt('{alias}.last_visit_timestamp {sort_dir}'), [])]
+ return left_joins, order_by
+
+
+def _ProcessCustomAndLabelSD(
+ sd, harmonized_labels, harmonized_fields, alias, sort_dir, fmt):
+ """Convert a label or custom field sort directive into SQL."""
+ left_joins = []
+ order_by = []
+ phase_name = None
+ # If a custom field is an approval_type with no suffix, the
+ # approvals should be sorted by status.
+ approval_suffix = '-status'
+ approval_fd_list = []
+
+ # Check for reserved suffixes in col_name sd.
+ # TODO(jojwang): check for other suffixes in
+ # tracker_constants.RESERVED_COL_NAME_SUFFIXES
+ if sd.endswith(tracker_constants.APPROVER_COL_SUFFIX):
+ field_name = sd[:-len(tracker_constants.APPROVER_COL_SUFFIX)]
+ fd_list = []
+ approval_fd_list = [fd for fd in harmonized_fields
+ if fd.field_name.lower() == field_name]
+ approval_suffix = tracker_constants.APPROVER_COL_SUFFIX
+ else:
+ field_name = sd
+ if '.' in sd:
+ phase_name, field_name = sd.split('.', 1)
+
+ fd_list = [fd for fd in harmonized_fields
+ if fd.field_name.lower() == field_name]
+ if not phase_name:
+ approval_fd_list = [fd for fd in fd_list if
+ fd.field_type == tracker_pb2.FieldTypes.APPROVAL_TYPE]
+
+ # 'alias' is used for all the CustomField, Approval, and Label sort clauses.
+ # Custom field aliases are alwyas appended by the value_col name.
+ # Approval aliases are always appended with 'approval'.
+ # Label clauses use 'alias' as-is.
+ if fd_list:
+ int_left_joins, int_order_by = _CustomFieldSortClauses(
+ fd_list, tracker_pb2.FieldTypes.INT_TYPE, 'int_value',
+ alias, sort_dir, phase_name=phase_name)
+ str_left_joins, str_order_by = _CustomFieldSortClauses(
+ fd_list, tracker_pb2.FieldTypes.STR_TYPE, 'str_value',
+ alias, sort_dir, phase_name=phase_name)
+ user_left_joins, user_order_by = _CustomFieldSortClauses(
+ fd_list, tracker_pb2.FieldTypes.USER_TYPE, 'user_id',
+ alias, sort_dir, phase_name=phase_name)
+ left_joins.extend(int_left_joins + str_left_joins + user_left_joins)
+ order_by.extend(int_order_by + str_order_by + user_order_by)
+
+ if approval_fd_list:
+ approval_left_joins, approval_order_by = _ApprovalFieldSortClauses(
+ approval_fd_list, approval_suffix, fmt)
+ left_joins.extend(approval_left_joins)
+ order_by.extend(approval_order_by)
+
+ label_left_joinss, label_order_by = _LabelSortClauses(
+ sd, harmonized_labels, fmt)
+ left_joins.extend(label_left_joinss)
+ order_by.extend(label_order_by)
+
+ return left_joins, order_by
+
+
+def _ApprovalFieldSortClauses(
+ approval_fd_list, approval_suffix, fmt):
+ """Give LEFT JOIN and ORDER BY terms for approval sort directives."""
+ approver_left_joins = None
+ if approval_suffix == tracker_constants.APPROVER_COL_SUFFIX:
+ tbl_name = 'IssueApproval2Approver'
+ approver_left_joins = (
+ fmt('User AS {alias}_approval_user '
+ 'ON {alias}_approval.approver_id = {alias}_approval_user.user_id'),
+ [])
+ order_by = [
+ (fmt('ISNULL({alias}_approval_user.email) {sort_dir}'), []),
+ (fmt('{alias}_approval_user.email {sort_dir}'), [])]
+ else:
+ tbl_name = 'Issue2ApprovalValue'
+ order_by = [
+ (fmt('FIELD({alias}_approval.status, {approval_status_ph}) '
+ '{rev_sort_dir}',
+ approval_status_ph=sql.PlaceHolders(APPROVAL_STATUS_SORT_ORDER)),
+ APPROVAL_STATUS_SORT_ORDER
+ )]
+
+ left_joins = [(
+ fmt('{tbl_name} AS {alias}_approval '
+ 'ON Issue.id = {alias}_approval.issue_id '
+ 'AND {alias}_approval.approval_id IN ({approval_ids_ph})',
+ approval_ids_ph=sql.PlaceHolders(approval_fd_list),
+ tbl_name=tbl_name),
+ [fd.field_id for fd in approval_fd_list]
+ )]
+
+ if approver_left_joins:
+ left_joins.append(approver_left_joins)
+
+ return left_joins, order_by
+
+
+def _LabelSortClauses(sd, harmonized_labels, fmt):
+ """Give LEFT JOIN and ORDER BY terms for label sort directives."""
+ # Note: derived labels should work automatically.
+
+ # label_def_rows are already ordered by REVERSED rank.
+ wk_label_ids = [
+ label_id for label_id, rank, label in harmonized_labels
+ if label.lower().startswith('%s-' % sd) and rank is not None]
+ odd_label_ids = [
+ label_id for label_id, rank, label in harmonized_labels
+ if label.lower().startswith('%s-' % sd) and rank is None]
+ all_label_ids = wk_label_ids + odd_label_ids
+
+ if all_label_ids:
+ left_joins = [
+ (fmt('Issue2Label AS {alias} ON Issue.id = {alias}.issue_id '
+ 'AND {alias}.label_id IN ({all_label_ph})',
+ all_label_ph=sql.PlaceHolders(all_label_ids)),
+ all_label_ids)]
+ else:
+ left_joins = []
+
+ order_by = []
+ # Reverse sort by using rev_sort_dir because we want NULLs at the end.
+ if wk_label_ids:
+ order_by.append(
+ (fmt('FIELD({alias}.label_id, {wk_label_ph}) {rev_sort_dir}',
+ wk_label_ph=sql.PlaceHolders(wk_label_ids)),
+ wk_label_ids))
+ if odd_label_ids:
+ # Even though oddball labels sort lexographically, use FIELD to determine
+ # the order so that the database sorts ints rather than strings for speed
+ order_by.append(
+ (fmt('FIELD({alias}.label_id, {odd_label_ph}) {rev_sort_dir}',
+ odd_label_ph=sql.PlaceHolders(odd_label_ids)),
+ odd_label_ids))
+
+ return left_joins, order_by
+
+
+def _CustomFieldSortClauses(
+ fd_list, value_type, value_column, alias, sort_dir, phase_name=None):
+ """Give LEFT JOIN and ORDER BY terms for custom fields of the given type."""
+ relevant_fd_list = [fd for fd in fd_list if fd.field_type == value_type]
+ if not relevant_fd_list:
+ return [], []
+
+ field_ids_ph = sql.PlaceHolders(relevant_fd_list)
+ def Fmt(sql_str):
+ return sql_str.format(
+ value_column=value_column, sort_dir=sort_dir,
+ field_ids_ph=field_ids_ph, alias=alias + '_' + value_column,
+ phase_name=phase_name)
+
+ left_joins = [
+ (Fmt('Issue2FieldValue AS {alias} ON Issue.id = {alias}.issue_id '
+ 'AND {alias}.field_id IN ({field_ids_ph})'),
+ [fd.field_id for fd in relevant_fd_list])]
+
+ if phase_name:
+ left_joins.append(
+ (Fmt('IssuePhaseDef AS {alias}_phase '
+ 'ON {alias}.phase_id = {alias}_phase.id '
+ 'AND LOWER({alias}_phase.name) = LOWER(%s)'),
+ [phase_name]))
+
+ if value_type == tracker_pb2.FieldTypes.USER_TYPE:
+ left_joins.append(
+ (Fmt('User AS {alias}_user ON {alias}.user_id = {alias}_user.user_id'),
+ []))
+ order_by = [
+ (Fmt('ISNULL({alias}_user.email) {sort_dir}'), []),
+ (Fmt('{alias}_user.email {sort_dir}'), [])]
+ else:
+ # Unfortunately, this sorts on the best field value, not all of them.
+ order_by = [
+ (Fmt('ISNULL({alias}.{value_column}) {sort_dir}'), []),
+ (Fmt('{alias}.{value_column} {sort_dir}'), [])]
+
+ return left_joins, order_by
+
+
+_PROCESSORS = {
+ 'component': _ProcessComponentSD,
+ 'project': _ProcessProjectSD,
+ 'reporter': _ProcessReporterSD,
+ 'owner': _ProcessOwnerSD,
+ 'cc': _ProcessCcSD,
+ 'summary': _ProcessSummarySD,
+ 'blocked': _ProcessBlockedSD,
+ 'blockedon': _ProcessBlockedOnSD,
+ 'blocking': _ProcessBlockingSD,
+ 'mergedinto': _ProcessMergedIntoSD,
+ 'ownerlastvisit': _ProcessOwnerLastVisitSD,
+ }
+
+
+def _OneSortDirective(
+ i, sd, harmonized_labels, harmonized_statuses, harmonized_fields):
+ """Return SQL clauses to do the sorting for one sort directive."""
+ alias = 'Sort%d' % i
+ if sd.startswith('-'):
+ sort_dir, rev_sort_dir = 'DESC', 'ASC'
+ sd = sd[1:]
+ else:
+ sort_dir, rev_sort_dir = 'ASC', 'DESC'
+
+ def Fmt(sql_str, **kwargs):
+ return sql_str.format(
+ sort_dir=sort_dir, rev_sort_dir=rev_sort_dir, alias=alias,
+ sd=sd, col=FIELDS_TO_COLUMNS.get(sd, sd), **kwargs)
+
+ if sd in NATIVE_SORTABLE_FIELDS:
+ left_joins = []
+ order_by = [(Fmt('Issue.{col} {sort_dir}'), [])]
+ return left_joins, order_by
+
+ elif sd in _PROCESSORS:
+ proc = _PROCESSORS[sd]
+ return proc(Fmt)
+
+ elif sd == 'status':
+ return _ProcessStatusSD(Fmt, harmonized_statuses)
+ else: # otherwise, it must be a field or label, or both
+ return _ProcessCustomAndLabelSD(
+ sd, harmonized_labels, harmonized_fields, alias, sort_dir, Fmt)