Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 1 | # Copyright 2018 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style |
| 3 | # license that can be found in the LICENSE file or at |
| 4 | # https://developers.google.com/open-source/licenses/bsd |
| 5 | |
| 6 | """A service for querying data for charts. |
| 7 | |
| 8 | Functions for querying the IssueSnapshot table and associated join tables. |
| 9 | """ |
| 10 | from __future__ import print_function |
| 11 | from __future__ import division |
| 12 | from __future__ import absolute_import |
| 13 | |
| 14 | import logging |
| 15 | import settings |
| 16 | import time |
| 17 | |
| 18 | from features import hotlist_helpers |
| 19 | from framework import framework_helpers |
| 20 | from framework import sql |
| 21 | from search import search_helpers |
| 22 | from tracker import tracker_bizobj |
| 23 | from tracker import tracker_helpers |
| 24 | from search import query2ast |
| 25 | from search import ast2select |
| 26 | from search import ast2ast |
| 27 | |
| 28 | |
| 29 | ISSUESNAPSHOT_TABLE_NAME = 'IssueSnapshot' |
| 30 | ISSUESNAPSHOT2CC_TABLE_NAME = 'IssueSnapshot2Cc' |
| 31 | ISSUESNAPSHOT2COMPONENT_TABLE_NAME = 'IssueSnapshot2Component' |
| 32 | ISSUESNAPSHOT2LABEL_TABLE_NAME = 'IssueSnapshot2Label' |
| 33 | |
| 34 | ISSUESNAPSHOT_COLS = ['id', 'issue_id', 'shard', 'project_id', 'local_id', |
| 35 | 'reporter_id', 'owner_id', 'status_id', 'period_start', 'period_end', |
| 36 | 'is_open'] |
| 37 | ISSUESNAPSHOT2CC_COLS = ['issuesnapshot_id', 'cc_id'] |
| 38 | ISSUESNAPSHOT2COMPONENT_COLS = ['issuesnapshot_id', 'component_id'] |
| 39 | ISSUESNAPSHOT2LABEL_COLS = ['issuesnapshot_id', 'label_id'] |
| 40 | |
| 41 | |
| 42 | class ChartService(object): |
| 43 | """Class for querying chart data.""" |
| 44 | |
| 45 | def __init__(self, config_service): |
| 46 | """Constructor for ChartService. |
| 47 | |
| 48 | Args: |
| 49 | config_service (ConfigService): An instance of ConfigService. |
| 50 | """ |
| 51 | self.config_service = config_service |
| 52 | |
| 53 | # Set up SQL table objects. |
| 54 | self.issuesnapshot_tbl = sql.SQLTableManager(ISSUESNAPSHOT_TABLE_NAME) |
| 55 | self.issuesnapshot2cc_tbl = sql.SQLTableManager( |
| 56 | ISSUESNAPSHOT2CC_TABLE_NAME) |
| 57 | self.issuesnapshot2component_tbl = sql.SQLTableManager( |
| 58 | ISSUESNAPSHOT2COMPONENT_TABLE_NAME) |
| 59 | self.issuesnapshot2label_tbl = sql.SQLTableManager( |
| 60 | ISSUESNAPSHOT2LABEL_TABLE_NAME) |
| 61 | |
| 62 | def QueryIssueSnapshots(self, cnxn, services, unixtime, effective_ids, |
| 63 | project, perms, group_by=None, label_prefix=None, |
| 64 | query=None, canned_query=None, hotlist=None): |
| 65 | """Queries historical issue counts grouped by label or component. |
| 66 | |
| 67 | Args: |
| 68 | cnxn: A MonorailConnection instance. |
| 69 | services: A Services instance. |
| 70 | unixtime: An integer representing the Unix time in seconds. |
| 71 | effective_ids: The effective User IDs associated with the current user. |
| 72 | project: A project object representing the current project. |
| 73 | perms: A permissions object associated with the current user. |
| 74 | group_by (str, optional): Which dimension to group by. Values can |
| 75 | be 'label', 'component', or None, in which case no grouping will |
| 76 | be applied. |
| 77 | label_prefix: Required when group_by is 'label.' Will limit the query to |
| 78 | only labels with the specified prefix (for example 'Pri'). |
| 79 | query (str, optional): A query string from the request to apply to |
| 80 | the snapshot query. |
| 81 | canned_query (str, optional): Parsed canned query applied to the query |
| 82 | scope. |
| 83 | hotlist (Hotlist, optional): Hotlist to search under (in lieu of project). |
| 84 | |
| 85 | Returns: |
| 86 | 1. A dict of {'2nd dimension or "total"': number of occurences}. |
| 87 | 2. A list of any unsupported query conditions in query. |
| 88 | 3. A boolean that is true if any results were capped. |
| 89 | """ |
| 90 | if hotlist: |
| 91 | # TODO(jeffcarp): Get project_ids in a more efficient manner. We can |
| 92 | # query for "SELECT DISTINCT(project_id)" for all issues in hotlist. |
| 93 | issues_list = services.issue.GetIssues(cnxn, |
| 94 | [hotlist_issue.issue_id for hotlist_issue in hotlist.items]) |
| 95 | hotlist_issues_project_ids = hotlist_helpers.GetAllProjectsOfIssues( |
| 96 | [issue for issue in issues_list]) |
| 97 | config_list = hotlist_helpers.GetAllConfigsOfProjects( |
| 98 | cnxn, hotlist_issues_project_ids, services) |
| 99 | project_config = tracker_bizobj.HarmonizeConfigs(config_list) |
| 100 | else: |
| 101 | project_config = services.config.GetProjectConfig(cnxn, |
| 102 | project.project_id) |
| 103 | |
| 104 | if project: |
| 105 | project_ids = [project.project_id] |
| 106 | else: |
| 107 | project_ids = hotlist_issues_project_ids |
| 108 | |
| 109 | try: |
| 110 | query_left_joins, query_where, unsupported_conds = self._QueryToWhere( |
| 111 | cnxn, services, project_config, query, canned_query, project_ids) |
| 112 | except ast2select.NoPossibleResults: |
| 113 | return {}, ['Invalid query.'], False |
| 114 | |
| 115 | restricted_label_ids = search_helpers.GetPersonalAtRiskLabelIDs( |
| 116 | cnxn, None, self.config_service, effective_ids, project, perms) |
| 117 | |
| 118 | left_joins = [ |
| 119 | ('Issue ON IssueSnapshot.issue_id = Issue.id', []), |
| 120 | ] |
| 121 | |
| 122 | if restricted_label_ids: |
| 123 | left_joins.append( |
| 124 | (('Issue2Label AS Forbidden_label' |
| 125 | ' ON Issue.id = Forbidden_label.issue_id' |
| 126 | ' AND Forbidden_label.label_id IN (%s)' % ( |
| 127 | sql.PlaceHolders(restricted_label_ids) |
| 128 | )), restricted_label_ids)) |
| 129 | |
| 130 | if effective_ids: |
| 131 | left_joins.append( |
| 132 | ('Issue2Cc AS I2cc' |
| 133 | ' ON Issue.id = I2cc.issue_id' |
| 134 | ' AND I2cc.cc_id IN (%s)' % sql.PlaceHolders(effective_ids), |
| 135 | effective_ids)) |
| 136 | |
| 137 | # TODO(jeffcarp): Handle case where there are issues with no labels. |
| 138 | where = [ |
| 139 | ('IssueSnapshot.period_start <= %s', [unixtime]), |
| 140 | ('IssueSnapshot.period_end > %s', [unixtime]), |
| 141 | ('Issue.is_spam = %s', [False]), |
| 142 | ('Issue.deleted = %s', [False]), |
| 143 | ] |
| 144 | if project_ids: |
| 145 | where.append( |
| 146 | ('IssueSnapshot.project_id IN (%s)' % sql.PlaceHolders(project_ids), |
| 147 | project_ids)) |
| 148 | |
| 149 | forbidden_label_clause = 'Forbidden_label.label_id IS NULL' |
| 150 | if effective_ids: |
| 151 | if restricted_label_ids: |
| 152 | forbidden_label_clause = ' OR %s' % forbidden_label_clause |
| 153 | else: |
| 154 | forbidden_label_clause = '' |
| 155 | |
| 156 | where.append( |
| 157 | (( |
| 158 | '(Issue.reporter_id IN (%s)' |
| 159 | ' OR Issue.owner_id IN (%s)' |
| 160 | ' OR I2cc.cc_id IS NOT NULL' |
| 161 | '%s)' |
| 162 | ) % ( |
| 163 | sql.PlaceHolders(effective_ids), sql.PlaceHolders(effective_ids), |
| 164 | forbidden_label_clause |
| 165 | ), |
| 166 | list(effective_ids) + list(effective_ids) |
| 167 | )) |
| 168 | else: |
| 169 | where.append((forbidden_label_clause, [])) |
| 170 | |
| 171 | if group_by == 'component': |
| 172 | cols = ['Comp.path', 'COUNT(IssueSnapshot.issue_id)'] |
| 173 | left_joins.extend([ |
| 174 | (('IssueSnapshot2Component AS Is2c ON' |
| 175 | ' Is2c.issuesnapshot_id = IssueSnapshot.id'), []), |
| 176 | ('ComponentDef AS Comp ON Comp.id = Is2c.component_id', []), |
| 177 | ]) |
| 178 | group_by = ['Comp.path'] |
| 179 | elif group_by == 'label': |
| 180 | cols = ['Lab.label', 'COUNT(IssueSnapshot.issue_id)'] |
| 181 | left_joins.extend([ |
| 182 | (('IssueSnapshot2Label AS Is2l' |
| 183 | ' ON Is2l.issuesnapshot_id = IssueSnapshot.id'), []), |
| 184 | ('LabelDef AS Lab ON Lab.id = Is2l.label_id', []), |
| 185 | ]) |
| 186 | |
| 187 | if not label_prefix: |
| 188 | raise ValueError('`label_prefix` required when grouping by label.') |
| 189 | |
| 190 | # TODO(jeffcarp): If LookupIDsOfLabelsMatching() is called on output, |
| 191 | # ensure regex is case-insensitive. |
| 192 | where.append(('LOWER(Lab.label) LIKE %s', [label_prefix.lower() + '-%'])) |
| 193 | group_by = ['Lab.label'] |
| 194 | elif group_by == 'open': |
| 195 | cols = ['IssueSnapshot.is_open', |
| 196 | 'COUNT(IssueSnapshot.issue_id) AS issue_count'] |
| 197 | group_by = ['IssueSnapshot.is_open'] |
| 198 | elif group_by == 'status': |
| 199 | left_joins.append(('StatusDef AS Stats ON ' \ |
| 200 | 'Stats.id = IssueSnapshot.status_id', [])) |
| 201 | cols = ['Stats.status', 'COUNT(IssueSnapshot.issue_id)'] |
| 202 | group_by = ['Stats.status'] |
| 203 | elif group_by == 'owner': |
| 204 | cols = ['IssueSnapshot.owner_id', 'COUNT(IssueSnapshot.issue_id)'] |
| 205 | group_by = ['IssueSnapshot.owner_id'] |
| 206 | elif not group_by: |
| 207 | cols = ['IssueSnapshot.issue_id'] |
| 208 | else: |
| 209 | raise ValueError('`group_by` must be label, component, ' \ |
| 210 | 'open, status, owner or None.') |
| 211 | |
| 212 | if query_left_joins: |
| 213 | left_joins.extend(query_left_joins) |
| 214 | |
| 215 | if query_where: |
| 216 | where.extend(query_where) |
| 217 | |
| 218 | if hotlist: |
| 219 | left_joins.extend([ |
| 220 | (('IssueSnapshot2Hotlist AS Is2h' |
| 221 | ' ON Is2h.issuesnapshot_id = IssueSnapshot.id' |
| 222 | ' AND Is2h.hotlist_id = %s'), [hotlist.hotlist_id]), |
| 223 | ]) |
| 224 | where.append( |
| 225 | ('Is2h.hotlist_id = %s', [hotlist.hotlist_id])) |
| 226 | |
| 227 | promises = [] |
| 228 | |
| 229 | for shard_id in range(settings.num_logical_shards): |
| 230 | count_stmt, stmt_args = self._BuildSnapshotQuery(cols=cols, |
| 231 | where=where, joins=left_joins, group_by=group_by, |
| 232 | shard_id=shard_id) |
| 233 | promises.append(framework_helpers.Promise(cnxn.Execute, |
| 234 | count_stmt, stmt_args, shard_id=shard_id)) |
| 235 | |
| 236 | shard_values_dict = {} |
| 237 | |
| 238 | search_limit_reached = False |
| 239 | |
| 240 | for promise in promises: |
| 241 | # Wait for each query to complete and add it to the dict. |
| 242 | shard_values = list(promise.WaitAndGetValue()) |
| 243 | |
| 244 | if not shard_values: |
| 245 | continue |
| 246 | if group_by: |
| 247 | for name, count in shard_values: |
| 248 | if count >= settings.chart_query_max_rows: |
| 249 | search_limit_reached = True |
| 250 | |
| 251 | shard_values_dict.setdefault(name, 0) |
| 252 | shard_values_dict[name] += count |
| 253 | else: |
| 254 | if shard_values[0][0] >= settings.chart_query_max_rows: |
| 255 | search_limit_reached = True |
| 256 | |
| 257 | shard_values_dict.setdefault('total', 0) |
| 258 | shard_values_dict['total'] += shard_values[0][0] |
| 259 | |
| 260 | unsupported_field_names = list(set([ |
| 261 | field.field_name |
| 262 | for cond in unsupported_conds |
| 263 | for field in cond.field_defs |
| 264 | ])) |
| 265 | |
| 266 | return shard_values_dict, unsupported_field_names, search_limit_reached |
| 267 | |
| 268 | def StoreIssueSnapshots(self, cnxn, issues, commit=True): |
| 269 | """Adds an IssueSnapshot and updates the previous one for each issue.""" |
| 270 | for issue in issues: |
| 271 | right_now = self._currentTime() |
| 272 | |
| 273 | # Update previous snapshot of current issue's end time to right now. |
| 274 | self.issuesnapshot_tbl.Update(cnxn, |
| 275 | delta={'period_end': right_now}, |
| 276 | where=[('IssueSnapshot.issue_id = %s', [issue.issue_id]), |
| 277 | ('IssueSnapshot.period_end = %s', |
| 278 | [settings.maximum_snapshot_period_end])], |
| 279 | commit=commit) |
| 280 | |
| 281 | config = self.config_service.GetProjectConfig(cnxn, issue.project_id) |
| 282 | period_end = settings.maximum_snapshot_period_end |
| 283 | is_open = tracker_helpers.MeansOpenInProject( |
| 284 | tracker_bizobj.GetStatus(issue), config) |
| 285 | shard = issue.issue_id % settings.num_logical_shards |
| 286 | status = tracker_bizobj.GetStatus(issue) |
| 287 | status_id = self.config_service.LookupStatusID( |
| 288 | cnxn, issue.project_id, status) or None |
| 289 | owner_id = tracker_bizobj.GetOwnerId(issue) or None |
| 290 | |
| 291 | issuesnapshot_rows = [(issue.issue_id, shard, issue.project_id, |
| 292 | issue.local_id, issue.reporter_id, owner_id, status_id, right_now, |
| 293 | period_end, is_open)] |
| 294 | |
| 295 | ids = self.issuesnapshot_tbl.InsertRows( |
| 296 | cnxn, ISSUESNAPSHOT_COLS[1:], |
| 297 | issuesnapshot_rows, |
| 298 | replace=True, commit=commit, |
| 299 | return_generated_ids=True) |
| 300 | issuesnapshot_id = ids[0] |
| 301 | |
| 302 | # Add all labels to IssueSnapshot2Label. |
| 303 | label_rows = [ |
| 304 | (issuesnapshot_id, |
| 305 | self.config_service.LookupLabelID(cnxn, issue.project_id, label)) |
| 306 | for label in tracker_bizobj.GetLabels(issue) |
| 307 | ] |
| 308 | self.issuesnapshot2label_tbl.InsertRows( |
| 309 | cnxn, ISSUESNAPSHOT2LABEL_COLS, |
| 310 | label_rows, replace=True, commit=commit) |
| 311 | |
| 312 | # Add all CCs to IssueSnapshot2Cc. |
| 313 | cc_rows = [ |
| 314 | (issuesnapshot_id, cc_id) |
| 315 | for cc_id in tracker_bizobj.GetCcIds(issue) |
| 316 | ] |
| 317 | self.issuesnapshot2cc_tbl.InsertRows( |
| 318 | cnxn, ISSUESNAPSHOT2CC_COLS, |
| 319 | cc_rows, |
| 320 | replace=True, commit=commit) |
| 321 | |
| 322 | # Add all components to IssueSnapshot2Component. |
| 323 | component_rows = [ |
| 324 | (issuesnapshot_id, component_id) |
| 325 | for component_id in issue.component_ids |
| 326 | ] |
| 327 | self.issuesnapshot2component_tbl.InsertRows( |
| 328 | cnxn, ISSUESNAPSHOT2COMPONENT_COLS, |
| 329 | component_rows, |
| 330 | replace=True, commit=commit) |
| 331 | |
| 332 | # Add all components to IssueSnapshot2Hotlist. |
| 333 | # This is raw SQL to obviate passing FeaturesService down through |
| 334 | # the call stack wherever this function is called. |
| 335 | # TODO(jrobbins): sort out dependencies between service classes. |
| 336 | cnxn.Execute(''' |
| 337 | INSERT INTO IssueSnapshot2Hotlist (issuesnapshot_id, hotlist_id) |
| 338 | SELECT %s, hotlist_id FROM Hotlist2Issue WHERE issue_id = %s |
| 339 | ''', [issuesnapshot_id, issue.issue_id]) |
| 340 | |
| 341 | def ExpungeHotlistsFromIssueSnapshots(self, cnxn, hotlist_ids, commit=True): |
| 342 | """Expunge the existence of hotlists from issue snapshots. |
| 343 | |
| 344 | Args: |
| 345 | cnxn: connection to SQL database. |
| 346 | hotlist_ids: list of hotlist_ids for hotlists we want to delete. |
| 347 | commit: set to False to skip the DB commit and do it in a caller. |
| 348 | """ |
| 349 | vals_ph = sql.PlaceHolders(hotlist_ids) |
| 350 | cnxn.Execute( |
| 351 | 'DELETE FROM IssueSnapshot2Hotlist ' |
| 352 | 'WHERE hotlist_id IN ({vals_ph})'.format(vals_ph=vals_ph), |
| 353 | hotlist_ids, |
| 354 | commit=commit) |
| 355 | |
| 356 | def _currentTime(self): |
| 357 | """This is a separate method so it can be mocked by tests.""" |
| 358 | return time.time() |
| 359 | |
| 360 | def _QueryToWhere(self, cnxn, services, project_config, query, canned_query, |
| 361 | project_ids): |
| 362 | """Parses a query string into LEFT JOIN and WHERE conditions. |
| 363 | |
| 364 | Args: |
| 365 | cnxn: A MonorailConnection instance. |
| 366 | services: A Services instance. |
| 367 | project_config: The configuration for the given project. |
| 368 | query (string): The query to parse. |
| 369 | canned_query (string): The supplied canned query. |
| 370 | project_ids: The current project ID(s). |
| 371 | |
| 372 | Returns: |
| 373 | 1. A list of LEFT JOIN clauses for the SQL query. |
| 374 | 2. A list of WHERE clases for the SQL query. |
| 375 | 3. A list of query conditions that are unsupported with snapshots. |
| 376 | """ |
| 377 | if not (query or canned_query): |
| 378 | return [], [], [] |
| 379 | |
| 380 | query = query or '' |
| 381 | scope = canned_query or '' |
| 382 | |
| 383 | query_ast = query2ast.ParseUserQuery(query, scope, |
| 384 | query2ast.BUILTIN_ISSUE_FIELDS, project_config) |
| 385 | query_ast = ast2ast.PreprocessAST(cnxn, query_ast, project_ids, |
| 386 | services, project_config) |
| 387 | left_joins, where, unsupported = ast2select.BuildSQLQuery(query_ast, |
| 388 | snapshot_mode=True) |
| 389 | |
| 390 | return left_joins, where, unsupported |
| 391 | |
| 392 | def _BuildSnapshotQuery(self, cols, where, joins, group_by, shard_id): |
| 393 | """Given SQL arguments, executes a snapshot COUNT query.""" |
| 394 | stmt = sql.Statement.MakeSelect('IssueSnapshot', cols, distinct=True) |
| 395 | stmt.AddJoinClauses(joins, left=True) |
| 396 | stmt.AddWhereTerms(where + [('IssueSnapshot.shard = %s', [shard_id])]) |
| 397 | if group_by: |
| 398 | stmt.AddGroupByTerms(group_by) |
| 399 | stmt.SetLimitAndOffset(limit=settings.chart_query_max_rows, offset=0) |
| 400 | stmt_str, stmt_args = stmt.Generate() |
| 401 | if group_by: |
| 402 | if group_by[0] == 'IssueSnapshot.is_open': |
| 403 | count_stmt = ('SELECT IF(results.is_open = 1, "Opened", "Closed") ' \ |
| 404 | 'AS bool_open, results.issue_count ' \ |
| 405 | 'FROM (%s) AS results' % stmt_str) |
| 406 | else: |
| 407 | count_stmt = stmt_str |
| 408 | else: |
| 409 | count_stmt = 'SELECT COUNT(results.issue_id) FROM (%s) AS results' % ( |
| 410 | stmt_str) |
| 411 | return count_stmt, stmt_args |