blob: 02ec7d8516ac2392e47a37cc6b97b2eb8987117f [file] [log] [blame]
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +01001# Copyright 2016 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Copybara854996b2021-09-07 19:36:02 +00004
5""" Set of functions for detaling with spam reports.
6"""
7from __future__ import print_function
8from __future__ import division
9from __future__ import absolute_import
10
11import collections
12import logging
13import settings
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +010014import time
Copybara854996b2021-09-07 19:36:02 +000015
16from collections import defaultdict
Copybara854996b2021-09-07 19:36:02 +000017from framework import sql
Copybara854996b2021-09-07 19:36:02 +000018from infra_libs import ts_mon
19from services import ml_helpers
20
21
22SPAMREPORT_TABLE_NAME = 'SpamReport'
23SPAMVERDICT_TABLE_NAME = 'SpamVerdict'
24ISSUE_TABLE = 'Issue'
25
26REASON_MANUAL = 'manual'
27REASON_THRESHOLD = 'threshold'
28REASON_CLASSIFIER = 'classifier'
29REASON_FAIL_OPEN = 'fail_open'
30SPAM_CLASS_LABEL = '1'
31
32SPAMREPORT_ISSUE_COLS = ['issue_id', 'reported_user_id', 'user_id']
33SPAMVERDICT_ISSUE_COL = ['created', 'content_created', 'user_id',
34 'reported_user_id', 'comment_id', 'issue_id']
35MANUALVERDICT_ISSUE_COLS = ['user_id', 'issue_id', 'is_spam', 'reason',
36 'project_id']
37THRESHVERDICT_ISSUE_COLS = ['issue_id', 'is_spam', 'reason', 'project_id']
38
39SPAMREPORT_COMMENT_COLS = ['comment_id', 'reported_user_id', 'user_id']
40MANUALVERDICT_COMMENT_COLS = ['user_id', 'comment_id', 'is_spam', 'reason',
41 'project_id']
42THRESHVERDICT_COMMENT_COLS = ['comment_id', 'is_spam', 'reason', 'project_id']
43
44
45class SpamService(object):
46 """The persistence layer for spam reports."""
47 issue_actions = ts_mon.CounterMetric(
48 'monorail/spam_svc/issue', 'Count of things that happen to issues.', [
49 ts_mon.StringField('type'),
50 ts_mon.StringField('reporter_id'),
51 ts_mon.StringField('issue')
52 ])
53 comment_actions = ts_mon.CounterMetric(
54 'monorail/spam_svc/comment', 'Count of things that happen to comments.', [
55 ts_mon.StringField('type'),
56 ts_mon.StringField('reporter_id'),
57 ts_mon.StringField('issue'),
58 ts_mon.StringField('comment_id')
59 ])
60 ml_engine_failures = ts_mon.CounterMetric(
61 'monorail/spam_svc/ml_engine_failure',
62 'Failures calling the ML Engine API',
63 None)
64
65 def __init__(self):
66 self.report_tbl = sql.SQLTableManager(SPAMREPORT_TABLE_NAME)
67 self.verdict_tbl = sql.SQLTableManager(SPAMVERDICT_TABLE_NAME)
68 self.issue_tbl = sql.SQLTableManager(ISSUE_TABLE)
69
70 # ML Engine library is lazy loaded below.
71 self.ml_engine = None
72
73 def LookupIssuesFlaggers(self, cnxn, issue_ids):
74 """Returns users who've reported the issues or their comments as spam.
75
76 Returns a dictionary {issue_id: (issue_reporters, comment_reporters)}
77 issue_reportes is a list of users who flagged the issue;
78 comment_reporters element is a dictionary {comment_id: [user_ids]} where
79 user_ids are the users who flagged that comment.
80 """
81 rows = self.report_tbl.Select(
82 cnxn, cols=['issue_id', 'user_id', 'comment_id'],
83 issue_id=issue_ids)
84
85 reporters = collections.defaultdict(
86 # Return a tuple of (issue_reporters, comment_reporters) as described
87 # above.
88 lambda: ([], collections.defaultdict(list)))
89
90 for row in rows:
91 issue_id = int(row[0])
92 user_id = row[1]
93 if row[2]:
94 comment_id = row[2]
95 reporters[issue_id][1][comment_id].append(user_id)
96 else:
97 reporters[issue_id][0].append(user_id)
98
99 return reporters
100
101 def LookupIssueFlaggers(self, cnxn, issue_id):
102 """Returns users who've reported the issue or its comments as spam.
103
104 Returns a tuple. First element is a list of users who flagged the issue;
105 second element is a dictionary of comment id to a list of users who flagged
106 that comment.
107 """
108 return self.LookupIssuesFlaggers(cnxn, [issue_id])[issue_id]
109
Adrià Vilanova Martínezde942802022-07-15 14:06:55 +0200110 def _LookupIssueFlagCounts(self, cnxn, issue_ids):
Copybara854996b2021-09-07 19:36:02 +0000111 """Returns a map of issue_id to flag counts"""
112 rows = self.report_tbl.Select(cnxn, cols=['issue_id', 'COUNT(*)'],
113 issue_id=issue_ids, group_by=['issue_id'])
114 counts = {}
115 for row in rows:
116 counts[int(row[0])] = row[1]
117 return counts
118
119 def LookupIssueVerdicts(self, cnxn, issue_ids):
120 """Returns a map of issue_id to most recent spam verdicts"""
121 rows = self.verdict_tbl.Select(cnxn,
122 cols=['issue_id', 'reason', 'MAX(created)'],
123 issue_id=issue_ids, comment_id=None,
124 group_by=['issue_id'])
125 counts = {}
126 for row in rows:
127 counts[int(row[0])] = row[1]
128 return counts
129
130 def LookupIssueVerdictHistory(self, cnxn, issue_ids):
131 """Returns a map of issue_id to most recent spam verdicts"""
132 rows = self.verdict_tbl.Select(cnxn, cols=[
133 'issue_id', 'reason', 'created', 'is_spam', 'classifier_confidence',
134 'user_id', 'overruled'],
135 issue_id=issue_ids, order_by=[('issue_id', []), ('created', [])])
136
137 # TODO: group by issue_id, make class instead of dict for verdict.
138 verdicts = []
139 for row in rows:
140 verdicts.append({
141 'issue_id': row[0],
142 'reason': row[1],
143 'created': row[2],
144 'is_spam': row[3],
145 'classifier_confidence': row[4],
146 'user_id': row[5],
147 'overruled': row[6],
148 })
149
150 return verdicts
151
152 def LookupCommentVerdictHistory(self, cnxn, comment_ids):
153 """Returns a map of issue_id to most recent spam verdicts"""
154 rows = self.verdict_tbl.Select(cnxn, cols=[
155 'comment_id', 'reason', 'created', 'is_spam', 'classifier_confidence',
156 'user_id', 'overruled'],
157 comment_id=comment_ids, order_by=[('comment_id', []), ('created', [])])
158
159 # TODO: group by comment_id, make class instead of dict for verdict.
160 verdicts = []
161 for row in rows:
162 verdicts.append({
163 'comment_id': row[0],
164 'reason': row[1],
165 'created': row[2],
166 'is_spam': row[3],
167 'classifier_confidence': row[4],
168 'user_id': row[5],
169 'overruled': row[6],
170 })
171
172 return verdicts
173
174 def FlagIssues(self, cnxn, issue_service, issues, reporting_user_id,
175 flagged_spam):
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100176 """Creates or deletes a spam report on an issue.
177
178 This function is run when a user flags an issue as spam but does not
179 have 'VerdictSpam' permission."""
Copybara854996b2021-09-07 19:36:02 +0000180 verdict_updates = []
181 if flagged_spam:
182 rows = [(issue.issue_id, issue.reporter_id, reporting_user_id)
183 for issue in issues]
184 self.report_tbl.InsertRows(cnxn, SPAMREPORT_ISSUE_COLS, rows,
185 ignore=True)
186 else:
187 issue_ids = [issue.issue_id for issue in issues]
188 self.report_tbl.Delete(
189 cnxn, issue_id=issue_ids, user_id=reporting_user_id,
190 comment_id=None)
191
192 project_id = issues[0].project_id
193
194 # Now record new verdicts and update issue.is_spam, if they've changed.
195 ids = [issue.issue_id for issue in issues]
Adrià Vilanova Martínezde942802022-07-15 14:06:55 +0200196 counts = self._LookupIssueFlagCounts(cnxn, ids)
Copybara854996b2021-09-07 19:36:02 +0000197 previous_verdicts = self.LookupIssueVerdicts(cnxn, ids)
198
199 for issue_id in counts:
200 # If the flag counts changed enough to toggle the is_spam bit, need to
201 # record a new verdict and update the Issue.
202
203 # No number of user spam flags can overturn an admin's verdict.
204 if previous_verdicts.get(issue_id) == REASON_MANUAL:
205 continue
206
207 # If enough spam flags come in, mark the issue as spam.
208 if (flagged_spam and counts[issue_id] >= settings.spam_flag_thresh):
209 verdict_updates.append(issue_id)
210
211 if len(verdict_updates) == 0:
212 return
213
214 # Some of the issues may have exceed the flag threshold, so issue verdicts
215 # and mark as spam in those cases.
216 rows = [(issue_id, flagged_spam, REASON_THRESHOLD, project_id)
217 for issue_id in verdict_updates]
218 self.verdict_tbl.InsertRows(cnxn, THRESHVERDICT_ISSUE_COLS, rows,
219 ignore=True)
220 update_issues = []
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100221 current_time = int(time.time())
Copybara854996b2021-09-07 19:36:02 +0000222 for issue in issues:
223 if issue.issue_id in verdict_updates:
224 issue.is_spam = flagged_spam
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100225 issue.migration_modified_timestamp = current_time
Copybara854996b2021-09-07 19:36:02 +0000226 update_issues.append(issue)
227
228 if flagged_spam:
229 for issue in update_issues:
230 issue_ref = '%s:%s' % (issue.project_name, issue.local_id)
231 self.issue_actions.increment(
232 {
233 'type': 'flag',
234 'reporter_id': str(reporting_user_id),
235 'issue': issue_ref
236 })
237
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100238 issue_service.UpdateIssues(
239 cnxn, update_issues, update_cols=['is_spam', 'migration_modified'])
Copybara854996b2021-09-07 19:36:02 +0000240
241 def FlagComment(
242 self, cnxn, issue, comment_id, reported_user_id, reporting_user_id,
243 flagged_spam):
244 """Creates or deletes a spam report on a comment."""
245 # TODO(seanmccullough): Bulk comment flagging? There's no UI for that.
246 if flagged_spam:
247 self.report_tbl.InsertRow(
248 cnxn,
249 ignore=True,
250 issue_id=issue.issue_id,
251 comment_id=comment_id,
252 reported_user_id=reported_user_id,
253 user_id=reporting_user_id)
254 issue_ref = '%s:%s' % (issue.project_name, issue.local_id)
255 self.comment_actions.increment(
256 {
257 'type': 'flag',
258 'reporter_id': str(reporting_user_id),
259 'issue': issue_ref,
260 'comment_id': str(comment_id)
261 })
262 else:
263 self.report_tbl.Delete(
264 cnxn,
265 issue_id=issue.issue_id,
266 comment_id=comment_id,
267 user_id=reporting_user_id)
268
269 def RecordClassifierIssueVerdict(self, cnxn, issue, is_spam, confidence,
270 fail_open):
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100271 """Records a judgment call on whether a new issue is spam.
272
273 Only run when an issue is newly filed. If the issue is determined to be
274 likely spam, the code increments a counter."""
Copybara854996b2021-09-07 19:36:02 +0000275 reason = REASON_FAIL_OPEN if fail_open else REASON_CLASSIFIER
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100276 self.verdict_tbl.InsertRow(
277 cnxn,
278 issue_id=issue.issue_id,
279 is_spam=is_spam,
280 reason=reason,
281 classifier_confidence=confidence,
282 project_id=issue.project_id,
283 overruled=False)
Copybara854996b2021-09-07 19:36:02 +0000284 if is_spam:
285 issue_ref = '%s:%s' % (issue.project_name, issue.local_id)
286 self.issue_actions.increment(
287 {
288 'type': 'classifier',
289 'reporter_id': 'classifier',
290 'issue': issue_ref
291 })
292 # This is called at issue creation time, so there's nothing else to do here.
293
294 def RecordManualIssueVerdicts(self, cnxn, issue_service, issues, user_id,
295 is_spam):
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100296 """Bypasses the classifier to manually classify an issue as spam.
297
298 This code can only be run by users with the 'VerdictSpam' permission."""
Copybara854996b2021-09-07 19:36:02 +0000299 rows = [(user_id, issue.issue_id, is_spam, REASON_MANUAL, issue.project_id)
300 for issue in issues]
301 issue_ids = [issue.issue_id for issue in issues]
302
303 # Overrule all previous verdicts.
304 self.verdict_tbl.Update(cnxn, {'overruled': True}, [
305 ('issue_id IN (%s)' % sql.PlaceHolders(issue_ids), issue_ids)
306 ], commit=False)
307
308 self.verdict_tbl.InsertRows(cnxn, MANUALVERDICT_ISSUE_COLS, rows,
309 ignore=True)
310
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100311 current_time = int(time.time())
Copybara854996b2021-09-07 19:36:02 +0000312 for issue in issues:
313 issue.is_spam = is_spam
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100314 issue.migration_modified_timestamp = current_time
Copybara854996b2021-09-07 19:36:02 +0000315
316 if is_spam:
317 for issue in issues:
318 issue_ref = '%s:%s' % (issue.project_name, issue.local_id)
319 self.issue_actions.increment(
320 {
321 'type': 'manual',
322 'reporter_id': str(user_id),
323 'issue': issue_ref
324 })
325 else:
326 issue_service.AllocateNewLocalIDs(cnxn, issues)
327
328 # This will commit the transaction.
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100329 issue_service.UpdateIssues(
330 cnxn, issues, update_cols=['is_spam', 'migration_modified'])
Copybara854996b2021-09-07 19:36:02 +0000331
332 def RecordManualCommentVerdict(self, cnxn, issue_service, user_service,
333 comment_id, user_id, is_spam):
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100334 """Bypasses the classifier to manually classify a comment as spam.
335
336 This code can only be run by users with the 'VerdictSpam' permission."""
Copybara854996b2021-09-07 19:36:02 +0000337 # TODO(seanmccullough): Bulk comment verdicts? There's no UI for that.
338 self.verdict_tbl.InsertRow(cnxn, ignore=True,
339 user_id=user_id, comment_id=comment_id, is_spam=is_spam,
340 reason=REASON_MANUAL)
341 comment = issue_service.GetComment(cnxn, comment_id)
342 comment.is_spam = is_spam
343 issue = issue_service.GetIssue(cnxn, comment.issue_id, use_cache=False)
344 issue_service.SoftDeleteComment(
345 cnxn, issue, comment, user_id, user_service, is_spam, True, is_spam)
346 if is_spam:
347 issue_ref = '%s:%s' % (issue.project_name, issue.local_id)
348 self.comment_actions.increment(
349 {
350 'type': 'manual',
351 'reporter_id': str(user_id),
352 'issue': issue_ref,
353 'comment_id': str(comment_id)
354 })
355
356 def RecordClassifierCommentVerdict(
357 self, cnxn, issue_service, comment, is_spam, confidence, fail_open):
358 reason = REASON_FAIL_OPEN if fail_open else REASON_CLASSIFIER
359 self.verdict_tbl.InsertRow(cnxn, comment_id=comment.id, is_spam=is_spam,
360 reason=reason, classifier_confidence=confidence,
361 project_id=comment.project_id)
362 if is_spam:
363 issue = issue_service.GetIssue(cnxn, comment.issue_id, use_cache=False)
364 issue_ref = '%s:%s' % (issue.project_name, issue.local_id)
365 self.comment_actions.increment(
366 {
367 'type': 'classifier',
368 'reporter_id': 'classifier',
369 'issue': issue_ref,
370 'comment_id': str(comment.id)
371 })
372
373 def _predict(self, instance):
374 """Requests a prediction from the ML Engine API.
375
376 Sample API response:
377 {'predictions': [{
378 'classes': ['0', '1'],
379 'scores': [0.4986788034439087, 0.5013211965560913]
380 }]}
381
382 This hits the default model.
383
384 Returns:
385 A floating point number representing the confidence
386 the instance is spam.
387 """
388 model_name = 'projects/%s/models/%s' % (
389 settings.classifier_project_id, settings.spam_model_name)
390 body = {'instances': [{"inputs": instance["word_hashes"]}]}
391
392 if not self.ml_engine:
393 self.ml_engine = ml_helpers.setup_ml_engine()
394
395 request = self.ml_engine.projects().predict(name=model_name, body=body)
396 response = request.execute()
397 logging.info('ML Engine API response: %r' % response)
398 prediction = response['predictions'][0]
399
400 # Ensure the class confidence we return is for the spam, not the ham label.
401 # The spam label, '1', is usually at index 1 but I'm not sure of any
402 # guarantees around label order.
403 if prediction['classes'][1] == SPAM_CLASS_LABEL:
404 return prediction['scores'][1]
405 elif prediction['classes'][0] == SPAM_CLASS_LABEL:
406 return prediction['scores'][0]
407 else:
408 raise Exception('No predicted classes found.')
409
410 def _IsExempt(self, author, is_project_member):
411 """Return True if the user is exempt from spam checking."""
412 if author.email is not None and author.email.endswith(
413 settings.spam_allowlisted_suffixes):
414 logging.info('%s allowlisted from spam filtering', author.email)
415 return True
416
417 if is_project_member:
418 logging.info('%s is a project member, assuming ham', author.email)
419 return True
420
421 return False
422
423 def ClassifyIssue(self, issue, firstComment, reporter, is_project_member):
424 """Classify an issue as either spam or ham.
425
426 Args:
427 issue: the Issue.
428 firstComment: the first Comment on issue.
429 reporter: User PB for the Issue reporter.
430 is_project_member: True if reporter is a member of issue's project.
431
432 Returns a JSON dict of classifier prediction results from
433 the ML Engine API.
434 """
435 instance = ml_helpers.GenerateFeaturesRaw(
436 [issue.summary, firstComment.content],
437 settings.spam_feature_hashes)
438 return self._classify(instance, reporter, is_project_member)
439
440 def ClassifyComment(self, comment_content, commenter, is_project_member=True):
441 """Classify a comment as either spam or ham.
442
443 Args:
444 comment: the comment text.
445 commenter: User PB for the user who authored the comment.
446
447 Returns a JSON dict of classifier prediction results from
448 the ML Engine API.
449 """
450 instance = ml_helpers.GenerateFeaturesRaw(
451 ['', comment_content],
452 settings.spam_feature_hashes)
453 return self._classify(instance, commenter, is_project_member)
454
455
456 def _classify(self, instance, author, is_project_member):
457 # Fail-safe: not spam.
458 result = self.ham_classification()
459
460 if self._IsExempt(author, is_project_member):
461 return result
462
463 if not self.ml_engine:
464 self.ml_engine = ml_helpers.setup_ml_engine()
465
466 # If setup_ml_engine returns None, it failed to init.
467 if not self.ml_engine:
468 logging.error("ML Engine not initialized.")
469 self.ml_engine_failures.increment()
470 result['failed_open'] = True
471 return result
472
473 remaining_retries = 3
474 while remaining_retries > 0:
475 try:
476 result['confidence_is_spam'] = self._predict(instance)
477 result['failed_open'] = False
478 return result
479 except Exception as ex:
480 remaining_retries = remaining_retries - 1
481 self.ml_engine_failures.increment()
482 logging.error('Error calling ML Engine API: %s' % ex)
483
484 result['failed_open'] = True
485 return result
486
487 def ham_classification(self):
488 return {'confidence_is_spam': 0.0,
489 'failed_open': False}
490
Copybara854996b2021-09-07 19:36:02 +0000491 def GetIssueFlagQueue(
492 self, cnxn, _issue_service, project_id, offset=0, limit=10):
493 """Returns list of recent issues that have been flagged by users"""
494 issue_flags = self.report_tbl.Select(
495 cnxn,
496 cols=[
497 "Issue.project_id", "Report.issue_id", "count(*) as count",
498 "max(Report.created) as latest",
499 "count(distinct Report.user_id) as users"
500 ],
501 left_joins=["Issue ON Issue.id = Report.issue_id"],
502 where=[
503 ('Report.issue_id IS NOT NULL', []),
504 ("Issue.project_id == %v", [project_id])
505 ],
506 order_by=[('count DESC', [])],
507 group_by=['Report.issue_id'],
508 offset=offset,
509 limit=limit)
510 ret = []
511 for row in issue_flags:
512 ret.append(
513 ModerationItem(
514 project_id=row[0],
515 issue_id=row[1],
516 count=row[2],
517 latest_report=row[3],
518 num_users=row[4],
519 ))
520
521 count = self.verdict_tbl.SelectValue(
522 cnxn,
523 col='COUNT(DISTINCT Report.issue_id)',
524 where=[('Issue.project_id = %s', [project_id])],
525 left_joins=["Issue ON Issue.id = SpamReport.issue_id"])
526 return ret, count
527
528
529 def GetCommentClassifierQueue(
530 self, cnxn, _issue_service, project_id, offset=0, limit=10):
531 """Returns list of recent comments with spam verdicts,
532 ranked in ascending order of confidence (so uncertain items are first).
533 """
534 # TODO(seanmccullough): Optimize pagination. This query probably gets
535 # slower as the number of SpamVerdicts grows, regardless of offset
536 # and limit values used here. Using offset,limit in general may not
537 # be the best way to do this.
538 comment_results = self.verdict_tbl.Select(
539 cnxn,
540 cols=[
541 'issue_id', 'is_spam', 'reason', 'classifier_confidence', 'created'
542 ],
543 where=[
544 ('project_id = %s', [project_id]),
545 (
546 'classifier_confidence <= %s',
547 [settings.classifier_moderation_thresh]),
548 ('overruled = %s', [False]),
549 ('comment_id IS NOT NULL', []),
550 ],
551 order_by=[
552 ('classifier_confidence ASC', []),
553 ('created ASC', []),
554 ],
555 group_by=['comment_id'],
556 offset=offset,
557 limit=limit,
558 )
559
560 ret = []
561 for row in comment_results:
562 ret.append(
563 ModerationItem(
564 comment_id=int(row[0]),
565 is_spam=row[1] == 1,
566 reason=row[2],
567 classifier_confidence=row[3],
568 verdict_time='%s' % row[4],
569 ))
570
571 count = self.verdict_tbl.SelectValue(
572 cnxn,
573 col='COUNT(*)',
574 where=[
575 ('project_id = %s', [project_id]),
576 (
577 'classifier_confidence <= %s',
578 [settings.classifier_moderation_thresh]),
579 ('overruled = %s', [False]),
580 ('comment_id IS NOT NULL', []),
581 ])
582
583 return ret, count
584
585
586 def GetTrainingIssues(self, cnxn, issue_service, since, offset=0, limit=100):
587 """Returns list of recent issues with human-labeled spam/ham verdicts.
588 """
589
590 # get all of the manual verdicts in the past day.
591 results = self.verdict_tbl.Select(cnxn,
592 cols=['issue_id'],
593 where=[
594 ('overruled = %s', [False]),
595 ('reason = %s', ['manual']),
596 ('issue_id IS NOT NULL', []),
597 ('created > %s', [since.isoformat()]),
598 ],
599 offset=offset,
600 limit=limit,
601 )
602
603 issue_ids = [int(row[0]) for row in results if row[0]]
604 issues = issue_service.GetIssues(cnxn, issue_ids)
605 comments = issue_service.GetCommentsForIssues(cnxn, issue_ids)
606 first_comments = {}
607 for issue in issues:
608 first_comments[issue.issue_id] = (comments[issue.issue_id][0].content
609 if issue.issue_id in comments else "[Empty]")
610
611 count = self.verdict_tbl.SelectValue(cnxn,
612 col='COUNT(*)',
613 where=[
614 ('overruled = %s', [False]),
615 ('reason = %s', ['manual']),
616 ('issue_id IS NOT NULL', []),
617 ('created > %s', [since.isoformat()]),
618 ])
619
620 return issues, first_comments, count
621
622 def GetTrainingComments(self, cnxn, issue_service, since, offset=0,
623 limit=100):
624 """Returns list of recent comments with human-labeled spam/ham verdicts.
625 """
626
627 # get all of the manual verdicts in the past day.
628 results = self.verdict_tbl.Select(
629 cnxn,
630 distinct=True,
631 cols=['comment_id'],
632 where=[
633 ('overruled = %s', [False]),
634 ('reason = %s', ['manual']),
635 ('comment_id IS NOT NULL', []),
636 ('created > %s', [since.isoformat()]),
637 ],
638 offset=offset,
639 limit=limit,
640 )
641
642 comment_ids = [int(row[0]) for row in results if row[0]]
643 # Don't care about sequence numbers in this context yet.
644 comments = issue_service.GetCommentsByID(cnxn, comment_ids,
645 defaultdict(int))
646 return comments
647
648 def ExpungeUsersInSpam(self, cnxn, user_ids):
649 """Removes all references to given users from Spam DB tables.
650
651 This method will not commit the operations. This method will
652 not make changes to in-memory data.
653 """
654 commit = False
655 self.report_tbl.Delete(cnxn, reported_user_id=user_ids, commit=commit)
656 self.report_tbl.Delete(cnxn, user_id=user_ids, commit=commit)
657 self.verdict_tbl.Delete(cnxn, user_id=user_ids, commit=commit)
658
659
660class ModerationItem:
661 def __init__(self, **kwargs):
662 self.__dict__ = kwargs