Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 1 | # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style |
| 3 | # license that can be found in the LICENSE file or at |
| 4 | # https://developers.google.com/open-source/licenses/bsd |
| 5 | |
| 6 | """Tests for the spam service.""" |
| 7 | from __future__ import print_function |
| 8 | from __future__ import division |
| 9 | from __future__ import absolute_import |
| 10 | |
| 11 | import mock |
| 12 | import unittest |
| 13 | |
| 14 | import mox |
| 15 | |
| 16 | from google.appengine.ext import testbed |
| 17 | |
| 18 | import settings |
| 19 | from framework import sql |
| 20 | from framework import framework_constants |
| 21 | from proto import user_pb2 |
| 22 | from proto import tracker_pb2 |
| 23 | from services import spam_svc |
| 24 | from testing import fake |
| 25 | from mock import Mock |
| 26 | |
| 27 | |
| 28 | def assert_unreached(): |
| 29 | raise Exception('This code should not have been called.') # pragma: no cover |
| 30 | |
| 31 | |
| 32 | class SpamServiceTest(unittest.TestCase): |
| 33 | |
| 34 | def setUp(self): |
| 35 | self.testbed = testbed.Testbed() |
| 36 | self.testbed.activate() |
| 37 | |
| 38 | self.mox = mox.Mox() |
| 39 | self.mock_report_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| 40 | self.mock_verdict_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| 41 | self.mock_issue_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| 42 | self.cnxn = self.mox.CreateMock(sql.MonorailConnection) |
| 43 | self.issue_service = fake.IssueService() |
| 44 | self.spam_service = spam_svc.SpamService() |
| 45 | self.spam_service.report_tbl = self.mock_report_tbl |
| 46 | self.spam_service.verdict_tbl = self.mock_verdict_tbl |
| 47 | self.spam_service.issue_tbl = self.mock_issue_tbl |
| 48 | |
| 49 | self.spam_service.report_tbl.Delete = Mock() |
| 50 | self.spam_service.verdict_tbl.Delete = Mock() |
| 51 | |
| 52 | def tearDown(self): |
| 53 | self.testbed.deactivate() |
| 54 | self.mox.UnsetStubs() |
| 55 | self.mox.ResetAll() |
| 56 | |
| 57 | def testLookupIssuesFlaggers(self): |
| 58 | self.mock_report_tbl.Select( |
| 59 | self.cnxn, cols=['issue_id', 'user_id', 'comment_id'], |
| 60 | issue_id=[234, 567, 890]).AndReturn([ |
| 61 | [234, 111, None], |
| 62 | [234, 222, 1], |
| 63 | [567, 333, None]]) |
| 64 | self.mox.ReplayAll() |
| 65 | |
| 66 | reporters = ( |
| 67 | self.spam_service.LookupIssuesFlaggers(self.cnxn, [234, 567, 890])) |
| 68 | self.mox.VerifyAll() |
| 69 | self.assertEqual({ |
| 70 | 234: ([111], {1: [222]}), |
| 71 | 567: ([333], {}), |
| 72 | }, reporters) |
| 73 | |
| 74 | def testLookupIssueFlaggers(self): |
| 75 | self.mock_report_tbl.Select( |
| 76 | self.cnxn, cols=['issue_id', 'user_id', 'comment_id'], |
| 77 | issue_id=[234]).AndReturn( |
| 78 | [[234, 111, None], [234, 222, 1]]) |
| 79 | self.mox.ReplayAll() |
| 80 | |
| 81 | issue_reporters, comment_reporters = ( |
| 82 | self.spam_service.LookupIssueFlaggers(self.cnxn, 234)) |
| 83 | self.mox.VerifyAll() |
| 84 | self.assertItemsEqual([111], issue_reporters) |
| 85 | self.assertEqual({1: [222]}, comment_reporters) |
| 86 | |
| 87 | def testFlagIssues_overThresh(self): |
| 88 | issue = fake.MakeTestIssue( |
| 89 | project_id=789, |
| 90 | local_id=1, |
| 91 | reporter_id=111, |
| 92 | owner_id=456, |
| 93 | summary='sum', |
| 94 | status='Live', |
| 95 | issue_id=78901, |
| 96 | project_name='proj') |
| 97 | issue.assume_stale = False # We will store this issue. |
| 98 | |
| 99 | self.mock_report_tbl.InsertRows(self.cnxn, |
| 100 | ['issue_id', 'reported_user_id', 'user_id'], |
| 101 | [(78901, 111, 111)], ignore=True) |
| 102 | |
| 103 | self.mock_report_tbl.Select(self.cnxn, |
| 104 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 105 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)]) |
| 106 | self.mock_verdict_tbl.Select( |
| 107 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 108 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 109 | self.mock_verdict_tbl.InsertRows( |
| 110 | self.cnxn, ['issue_id', 'is_spam', 'reason', 'project_id'], |
| 111 | [(78901, True, 'threshold', 789)], ignore=True) |
| 112 | |
| 113 | self.mox.ReplayAll() |
| 114 | self.spam_service.FlagIssues( |
| 115 | self.cnxn, self.issue_service, [issue], 111, True) |
| 116 | self.mox.VerifyAll() |
| 117 | self.assertIn(issue, self.issue_service.updated_issues) |
| 118 | |
| 119 | self.assertEqual( |
| 120 | 1, |
| 121 | self.spam_service.issue_actions.get( |
| 122 | fields={ |
| 123 | 'type': 'flag', |
| 124 | 'reporter_id': str(111), |
| 125 | 'issue': 'proj:1' |
| 126 | })) |
| 127 | |
| 128 | def testFlagIssues_underThresh(self): |
| 129 | issue = fake.MakeTestIssue( |
| 130 | project_id=789, |
| 131 | local_id=1, |
| 132 | reporter_id=111, |
| 133 | owner_id=456, |
| 134 | summary='sum', |
| 135 | status='Live', |
| 136 | issue_id=78901, |
| 137 | project_name='proj') |
| 138 | |
| 139 | self.mock_report_tbl.InsertRows(self.cnxn, |
| 140 | ['issue_id', 'reported_user_id', 'user_id'], |
| 141 | [(78901, 111, 111)], ignore=True) |
| 142 | |
| 143 | self.mock_report_tbl.Select(self.cnxn, |
| 144 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 145 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| 146 | |
| 147 | self.mock_verdict_tbl.Select( |
| 148 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 149 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 150 | |
| 151 | self.mox.ReplayAll() |
| 152 | self.spam_service.FlagIssues( |
| 153 | self.cnxn, self.issue_service, [issue], 111, True) |
| 154 | self.mox.VerifyAll() |
| 155 | |
| 156 | self.assertNotIn(issue, self.issue_service.updated_issues) |
| 157 | self.assertIsNone( |
| 158 | self.spam_service.issue_actions.get( |
| 159 | fields={ |
| 160 | 'type': 'flag', |
| 161 | 'reporter_id': str(111), |
| 162 | 'issue': 'proj:1' |
| 163 | })) |
| 164 | |
| 165 | def testUnflagIssue_overThresh(self): |
| 166 | issue = fake.MakeTestIssue( |
| 167 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 168 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 169 | self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| 170 | comment_id=None, user_id=111) |
| 171 | self.mock_report_tbl.Select(self.cnxn, |
| 172 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 173 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)]) |
| 174 | |
| 175 | self.mock_verdict_tbl.Select( |
| 176 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 177 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 178 | |
| 179 | self.mox.ReplayAll() |
| 180 | self.spam_service.FlagIssues( |
| 181 | self.cnxn, self.issue_service, [issue], 111, False) |
| 182 | self.mox.VerifyAll() |
| 183 | |
| 184 | self.assertNotIn(issue, self.issue_service.updated_issues) |
| 185 | self.assertEqual(True, issue.is_spam) |
| 186 | |
| 187 | def testUnflagIssue_underThresh(self): |
| 188 | """A non-member un-flagging an issue as spam should not be able |
| 189 | to overturn the verdict to ham. This is different from previous |
| 190 | behavior. See https://crbug.com/monorail/2232 for details.""" |
| 191 | issue = fake.MakeTestIssue( |
| 192 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 193 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 194 | issue.assume_stale = False # We will store this issue. |
| 195 | self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| 196 | comment_id=None, user_id=111) |
| 197 | self.mock_report_tbl.Select(self.cnxn, |
| 198 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 199 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| 200 | |
| 201 | self.mock_verdict_tbl.Select( |
| 202 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 203 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 204 | |
| 205 | self.mox.ReplayAll() |
| 206 | self.spam_service.FlagIssues( |
| 207 | self.cnxn, self.issue_service, [issue], 111, False) |
| 208 | self.mox.VerifyAll() |
| 209 | |
| 210 | self.assertNotIn(issue, self.issue_service.updated_issues) |
| 211 | self.assertEqual(True, issue.is_spam) |
| 212 | |
| 213 | def testUnflagIssue_underThreshNoManualOverride(self): |
| 214 | issue = fake.MakeTestIssue( |
| 215 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 216 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 217 | self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| 218 | comment_id=None, user_id=111) |
| 219 | self.mock_report_tbl.Select(self.cnxn, |
| 220 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 221 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| 222 | |
| 223 | self.mock_verdict_tbl.Select( |
| 224 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 225 | group_by=['issue_id'], comment_id=None, |
| 226 | issue_id=[78901]).AndReturn([(78901, 'manual', '')]) |
| 227 | |
| 228 | self.mox.ReplayAll() |
| 229 | self.spam_service.FlagIssues( |
| 230 | self.cnxn, self.issue_service, [issue], 111, False) |
| 231 | self.mox.VerifyAll() |
| 232 | |
| 233 | self.assertNotIn(issue, self.issue_service.updated_issues) |
| 234 | self.assertEqual(True, issue.is_spam) |
| 235 | |
| 236 | def testGetIssueClassifierQueue_noVerdicts(self): |
| 237 | self.mock_verdict_tbl.Select(self.cnxn, |
| 238 | cols=['issue_id', 'is_spam', 'reason', 'classifier_confidence', |
| 239 | 'created'], |
| 240 | where=[ |
| 241 | ('project_id = %s', [789]), |
| 242 | ('classifier_confidence <= %s', |
| 243 | [settings.classifier_moderation_thresh]), |
| 244 | ('overruled = %s', [False]), |
| 245 | ('issue_id IS NOT NULL', []), |
| 246 | ], |
| 247 | order_by=[ |
| 248 | ('classifier_confidence ASC', []), |
| 249 | ('created ASC', []) |
| 250 | ], |
| 251 | group_by=['issue_id'], |
| 252 | offset=0, |
| 253 | limit=10, |
| 254 | ).AndReturn([]) |
| 255 | |
| 256 | self.mock_verdict_tbl.SelectValue(self.cnxn, |
| 257 | col='COUNT(*)', |
| 258 | where=[ |
| 259 | ('project_id = %s', [789]), |
| 260 | ('classifier_confidence <= %s', |
| 261 | [settings.classifier_moderation_thresh]), |
| 262 | ('overruled = %s', [False]), |
| 263 | ('issue_id IS NOT NULL', []), |
| 264 | ]).AndReturn(0) |
| 265 | |
| 266 | self.mox.ReplayAll() |
| 267 | res, count = self.spam_service.GetIssueClassifierQueue( |
| 268 | self.cnxn, self.issue_service, 789) |
| 269 | self.mox.VerifyAll() |
| 270 | |
| 271 | self.assertEqual([], res) |
| 272 | self.assertEqual(0, count) |
| 273 | |
| 274 | def testGetIssueClassifierQueue_someVerdicts(self): |
| 275 | self.mock_verdict_tbl.Select(self.cnxn, |
| 276 | cols=['issue_id', 'is_spam', 'reason', 'classifier_confidence', |
| 277 | 'created'], |
| 278 | where=[ |
| 279 | ('project_id = %s', [789]), |
| 280 | ('classifier_confidence <= %s', |
| 281 | [settings.classifier_moderation_thresh]), |
| 282 | ('overruled = %s', [False]), |
| 283 | ('issue_id IS NOT NULL', []), |
| 284 | ], |
| 285 | order_by=[ |
| 286 | ('classifier_confidence ASC', []), |
| 287 | ('created ASC', []) |
| 288 | ], |
| 289 | group_by=['issue_id'], |
| 290 | offset=0, |
| 291 | limit=10, |
| 292 | ).AndReturn([[78901, 0, "classifier", 0.9, "2015-12-10 11:06:24"]]) |
| 293 | |
| 294 | self.mock_verdict_tbl.SelectValue(self.cnxn, |
| 295 | col='COUNT(*)', |
| 296 | where=[ |
| 297 | ('project_id = %s', [789]), |
| 298 | ('classifier_confidence <= %s', |
| 299 | [settings.classifier_moderation_thresh]), |
| 300 | ('overruled = %s', [False]), |
| 301 | ('issue_id IS NOT NULL', []), |
| 302 | ]).AndReturn(10) |
| 303 | |
| 304 | self.mox.ReplayAll() |
| 305 | res, count = self.spam_service.GetIssueClassifierQueue( |
| 306 | self.cnxn, self.issue_service, 789) |
| 307 | self.mox.VerifyAll() |
| 308 | self.assertEqual(1, len(res)) |
| 309 | self.assertEqual(10, count) |
| 310 | self.assertEqual(78901, res[0].issue_id) |
| 311 | self.assertEqual(False, res[0].is_spam) |
| 312 | self.assertEqual("classifier", res[0].reason) |
| 313 | self.assertEqual(0.9, res[0].classifier_confidence) |
| 314 | self.assertEqual("2015-12-10 11:06:24", res[0].verdict_time) |
| 315 | |
| 316 | def testIsExempt_RegularUser(self): |
| 317 | author = user_pb2.MakeUser(111, email='test@example.com') |
| 318 | self.assertFalse(self.spam_service._IsExempt(author, False)) |
| 319 | author = user_pb2.MakeUser(111, email='test@chromium.org.example.com') |
| 320 | self.assertFalse(self.spam_service._IsExempt(author, False)) |
| 321 | |
| 322 | def testIsExempt_ProjectMember(self): |
| 323 | author = user_pb2.MakeUser(111, email='test@example.com') |
| 324 | self.assertTrue(self.spam_service._IsExempt(author, True)) |
| 325 | |
| 326 | def testIsExempt_AllowlistedDomain(self): |
| 327 | author = user_pb2.MakeUser(111, email='test@google.com') |
| 328 | self.assertTrue(self.spam_service._IsExempt(author, False)) |
| 329 | |
| 330 | def testClassifyIssue_spam(self): |
| 331 | issue = fake.MakeTestIssue( |
| 332 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 333 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 334 | self.spam_service._predict = lambda body: 1.0 |
| 335 | |
| 336 | # Prevent missing service inits to fail the test. |
| 337 | self.spam_service.ml_engine = True |
| 338 | |
| 339 | comment_pb = tracker_pb2.IssueComment() |
| 340 | comment_pb.content = "this is spam" |
| 341 | reporter = user_pb2.MakeUser(111, email='test@test.com') |
| 342 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 343 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 344 | |
| 345 | reporter.email = 'test@chromium.org.spam.com' |
| 346 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 347 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 348 | |
| 349 | reporter.email = 'test.google.com@test.com' |
| 350 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 351 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 352 | |
| 353 | def testClassifyIssue_Allowlisted(self): |
| 354 | issue = fake.MakeTestIssue( |
| 355 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 356 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 357 | self.spam_service._predict = assert_unreached |
| 358 | |
| 359 | # Prevent missing service inits to fail the test. |
| 360 | self.spam_service.ml_engine = True |
| 361 | |
| 362 | comment_pb = tracker_pb2.IssueComment() |
| 363 | comment_pb.content = "this is spam" |
| 364 | reporter = user_pb2.MakeUser(111, email='test@google.com') |
| 365 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 366 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 367 | reporter.email = 'test@chromium.org' |
| 368 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 369 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 370 | |
| 371 | def testClassifyComment_spam(self): |
| 372 | self.spam_service._predict = lambda body: 1.0 |
| 373 | |
| 374 | # Prevent missing service inits to fail the test. |
| 375 | self.spam_service.ml_engine = True |
| 376 | |
| 377 | commenter = user_pb2.MakeUser(111, email='test@test.com') |
| 378 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 379 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 380 | |
| 381 | commenter.email = 'test@chromium.org.spam.com' |
| 382 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 383 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 384 | |
| 385 | commenter.email = 'test.google.com@test.com' |
| 386 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 387 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 388 | |
| 389 | def testClassifyComment_Allowlisted(self): |
| 390 | self.spam_service._predict = assert_unreached |
| 391 | |
| 392 | # Prevent missing service inits to fail the test. |
| 393 | self.spam_service.ml_engine = True |
| 394 | |
| 395 | commenter = user_pb2.MakeUser(111, email='test@google.com') |
| 396 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 397 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 398 | |
| 399 | commenter.email = 'test@chromium.org' |
| 400 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 401 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 402 | |
| 403 | def test_ham_classification(self): |
| 404 | actual = self.spam_service.ham_classification() |
| 405 | self.assertEqual(actual['confidence_is_spam'], 0.0) |
| 406 | self.assertEqual(actual['failed_open'], False) |
| 407 | |
| 408 | def testExpungeUsersInSpam(self): |
| 409 | user_ids = [3, 4, 5] |
| 410 | self.spam_service.ExpungeUsersInSpam(self.cnxn, user_ids=user_ids) |
| 411 | |
| 412 | self.spam_service.report_tbl.Delete.assert_has_calls( |
| 413 | [ |
| 414 | mock.call(self.cnxn, reported_user_id=user_ids, commit=False), |
| 415 | mock.call(self.cnxn, user_id=user_ids, commit=False) |
| 416 | ]) |
| 417 | self.spam_service.verdict_tbl.Delete.assert_called_once_with( |
| 418 | self.cnxn, user_id=user_ids, commit=False) |
| 419 | |
| 420 | def testLookupIssueVerdicts(self): |
| 421 | self.spam_service.verdict_tbl.Select = Mock(return_value=[ |
| 422 | [5, 10], [4, 11], [6, 12], |
| 423 | ]) |
| 424 | actual = self.spam_service.LookupIssueVerdicts(self.cnxn, [4, 5, 6]) |
| 425 | |
| 426 | self.spam_service.verdict_tbl.Select.assert_called_once_with( |
| 427 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 428 | issue_id=[4, 5, 6], comment_id=None, group_by=['issue_id']) |
| 429 | self.assertEqual(actual, { |
| 430 | 5: 10, |
| 431 | 4: 11, |
| 432 | 6: 12, |
| 433 | }) |