Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 1 | # Copyright 2016 The Chromium Authors |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 4 | |
| 5 | """Tests for the spam service.""" |
| 6 | from __future__ import print_function |
| 7 | from __future__ import division |
| 8 | from __future__ import absolute_import |
| 9 | |
| 10 | import mock |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 11 | import six |
| 12 | import time |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 13 | import unittest |
| 14 | |
Adrià Vilanova Martínez | 9f9ade5 | 2022-10-10 23:20:11 +0200 | [diff] [blame] | 15 | try: |
| 16 | from mox3 import mox |
| 17 | except ImportError: |
| 18 | import mox |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 19 | |
| 20 | from google.appengine.ext import testbed |
| 21 | |
| 22 | import settings |
| 23 | from framework import sql |
| 24 | from framework import framework_constants |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 25 | from infra_libs import ts_mon |
| 26 | from mrproto import user_pb2 |
| 27 | from mrproto import tracker_pb2 |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 28 | from services import spam_svc |
| 29 | from testing import fake |
| 30 | from mock import Mock |
| 31 | |
| 32 | |
| 33 | def assert_unreached(): |
| 34 | raise Exception('This code should not have been called.') # pragma: no cover |
| 35 | |
| 36 | |
| 37 | class SpamServiceTest(unittest.TestCase): |
| 38 | |
| 39 | def setUp(self): |
| 40 | self.testbed = testbed.Testbed() |
| 41 | self.testbed.activate() |
| 42 | |
| 43 | self.mox = mox.Mox() |
| 44 | self.mock_report_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| 45 | self.mock_verdict_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| 46 | self.mock_issue_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| 47 | self.cnxn = self.mox.CreateMock(sql.MonorailConnection) |
| 48 | self.issue_service = fake.IssueService() |
| 49 | self.spam_service = spam_svc.SpamService() |
| 50 | self.spam_service.report_tbl = self.mock_report_tbl |
| 51 | self.spam_service.verdict_tbl = self.mock_verdict_tbl |
| 52 | self.spam_service.issue_tbl = self.mock_issue_tbl |
| 53 | |
| 54 | self.spam_service.report_tbl.Delete = Mock() |
| 55 | self.spam_service.verdict_tbl.Delete = Mock() |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 56 | self.now = int(time.time()) |
| 57 | |
| 58 | ts_mon.reset_for_unittest() |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 59 | |
| 60 | def tearDown(self): |
| 61 | self.testbed.deactivate() |
| 62 | self.mox.UnsetStubs() |
| 63 | self.mox.ResetAll() |
| 64 | |
| 65 | def testLookupIssuesFlaggers(self): |
| 66 | self.mock_report_tbl.Select( |
| 67 | self.cnxn, cols=['issue_id', 'user_id', 'comment_id'], |
| 68 | issue_id=[234, 567, 890]).AndReturn([ |
| 69 | [234, 111, None], |
| 70 | [234, 222, 1], |
| 71 | [567, 333, None]]) |
| 72 | self.mox.ReplayAll() |
| 73 | |
| 74 | reporters = ( |
| 75 | self.spam_service.LookupIssuesFlaggers(self.cnxn, [234, 567, 890])) |
| 76 | self.mox.VerifyAll() |
| 77 | self.assertEqual({ |
| 78 | 234: ([111], {1: [222]}), |
| 79 | 567: ([333], {}), |
| 80 | }, reporters) |
| 81 | |
| 82 | def testLookupIssueFlaggers(self): |
| 83 | self.mock_report_tbl.Select( |
| 84 | self.cnxn, cols=['issue_id', 'user_id', 'comment_id'], |
| 85 | issue_id=[234]).AndReturn( |
| 86 | [[234, 111, None], [234, 222, 1]]) |
| 87 | self.mox.ReplayAll() |
| 88 | |
| 89 | issue_reporters, comment_reporters = ( |
| 90 | self.spam_service.LookupIssueFlaggers(self.cnxn, 234)) |
| 91 | self.mox.VerifyAll() |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 92 | six.assertCountEqual(self, [111], issue_reporters) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 93 | self.assertEqual({1: [222]}, comment_reporters) |
| 94 | |
| 95 | def testFlagIssues_overThresh(self): |
| 96 | issue = fake.MakeTestIssue( |
| 97 | project_id=789, |
| 98 | local_id=1, |
| 99 | reporter_id=111, |
| 100 | owner_id=456, |
| 101 | summary='sum', |
| 102 | status='Live', |
| 103 | issue_id=78901, |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 104 | project_name='proj', |
| 105 | migration_modified_timestamp=1234567, |
| 106 | is_spam=False) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 107 | issue.assume_stale = False # We will store this issue. |
| 108 | |
| 109 | self.mock_report_tbl.InsertRows(self.cnxn, |
| 110 | ['issue_id', 'reported_user_id', 'user_id'], |
| 111 | [(78901, 111, 111)], ignore=True) |
| 112 | |
| 113 | self.mock_report_tbl.Select(self.cnxn, |
| 114 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 115 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)]) |
| 116 | self.mock_verdict_tbl.Select( |
| 117 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 118 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 119 | self.mock_verdict_tbl.InsertRows( |
| 120 | self.cnxn, ['issue_id', 'is_spam', 'reason', 'project_id'], |
| 121 | [(78901, True, 'threshold', 789)], ignore=True) |
| 122 | |
| 123 | self.mox.ReplayAll() |
| 124 | self.spam_service.FlagIssues( |
| 125 | self.cnxn, self.issue_service, [issue], 111, True) |
| 126 | self.mox.VerifyAll() |
| 127 | self.assertIn(issue, self.issue_service.updated_issues) |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 128 | self.assertEqual(issue.migration_modified_timestamp, self.now) |
| 129 | self.assertEqual(issue.is_spam, True) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 130 | |
| 131 | self.assertEqual( |
| 132 | 1, |
| 133 | self.spam_service.issue_actions.get( |
| 134 | fields={ |
| 135 | 'type': 'flag', |
| 136 | 'reporter_id': str(111), |
| 137 | 'issue': 'proj:1' |
| 138 | })) |
| 139 | |
| 140 | def testFlagIssues_underThresh(self): |
| 141 | issue = fake.MakeTestIssue( |
| 142 | project_id=789, |
| 143 | local_id=1, |
| 144 | reporter_id=111, |
| 145 | owner_id=456, |
| 146 | summary='sum', |
| 147 | status='Live', |
| 148 | issue_id=78901, |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 149 | project_name='proj', |
| 150 | migration_modified_timestamp=1234567, |
| 151 | is_spam=False) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 152 | |
| 153 | self.mock_report_tbl.InsertRows(self.cnxn, |
| 154 | ['issue_id', 'reported_user_id', 'user_id'], |
| 155 | [(78901, 111, 111)], ignore=True) |
| 156 | |
| 157 | self.mock_report_tbl.Select(self.cnxn, |
| 158 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 159 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| 160 | |
| 161 | self.mock_verdict_tbl.Select( |
| 162 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 163 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 164 | |
| 165 | self.mox.ReplayAll() |
| 166 | self.spam_service.FlagIssues( |
| 167 | self.cnxn, self.issue_service, [issue], 111, True) |
| 168 | self.mox.VerifyAll() |
| 169 | |
| 170 | self.assertNotIn(issue, self.issue_service.updated_issues) |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 171 | self.assertEqual(issue.migration_modified_timestamp, 1234567) |
| 172 | self.assertEqual(issue.is_spam, False) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 173 | self.assertIsNone( |
| 174 | self.spam_service.issue_actions.get( |
| 175 | fields={ |
| 176 | 'type': 'flag', |
| 177 | 'reporter_id': str(111), |
| 178 | 'issue': 'proj:1' |
| 179 | })) |
| 180 | |
| 181 | def testUnflagIssue_overThresh(self): |
| 182 | issue = fake.MakeTestIssue( |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 183 | project_id=789, |
| 184 | local_id=1, |
| 185 | reporter_id=111, |
| 186 | owner_id=456, |
| 187 | summary='sum', |
| 188 | status='Live', |
| 189 | issue_id=78901, |
| 190 | migration_modified_timestamp=1234567, |
| 191 | is_spam=True) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 192 | self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| 193 | comment_id=None, user_id=111) |
| 194 | self.mock_report_tbl.Select(self.cnxn, |
| 195 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 196 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)]) |
| 197 | |
| 198 | self.mock_verdict_tbl.Select( |
| 199 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 200 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 201 | |
| 202 | self.mox.ReplayAll() |
| 203 | self.spam_service.FlagIssues( |
| 204 | self.cnxn, self.issue_service, [issue], 111, False) |
| 205 | self.mox.VerifyAll() |
| 206 | |
| 207 | self.assertNotIn(issue, self.issue_service.updated_issues) |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 208 | self.assertEqual(issue.migration_modified_timestamp, 1234567) |
| 209 | self.assertEqual(issue.is_spam, True) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 210 | |
| 211 | def testUnflagIssue_underThresh(self): |
| 212 | """A non-member un-flagging an issue as spam should not be able |
| 213 | to overturn the verdict to ham. This is different from previous |
| 214 | behavior. See https://crbug.com/monorail/2232 for details.""" |
| 215 | issue = fake.MakeTestIssue( |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 216 | project_id=789, |
| 217 | local_id=1, |
| 218 | reporter_id=111, |
| 219 | owner_id=456, |
| 220 | summary='sum', |
| 221 | status='Live', |
| 222 | issue_id=78901, |
| 223 | migration_modified_timestamp=1234567, |
| 224 | is_spam=True) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 225 | issue.assume_stale = False # We will store this issue. |
| 226 | self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| 227 | comment_id=None, user_id=111) |
| 228 | self.mock_report_tbl.Select(self.cnxn, |
| 229 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 230 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| 231 | |
| 232 | self.mock_verdict_tbl.Select( |
| 233 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 234 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 235 | |
| 236 | self.mox.ReplayAll() |
| 237 | self.spam_service.FlagIssues( |
| 238 | self.cnxn, self.issue_service, [issue], 111, False) |
| 239 | self.mox.VerifyAll() |
| 240 | |
| 241 | self.assertNotIn(issue, self.issue_service.updated_issues) |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 242 | self.assertEqual(issue.migration_modified_timestamp, 1234567) |
| 243 | self.assertEqual(issue.is_spam, True) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 244 | |
| 245 | def testUnflagIssue_underThreshNoManualOverride(self): |
| 246 | issue = fake.MakeTestIssue( |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 247 | project_id=789, |
| 248 | local_id=1, |
| 249 | reporter_id=111, |
| 250 | owner_id=456, |
| 251 | summary='sum', |
| 252 | status='Live', |
| 253 | issue_id=78901, |
| 254 | migration_modified_timestamp=1234567, |
| 255 | is_spam=True) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 256 | self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| 257 | comment_id=None, user_id=111) |
| 258 | self.mock_report_tbl.Select(self.cnxn, |
| 259 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 260 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| 261 | |
| 262 | self.mock_verdict_tbl.Select( |
| 263 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 264 | group_by=['issue_id'], comment_id=None, |
| 265 | issue_id=[78901]).AndReturn([(78901, 'manual', '')]) |
| 266 | |
| 267 | self.mox.ReplayAll() |
| 268 | self.spam_service.FlagIssues( |
| 269 | self.cnxn, self.issue_service, [issue], 111, False) |
| 270 | self.mox.VerifyAll() |
| 271 | |
| 272 | self.assertNotIn(issue, self.issue_service.updated_issues) |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 273 | self.assertEqual(issue.migration_modified_timestamp, 1234567) |
| 274 | self.assertEqual(issue.is_spam, True) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 275 | |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 276 | def testIsExempt_RegularUser(self): |
| 277 | author = user_pb2.MakeUser(111, email='test@example.com') |
| 278 | self.assertFalse(self.spam_service._IsExempt(author, False)) |
| 279 | author = user_pb2.MakeUser(111, email='test@chromium.org.example.com') |
| 280 | self.assertFalse(self.spam_service._IsExempt(author, False)) |
| 281 | |
| 282 | def testIsExempt_ProjectMember(self): |
| 283 | author = user_pb2.MakeUser(111, email='test@example.com') |
| 284 | self.assertTrue(self.spam_service._IsExempt(author, True)) |
| 285 | |
| 286 | def testIsExempt_AllowlistedDomain(self): |
| 287 | author = user_pb2.MakeUser(111, email='test@google.com') |
| 288 | self.assertTrue(self.spam_service._IsExempt(author, False)) |
| 289 | |
| 290 | def testClassifyIssue_spam(self): |
| 291 | issue = fake.MakeTestIssue( |
| 292 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 293 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 294 | self.spam_service._predict = lambda body: 1.0 |
| 295 | |
| 296 | # Prevent missing service inits to fail the test. |
| 297 | self.spam_service.ml_engine = True |
| 298 | |
| 299 | comment_pb = tracker_pb2.IssueComment() |
| 300 | comment_pb.content = "this is spam" |
| 301 | reporter = user_pb2.MakeUser(111, email='test@test.com') |
| 302 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 303 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 304 | |
| 305 | reporter.email = 'test@chromium.org.spam.com' |
| 306 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 307 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 308 | |
| 309 | reporter.email = 'test.google.com@test.com' |
| 310 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 311 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 312 | |
| 313 | def testClassifyIssue_Allowlisted(self): |
| 314 | issue = fake.MakeTestIssue( |
| 315 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 316 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 317 | self.spam_service._predict = assert_unreached |
| 318 | |
| 319 | # Prevent missing service inits to fail the test. |
| 320 | self.spam_service.ml_engine = True |
| 321 | |
| 322 | comment_pb = tracker_pb2.IssueComment() |
| 323 | comment_pb.content = "this is spam" |
| 324 | reporter = user_pb2.MakeUser(111, email='test@google.com') |
| 325 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 326 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 327 | reporter.email = 'test@chromium.org' |
| 328 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 329 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 330 | |
| 331 | def testClassifyComment_spam(self): |
| 332 | self.spam_service._predict = lambda body: 1.0 |
| 333 | |
| 334 | # Prevent missing service inits to fail the test. |
| 335 | self.spam_service.ml_engine = True |
| 336 | |
| 337 | commenter = user_pb2.MakeUser(111, email='test@test.com') |
| 338 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 339 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 340 | |
| 341 | commenter.email = 'test@chromium.org.spam.com' |
| 342 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 343 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 344 | |
| 345 | commenter.email = 'test.google.com@test.com' |
| 346 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 347 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 348 | |
| 349 | def testClassifyComment_Allowlisted(self): |
| 350 | self.spam_service._predict = assert_unreached |
| 351 | |
| 352 | # Prevent missing service inits to fail the test. |
| 353 | self.spam_service.ml_engine = True |
| 354 | |
| 355 | commenter = user_pb2.MakeUser(111, email='test@google.com') |
| 356 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 357 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 358 | |
| 359 | commenter.email = 'test@chromium.org' |
| 360 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 361 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 362 | |
| 363 | def test_ham_classification(self): |
| 364 | actual = self.spam_service.ham_classification() |
| 365 | self.assertEqual(actual['confidence_is_spam'], 0.0) |
| 366 | self.assertEqual(actual['failed_open'], False) |
| 367 | |
| 368 | def testExpungeUsersInSpam(self): |
| 369 | user_ids = [3, 4, 5] |
| 370 | self.spam_service.ExpungeUsersInSpam(self.cnxn, user_ids=user_ids) |
| 371 | |
| 372 | self.spam_service.report_tbl.Delete.assert_has_calls( |
| 373 | [ |
| 374 | mock.call(self.cnxn, reported_user_id=user_ids, commit=False), |
| 375 | mock.call(self.cnxn, user_id=user_ids, commit=False) |
| 376 | ]) |
| 377 | self.spam_service.verdict_tbl.Delete.assert_called_once_with( |
| 378 | self.cnxn, user_id=user_ids, commit=False) |
| 379 | |
| 380 | def testLookupIssueVerdicts(self): |
| 381 | self.spam_service.verdict_tbl.Select = Mock(return_value=[ |
| 382 | [5, 10], [4, 11], [6, 12], |
| 383 | ]) |
| 384 | actual = self.spam_service.LookupIssueVerdicts(self.cnxn, [4, 5, 6]) |
| 385 | |
| 386 | self.spam_service.verdict_tbl.Select.assert_called_once_with( |
| 387 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 388 | issue_id=[4, 5, 6], comment_id=None, group_by=['issue_id']) |
| 389 | self.assertEqual(actual, { |
| 390 | 5: 10, |
| 391 | 4: 11, |
| 392 | 6: 12, |
| 393 | }) |