Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 1 | # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style |
| 3 | # license that can be found in the LICENSE file or at |
| 4 | # https://developers.google.com/open-source/licenses/bsd |
| 5 | |
| 6 | """Tests for the spam service.""" |
| 7 | from __future__ import print_function |
| 8 | from __future__ import division |
| 9 | from __future__ import absolute_import |
| 10 | |
| 11 | import mock |
| 12 | import unittest |
| 13 | |
| 14 | import mox |
| 15 | |
| 16 | from google.appengine.ext import testbed |
| 17 | |
| 18 | import settings |
| 19 | from framework import sql |
| 20 | from framework import framework_constants |
| 21 | from proto import user_pb2 |
| 22 | from proto import tracker_pb2 |
| 23 | from services import spam_svc |
| 24 | from testing import fake |
| 25 | from mock import Mock |
| 26 | |
| 27 | |
| 28 | def assert_unreached(): |
| 29 | raise Exception('This code should not have been called.') # pragma: no cover |
| 30 | |
| 31 | |
| 32 | class SpamServiceTest(unittest.TestCase): |
| 33 | |
| 34 | def setUp(self): |
| 35 | self.testbed = testbed.Testbed() |
| 36 | self.testbed.activate() |
| 37 | |
| 38 | self.mox = mox.Mox() |
| 39 | self.mock_report_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| 40 | self.mock_verdict_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| 41 | self.mock_issue_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| 42 | self.cnxn = self.mox.CreateMock(sql.MonorailConnection) |
| 43 | self.issue_service = fake.IssueService() |
| 44 | self.spam_service = spam_svc.SpamService() |
| 45 | self.spam_service.report_tbl = self.mock_report_tbl |
| 46 | self.spam_service.verdict_tbl = self.mock_verdict_tbl |
| 47 | self.spam_service.issue_tbl = self.mock_issue_tbl |
| 48 | |
| 49 | self.spam_service.report_tbl.Delete = Mock() |
| 50 | self.spam_service.verdict_tbl.Delete = Mock() |
| 51 | |
| 52 | def tearDown(self): |
| 53 | self.testbed.deactivate() |
| 54 | self.mox.UnsetStubs() |
| 55 | self.mox.ResetAll() |
| 56 | |
| 57 | def testLookupIssuesFlaggers(self): |
| 58 | self.mock_report_tbl.Select( |
| 59 | self.cnxn, cols=['issue_id', 'user_id', 'comment_id'], |
| 60 | issue_id=[234, 567, 890]).AndReturn([ |
| 61 | [234, 111, None], |
| 62 | [234, 222, 1], |
| 63 | [567, 333, None]]) |
| 64 | self.mox.ReplayAll() |
| 65 | |
| 66 | reporters = ( |
| 67 | self.spam_service.LookupIssuesFlaggers(self.cnxn, [234, 567, 890])) |
| 68 | self.mox.VerifyAll() |
| 69 | self.assertEqual({ |
| 70 | 234: ([111], {1: [222]}), |
| 71 | 567: ([333], {}), |
| 72 | }, reporters) |
| 73 | |
| 74 | def testLookupIssueFlaggers(self): |
| 75 | self.mock_report_tbl.Select( |
| 76 | self.cnxn, cols=['issue_id', 'user_id', 'comment_id'], |
| 77 | issue_id=[234]).AndReturn( |
| 78 | [[234, 111, None], [234, 222, 1]]) |
| 79 | self.mox.ReplayAll() |
| 80 | |
| 81 | issue_reporters, comment_reporters = ( |
| 82 | self.spam_service.LookupIssueFlaggers(self.cnxn, 234)) |
| 83 | self.mox.VerifyAll() |
| 84 | self.assertItemsEqual([111], issue_reporters) |
| 85 | self.assertEqual({1: [222]}, comment_reporters) |
| 86 | |
| 87 | def testFlagIssues_overThresh(self): |
| 88 | issue = fake.MakeTestIssue( |
| 89 | project_id=789, |
| 90 | local_id=1, |
| 91 | reporter_id=111, |
| 92 | owner_id=456, |
| 93 | summary='sum', |
| 94 | status='Live', |
| 95 | issue_id=78901, |
| 96 | project_name='proj') |
| 97 | issue.assume_stale = False # We will store this issue. |
| 98 | |
| 99 | self.mock_report_tbl.InsertRows(self.cnxn, |
| 100 | ['issue_id', 'reported_user_id', 'user_id'], |
| 101 | [(78901, 111, 111)], ignore=True) |
| 102 | |
| 103 | self.mock_report_tbl.Select(self.cnxn, |
| 104 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 105 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)]) |
| 106 | self.mock_verdict_tbl.Select( |
| 107 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 108 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 109 | self.mock_verdict_tbl.InsertRows( |
| 110 | self.cnxn, ['issue_id', 'is_spam', 'reason', 'project_id'], |
| 111 | [(78901, True, 'threshold', 789)], ignore=True) |
| 112 | |
| 113 | self.mox.ReplayAll() |
| 114 | self.spam_service.FlagIssues( |
| 115 | self.cnxn, self.issue_service, [issue], 111, True) |
| 116 | self.mox.VerifyAll() |
| 117 | self.assertIn(issue, self.issue_service.updated_issues) |
| 118 | |
| 119 | self.assertEqual( |
| 120 | 1, |
| 121 | self.spam_service.issue_actions.get( |
| 122 | fields={ |
| 123 | 'type': 'flag', |
| 124 | 'reporter_id': str(111), |
| 125 | 'issue': 'proj:1' |
| 126 | })) |
| 127 | |
| 128 | def testFlagIssues_underThresh(self): |
| 129 | issue = fake.MakeTestIssue( |
| 130 | project_id=789, |
| 131 | local_id=1, |
| 132 | reporter_id=111, |
| 133 | owner_id=456, |
| 134 | summary='sum', |
| 135 | status='Live', |
| 136 | issue_id=78901, |
| 137 | project_name='proj') |
| 138 | |
| 139 | self.mock_report_tbl.InsertRows(self.cnxn, |
| 140 | ['issue_id', 'reported_user_id', 'user_id'], |
| 141 | [(78901, 111, 111)], ignore=True) |
| 142 | |
| 143 | self.mock_report_tbl.Select(self.cnxn, |
| 144 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 145 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| 146 | |
| 147 | self.mock_verdict_tbl.Select( |
| 148 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 149 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 150 | |
| 151 | self.mox.ReplayAll() |
| 152 | self.spam_service.FlagIssues( |
| 153 | self.cnxn, self.issue_service, [issue], 111, True) |
| 154 | self.mox.VerifyAll() |
| 155 | |
| 156 | self.assertNotIn(issue, self.issue_service.updated_issues) |
| 157 | self.assertIsNone( |
| 158 | self.spam_service.issue_actions.get( |
| 159 | fields={ |
| 160 | 'type': 'flag', |
| 161 | 'reporter_id': str(111), |
| 162 | 'issue': 'proj:1' |
| 163 | })) |
| 164 | |
| 165 | def testUnflagIssue_overThresh(self): |
| 166 | issue = fake.MakeTestIssue( |
| 167 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 168 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 169 | self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| 170 | comment_id=None, user_id=111) |
| 171 | self.mock_report_tbl.Select(self.cnxn, |
| 172 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 173 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)]) |
| 174 | |
| 175 | self.mock_verdict_tbl.Select( |
| 176 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 177 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 178 | |
| 179 | self.mox.ReplayAll() |
| 180 | self.spam_service.FlagIssues( |
| 181 | self.cnxn, self.issue_service, [issue], 111, False) |
| 182 | self.mox.VerifyAll() |
| 183 | |
| 184 | self.assertNotIn(issue, self.issue_service.updated_issues) |
| 185 | self.assertEqual(True, issue.is_spam) |
| 186 | |
| 187 | def testUnflagIssue_underThresh(self): |
| 188 | """A non-member un-flagging an issue as spam should not be able |
| 189 | to overturn the verdict to ham. This is different from previous |
| 190 | behavior. See https://crbug.com/monorail/2232 for details.""" |
| 191 | issue = fake.MakeTestIssue( |
| 192 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 193 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 194 | issue.assume_stale = False # We will store this issue. |
| 195 | self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| 196 | comment_id=None, user_id=111) |
| 197 | self.mock_report_tbl.Select(self.cnxn, |
| 198 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 199 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| 200 | |
| 201 | self.mock_verdict_tbl.Select( |
| 202 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 203 | group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| 204 | |
| 205 | self.mox.ReplayAll() |
| 206 | self.spam_service.FlagIssues( |
| 207 | self.cnxn, self.issue_service, [issue], 111, False) |
| 208 | self.mox.VerifyAll() |
| 209 | |
| 210 | self.assertNotIn(issue, self.issue_service.updated_issues) |
| 211 | self.assertEqual(True, issue.is_spam) |
| 212 | |
| 213 | def testUnflagIssue_underThreshNoManualOverride(self): |
| 214 | issue = fake.MakeTestIssue( |
| 215 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 216 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 217 | self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| 218 | comment_id=None, user_id=111) |
| 219 | self.mock_report_tbl.Select(self.cnxn, |
| 220 | cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| 221 | issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| 222 | |
| 223 | self.mock_verdict_tbl.Select( |
| 224 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 225 | group_by=['issue_id'], comment_id=None, |
| 226 | issue_id=[78901]).AndReturn([(78901, 'manual', '')]) |
| 227 | |
| 228 | self.mox.ReplayAll() |
| 229 | self.spam_service.FlagIssues( |
| 230 | self.cnxn, self.issue_service, [issue], 111, False) |
| 231 | self.mox.VerifyAll() |
| 232 | |
| 233 | self.assertNotIn(issue, self.issue_service.updated_issues) |
| 234 | self.assertEqual(True, issue.is_spam) |
| 235 | |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 236 | def testIsExempt_RegularUser(self): |
| 237 | author = user_pb2.MakeUser(111, email='test@example.com') |
| 238 | self.assertFalse(self.spam_service._IsExempt(author, False)) |
| 239 | author = user_pb2.MakeUser(111, email='test@chromium.org.example.com') |
| 240 | self.assertFalse(self.spam_service._IsExempt(author, False)) |
| 241 | |
| 242 | def testIsExempt_ProjectMember(self): |
| 243 | author = user_pb2.MakeUser(111, email='test@example.com') |
| 244 | self.assertTrue(self.spam_service._IsExempt(author, True)) |
| 245 | |
| 246 | def testIsExempt_AllowlistedDomain(self): |
| 247 | author = user_pb2.MakeUser(111, email='test@google.com') |
| 248 | self.assertTrue(self.spam_service._IsExempt(author, False)) |
| 249 | |
| 250 | def testClassifyIssue_spam(self): |
| 251 | issue = fake.MakeTestIssue( |
| 252 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 253 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 254 | self.spam_service._predict = lambda body: 1.0 |
| 255 | |
| 256 | # Prevent missing service inits to fail the test. |
| 257 | self.spam_service.ml_engine = True |
| 258 | |
| 259 | comment_pb = tracker_pb2.IssueComment() |
| 260 | comment_pb.content = "this is spam" |
| 261 | reporter = user_pb2.MakeUser(111, email='test@test.com') |
| 262 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 263 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 264 | |
| 265 | reporter.email = 'test@chromium.org.spam.com' |
| 266 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 267 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 268 | |
| 269 | reporter.email = 'test.google.com@test.com' |
| 270 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 271 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 272 | |
| 273 | def testClassifyIssue_Allowlisted(self): |
| 274 | issue = fake.MakeTestIssue( |
| 275 | project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| 276 | summary='sum', status='Live', issue_id=78901, is_spam=True) |
| 277 | self.spam_service._predict = assert_unreached |
| 278 | |
| 279 | # Prevent missing service inits to fail the test. |
| 280 | self.spam_service.ml_engine = True |
| 281 | |
| 282 | comment_pb = tracker_pb2.IssueComment() |
| 283 | comment_pb.content = "this is spam" |
| 284 | reporter = user_pb2.MakeUser(111, email='test@google.com') |
| 285 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 286 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 287 | reporter.email = 'test@chromium.org' |
| 288 | res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| 289 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 290 | |
| 291 | def testClassifyComment_spam(self): |
| 292 | self.spam_service._predict = lambda body: 1.0 |
| 293 | |
| 294 | # Prevent missing service inits to fail the test. |
| 295 | self.spam_service.ml_engine = True |
| 296 | |
| 297 | commenter = user_pb2.MakeUser(111, email='test@test.com') |
| 298 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 299 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 300 | |
| 301 | commenter.email = 'test@chromium.org.spam.com' |
| 302 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 303 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 304 | |
| 305 | commenter.email = 'test.google.com@test.com' |
| 306 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 307 | self.assertEqual(1.0, res['confidence_is_spam']) |
| 308 | |
| 309 | def testClassifyComment_Allowlisted(self): |
| 310 | self.spam_service._predict = assert_unreached |
| 311 | |
| 312 | # Prevent missing service inits to fail the test. |
| 313 | self.spam_service.ml_engine = True |
| 314 | |
| 315 | commenter = user_pb2.MakeUser(111, email='test@google.com') |
| 316 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 317 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 318 | |
| 319 | commenter.email = 'test@chromium.org' |
| 320 | res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| 321 | self.assertEqual(0.0, res['confidence_is_spam']) |
| 322 | |
| 323 | def test_ham_classification(self): |
| 324 | actual = self.spam_service.ham_classification() |
| 325 | self.assertEqual(actual['confidence_is_spam'], 0.0) |
| 326 | self.assertEqual(actual['failed_open'], False) |
| 327 | |
| 328 | def testExpungeUsersInSpam(self): |
| 329 | user_ids = [3, 4, 5] |
| 330 | self.spam_service.ExpungeUsersInSpam(self.cnxn, user_ids=user_ids) |
| 331 | |
| 332 | self.spam_service.report_tbl.Delete.assert_has_calls( |
| 333 | [ |
| 334 | mock.call(self.cnxn, reported_user_id=user_ids, commit=False), |
| 335 | mock.call(self.cnxn, user_id=user_ids, commit=False) |
| 336 | ]) |
| 337 | self.spam_service.verdict_tbl.Delete.assert_called_once_with( |
| 338 | self.cnxn, user_id=user_ids, commit=False) |
| 339 | |
| 340 | def testLookupIssueVerdicts(self): |
| 341 | self.spam_service.verdict_tbl.Select = Mock(return_value=[ |
| 342 | [5, 10], [4, 11], [6, 12], |
| 343 | ]) |
| 344 | actual = self.spam_service.LookupIssueVerdicts(self.cnxn, [4, 5, 6]) |
| 345 | |
| 346 | self.spam_service.verdict_tbl.Select.assert_called_once_with( |
| 347 | self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| 348 | issue_id=[4, 5, 6], comment_id=None, group_by=['issue_id']) |
| 349 | self.assertEqual(actual, { |
| 350 | 5: 10, |
| 351 | 4: 11, |
| 352 | 6: 12, |
| 353 | }) |