blob: 156269c8678008d759e4510332ca39a9106f7e64 [file] [log] [blame]
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +01001# Copyright 2016 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Copybara854996b2021-09-07 19:36:02 +00004
5"""Tests for the spam service."""
6from __future__ import print_function
7from __future__ import division
8from __future__ import absolute_import
9
10import mock
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +010011import six
12import time
Copybara854996b2021-09-07 19:36:02 +000013import unittest
14
Adrià Vilanova Martínez9f9ade52022-10-10 23:20:11 +020015try:
16 from mox3 import mox
17except ImportError:
18 import mox
Copybara854996b2021-09-07 19:36:02 +000019
20from google.appengine.ext import testbed
21
22import settings
23from framework import sql
24from framework import framework_constants
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +010025from infra_libs import ts_mon
26from mrproto import user_pb2
27from mrproto import tracker_pb2
Copybara854996b2021-09-07 19:36:02 +000028from services import spam_svc
29from testing import fake
30from mock import Mock
31
32
33def assert_unreached():
34 raise Exception('This code should not have been called.') # pragma: no cover
35
36
37class SpamServiceTest(unittest.TestCase):
38
39 def setUp(self):
40 self.testbed = testbed.Testbed()
41 self.testbed.activate()
42
43 self.mox = mox.Mox()
44 self.mock_report_tbl = self.mox.CreateMock(sql.SQLTableManager)
45 self.mock_verdict_tbl = self.mox.CreateMock(sql.SQLTableManager)
46 self.mock_issue_tbl = self.mox.CreateMock(sql.SQLTableManager)
47 self.cnxn = self.mox.CreateMock(sql.MonorailConnection)
48 self.issue_service = fake.IssueService()
49 self.spam_service = spam_svc.SpamService()
50 self.spam_service.report_tbl = self.mock_report_tbl
51 self.spam_service.verdict_tbl = self.mock_verdict_tbl
52 self.spam_service.issue_tbl = self.mock_issue_tbl
53
54 self.spam_service.report_tbl.Delete = Mock()
55 self.spam_service.verdict_tbl.Delete = Mock()
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +010056 self.now = int(time.time())
57
58 ts_mon.reset_for_unittest()
Copybara854996b2021-09-07 19:36:02 +000059
60 def tearDown(self):
61 self.testbed.deactivate()
62 self.mox.UnsetStubs()
63 self.mox.ResetAll()
64
65 def testLookupIssuesFlaggers(self):
66 self.mock_report_tbl.Select(
67 self.cnxn, cols=['issue_id', 'user_id', 'comment_id'],
68 issue_id=[234, 567, 890]).AndReturn([
69 [234, 111, None],
70 [234, 222, 1],
71 [567, 333, None]])
72 self.mox.ReplayAll()
73
74 reporters = (
75 self.spam_service.LookupIssuesFlaggers(self.cnxn, [234, 567, 890]))
76 self.mox.VerifyAll()
77 self.assertEqual({
78 234: ([111], {1: [222]}),
79 567: ([333], {}),
80 }, reporters)
81
82 def testLookupIssueFlaggers(self):
83 self.mock_report_tbl.Select(
84 self.cnxn, cols=['issue_id', 'user_id', 'comment_id'],
85 issue_id=[234]).AndReturn(
86 [[234, 111, None], [234, 222, 1]])
87 self.mox.ReplayAll()
88
89 issue_reporters, comment_reporters = (
90 self.spam_service.LookupIssueFlaggers(self.cnxn, 234))
91 self.mox.VerifyAll()
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +010092 six.assertCountEqual(self, [111], issue_reporters)
Copybara854996b2021-09-07 19:36:02 +000093 self.assertEqual({1: [222]}, comment_reporters)
94
95 def testFlagIssues_overThresh(self):
96 issue = fake.MakeTestIssue(
97 project_id=789,
98 local_id=1,
99 reporter_id=111,
100 owner_id=456,
101 summary='sum',
102 status='Live',
103 issue_id=78901,
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100104 project_name='proj',
105 migration_modified_timestamp=1234567,
106 is_spam=False)
Copybara854996b2021-09-07 19:36:02 +0000107 issue.assume_stale = False # We will store this issue.
108
109 self.mock_report_tbl.InsertRows(self.cnxn,
110 ['issue_id', 'reported_user_id', 'user_id'],
111 [(78901, 111, 111)], ignore=True)
112
113 self.mock_report_tbl.Select(self.cnxn,
114 cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
115 issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)])
116 self.mock_verdict_tbl.Select(
117 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
118 group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
119 self.mock_verdict_tbl.InsertRows(
120 self.cnxn, ['issue_id', 'is_spam', 'reason', 'project_id'],
121 [(78901, True, 'threshold', 789)], ignore=True)
122
123 self.mox.ReplayAll()
124 self.spam_service.FlagIssues(
125 self.cnxn, self.issue_service, [issue], 111, True)
126 self.mox.VerifyAll()
127 self.assertIn(issue, self.issue_service.updated_issues)
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100128 self.assertEqual(issue.migration_modified_timestamp, self.now)
129 self.assertEqual(issue.is_spam, True)
Copybara854996b2021-09-07 19:36:02 +0000130
131 self.assertEqual(
132 1,
133 self.spam_service.issue_actions.get(
134 fields={
135 'type': 'flag',
136 'reporter_id': str(111),
137 'issue': 'proj:1'
138 }))
139
140 def testFlagIssues_underThresh(self):
141 issue = fake.MakeTestIssue(
142 project_id=789,
143 local_id=1,
144 reporter_id=111,
145 owner_id=456,
146 summary='sum',
147 status='Live',
148 issue_id=78901,
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100149 project_name='proj',
150 migration_modified_timestamp=1234567,
151 is_spam=False)
Copybara854996b2021-09-07 19:36:02 +0000152
153 self.mock_report_tbl.InsertRows(self.cnxn,
154 ['issue_id', 'reported_user_id', 'user_id'],
155 [(78901, 111, 111)], ignore=True)
156
157 self.mock_report_tbl.Select(self.cnxn,
158 cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
159 issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)])
160
161 self.mock_verdict_tbl.Select(
162 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
163 group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
164
165 self.mox.ReplayAll()
166 self.spam_service.FlagIssues(
167 self.cnxn, self.issue_service, [issue], 111, True)
168 self.mox.VerifyAll()
169
170 self.assertNotIn(issue, self.issue_service.updated_issues)
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100171 self.assertEqual(issue.migration_modified_timestamp, 1234567)
172 self.assertEqual(issue.is_spam, False)
Copybara854996b2021-09-07 19:36:02 +0000173 self.assertIsNone(
174 self.spam_service.issue_actions.get(
175 fields={
176 'type': 'flag',
177 'reporter_id': str(111),
178 'issue': 'proj:1'
179 }))
180
181 def testUnflagIssue_overThresh(self):
182 issue = fake.MakeTestIssue(
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100183 project_id=789,
184 local_id=1,
185 reporter_id=111,
186 owner_id=456,
187 summary='sum',
188 status='Live',
189 issue_id=78901,
190 migration_modified_timestamp=1234567,
191 is_spam=True)
Copybara854996b2021-09-07 19:36:02 +0000192 self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id],
193 comment_id=None, user_id=111)
194 self.mock_report_tbl.Select(self.cnxn,
195 cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
196 issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)])
197
198 self.mock_verdict_tbl.Select(
199 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
200 group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
201
202 self.mox.ReplayAll()
203 self.spam_service.FlagIssues(
204 self.cnxn, self.issue_service, [issue], 111, False)
205 self.mox.VerifyAll()
206
207 self.assertNotIn(issue, self.issue_service.updated_issues)
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100208 self.assertEqual(issue.migration_modified_timestamp, 1234567)
209 self.assertEqual(issue.is_spam, True)
Copybara854996b2021-09-07 19:36:02 +0000210
211 def testUnflagIssue_underThresh(self):
212 """A non-member un-flagging an issue as spam should not be able
213 to overturn the verdict to ham. This is different from previous
214 behavior. See https://crbug.com/monorail/2232 for details."""
215 issue = fake.MakeTestIssue(
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100216 project_id=789,
217 local_id=1,
218 reporter_id=111,
219 owner_id=456,
220 summary='sum',
221 status='Live',
222 issue_id=78901,
223 migration_modified_timestamp=1234567,
224 is_spam=True)
Copybara854996b2021-09-07 19:36:02 +0000225 issue.assume_stale = False # We will store this issue.
226 self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id],
227 comment_id=None, user_id=111)
228 self.mock_report_tbl.Select(self.cnxn,
229 cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
230 issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)])
231
232 self.mock_verdict_tbl.Select(
233 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
234 group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
235
236 self.mox.ReplayAll()
237 self.spam_service.FlagIssues(
238 self.cnxn, self.issue_service, [issue], 111, False)
239 self.mox.VerifyAll()
240
241 self.assertNotIn(issue, self.issue_service.updated_issues)
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100242 self.assertEqual(issue.migration_modified_timestamp, 1234567)
243 self.assertEqual(issue.is_spam, True)
Copybara854996b2021-09-07 19:36:02 +0000244
245 def testUnflagIssue_underThreshNoManualOverride(self):
246 issue = fake.MakeTestIssue(
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100247 project_id=789,
248 local_id=1,
249 reporter_id=111,
250 owner_id=456,
251 summary='sum',
252 status='Live',
253 issue_id=78901,
254 migration_modified_timestamp=1234567,
255 is_spam=True)
Copybara854996b2021-09-07 19:36:02 +0000256 self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id],
257 comment_id=None, user_id=111)
258 self.mock_report_tbl.Select(self.cnxn,
259 cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
260 issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)])
261
262 self.mock_verdict_tbl.Select(
263 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
264 group_by=['issue_id'], comment_id=None,
265 issue_id=[78901]).AndReturn([(78901, 'manual', '')])
266
267 self.mox.ReplayAll()
268 self.spam_service.FlagIssues(
269 self.cnxn, self.issue_service, [issue], 111, False)
270 self.mox.VerifyAll()
271
272 self.assertNotIn(issue, self.issue_service.updated_issues)
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100273 self.assertEqual(issue.migration_modified_timestamp, 1234567)
274 self.assertEqual(issue.is_spam, True)
Copybara854996b2021-09-07 19:36:02 +0000275
Copybara854996b2021-09-07 19:36:02 +0000276 def testIsExempt_RegularUser(self):
277 author = user_pb2.MakeUser(111, email='test@example.com')
278 self.assertFalse(self.spam_service._IsExempt(author, False))
279 author = user_pb2.MakeUser(111, email='test@chromium.org.example.com')
280 self.assertFalse(self.spam_service._IsExempt(author, False))
281
282 def testIsExempt_ProjectMember(self):
283 author = user_pb2.MakeUser(111, email='test@example.com')
284 self.assertTrue(self.spam_service._IsExempt(author, True))
285
286 def testIsExempt_AllowlistedDomain(self):
287 author = user_pb2.MakeUser(111, email='test@google.com')
288 self.assertTrue(self.spam_service._IsExempt(author, False))
289
290 def testClassifyIssue_spam(self):
291 issue = fake.MakeTestIssue(
292 project_id=789, local_id=1, reporter_id=111, owner_id=456,
293 summary='sum', status='Live', issue_id=78901, is_spam=True)
294 self.spam_service._predict = lambda body: 1.0
295
296 # Prevent missing service inits to fail the test.
297 self.spam_service.ml_engine = True
298
299 comment_pb = tracker_pb2.IssueComment()
300 comment_pb.content = "this is spam"
301 reporter = user_pb2.MakeUser(111, email='test@test.com')
302 res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
303 self.assertEqual(1.0, res['confidence_is_spam'])
304
305 reporter.email = 'test@chromium.org.spam.com'
306 res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
307 self.assertEqual(1.0, res['confidence_is_spam'])
308
309 reporter.email = 'test.google.com@test.com'
310 res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
311 self.assertEqual(1.0, res['confidence_is_spam'])
312
313 def testClassifyIssue_Allowlisted(self):
314 issue = fake.MakeTestIssue(
315 project_id=789, local_id=1, reporter_id=111, owner_id=456,
316 summary='sum', status='Live', issue_id=78901, is_spam=True)
317 self.spam_service._predict = assert_unreached
318
319 # Prevent missing service inits to fail the test.
320 self.spam_service.ml_engine = True
321
322 comment_pb = tracker_pb2.IssueComment()
323 comment_pb.content = "this is spam"
324 reporter = user_pb2.MakeUser(111, email='test@google.com')
325 res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
326 self.assertEqual(0.0, res['confidence_is_spam'])
327 reporter.email = 'test@chromium.org'
328 res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
329 self.assertEqual(0.0, res['confidence_is_spam'])
330
331 def testClassifyComment_spam(self):
332 self.spam_service._predict = lambda body: 1.0
333
334 # Prevent missing service inits to fail the test.
335 self.spam_service.ml_engine = True
336
337 commenter = user_pb2.MakeUser(111, email='test@test.com')
338 res = self.spam_service.ClassifyComment('this is spam', commenter, False)
339 self.assertEqual(1.0, res['confidence_is_spam'])
340
341 commenter.email = 'test@chromium.org.spam.com'
342 res = self.spam_service.ClassifyComment('this is spam', commenter, False)
343 self.assertEqual(1.0, res['confidence_is_spam'])
344
345 commenter.email = 'test.google.com@test.com'
346 res = self.spam_service.ClassifyComment('this is spam', commenter, False)
347 self.assertEqual(1.0, res['confidence_is_spam'])
348
349 def testClassifyComment_Allowlisted(self):
350 self.spam_service._predict = assert_unreached
351
352 # Prevent missing service inits to fail the test.
353 self.spam_service.ml_engine = True
354
355 commenter = user_pb2.MakeUser(111, email='test@google.com')
356 res = self.spam_service.ClassifyComment('this is spam', commenter, False)
357 self.assertEqual(0.0, res['confidence_is_spam'])
358
359 commenter.email = 'test@chromium.org'
360 res = self.spam_service.ClassifyComment('this is spam', commenter, False)
361 self.assertEqual(0.0, res['confidence_is_spam'])
362
363 def test_ham_classification(self):
364 actual = self.spam_service.ham_classification()
365 self.assertEqual(actual['confidence_is_spam'], 0.0)
366 self.assertEqual(actual['failed_open'], False)
367
368 def testExpungeUsersInSpam(self):
369 user_ids = [3, 4, 5]
370 self.spam_service.ExpungeUsersInSpam(self.cnxn, user_ids=user_ids)
371
372 self.spam_service.report_tbl.Delete.assert_has_calls(
373 [
374 mock.call(self.cnxn, reported_user_id=user_ids, commit=False),
375 mock.call(self.cnxn, user_id=user_ids, commit=False)
376 ])
377 self.spam_service.verdict_tbl.Delete.assert_called_once_with(
378 self.cnxn, user_id=user_ids, commit=False)
379
380 def testLookupIssueVerdicts(self):
381 self.spam_service.verdict_tbl.Select = Mock(return_value=[
382 [5, 10], [4, 11], [6, 12],
383 ])
384 actual = self.spam_service.LookupIssueVerdicts(self.cnxn, [4, 5, 6])
385
386 self.spam_service.verdict_tbl.Select.assert_called_once_with(
387 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
388 issue_id=[4, 5, 6], comment_id=None, group_by=['issue_id'])
389 self.assertEqual(actual, {
390 5: 10,
391 4: 11,
392 6: 12,
393 })