blob: 3aeba13cd570011b9e6c134efd0650e4c1193a07 [file] [log] [blame]
Copybara854996b2021-09-07 19:36:02 +00001# Copyright 2016 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style
3# license that can be found in the LICENSE file or at
4# https://developers.google.com/open-source/licenses/bsd
5
6"""Tests for the spam service."""
7from __future__ import print_function
8from __future__ import division
9from __future__ import absolute_import
10
11import mock
12import unittest
13
14import mox
15
16from google.appengine.ext import testbed
17
18import settings
19from framework import sql
20from framework import framework_constants
21from proto import user_pb2
22from proto import tracker_pb2
23from services import spam_svc
24from testing import fake
25from mock import Mock
26
27
28def assert_unreached():
29 raise Exception('This code should not have been called.') # pragma: no cover
30
31
32class SpamServiceTest(unittest.TestCase):
33
34 def setUp(self):
35 self.testbed = testbed.Testbed()
36 self.testbed.activate()
37
38 self.mox = mox.Mox()
39 self.mock_report_tbl = self.mox.CreateMock(sql.SQLTableManager)
40 self.mock_verdict_tbl = self.mox.CreateMock(sql.SQLTableManager)
41 self.mock_issue_tbl = self.mox.CreateMock(sql.SQLTableManager)
42 self.cnxn = self.mox.CreateMock(sql.MonorailConnection)
43 self.issue_service = fake.IssueService()
44 self.spam_service = spam_svc.SpamService()
45 self.spam_service.report_tbl = self.mock_report_tbl
46 self.spam_service.verdict_tbl = self.mock_verdict_tbl
47 self.spam_service.issue_tbl = self.mock_issue_tbl
48
49 self.spam_service.report_tbl.Delete = Mock()
50 self.spam_service.verdict_tbl.Delete = Mock()
51
52 def tearDown(self):
53 self.testbed.deactivate()
54 self.mox.UnsetStubs()
55 self.mox.ResetAll()
56
57 def testLookupIssuesFlaggers(self):
58 self.mock_report_tbl.Select(
59 self.cnxn, cols=['issue_id', 'user_id', 'comment_id'],
60 issue_id=[234, 567, 890]).AndReturn([
61 [234, 111, None],
62 [234, 222, 1],
63 [567, 333, None]])
64 self.mox.ReplayAll()
65
66 reporters = (
67 self.spam_service.LookupIssuesFlaggers(self.cnxn, [234, 567, 890]))
68 self.mox.VerifyAll()
69 self.assertEqual({
70 234: ([111], {1: [222]}),
71 567: ([333], {}),
72 }, reporters)
73
74 def testLookupIssueFlaggers(self):
75 self.mock_report_tbl.Select(
76 self.cnxn, cols=['issue_id', 'user_id', 'comment_id'],
77 issue_id=[234]).AndReturn(
78 [[234, 111, None], [234, 222, 1]])
79 self.mox.ReplayAll()
80
81 issue_reporters, comment_reporters = (
82 self.spam_service.LookupIssueFlaggers(self.cnxn, 234))
83 self.mox.VerifyAll()
84 self.assertItemsEqual([111], issue_reporters)
85 self.assertEqual({1: [222]}, comment_reporters)
86
87 def testFlagIssues_overThresh(self):
88 issue = fake.MakeTestIssue(
89 project_id=789,
90 local_id=1,
91 reporter_id=111,
92 owner_id=456,
93 summary='sum',
94 status='Live',
95 issue_id=78901,
96 project_name='proj')
97 issue.assume_stale = False # We will store this issue.
98
99 self.mock_report_tbl.InsertRows(self.cnxn,
100 ['issue_id', 'reported_user_id', 'user_id'],
101 [(78901, 111, 111)], ignore=True)
102
103 self.mock_report_tbl.Select(self.cnxn,
104 cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
105 issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)])
106 self.mock_verdict_tbl.Select(
107 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
108 group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
109 self.mock_verdict_tbl.InsertRows(
110 self.cnxn, ['issue_id', 'is_spam', 'reason', 'project_id'],
111 [(78901, True, 'threshold', 789)], ignore=True)
112
113 self.mox.ReplayAll()
114 self.spam_service.FlagIssues(
115 self.cnxn, self.issue_service, [issue], 111, True)
116 self.mox.VerifyAll()
117 self.assertIn(issue, self.issue_service.updated_issues)
118
119 self.assertEqual(
120 1,
121 self.spam_service.issue_actions.get(
122 fields={
123 'type': 'flag',
124 'reporter_id': str(111),
125 'issue': 'proj:1'
126 }))
127
128 def testFlagIssues_underThresh(self):
129 issue = fake.MakeTestIssue(
130 project_id=789,
131 local_id=1,
132 reporter_id=111,
133 owner_id=456,
134 summary='sum',
135 status='Live',
136 issue_id=78901,
137 project_name='proj')
138
139 self.mock_report_tbl.InsertRows(self.cnxn,
140 ['issue_id', 'reported_user_id', 'user_id'],
141 [(78901, 111, 111)], ignore=True)
142
143 self.mock_report_tbl.Select(self.cnxn,
144 cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
145 issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)])
146
147 self.mock_verdict_tbl.Select(
148 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
149 group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
150
151 self.mox.ReplayAll()
152 self.spam_service.FlagIssues(
153 self.cnxn, self.issue_service, [issue], 111, True)
154 self.mox.VerifyAll()
155
156 self.assertNotIn(issue, self.issue_service.updated_issues)
157 self.assertIsNone(
158 self.spam_service.issue_actions.get(
159 fields={
160 'type': 'flag',
161 'reporter_id': str(111),
162 'issue': 'proj:1'
163 }))
164
165 def testUnflagIssue_overThresh(self):
166 issue = fake.MakeTestIssue(
167 project_id=789, local_id=1, reporter_id=111, owner_id=456,
168 summary='sum', status='Live', issue_id=78901, is_spam=True)
169 self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id],
170 comment_id=None, user_id=111)
171 self.mock_report_tbl.Select(self.cnxn,
172 cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
173 issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)])
174
175 self.mock_verdict_tbl.Select(
176 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
177 group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
178
179 self.mox.ReplayAll()
180 self.spam_service.FlagIssues(
181 self.cnxn, self.issue_service, [issue], 111, False)
182 self.mox.VerifyAll()
183
184 self.assertNotIn(issue, self.issue_service.updated_issues)
185 self.assertEqual(True, issue.is_spam)
186
187 def testUnflagIssue_underThresh(self):
188 """A non-member un-flagging an issue as spam should not be able
189 to overturn the verdict to ham. This is different from previous
190 behavior. See https://crbug.com/monorail/2232 for details."""
191 issue = fake.MakeTestIssue(
192 project_id=789, local_id=1, reporter_id=111, owner_id=456,
193 summary='sum', status='Live', issue_id=78901, is_spam=True)
194 issue.assume_stale = False # We will store this issue.
195 self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id],
196 comment_id=None, user_id=111)
197 self.mock_report_tbl.Select(self.cnxn,
198 cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
199 issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)])
200
201 self.mock_verdict_tbl.Select(
202 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
203 group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
204
205 self.mox.ReplayAll()
206 self.spam_service.FlagIssues(
207 self.cnxn, self.issue_service, [issue], 111, False)
208 self.mox.VerifyAll()
209
210 self.assertNotIn(issue, self.issue_service.updated_issues)
211 self.assertEqual(True, issue.is_spam)
212
213 def testUnflagIssue_underThreshNoManualOverride(self):
214 issue = fake.MakeTestIssue(
215 project_id=789, local_id=1, reporter_id=111, owner_id=456,
216 summary='sum', status='Live', issue_id=78901, is_spam=True)
217 self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id],
218 comment_id=None, user_id=111)
219 self.mock_report_tbl.Select(self.cnxn,
220 cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
221 issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)])
222
223 self.mock_verdict_tbl.Select(
224 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
225 group_by=['issue_id'], comment_id=None,
226 issue_id=[78901]).AndReturn([(78901, 'manual', '')])
227
228 self.mox.ReplayAll()
229 self.spam_service.FlagIssues(
230 self.cnxn, self.issue_service, [issue], 111, False)
231 self.mox.VerifyAll()
232
233 self.assertNotIn(issue, self.issue_service.updated_issues)
234 self.assertEqual(True, issue.is_spam)
235
236 def testGetIssueClassifierQueue_noVerdicts(self):
237 self.mock_verdict_tbl.Select(self.cnxn,
238 cols=['issue_id', 'is_spam', 'reason', 'classifier_confidence',
239 'created'],
240 where=[
241 ('project_id = %s', [789]),
242 ('classifier_confidence <= %s',
243 [settings.classifier_moderation_thresh]),
244 ('overruled = %s', [False]),
245 ('issue_id IS NOT NULL', []),
246 ],
247 order_by=[
248 ('classifier_confidence ASC', []),
249 ('created ASC', [])
250 ],
251 group_by=['issue_id'],
252 offset=0,
253 limit=10,
254 ).AndReturn([])
255
256 self.mock_verdict_tbl.SelectValue(self.cnxn,
257 col='COUNT(*)',
258 where=[
259 ('project_id = %s', [789]),
260 ('classifier_confidence <= %s',
261 [settings.classifier_moderation_thresh]),
262 ('overruled = %s', [False]),
263 ('issue_id IS NOT NULL', []),
264 ]).AndReturn(0)
265
266 self.mox.ReplayAll()
267 res, count = self.spam_service.GetIssueClassifierQueue(
268 self.cnxn, self.issue_service, 789)
269 self.mox.VerifyAll()
270
271 self.assertEqual([], res)
272 self.assertEqual(0, count)
273
274 def testGetIssueClassifierQueue_someVerdicts(self):
275 self.mock_verdict_tbl.Select(self.cnxn,
276 cols=['issue_id', 'is_spam', 'reason', 'classifier_confidence',
277 'created'],
278 where=[
279 ('project_id = %s', [789]),
280 ('classifier_confidence <= %s',
281 [settings.classifier_moderation_thresh]),
282 ('overruled = %s', [False]),
283 ('issue_id IS NOT NULL', []),
284 ],
285 order_by=[
286 ('classifier_confidence ASC', []),
287 ('created ASC', [])
288 ],
289 group_by=['issue_id'],
290 offset=0,
291 limit=10,
292 ).AndReturn([[78901, 0, "classifier", 0.9, "2015-12-10 11:06:24"]])
293
294 self.mock_verdict_tbl.SelectValue(self.cnxn,
295 col='COUNT(*)',
296 where=[
297 ('project_id = %s', [789]),
298 ('classifier_confidence <= %s',
299 [settings.classifier_moderation_thresh]),
300 ('overruled = %s', [False]),
301 ('issue_id IS NOT NULL', []),
302 ]).AndReturn(10)
303
304 self.mox.ReplayAll()
305 res, count = self.spam_service.GetIssueClassifierQueue(
306 self.cnxn, self.issue_service, 789)
307 self.mox.VerifyAll()
308 self.assertEqual(1, len(res))
309 self.assertEqual(10, count)
310 self.assertEqual(78901, res[0].issue_id)
311 self.assertEqual(False, res[0].is_spam)
312 self.assertEqual("classifier", res[0].reason)
313 self.assertEqual(0.9, res[0].classifier_confidence)
314 self.assertEqual("2015-12-10 11:06:24", res[0].verdict_time)
315
316 def testIsExempt_RegularUser(self):
317 author = user_pb2.MakeUser(111, email='test@example.com')
318 self.assertFalse(self.spam_service._IsExempt(author, False))
319 author = user_pb2.MakeUser(111, email='test@chromium.org.example.com')
320 self.assertFalse(self.spam_service._IsExempt(author, False))
321
322 def testIsExempt_ProjectMember(self):
323 author = user_pb2.MakeUser(111, email='test@example.com')
324 self.assertTrue(self.spam_service._IsExempt(author, True))
325
326 def testIsExempt_AllowlistedDomain(self):
327 author = user_pb2.MakeUser(111, email='test@google.com')
328 self.assertTrue(self.spam_service._IsExempt(author, False))
329
330 def testClassifyIssue_spam(self):
331 issue = fake.MakeTestIssue(
332 project_id=789, local_id=1, reporter_id=111, owner_id=456,
333 summary='sum', status='Live', issue_id=78901, is_spam=True)
334 self.spam_service._predict = lambda body: 1.0
335
336 # Prevent missing service inits to fail the test.
337 self.spam_service.ml_engine = True
338
339 comment_pb = tracker_pb2.IssueComment()
340 comment_pb.content = "this is spam"
341 reporter = user_pb2.MakeUser(111, email='test@test.com')
342 res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
343 self.assertEqual(1.0, res['confidence_is_spam'])
344
345 reporter.email = 'test@chromium.org.spam.com'
346 res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
347 self.assertEqual(1.0, res['confidence_is_spam'])
348
349 reporter.email = 'test.google.com@test.com'
350 res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
351 self.assertEqual(1.0, res['confidence_is_spam'])
352
353 def testClassifyIssue_Allowlisted(self):
354 issue = fake.MakeTestIssue(
355 project_id=789, local_id=1, reporter_id=111, owner_id=456,
356 summary='sum', status='Live', issue_id=78901, is_spam=True)
357 self.spam_service._predict = assert_unreached
358
359 # Prevent missing service inits to fail the test.
360 self.spam_service.ml_engine = True
361
362 comment_pb = tracker_pb2.IssueComment()
363 comment_pb.content = "this is spam"
364 reporter = user_pb2.MakeUser(111, email='test@google.com')
365 res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
366 self.assertEqual(0.0, res['confidence_is_spam'])
367 reporter.email = 'test@chromium.org'
368 res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
369 self.assertEqual(0.0, res['confidence_is_spam'])
370
371 def testClassifyComment_spam(self):
372 self.spam_service._predict = lambda body: 1.0
373
374 # Prevent missing service inits to fail the test.
375 self.spam_service.ml_engine = True
376
377 commenter = user_pb2.MakeUser(111, email='test@test.com')
378 res = self.spam_service.ClassifyComment('this is spam', commenter, False)
379 self.assertEqual(1.0, res['confidence_is_spam'])
380
381 commenter.email = 'test@chromium.org.spam.com'
382 res = self.spam_service.ClassifyComment('this is spam', commenter, False)
383 self.assertEqual(1.0, res['confidence_is_spam'])
384
385 commenter.email = 'test.google.com@test.com'
386 res = self.spam_service.ClassifyComment('this is spam', commenter, False)
387 self.assertEqual(1.0, res['confidence_is_spam'])
388
389 def testClassifyComment_Allowlisted(self):
390 self.spam_service._predict = assert_unreached
391
392 # Prevent missing service inits to fail the test.
393 self.spam_service.ml_engine = True
394
395 commenter = user_pb2.MakeUser(111, email='test@google.com')
396 res = self.spam_service.ClassifyComment('this is spam', commenter, False)
397 self.assertEqual(0.0, res['confidence_is_spam'])
398
399 commenter.email = 'test@chromium.org'
400 res = self.spam_service.ClassifyComment('this is spam', commenter, False)
401 self.assertEqual(0.0, res['confidence_is_spam'])
402
403 def test_ham_classification(self):
404 actual = self.spam_service.ham_classification()
405 self.assertEqual(actual['confidence_is_spam'], 0.0)
406 self.assertEqual(actual['failed_open'], False)
407
408 def testExpungeUsersInSpam(self):
409 user_ids = [3, 4, 5]
410 self.spam_service.ExpungeUsersInSpam(self.cnxn, user_ids=user_ids)
411
412 self.spam_service.report_tbl.Delete.assert_has_calls(
413 [
414 mock.call(self.cnxn, reported_user_id=user_ids, commit=False),
415 mock.call(self.cnxn, user_id=user_ids, commit=False)
416 ])
417 self.spam_service.verdict_tbl.Delete.assert_called_once_with(
418 self.cnxn, user_id=user_ids, commit=False)
419
420 def testLookupIssueVerdicts(self):
421 self.spam_service.verdict_tbl.Select = Mock(return_value=[
422 [5, 10], [4, 11], [6, 12],
423 ])
424 actual = self.spam_service.LookupIssueVerdicts(self.cnxn, [4, 5, 6])
425
426 self.spam_service.verdict_tbl.Select.assert_called_once_with(
427 self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
428 issue_id=[4, 5, 6], comment_id=None, group_by=['issue_id'])
429 self.assertEqual(actual, {
430 5: 10,
431 4: 11,
432 6: 12,
433 })