Project import generated by Copybara.
GitOrigin-RevId: d9e9e3fb4e31372ec1fb43b178994ca78fa8fe70
diff --git a/services/test/spam_svc_test.py b/services/test/spam_svc_test.py
new file mode 100644
index 0000000..3aeba13
--- /dev/null
+++ b/services/test/spam_svc_test.py
@@ -0,0 +1,433 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file or at
+# https://developers.google.com/open-source/licenses/bsd
+
+"""Tests for the spam service."""
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+
+import mock
+import unittest
+
+import mox
+
+from google.appengine.ext import testbed
+
+import settings
+from framework import sql
+from framework import framework_constants
+from proto import user_pb2
+from proto import tracker_pb2
+from services import spam_svc
+from testing import fake
+from mock import Mock
+
+
+def assert_unreached():
+ raise Exception('This code should not have been called.') # pragma: no cover
+
+
+class SpamServiceTest(unittest.TestCase):
+
+ def setUp(self):
+ self.testbed = testbed.Testbed()
+ self.testbed.activate()
+
+ self.mox = mox.Mox()
+ self.mock_report_tbl = self.mox.CreateMock(sql.SQLTableManager)
+ self.mock_verdict_tbl = self.mox.CreateMock(sql.SQLTableManager)
+ self.mock_issue_tbl = self.mox.CreateMock(sql.SQLTableManager)
+ self.cnxn = self.mox.CreateMock(sql.MonorailConnection)
+ self.issue_service = fake.IssueService()
+ self.spam_service = spam_svc.SpamService()
+ self.spam_service.report_tbl = self.mock_report_tbl
+ self.spam_service.verdict_tbl = self.mock_verdict_tbl
+ self.spam_service.issue_tbl = self.mock_issue_tbl
+
+ self.spam_service.report_tbl.Delete = Mock()
+ self.spam_service.verdict_tbl.Delete = Mock()
+
+ def tearDown(self):
+ self.testbed.deactivate()
+ self.mox.UnsetStubs()
+ self.mox.ResetAll()
+
+ def testLookupIssuesFlaggers(self):
+ self.mock_report_tbl.Select(
+ self.cnxn, cols=['issue_id', 'user_id', 'comment_id'],
+ issue_id=[234, 567, 890]).AndReturn([
+ [234, 111, None],
+ [234, 222, 1],
+ [567, 333, None]])
+ self.mox.ReplayAll()
+
+ reporters = (
+ self.spam_service.LookupIssuesFlaggers(self.cnxn, [234, 567, 890]))
+ self.mox.VerifyAll()
+ self.assertEqual({
+ 234: ([111], {1: [222]}),
+ 567: ([333], {}),
+ }, reporters)
+
+ def testLookupIssueFlaggers(self):
+ self.mock_report_tbl.Select(
+ self.cnxn, cols=['issue_id', 'user_id', 'comment_id'],
+ issue_id=[234]).AndReturn(
+ [[234, 111, None], [234, 222, 1]])
+ self.mox.ReplayAll()
+
+ issue_reporters, comment_reporters = (
+ self.spam_service.LookupIssueFlaggers(self.cnxn, 234))
+ self.mox.VerifyAll()
+ self.assertItemsEqual([111], issue_reporters)
+ self.assertEqual({1: [222]}, comment_reporters)
+
+ def testFlagIssues_overThresh(self):
+ issue = fake.MakeTestIssue(
+ project_id=789,
+ local_id=1,
+ reporter_id=111,
+ owner_id=456,
+ summary='sum',
+ status='Live',
+ issue_id=78901,
+ project_name='proj')
+ issue.assume_stale = False # We will store this issue.
+
+ self.mock_report_tbl.InsertRows(self.cnxn,
+ ['issue_id', 'reported_user_id', 'user_id'],
+ [(78901, 111, 111)], ignore=True)
+
+ self.mock_report_tbl.Select(self.cnxn,
+ cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
+ issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)])
+ self.mock_verdict_tbl.Select(
+ self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
+ group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
+ self.mock_verdict_tbl.InsertRows(
+ self.cnxn, ['issue_id', 'is_spam', 'reason', 'project_id'],
+ [(78901, True, 'threshold', 789)], ignore=True)
+
+ self.mox.ReplayAll()
+ self.spam_service.FlagIssues(
+ self.cnxn, self.issue_service, [issue], 111, True)
+ self.mox.VerifyAll()
+ self.assertIn(issue, self.issue_service.updated_issues)
+
+ self.assertEqual(
+ 1,
+ self.spam_service.issue_actions.get(
+ fields={
+ 'type': 'flag',
+ 'reporter_id': str(111),
+ 'issue': 'proj:1'
+ }))
+
+ def testFlagIssues_underThresh(self):
+ issue = fake.MakeTestIssue(
+ project_id=789,
+ local_id=1,
+ reporter_id=111,
+ owner_id=456,
+ summary='sum',
+ status='Live',
+ issue_id=78901,
+ project_name='proj')
+
+ self.mock_report_tbl.InsertRows(self.cnxn,
+ ['issue_id', 'reported_user_id', 'user_id'],
+ [(78901, 111, 111)], ignore=True)
+
+ self.mock_report_tbl.Select(self.cnxn,
+ cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
+ issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)])
+
+ self.mock_verdict_tbl.Select(
+ self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
+ group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
+
+ self.mox.ReplayAll()
+ self.spam_service.FlagIssues(
+ self.cnxn, self.issue_service, [issue], 111, True)
+ self.mox.VerifyAll()
+
+ self.assertNotIn(issue, self.issue_service.updated_issues)
+ self.assertIsNone(
+ self.spam_service.issue_actions.get(
+ fields={
+ 'type': 'flag',
+ 'reporter_id': str(111),
+ 'issue': 'proj:1'
+ }))
+
+ def testUnflagIssue_overThresh(self):
+ issue = fake.MakeTestIssue(
+ project_id=789, local_id=1, reporter_id=111, owner_id=456,
+ summary='sum', status='Live', issue_id=78901, is_spam=True)
+ self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id],
+ comment_id=None, user_id=111)
+ self.mock_report_tbl.Select(self.cnxn,
+ cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
+ issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)])
+
+ self.mock_verdict_tbl.Select(
+ self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
+ group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
+
+ self.mox.ReplayAll()
+ self.spam_service.FlagIssues(
+ self.cnxn, self.issue_service, [issue], 111, False)
+ self.mox.VerifyAll()
+
+ self.assertNotIn(issue, self.issue_service.updated_issues)
+ self.assertEqual(True, issue.is_spam)
+
+ def testUnflagIssue_underThresh(self):
+ """A non-member un-flagging an issue as spam should not be able
+ to overturn the verdict to ham. This is different from previous
+ behavior. See https://crbug.com/monorail/2232 for details."""
+ issue = fake.MakeTestIssue(
+ project_id=789, local_id=1, reporter_id=111, owner_id=456,
+ summary='sum', status='Live', issue_id=78901, is_spam=True)
+ issue.assume_stale = False # We will store this issue.
+ self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id],
+ comment_id=None, user_id=111)
+ self.mock_report_tbl.Select(self.cnxn,
+ cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
+ issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)])
+
+ self.mock_verdict_tbl.Select(
+ self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
+ group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([])
+
+ self.mox.ReplayAll()
+ self.spam_service.FlagIssues(
+ self.cnxn, self.issue_service, [issue], 111, False)
+ self.mox.VerifyAll()
+
+ self.assertNotIn(issue, self.issue_service.updated_issues)
+ self.assertEqual(True, issue.is_spam)
+
+ def testUnflagIssue_underThreshNoManualOverride(self):
+ issue = fake.MakeTestIssue(
+ project_id=789, local_id=1, reporter_id=111, owner_id=456,
+ summary='sum', status='Live', issue_id=78901, is_spam=True)
+ self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id],
+ comment_id=None, user_id=111)
+ self.mock_report_tbl.Select(self.cnxn,
+ cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'],
+ issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)])
+
+ self.mock_verdict_tbl.Select(
+ self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
+ group_by=['issue_id'], comment_id=None,
+ issue_id=[78901]).AndReturn([(78901, 'manual', '')])
+
+ self.mox.ReplayAll()
+ self.spam_service.FlagIssues(
+ self.cnxn, self.issue_service, [issue], 111, False)
+ self.mox.VerifyAll()
+
+ self.assertNotIn(issue, self.issue_service.updated_issues)
+ self.assertEqual(True, issue.is_spam)
+
+ def testGetIssueClassifierQueue_noVerdicts(self):
+ self.mock_verdict_tbl.Select(self.cnxn,
+ cols=['issue_id', 'is_spam', 'reason', 'classifier_confidence',
+ 'created'],
+ where=[
+ ('project_id = %s', [789]),
+ ('classifier_confidence <= %s',
+ [settings.classifier_moderation_thresh]),
+ ('overruled = %s', [False]),
+ ('issue_id IS NOT NULL', []),
+ ],
+ order_by=[
+ ('classifier_confidence ASC', []),
+ ('created ASC', [])
+ ],
+ group_by=['issue_id'],
+ offset=0,
+ limit=10,
+ ).AndReturn([])
+
+ self.mock_verdict_tbl.SelectValue(self.cnxn,
+ col='COUNT(*)',
+ where=[
+ ('project_id = %s', [789]),
+ ('classifier_confidence <= %s',
+ [settings.classifier_moderation_thresh]),
+ ('overruled = %s', [False]),
+ ('issue_id IS NOT NULL', []),
+ ]).AndReturn(0)
+
+ self.mox.ReplayAll()
+ res, count = self.spam_service.GetIssueClassifierQueue(
+ self.cnxn, self.issue_service, 789)
+ self.mox.VerifyAll()
+
+ self.assertEqual([], res)
+ self.assertEqual(0, count)
+
+ def testGetIssueClassifierQueue_someVerdicts(self):
+ self.mock_verdict_tbl.Select(self.cnxn,
+ cols=['issue_id', 'is_spam', 'reason', 'classifier_confidence',
+ 'created'],
+ where=[
+ ('project_id = %s', [789]),
+ ('classifier_confidence <= %s',
+ [settings.classifier_moderation_thresh]),
+ ('overruled = %s', [False]),
+ ('issue_id IS NOT NULL', []),
+ ],
+ order_by=[
+ ('classifier_confidence ASC', []),
+ ('created ASC', [])
+ ],
+ group_by=['issue_id'],
+ offset=0,
+ limit=10,
+ ).AndReturn([[78901, 0, "classifier", 0.9, "2015-12-10 11:06:24"]])
+
+ self.mock_verdict_tbl.SelectValue(self.cnxn,
+ col='COUNT(*)',
+ where=[
+ ('project_id = %s', [789]),
+ ('classifier_confidence <= %s',
+ [settings.classifier_moderation_thresh]),
+ ('overruled = %s', [False]),
+ ('issue_id IS NOT NULL', []),
+ ]).AndReturn(10)
+
+ self.mox.ReplayAll()
+ res, count = self.spam_service.GetIssueClassifierQueue(
+ self.cnxn, self.issue_service, 789)
+ self.mox.VerifyAll()
+ self.assertEqual(1, len(res))
+ self.assertEqual(10, count)
+ self.assertEqual(78901, res[0].issue_id)
+ self.assertEqual(False, res[0].is_spam)
+ self.assertEqual("classifier", res[0].reason)
+ self.assertEqual(0.9, res[0].classifier_confidence)
+ self.assertEqual("2015-12-10 11:06:24", res[0].verdict_time)
+
+ def testIsExempt_RegularUser(self):
+ author = user_pb2.MakeUser(111, email='test@example.com')
+ self.assertFalse(self.spam_service._IsExempt(author, False))
+ author = user_pb2.MakeUser(111, email='test@chromium.org.example.com')
+ self.assertFalse(self.spam_service._IsExempt(author, False))
+
+ def testIsExempt_ProjectMember(self):
+ author = user_pb2.MakeUser(111, email='test@example.com')
+ self.assertTrue(self.spam_service._IsExempt(author, True))
+
+ def testIsExempt_AllowlistedDomain(self):
+ author = user_pb2.MakeUser(111, email='test@google.com')
+ self.assertTrue(self.spam_service._IsExempt(author, False))
+
+ def testClassifyIssue_spam(self):
+ issue = fake.MakeTestIssue(
+ project_id=789, local_id=1, reporter_id=111, owner_id=456,
+ summary='sum', status='Live', issue_id=78901, is_spam=True)
+ self.spam_service._predict = lambda body: 1.0
+
+ # Prevent missing service inits to fail the test.
+ self.spam_service.ml_engine = True
+
+ comment_pb = tracker_pb2.IssueComment()
+ comment_pb.content = "this is spam"
+ reporter = user_pb2.MakeUser(111, email='test@test.com')
+ res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
+ self.assertEqual(1.0, res['confidence_is_spam'])
+
+ reporter.email = 'test@chromium.org.spam.com'
+ res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
+ self.assertEqual(1.0, res['confidence_is_spam'])
+
+ reporter.email = 'test.google.com@test.com'
+ res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
+ self.assertEqual(1.0, res['confidence_is_spam'])
+
+ def testClassifyIssue_Allowlisted(self):
+ issue = fake.MakeTestIssue(
+ project_id=789, local_id=1, reporter_id=111, owner_id=456,
+ summary='sum', status='Live', issue_id=78901, is_spam=True)
+ self.spam_service._predict = assert_unreached
+
+ # Prevent missing service inits to fail the test.
+ self.spam_service.ml_engine = True
+
+ comment_pb = tracker_pb2.IssueComment()
+ comment_pb.content = "this is spam"
+ reporter = user_pb2.MakeUser(111, email='test@google.com')
+ res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
+ self.assertEqual(0.0, res['confidence_is_spam'])
+ reporter.email = 'test@chromium.org'
+ res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False)
+ self.assertEqual(0.0, res['confidence_is_spam'])
+
+ def testClassifyComment_spam(self):
+ self.spam_service._predict = lambda body: 1.0
+
+ # Prevent missing service inits to fail the test.
+ self.spam_service.ml_engine = True
+
+ commenter = user_pb2.MakeUser(111, email='test@test.com')
+ res = self.spam_service.ClassifyComment('this is spam', commenter, False)
+ self.assertEqual(1.0, res['confidence_is_spam'])
+
+ commenter.email = 'test@chromium.org.spam.com'
+ res = self.spam_service.ClassifyComment('this is spam', commenter, False)
+ self.assertEqual(1.0, res['confidence_is_spam'])
+
+ commenter.email = 'test.google.com@test.com'
+ res = self.spam_service.ClassifyComment('this is spam', commenter, False)
+ self.assertEqual(1.0, res['confidence_is_spam'])
+
+ def testClassifyComment_Allowlisted(self):
+ self.spam_service._predict = assert_unreached
+
+ # Prevent missing service inits to fail the test.
+ self.spam_service.ml_engine = True
+
+ commenter = user_pb2.MakeUser(111, email='test@google.com')
+ res = self.spam_service.ClassifyComment('this is spam', commenter, False)
+ self.assertEqual(0.0, res['confidence_is_spam'])
+
+ commenter.email = 'test@chromium.org'
+ res = self.spam_service.ClassifyComment('this is spam', commenter, False)
+ self.assertEqual(0.0, res['confidence_is_spam'])
+
+ def test_ham_classification(self):
+ actual = self.spam_service.ham_classification()
+ self.assertEqual(actual['confidence_is_spam'], 0.0)
+ self.assertEqual(actual['failed_open'], False)
+
+ def testExpungeUsersInSpam(self):
+ user_ids = [3, 4, 5]
+ self.spam_service.ExpungeUsersInSpam(self.cnxn, user_ids=user_ids)
+
+ self.spam_service.report_tbl.Delete.assert_has_calls(
+ [
+ mock.call(self.cnxn, reported_user_id=user_ids, commit=False),
+ mock.call(self.cnxn, user_id=user_ids, commit=False)
+ ])
+ self.spam_service.verdict_tbl.Delete.assert_called_once_with(
+ self.cnxn, user_id=user_ids, commit=False)
+
+ def testLookupIssueVerdicts(self):
+ self.spam_service.verdict_tbl.Select = Mock(return_value=[
+ [5, 10], [4, 11], [6, 12],
+ ])
+ actual = self.spam_service.LookupIssueVerdicts(self.cnxn, [4, 5, 6])
+
+ self.spam_service.verdict_tbl.Select.assert_called_once_with(
+ self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'],
+ issue_id=[4, 5, 6], comment_id=None, group_by=['issue_id'])
+ self.assertEqual(actual, {
+ 5: 10,
+ 4: 11,
+ 6: 12,
+ })