| # Copyright 2016 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Tests for the spam service.""" |
| from __future__ import print_function |
| from __future__ import division |
| from __future__ import absolute_import |
| |
| import mock |
| import six |
| import time |
| import unittest |
| |
| try: |
| from mox3 import mox |
| except ImportError: |
| import mox |
| |
| from google.appengine.ext import testbed |
| |
| import settings |
| from framework import sql |
| from framework import framework_constants |
| from infra_libs import ts_mon |
| from mrproto import user_pb2 |
| from mrproto import tracker_pb2 |
| from services import spam_svc |
| from testing import fake |
| from mock import Mock |
| |
| |
| def assert_unreached(): |
| raise Exception('This code should not have been called.') # pragma: no cover |
| |
| |
| class SpamServiceTest(unittest.TestCase): |
| |
| def setUp(self): |
| self.testbed = testbed.Testbed() |
| self.testbed.activate() |
| |
| self.mox = mox.Mox() |
| self.mock_report_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| self.mock_verdict_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| self.mock_issue_tbl = self.mox.CreateMock(sql.SQLTableManager) |
| self.cnxn = self.mox.CreateMock(sql.MonorailConnection) |
| self.issue_service = fake.IssueService() |
| self.spam_service = spam_svc.SpamService() |
| self.spam_service.report_tbl = self.mock_report_tbl |
| self.spam_service.verdict_tbl = self.mock_verdict_tbl |
| self.spam_service.issue_tbl = self.mock_issue_tbl |
| |
| self.spam_service.report_tbl.Delete = Mock() |
| self.spam_service.verdict_tbl.Delete = Mock() |
| self.now = int(time.time()) |
| |
| ts_mon.reset_for_unittest() |
| |
| def tearDown(self): |
| self.testbed.deactivate() |
| self.mox.UnsetStubs() |
| self.mox.ResetAll() |
| |
| def testLookupIssuesFlaggers(self): |
| self.mock_report_tbl.Select( |
| self.cnxn, cols=['issue_id', 'user_id', 'comment_id'], |
| issue_id=[234, 567, 890]).AndReturn([ |
| [234, 111, None], |
| [234, 222, 1], |
| [567, 333, None]]) |
| self.mox.ReplayAll() |
| |
| reporters = ( |
| self.spam_service.LookupIssuesFlaggers(self.cnxn, [234, 567, 890])) |
| self.mox.VerifyAll() |
| self.assertEqual({ |
| 234: ([111], {1: [222]}), |
| 567: ([333], {}), |
| }, reporters) |
| |
| def testLookupIssueFlaggers(self): |
| self.mock_report_tbl.Select( |
| self.cnxn, cols=['issue_id', 'user_id', 'comment_id'], |
| issue_id=[234]).AndReturn( |
| [[234, 111, None], [234, 222, 1]]) |
| self.mox.ReplayAll() |
| |
| issue_reporters, comment_reporters = ( |
| self.spam_service.LookupIssueFlaggers(self.cnxn, 234)) |
| self.mox.VerifyAll() |
| six.assertCountEqual(self, [111], issue_reporters) |
| self.assertEqual({1: [222]}, comment_reporters) |
| |
| def testFlagIssues_overThresh(self): |
| issue = fake.MakeTestIssue( |
| project_id=789, |
| local_id=1, |
| reporter_id=111, |
| owner_id=456, |
| summary='sum', |
| status='Live', |
| issue_id=78901, |
| project_name='proj', |
| migration_modified_timestamp=1234567, |
| is_spam=False) |
| issue.assume_stale = False # We will store this issue. |
| |
| self.mock_report_tbl.InsertRows(self.cnxn, |
| ['issue_id', 'reported_user_id', 'user_id'], |
| [(78901, 111, 111)], ignore=True) |
| |
| self.mock_report_tbl.Select(self.cnxn, |
| cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)]) |
| self.mock_verdict_tbl.Select( |
| self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| self.mock_verdict_tbl.InsertRows( |
| self.cnxn, ['issue_id', 'is_spam', 'reason', 'project_id'], |
| [(78901, True, 'threshold', 789)], ignore=True) |
| |
| self.mox.ReplayAll() |
| self.spam_service.FlagIssues( |
| self.cnxn, self.issue_service, [issue], 111, True) |
| self.mox.VerifyAll() |
| self.assertIn(issue, self.issue_service.updated_issues) |
| self.assertEqual(issue.migration_modified_timestamp, self.now) |
| self.assertEqual(issue.is_spam, True) |
| |
| self.assertEqual( |
| 1, |
| self.spam_service.issue_actions.get( |
| fields={ |
| 'type': 'flag', |
| 'reporter_id': str(111), |
| 'issue': 'proj:1' |
| })) |
| |
| def testFlagIssues_underThresh(self): |
| issue = fake.MakeTestIssue( |
| project_id=789, |
| local_id=1, |
| reporter_id=111, |
| owner_id=456, |
| summary='sum', |
| status='Live', |
| issue_id=78901, |
| project_name='proj', |
| migration_modified_timestamp=1234567, |
| is_spam=False) |
| |
| self.mock_report_tbl.InsertRows(self.cnxn, |
| ['issue_id', 'reported_user_id', 'user_id'], |
| [(78901, 111, 111)], ignore=True) |
| |
| self.mock_report_tbl.Select(self.cnxn, |
| cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| |
| self.mock_verdict_tbl.Select( |
| self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| |
| self.mox.ReplayAll() |
| self.spam_service.FlagIssues( |
| self.cnxn, self.issue_service, [issue], 111, True) |
| self.mox.VerifyAll() |
| |
| self.assertNotIn(issue, self.issue_service.updated_issues) |
| self.assertEqual(issue.migration_modified_timestamp, 1234567) |
| self.assertEqual(issue.is_spam, False) |
| self.assertIsNone( |
| self.spam_service.issue_actions.get( |
| fields={ |
| 'type': 'flag', |
| 'reporter_id': str(111), |
| 'issue': 'proj:1' |
| })) |
| |
| def testUnflagIssue_overThresh(self): |
| issue = fake.MakeTestIssue( |
| project_id=789, |
| local_id=1, |
| reporter_id=111, |
| owner_id=456, |
| summary='sum', |
| status='Live', |
| issue_id=78901, |
| migration_modified_timestamp=1234567, |
| is_spam=True) |
| self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| comment_id=None, user_id=111) |
| self.mock_report_tbl.Select(self.cnxn, |
| cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh)]) |
| |
| self.mock_verdict_tbl.Select( |
| self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| |
| self.mox.ReplayAll() |
| self.spam_service.FlagIssues( |
| self.cnxn, self.issue_service, [issue], 111, False) |
| self.mox.VerifyAll() |
| |
| self.assertNotIn(issue, self.issue_service.updated_issues) |
| self.assertEqual(issue.migration_modified_timestamp, 1234567) |
| self.assertEqual(issue.is_spam, True) |
| |
| def testUnflagIssue_underThresh(self): |
| """A non-member un-flagging an issue as spam should not be able |
| to overturn the verdict to ham. This is different from previous |
| behavior. See https://crbug.com/monorail/2232 for details.""" |
| issue = fake.MakeTestIssue( |
| project_id=789, |
| local_id=1, |
| reporter_id=111, |
| owner_id=456, |
| summary='sum', |
| status='Live', |
| issue_id=78901, |
| migration_modified_timestamp=1234567, |
| is_spam=True) |
| issue.assume_stale = False # We will store this issue. |
| self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| comment_id=None, user_id=111) |
| self.mock_report_tbl.Select(self.cnxn, |
| cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| |
| self.mock_verdict_tbl.Select( |
| self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| group_by=['issue_id'], issue_id=[78901], comment_id=None).AndReturn([]) |
| |
| self.mox.ReplayAll() |
| self.spam_service.FlagIssues( |
| self.cnxn, self.issue_service, [issue], 111, False) |
| self.mox.VerifyAll() |
| |
| self.assertNotIn(issue, self.issue_service.updated_issues) |
| self.assertEqual(issue.migration_modified_timestamp, 1234567) |
| self.assertEqual(issue.is_spam, True) |
| |
| def testUnflagIssue_underThreshNoManualOverride(self): |
| issue = fake.MakeTestIssue( |
| project_id=789, |
| local_id=1, |
| reporter_id=111, |
| owner_id=456, |
| summary='sum', |
| status='Live', |
| issue_id=78901, |
| migration_modified_timestamp=1234567, |
| is_spam=True) |
| self.mock_report_tbl.Delete(self.cnxn, issue_id=[issue.issue_id], |
| comment_id=None, user_id=111) |
| self.mock_report_tbl.Select(self.cnxn, |
| cols=['issue_id', 'COUNT(*)'], group_by=['issue_id'], |
| issue_id=[78901]).AndReturn([(78901, settings.spam_flag_thresh - 1)]) |
| |
| self.mock_verdict_tbl.Select( |
| self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| group_by=['issue_id'], comment_id=None, |
| issue_id=[78901]).AndReturn([(78901, 'manual', '')]) |
| |
| self.mox.ReplayAll() |
| self.spam_service.FlagIssues( |
| self.cnxn, self.issue_service, [issue], 111, False) |
| self.mox.VerifyAll() |
| |
| self.assertNotIn(issue, self.issue_service.updated_issues) |
| self.assertEqual(issue.migration_modified_timestamp, 1234567) |
| self.assertEqual(issue.is_spam, True) |
| |
| def testIsExempt_RegularUser(self): |
| author = user_pb2.MakeUser(111, email='test@example.com') |
| self.assertFalse(self.spam_service._IsExempt(author, False)) |
| author = user_pb2.MakeUser(111, email='test@chromium.org.example.com') |
| self.assertFalse(self.spam_service._IsExempt(author, False)) |
| |
| def testIsExempt_ProjectMember(self): |
| author = user_pb2.MakeUser(111, email='test@example.com') |
| self.assertTrue(self.spam_service._IsExempt(author, True)) |
| |
| def testIsExempt_AllowlistedDomain(self): |
| author = user_pb2.MakeUser(111, email='test@google.com') |
| self.assertTrue(self.spam_service._IsExempt(author, False)) |
| |
| def testClassifyIssue_spam(self): |
| issue = fake.MakeTestIssue( |
| project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| summary='sum', status='Live', issue_id=78901, is_spam=True) |
| self.spam_service._predict = lambda body: 1.0 |
| |
| # Prevent missing service inits to fail the test. |
| self.spam_service.ml_engine = True |
| |
| comment_pb = tracker_pb2.IssueComment() |
| comment_pb.content = "this is spam" |
| reporter = user_pb2.MakeUser(111, email='test@test.com') |
| res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| self.assertEqual(1.0, res['confidence_is_spam']) |
| |
| reporter.email = 'test@chromium.org.spam.com' |
| res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| self.assertEqual(1.0, res['confidence_is_spam']) |
| |
| reporter.email = 'test.google.com@test.com' |
| res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| self.assertEqual(1.0, res['confidence_is_spam']) |
| |
| def testClassifyIssue_Allowlisted(self): |
| issue = fake.MakeTestIssue( |
| project_id=789, local_id=1, reporter_id=111, owner_id=456, |
| summary='sum', status='Live', issue_id=78901, is_spam=True) |
| self.spam_service._predict = assert_unreached |
| |
| # Prevent missing service inits to fail the test. |
| self.spam_service.ml_engine = True |
| |
| comment_pb = tracker_pb2.IssueComment() |
| comment_pb.content = "this is spam" |
| reporter = user_pb2.MakeUser(111, email='test@google.com') |
| res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| self.assertEqual(0.0, res['confidence_is_spam']) |
| reporter.email = 'test@chromium.org' |
| res = self.spam_service.ClassifyIssue(issue, comment_pb, reporter, False) |
| self.assertEqual(0.0, res['confidence_is_spam']) |
| |
| def testClassifyComment_spam(self): |
| self.spam_service._predict = lambda body: 1.0 |
| |
| # Prevent missing service inits to fail the test. |
| self.spam_service.ml_engine = True |
| |
| commenter = user_pb2.MakeUser(111, email='test@test.com') |
| res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| self.assertEqual(1.0, res['confidence_is_spam']) |
| |
| commenter.email = 'test@chromium.org.spam.com' |
| res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| self.assertEqual(1.0, res['confidence_is_spam']) |
| |
| commenter.email = 'test.google.com@test.com' |
| res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| self.assertEqual(1.0, res['confidence_is_spam']) |
| |
| def testClassifyComment_Allowlisted(self): |
| self.spam_service._predict = assert_unreached |
| |
| # Prevent missing service inits to fail the test. |
| self.spam_service.ml_engine = True |
| |
| commenter = user_pb2.MakeUser(111, email='test@google.com') |
| res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| self.assertEqual(0.0, res['confidence_is_spam']) |
| |
| commenter.email = 'test@chromium.org' |
| res = self.spam_service.ClassifyComment('this is spam', commenter, False) |
| self.assertEqual(0.0, res['confidence_is_spam']) |
| |
| def test_ham_classification(self): |
| actual = self.spam_service.ham_classification() |
| self.assertEqual(actual['confidence_is_spam'], 0.0) |
| self.assertEqual(actual['failed_open'], False) |
| |
| def testExpungeUsersInSpam(self): |
| user_ids = [3, 4, 5] |
| self.spam_service.ExpungeUsersInSpam(self.cnxn, user_ids=user_ids) |
| |
| self.spam_service.report_tbl.Delete.assert_has_calls( |
| [ |
| mock.call(self.cnxn, reported_user_id=user_ids, commit=False), |
| mock.call(self.cnxn, user_id=user_ids, commit=False) |
| ]) |
| self.spam_service.verdict_tbl.Delete.assert_called_once_with( |
| self.cnxn, user_id=user_ids, commit=False) |
| |
| def testLookupIssueVerdicts(self): |
| self.spam_service.verdict_tbl.Select = Mock(return_value=[ |
| [5, 10], [4, 11], [6, 12], |
| ]) |
| actual = self.spam_service.LookupIssueVerdicts(self.cnxn, [4, 5, 6]) |
| |
| self.spam_service.verdict_tbl.Select.assert_called_once_with( |
| self.cnxn, cols=['issue_id', 'reason', 'MAX(created)'], |
| issue_id=[4, 5, 6], comment_id=None, group_by=['issue_id']) |
| self.assertEqual(actual, { |
| 5: 10, |
| 4: 11, |
| 6: 12, |
| }) |