blob: cadb6a85d595faa0bbe51a91037314a768482919 [file] [log] [blame]
Copybara854996b2021-09-07 19:36:02 +00001# Copyright 2018 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style
3# license that can be found in the LICENSE file or at
4# https://developers.google.com/open-source/licenses/bsd
5""" Tasks and handlers for maintaining the spam classifier model. These
6 should be run via cron and task queue rather than manually.
7"""
8from __future__ import print_function
9from __future__ import division
10from __future__ import absolute_import
11
12import cloudstorage
13import datetime
14import logging
15import webapp2
16
17from google.appengine.api import app_identity
18
19from features.generate_dataset import build_component_dataset
20from framework import cloud_tasks_helpers
21from framework import servlet
22from framework import urls
23
24
25class ComponentTrainingDataExport(webapp2.RequestHandler):
26 """Trigger a training data export task"""
27 def get(self):
28 logging.info('Training data export requested.')
29 task = {
30 'app_engine_http_request':
31 {
32 'http_method': 'GET',
33 'relative_uri': urls.COMPONENT_DATA_EXPORT_TASK,
34 }
35 }
36 cloud_tasks_helpers.create_task(task, queue='componentexport')
37
38
39class ComponentTrainingDataExportTask(servlet.Servlet):
40 """Export training data for issues and their assigned components, to be used
41 to train a model later.
42 """
43 def get(self):
44 logging.info('Training data export initiated.')
45 bucket_name = app_identity.get_default_gcs_bucket_name()
46 logging.info('Bucket name: %s', bucket_name)
47 date_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
48
49 logging.info('Opening cloud storage')
50 gcs_file = cloudstorage.open('/' + bucket_name
51 + '/component_training_data/'
52 + date_str + '.csv',
53 content_type='text/csv', mode='w')
54
55 logging.info('GCS file opened')
56
57 gcs_file = build_component_dataset(self.services.issue, gcs_file)
58
59 gcs_file.close()