Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 1 | # Copyright 2018 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style |
| 3 | # license that can be found in the LICENSE file or at |
| 4 | # https://developers.google.com/open-source/licenses/bsd |
| 5 | """ Tasks and handlers for maintaining the spam classifier model. These |
| 6 | should be run via cron and task queue rather than manually. |
| 7 | """ |
| 8 | from __future__ import print_function |
| 9 | from __future__ import division |
| 10 | from __future__ import absolute_import |
| 11 | |
| 12 | import cloudstorage |
| 13 | import datetime |
| 14 | import logging |
| 15 | import webapp2 |
| 16 | |
| 17 | from google.appengine.api import app_identity |
| 18 | |
| 19 | from features.generate_dataset import build_component_dataset |
| 20 | from framework import cloud_tasks_helpers |
| 21 | from framework import servlet |
| 22 | from framework import urls |
| 23 | |
| 24 | |
| 25 | class ComponentTrainingDataExport(webapp2.RequestHandler): |
| 26 | """Trigger a training data export task""" |
| 27 | def get(self): |
| 28 | logging.info('Training data export requested.') |
| 29 | task = { |
| 30 | 'app_engine_http_request': |
| 31 | { |
| 32 | 'http_method': 'GET', |
| 33 | 'relative_uri': urls.COMPONENT_DATA_EXPORT_TASK, |
| 34 | } |
| 35 | } |
| 36 | cloud_tasks_helpers.create_task(task, queue='componentexport') |
| 37 | |
| 38 | |
| 39 | class ComponentTrainingDataExportTask(servlet.Servlet): |
| 40 | """Export training data for issues and their assigned components, to be used |
| 41 | to train a model later. |
| 42 | """ |
| 43 | def get(self): |
| 44 | logging.info('Training data export initiated.') |
| 45 | bucket_name = app_identity.get_default_gcs_bucket_name() |
| 46 | logging.info('Bucket name: %s', bucket_name) |
| 47 | date_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
| 48 | |
| 49 | logging.info('Opening cloud storage') |
| 50 | gcs_file = cloudstorage.open('/' + bucket_name |
| 51 | + '/component_training_data/' |
| 52 | + date_str + '.csv', |
| 53 | content_type='text/csv', mode='w') |
| 54 | |
| 55 | logging.info('GCS file opened') |
| 56 | |
| 57 | gcs_file = build_component_dataset(self.services.issue, gcs_file) |
| 58 | |
| 59 | gcs_file.close() |