blob: cadb6a85d595faa0bbe51a91037314a768482919 [file] [log] [blame]
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file or at
# https://developers.google.com/open-source/licenses/bsd
""" Tasks and handlers for maintaining the spam classifier model. These
should be run via cron and task queue rather than manually.
"""
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import cloudstorage
import datetime
import logging
import webapp2
from google.appengine.api import app_identity
from features.generate_dataset import build_component_dataset
from framework import cloud_tasks_helpers
from framework import servlet
from framework import urls
class ComponentTrainingDataExport(webapp2.RequestHandler):
"""Trigger a training data export task"""
def get(self):
logging.info('Training data export requested.')
task = {
'app_engine_http_request':
{
'http_method': 'GET',
'relative_uri': urls.COMPONENT_DATA_EXPORT_TASK,
}
}
cloud_tasks_helpers.create_task(task, queue='componentexport')
class ComponentTrainingDataExportTask(servlet.Servlet):
"""Export training data for issues and their assigned components, to be used
to train a model later.
"""
def get(self):
logging.info('Training data export initiated.')
bucket_name = app_identity.get_default_gcs_bucket_name()
logging.info('Bucket name: %s', bucket_name)
date_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
logging.info('Opening cloud storage')
gcs_file = cloudstorage.open('/' + bucket_name
+ '/component_training_data/'
+ date_str + '.csv',
content_type='text/csv', mode='w')
logging.info('GCS file opened')
gcs_file = build_component_dataset(self.services.issue, gcs_file)
gcs_file.close()