blob: 625fa5302accd0c6dd538c3be025214396d24fda [file] [log] [blame]
Copybara854996b2021-09-07 19:36:02 +00001"""Cron job to train spam model with all spam data."""
2from __future__ import print_function
3from __future__ import division
4from __future__ import absolute_import
5
6import logging
7import settings
8import time
9
10from googleapiclient import discovery
11from googleapiclient import errors
12from google.appengine.api import app_identity
13from oauth2client.client import GoogleCredentials
14import webapp2
15
16class TrainSpamModelCron(webapp2.RequestHandler):
17
18 """Submit a job to ML Engine which uploads a spam classification model by
19 training on an already packaged trainer.
20 """
21 def get(self):
22
23 credentials = GoogleCredentials.get_application_default()
24 ml = discovery.build('ml', 'v1', credentials=credentials)
25
26 app_id = app_identity.get_application_id()
27 project_id = 'projects/%s' % (app_id)
28 job_id = 'spam_trainer_%d' % time.time()
29 training_input = {
30 'scaleTier': 'BASIC',
31 'packageUris': [
32 settings.trainer_staging
33 if app_id == "monorail-staging" else
34 settings.trainer_prod
35 ],
36 'pythonModule': 'trainer.task',
37 'args': [
38 '--train-steps',
39 '1000',
40 '--verbosity',
41 'DEBUG',
42 '--gcs-bucket',
43 'monorail-prod.appspot.com',
44 '--gcs-prefix',
45 'spam_training_data',
46 '--trainer-type',
47 'spam'
48 ],
49 'region': 'us-central1',
50 'jobDir': 'gs://%s-mlengine/%s' % (app_id, job_id),
51 'runtimeVersion': '1.2'
52 }
53 job_info = {
54 'jobId': job_id,
55 'trainingInput': training_input
56 }
57 request = ml.projects().jobs().create(parent=project_id, body=job_info)
58
59 try:
60 response = request.execute()
61 logging.info(response)
62 except errors.HttpError, err:
63 logging.error(err._get_reason())