Project import generated by Copybara.
GitOrigin-RevId: d9e9e3fb4e31372ec1fb43b178994ca78fa8fe70
diff --git a/tools/ml/Makefile b/tools/ml/Makefile
new file mode 100644
index 0000000..b0a8684
--- /dev/null
+++ b/tools/ml/Makefile
@@ -0,0 +1,222 @@
+# Copyright 2019 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+# Or at https://developers.google.com/open-source/licenses/bsd
+
+# Use 'make help' for a list of commands.
+
+OUTPUT_DIR := /tmp/monospam-local-training/
+TIMESTAMP := $(shell date +%s)
+MODEL_DIR := /tmp/monospam-local-training/export/Servo/{TIMESTAMP}/
+SPAM_JOB_NAME := spam_trainer_$(TIMESTAMP)
+COMP_JOB_NAME := comp_trainer_$(TIMESTAMP)
+
+default: help
+
+help:
+ @echo "Available commands:"
+ @sed -n '/^[a-zA-Z0-9_.]*:/s/:.*//p' <Makefile
+
+train_local_spam:
+ gcloud ai-platform local train \
+ --package-path trainer/ \
+ --module-name trainer.task \
+ --job-dir $(OUTPUT_DIR) \
+ -- \
+ --train-steps 1000 \
+ --verbosity DEBUG \
+ --train-file $(TRAIN_FILE) \
+ --trainer-type spam
+
+train_local_spam_2:
+ gcloud ai-platform local train \
+ --package-path trainer2/ \
+ --module-name trainer2.task \
+ --job-dir $(OUTPUT_DIR) \
+ -- \
+ --train-steps 1000 \
+ --verbosity DEBUG \
+ --train-file $(TRAIN_FILE) \
+ --trainer-type spam
+
+predict_local_spam:
+ ./spam.py local-predict
+ gcloud ai-platform local predict \
+ --model-dir $(MODEL_DIR) \
+ --json-instances /tmp/instances.json
+
+train_from_prod_data_spam:
+ gcloud ai-platform local train \
+ --package-path trainer/ \
+ --module-name trainer.task \
+ --job-dir $(OUTPUT_DIR) \
+ -- \
+ --train-steps 1000 \
+ --verbosity DEBUG \
+ --gcs-bucket monorail-prod.appspot.com \
+ --gcs-prefix spam_training_data \
+ --trainer-type spam
+
+train_from_prod_data_spam_2:
+ gcloud ai-platform local train \
+ --package-path trainer2/ \
+ --module-name trainer2.task \
+ --job-dir $(OUTPUT_DIR) \
+ -- \
+ --train-steps 1000 \
+ --verbosity DEBUG \
+ --gcs-bucket monorail-prod.appspot.com \
+ --gcs-prefix spam_training_data \
+ --trainer-type spam
+
+submit_train_job_spam:
+ @echo ${TIMESTAMP}
+ gcloud ai-platform jobs submit training $(SPAM_JOB_NAME) \
+ --package-path trainer/ \
+ --module-name trainer.task \
+ --runtime-version 1.2 \
+ --job-dir gs://monorail-prod-mlengine/$(SPAM_JOB_NAME) \
+ --region us-central1 \
+ -- \
+ --train-steps 1000 \
+ --verbosity DEBUG \
+ --gcs-bucket monorail-prod.appspot.com \
+ --gcs-prefix spam_training_data \
+ --trainer-type spam
+
+submit_train_job_spam_2:
+ @echo ${TIMESTAMP}
+ gcloud ai-platform jobs submit training $(SPAM_JOB_NAME) \
+ --package-path trainer2/ \
+ --module-name trainer2.task \
+ --runtime-version 2.1 \
+ --python-version 3.7 \
+ --job-dir gs://monorail-prod-mlengine/$(SPAM_JOB_NAME) \
+ --region us-central1 \
+ -- \
+ --train-steps 1000 \
+ --verbosity DEBUG \
+ --gcs-bucket monorail-prod.appspot.com \
+ --gcs-prefix spam_training_data \
+ --trainer-type spam
+
+# VERSION of format 'v_TIMESTAMP' should match TIMESTAMP in SPAM_JOB_NAME and MODEL_BINARIES.
+upload_model_prod_spam:
+ifndef MODEL_BINARIES
+ $(error MODEL_BINARIES not set)
+endif
+ifndef VERSION
+ $(error VERSION not set)
+endif
+ gsutil ls -r gs://monorail-prod-mlengine/$(SPAM_JOB_NAME)
+ gcloud ai-platform versions create $(VERSION) \
+ --model spam_only_words \
+ --origin $(MODEL_BINARIES) \
+ --runtime-version 1.2
+ gcloud ai-platform versions set-default $(VERSION) --model spam_only_words
+
+submit_pred_spam:
+ifndef SUMMARY_PATH
+ $(error SUMMARY_PATH not set)
+endif
+ifndef CONTENT_PATH
+ $(error CONTENT_PATH not set)
+endif
+ ./spam.py predict --summary $(SUMMARY_PATH) --content $(CONTENT_PATH)
+
+
+train_from_prod_data_component:
+ gcloud ai-platform local train \
+ --package-path trainer/ \
+ --module-name trainer.task \
+ --job-dir $(OUTPUT_DIR) \
+ -- \
+ --train-steps 10000 \
+ --eval-steps 1000 \
+ --verbosity DEBUG \
+ --gcs-bucket monorail-prod.appspot.com \
+ --gcs-prefix component_training_data \
+ --trainer-type component
+
+submit_train_job_component:
+ gcloud init
+ gcloud ai-platform jobs submit training $(COMP_JOB_NAME) \
+ --package-path trainer/ \
+ --module-name trainer.task \
+ --runtime-version 1.2 \
+ --job-dir gs://monorail-prod-mlengine/$(COMP_JOB_NAME) \
+ --region us-central1 \
+ --scale-tier custom \
+ --config config.json \
+ -- \
+ --train-steps 10000 \
+ --eval-steps 1000 \
+ --verbosity DEBUG \
+ --gcs-bucket monorail-prod.appspot.com \
+ --gcs-prefix component_training_data \
+ --trainer-type component
+
+submit_train_job_component_2:
+ gcloud ai-platform jobs submit training $(COMP_JOB_NAME) \
+ --package-path trainer2/ \
+ --module-name trainer2.task \
+ --runtime-version 2.1 \
+ --python-version 3.7 \
+ --job-dir gs://monorail-prod-mlengine/$(COMP_JOB_NAME) \
+ --region us-central1 \
+ --scale-tier custom \
+ --master-machine-type n1-highmem-8 \
+ -- \
+ --train-steps 10000 \
+ --eval-steps 1000 \
+ --verbosity DEBUG \
+ --gcs-bucket monorail-prod.appspot.com \
+ --gcs-prefix component_training_data \
+ --trainer-type component
+
+# VERSION of format 'v_TIMESTAMP' should match TIMESTAMP in COMP_JOB_NAME and MODEL_BINARIES.
+upload_model_prod_component:
+ifndef MODEL_BINARIES
+ $(error MODEL_BINARIES not set)
+endif
+ifndef VERSION
+ $(error VERSION not set)
+endif
+ gsutil ls -r gs://monorail-prod-mlengine/$(COMP_JOB_NAME)
+ gcloud ai-platform versions create $(VERSION) \
+ --model component_top_words \
+ --origin $(MODEL_BINARIES) \
+ --runtime-version 1.2
+ gcloud ai-platform versions set-default $(VERSION) --model component_top_words
+
+submit_pred_component:
+ifndef CONTENT_PATH
+ $(error CONTENT_PATH not set)
+endif
+ ./component.py --project monorail-prod --content $(CONTENT_PATH)
+
+
+### Local Training in TF 2.0
+
+tf2_train_local_spam:
+ifndef TRAIN_FILE
+ $(error TRAIN_FILE not set)
+endif
+ python3 ./trainer2/task.py \
+ --train-file $(TRAIN_FILE) \
+ --job-dir $(OUTPUT_DIR) \
+ --train-steps 1000 \
+ --verbosity DEBUG \
+ --trainer-type spam
+
+tf2_train_local_component:
+ifndef TRAIN_FILE
+ $(error TRAIN_FILE not set)
+endif
+ python3 ./trainer2/task.py \
+ --train-file $(TRAIN_FILE) \
+ --job-dir $(OUTPUT_DIR) \
+ --train-steps 10000 \
+ --eval-steps 1000 \
+ --verbosity DEBUG \
+ --trainer-type component