blob: b0a8684aab756ce14a2e7c4f7fd41846f7d73568 [file] [log] [blame]
Copybara854996b2021-09-07 19:36:02 +00001# Copyright 2019 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4# Or at https://developers.google.com/open-source/licenses/bsd
5
6# Use 'make help' for a list of commands.
7
8OUTPUT_DIR := /tmp/monospam-local-training/
9TIMESTAMP := $(shell date +%s)
10MODEL_DIR := /tmp/monospam-local-training/export/Servo/{TIMESTAMP}/
11SPAM_JOB_NAME := spam_trainer_$(TIMESTAMP)
12COMP_JOB_NAME := comp_trainer_$(TIMESTAMP)
13
14default: help
15
16help:
17 @echo "Available commands:"
18 @sed -n '/^[a-zA-Z0-9_.]*:/s/:.*//p' <Makefile
19
20train_local_spam:
21 gcloud ai-platform local train \
22 --package-path trainer/ \
23 --module-name trainer.task \
24 --job-dir $(OUTPUT_DIR) \
25 -- \
26 --train-steps 1000 \
27 --verbosity DEBUG \
28 --train-file $(TRAIN_FILE) \
29 --trainer-type spam
30
31train_local_spam_2:
32 gcloud ai-platform local train \
33 --package-path trainer2/ \
34 --module-name trainer2.task \
35 --job-dir $(OUTPUT_DIR) \
36 -- \
37 --train-steps 1000 \
38 --verbosity DEBUG \
39 --train-file $(TRAIN_FILE) \
40 --trainer-type spam
41
42predict_local_spam:
43 ./spam.py local-predict
44 gcloud ai-platform local predict \
45 --model-dir $(MODEL_DIR) \
46 --json-instances /tmp/instances.json
47
48train_from_prod_data_spam:
49 gcloud ai-platform local train \
50 --package-path trainer/ \
51 --module-name trainer.task \
52 --job-dir $(OUTPUT_DIR) \
53 -- \
54 --train-steps 1000 \
55 --verbosity DEBUG \
56 --gcs-bucket monorail-prod.appspot.com \
57 --gcs-prefix spam_training_data \
58 --trainer-type spam
59
60train_from_prod_data_spam_2:
61 gcloud ai-platform local train \
62 --package-path trainer2/ \
63 --module-name trainer2.task \
64 --job-dir $(OUTPUT_DIR) \
65 -- \
66 --train-steps 1000 \
67 --verbosity DEBUG \
68 --gcs-bucket monorail-prod.appspot.com \
69 --gcs-prefix spam_training_data \
70 --trainer-type spam
71
72submit_train_job_spam:
73 @echo ${TIMESTAMP}
74 gcloud ai-platform jobs submit training $(SPAM_JOB_NAME) \
75 --package-path trainer/ \
76 --module-name trainer.task \
77 --runtime-version 1.2 \
78 --job-dir gs://monorail-prod-mlengine/$(SPAM_JOB_NAME) \
79 --region us-central1 \
80 -- \
81 --train-steps 1000 \
82 --verbosity DEBUG \
83 --gcs-bucket monorail-prod.appspot.com \
84 --gcs-prefix spam_training_data \
85 --trainer-type spam
86
87submit_train_job_spam_2:
88 @echo ${TIMESTAMP}
89 gcloud ai-platform jobs submit training $(SPAM_JOB_NAME) \
90 --package-path trainer2/ \
91 --module-name trainer2.task \
92 --runtime-version 2.1 \
93 --python-version 3.7 \
94 --job-dir gs://monorail-prod-mlengine/$(SPAM_JOB_NAME) \
95 --region us-central1 \
96 -- \
97 --train-steps 1000 \
98 --verbosity DEBUG \
99 --gcs-bucket monorail-prod.appspot.com \
100 --gcs-prefix spam_training_data \
101 --trainer-type spam
102
103# VERSION of format 'v_TIMESTAMP' should match TIMESTAMP in SPAM_JOB_NAME and MODEL_BINARIES.
104upload_model_prod_spam:
105ifndef MODEL_BINARIES
106 $(error MODEL_BINARIES not set)
107endif
108ifndef VERSION
109 $(error VERSION not set)
110endif
111 gsutil ls -r gs://monorail-prod-mlengine/$(SPAM_JOB_NAME)
112 gcloud ai-platform versions create $(VERSION) \
113 --model spam_only_words \
114 --origin $(MODEL_BINARIES) \
115 --runtime-version 1.2
116 gcloud ai-platform versions set-default $(VERSION) --model spam_only_words
117
118submit_pred_spam:
119ifndef SUMMARY_PATH
120 $(error SUMMARY_PATH not set)
121endif
122ifndef CONTENT_PATH
123 $(error CONTENT_PATH not set)
124endif
125 ./spam.py predict --summary $(SUMMARY_PATH) --content $(CONTENT_PATH)
126
127
128train_from_prod_data_component:
129 gcloud ai-platform local train \
130 --package-path trainer/ \
131 --module-name trainer.task \
132 --job-dir $(OUTPUT_DIR) \
133 -- \
134 --train-steps 10000 \
135 --eval-steps 1000 \
136 --verbosity DEBUG \
137 --gcs-bucket monorail-prod.appspot.com \
138 --gcs-prefix component_training_data \
139 --trainer-type component
140
141submit_train_job_component:
142 gcloud init
143 gcloud ai-platform jobs submit training $(COMP_JOB_NAME) \
144 --package-path trainer/ \
145 --module-name trainer.task \
146 --runtime-version 1.2 \
147 --job-dir gs://monorail-prod-mlengine/$(COMP_JOB_NAME) \
148 --region us-central1 \
149 --scale-tier custom \
150 --config config.json \
151 -- \
152 --train-steps 10000 \
153 --eval-steps 1000 \
154 --verbosity DEBUG \
155 --gcs-bucket monorail-prod.appspot.com \
156 --gcs-prefix component_training_data \
157 --trainer-type component
158
159submit_train_job_component_2:
160 gcloud ai-platform jobs submit training $(COMP_JOB_NAME) \
161 --package-path trainer2/ \
162 --module-name trainer2.task \
163 --runtime-version 2.1 \
164 --python-version 3.7 \
165 --job-dir gs://monorail-prod-mlengine/$(COMP_JOB_NAME) \
166 --region us-central1 \
167 --scale-tier custom \
168 --master-machine-type n1-highmem-8 \
169 -- \
170 --train-steps 10000 \
171 --eval-steps 1000 \
172 --verbosity DEBUG \
173 --gcs-bucket monorail-prod.appspot.com \
174 --gcs-prefix component_training_data \
175 --trainer-type component
176
177# VERSION of format 'v_TIMESTAMP' should match TIMESTAMP in COMP_JOB_NAME and MODEL_BINARIES.
178upload_model_prod_component:
179ifndef MODEL_BINARIES
180 $(error MODEL_BINARIES not set)
181endif
182ifndef VERSION
183 $(error VERSION not set)
184endif
185 gsutil ls -r gs://monorail-prod-mlengine/$(COMP_JOB_NAME)
186 gcloud ai-platform versions create $(VERSION) \
187 --model component_top_words \
188 --origin $(MODEL_BINARIES) \
189 --runtime-version 1.2
190 gcloud ai-platform versions set-default $(VERSION) --model component_top_words
191
192submit_pred_component:
193ifndef CONTENT_PATH
194 $(error CONTENT_PATH not set)
195endif
196 ./component.py --project monorail-prod --content $(CONTENT_PATH)
197
198
199### Local Training in TF 2.0
200
201tf2_train_local_spam:
202ifndef TRAIN_FILE
203 $(error TRAIN_FILE not set)
204endif
205 python3 ./trainer2/task.py \
206 --train-file $(TRAIN_FILE) \
207 --job-dir $(OUTPUT_DIR) \
208 --train-steps 1000 \
209 --verbosity DEBUG \
210 --trainer-type spam
211
212tf2_train_local_component:
213ifndef TRAIN_FILE
214 $(error TRAIN_FILE not set)
215endif
216 python3 ./trainer2/task.py \
217 --train-file $(TRAIN_FILE) \
218 --job-dir $(OUTPUT_DIR) \
219 --train-steps 10000 \
220 --eval-steps 1000 \
221 --verbosity DEBUG \
222 --trainer-type component