Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 1 | #!/bin/bash |
| 2 | # Copyright 2016 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style |
| 4 | # license that can be found in the LICENSE file or at |
| 5 | # https://developers.google.com/open-source/licenses/bsd |
| 6 | |
| 7 | # The existing replicas all have this prefix: |
| 8 | REPLICA_PREFIX="replica" |
| 9 | |
| 10 | # The new replicas made from the restored primary will have this prefix: |
| 11 | NEW_REPLICA_PREFIX="replica-1" |
| 12 | |
| 13 | CLOUD_PROJECT="monorail-staging" |
| 14 | |
| 15 | DRY_RUN=true |
| 16 | |
| 17 | echo Restoring backups to primary for ${CLOUD_PROJECT}. Dry run: ${DRY_RUN} |
| 18 | echo This will delete all read replicas with the prefix "${REPLICA_PREFIX}" |
| 19 | echo and create a new set of replicas with the prefix "${NEW_REPLICA_PREFIX}" |
| 20 | echo |
| 21 | echo Checking for existing read replicas to delete: |
| 22 | |
| 23 | EXISTING_REPLICAS=($(gcloud sql instances list --project=${CLOUD_PROJECT} | grep ${REPLICA_PREFIX}- | awk '{print $1}')) |
| 24 | |
| 25 | if [ ${#EXISTING_REPLICAS[@]} -eq 0 ]; then |
| 26 | echo No replicas found with prefix ${REPLICA_PREFIX} |
| 27 | echo List instances to find the replica prefix by running: |
| 28 | echo gcloud sql instances list --project=${CLOUD_PROJECT} |
| 29 | exit 1 |
| 30 | fi |
| 31 | |
| 32 | echo Deleting ${#EXISTING_REPLICAS[@]} existing replicas found with the prefix ${REPLICA_PREFIX} |
| 33 | |
| 34 | for r in "${EXISTING_REPLICAS[@]}"; do |
| 35 | echo Deleting ${r} |
| 36 | cmd="gcloud sql instances delete ${r} --project=${CLOUD_PROJECT}" |
| 37 | echo ${cmd} |
| 38 | if [ ${DRY_RUN} == false ]; then |
| 39 | ${cmd} |
| 40 | fi |
| 41 | done |
| 42 | |
| 43 | echo Checking for available backups: |
| 44 | |
| 45 | DUE_TIMES=($(gcloud sql backups list --instance primary --project=${CLOUD_PROJECT} | grep SUCCESSFUL | awk '{print $1}')) |
| 46 | |
| 47 | for index in ${!DUE_TIMES[*]}; do |
| 48 | echo "[${index}] ${DUE_TIMES[${index}]}" |
| 49 | done |
| 50 | |
| 51 | echo "Choose one of the above due_time values." |
| 52 | echo "NOTE: selecting anything besides 0 will require you to manually" |
| 53 | echo "complete the rest of the restore process." |
| 54 | echo "Recover from date [0: ${DUE_TIMES[0]}]:" |
| 55 | read DUE_TIME_INDEX |
| 56 | |
| 57 | DUE_TIME=${DUE_TIMES[${DUE_TIME_INDEX}]} |
| 58 | |
| 59 | cmd="gcloud sql backups restore ${DUE_TIME} --project=${CLOUD_PROJECT} --restore-instance=primary" |
| 60 | echo ${cmd} |
| 61 | if [ ${DRY_RUN} == false ]; then |
| 62 | ${cmd} |
| 63 | fi |
| 64 | |
| 65 | if [ "${DUE_TIME_INDEX}" -ne "0" ]; then |
| 66 | echo "You've restored an older-than-latest backup. Please contact speckle-oncall@" |
| 67 | echo "to request an on-demand backup of the primary before attempting to restart replicas," |
| 68 | echo "which this script does not do automatically in this case." |
| 69 | echo "run 'gcloud sql instances create' commands to create new replicas manually after" |
| 70 | echo "you have confirmed with speckle-oncall@ the on-demand backup is complete." |
| 71 | echo "Exiting" |
| 72 | exit 0 |
| 73 | fi |
| 74 | |
| 75 | echo "Finding restore operation ID..." |
| 76 | |
| 77 | RESTORE_OP_IDS=($(gcloud sql operations list --instance=primary --project=${CLOUD_PROJECT} | grep RESTORE_VOLUME | awk '{print $1}')) |
| 78 | |
| 79 | # Assume the fist RESTORE_VOLUME is the operation we want; they're listed in reverse chronological order. |
| 80 | echo Waiting on restore operation ID: ${RESTORE_OP_IDS[0]} |
| 81 | |
| 82 | if [ ${DRY_RUN} == false ]; then |
| 83 | gcloud sql operations wait ${RESTORE_OP_IDS[0]} --project=${CLOUD_PROJECT} |
| 84 | fi |
| 85 | |
| 86 | echo Restore is finished on primary. Now create the new set of read replicas with the new name prefix ${NEW_REPLICA_PREFIX}: |
| 87 | |
| 88 | TIER=($(gcloud sql instances describe primary --project=${CLOUD_PROJECT} | grep tier | awk '{print $2}')) |
| 89 | |
| 90 | for i in {00..09}; do |
| 91 | cmd="gcloud sql instances create ${NEW_REPLICA_PREFIX}-${i} --master-instance-name=primary --project=${CLOUD_PROJECT} --tier=${TIER} --region=us-central1" |
| 92 | echo ${cmd} |
| 93 | if [ ${DRY_RUN} == false ]; then |
| 94 | ${cmd} |
| 95 | fi |
| 96 | done |
| 97 | |
| 98 | echo If the replica creation steps above did not succeed due to authentication |
| 99 | echo errors, you may need to retry them manually. |
| 100 | echo |
| 101 | echo |
| 102 | echo Backup restore is nearly complete. Check the instances page on developer console to see when |
| 103 | echo all of the replicas are "Runnable" status. Until then, you may encounter errors in issue search. |
| 104 | echo In the mean time: |
| 105 | echo - edit settings.py to change the db_replica_prefix variable to be "${NEW_REPLICA_PREFIX}-" |
| 106 | echo Then either "make deploy_prod" or "make deploy_staging" for search to pick up the new prefix. |
| 107 | echo Then set the newly deploy version for besearch and besearch2 on the dev console Versons page. |
| 108 | echo Follow-up: |
| 109 | echo - Submit the change. |
| 110 | echo - Delete old versions of besearch because they run up the GAE bill. |