Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 1 | # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style |
| 3 | # license that can be found in the LICENSE file or at |
| 4 | # https://developers.google.com/open-source/licenses/bsd |
| 5 | |
| 6 | """Set of helpers for interacting with Google Cloud Storage.""" |
| 7 | from __future__ import print_function |
| 8 | from __future__ import division |
| 9 | from __future__ import absolute_import |
| 10 | |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 11 | import logging |
| 12 | import os |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 13 | from six.moves import urllib |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 14 | import uuid |
| 15 | |
| 16 | from datetime import datetime, timedelta |
| 17 | |
| 18 | from google.appengine.api import app_identity |
| 19 | from google.appengine.api import images |
| 20 | from google.appengine.api import memcache |
| 21 | from google.appengine.api import urlfetch |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 22 | from google.cloud import storage |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 23 | |
| 24 | from framework import filecontent |
| 25 | from framework import framework_constants |
| 26 | from framework import framework_helpers |
| 27 | |
| 28 | |
| 29 | ATTACHMENT_TTL = timedelta(seconds=30) |
| 30 | |
| 31 | IS_DEV_APPSERVER = ( |
| 32 | 'development' in os.environ.get('SERVER_SOFTWARE', '').lower()) |
| 33 | |
| 34 | RESIZABLE_MIME_TYPES = [ |
| 35 | 'image/png', 'image/jpg', 'image/jpeg', 'image/gif', 'image/webp', |
| 36 | ] |
| 37 | |
| 38 | DEFAULT_THUMB_WIDTH = 250 |
| 39 | DEFAULT_THUMB_HEIGHT = 200 |
| 40 | LOGO_THUMB_WIDTH = 110 |
| 41 | LOGO_THUMB_HEIGHT = 30 |
| 42 | MAX_ATTACH_SIZE_TO_COPY = 10 * 1024 * 1024 # 10 MB |
| 43 | # GCS signatures are valid for 10 minutes by default, but cache them for |
| 44 | # 5 minutes just to be on the safe side. |
| 45 | GCS_SIG_TTL = 60 * 5 |
| 46 | |
| 47 | |
| 48 | def _Now(): |
| 49 | return datetime.utcnow() |
| 50 | |
| 51 | |
| 52 | class UnsupportedMimeType(Exception): |
| 53 | pass |
| 54 | |
| 55 | |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 56 | def _RemoveLeadingSlash(text): |
| 57 | if text.startswith('/'): |
| 58 | return text[1:] |
| 59 | return text |
| 60 | |
| 61 | |
| 62 | def DeleteObjectFromGCS(blob_name): |
| 63 | storage_client = storage.Client() |
| 64 | bucket_name = app_identity.get_default_gcs_bucket_name() |
| 65 | bucket = storage_client.bucket(bucket_name) |
| 66 | validated_blob_name = _RemoveLeadingSlash(blob_name) |
| 67 | blob = bucket.get_blob(validated_blob_name) |
| 68 | blob.delete() |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 69 | |
| 70 | |
| 71 | def StoreObjectInGCS( |
| 72 | content, mime_type, project_id, thumb_width=DEFAULT_THUMB_WIDTH, |
| 73 | thumb_height=DEFAULT_THUMB_HEIGHT, filename=None): |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 74 | storage_client = storage.Client() |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 75 | bucket_name = app_identity.get_default_gcs_bucket_name() |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 76 | bucket = storage_client.bucket(bucket_name) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 77 | guid = uuid.uuid4() |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 78 | blob_name = '%s/attachments/%s' % (project_id, guid) |
| 79 | |
| 80 | blob = bucket.blob(blob_name) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 81 | if filename: |
| 82 | if not framework_constants.FILENAME_RE.match(filename): |
| 83 | logging.info('bad file name: %s' % filename) |
| 84 | filename = 'attachment.dat' |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 85 | content_disposition = 'inline; filename="%s"' % filename |
| 86 | blob.content_disposition = content_disposition |
| 87 | logging.info('Writing with content_disposition %r', content_disposition) |
| 88 | blob.upload_from_string(content, content_type=mime_type) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 89 | |
| 90 | if mime_type in RESIZABLE_MIME_TYPES: |
| 91 | # Create and save a thumbnail too. |
| 92 | thumb_content = None |
| 93 | try: |
| 94 | thumb_content = images.resize(content, thumb_width, thumb_height) |
| 95 | except images.LargeImageError: |
| 96 | # Don't log the whole exception because we don't need to see |
| 97 | # this on the Cloud Error Reporting page. |
| 98 | logging.info('Got LargeImageError on image with %d bytes', len(content)) |
| 99 | except Exception, e: |
| 100 | # Do not raise exception for incorrectly formed images. |
| 101 | # See https://bugs.chromium.org/p/monorail/issues/detail?id=597 for more |
| 102 | # detail. |
| 103 | logging.exception(e) |
| 104 | if thumb_content: |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 105 | thumb_blob_name = '%s-thumbnail' % blob_name |
| 106 | thumb_blob = bucket.blob(thumb_blob_name) |
| 107 | thumb_blob.upload_from_string(thumb_content, content_type='image/png') |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 108 | |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 109 | # Our database, sadly, stores these with the leading slash. |
| 110 | return '/%s' % blob_name |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 111 | |
| 112 | |
| 113 | def CheckMimeTypeResizable(mime_type): |
| 114 | if mime_type not in RESIZABLE_MIME_TYPES: |
| 115 | raise UnsupportedMimeType( |
| 116 | 'Please upload a logo with one of the following mime types:\n%s' % |
| 117 | ', '.join(RESIZABLE_MIME_TYPES)) |
| 118 | |
| 119 | |
| 120 | def StoreLogoInGCS(file_name, content, project_id): |
| 121 | mime_type = filecontent.GuessContentTypeFromFilename(file_name) |
| 122 | CheckMimeTypeResizable(mime_type) |
| 123 | if '\\' in file_name: # IE insists on giving us the whole path. |
| 124 | file_name = file_name[file_name.rindex('\\') + 1:] |
| 125 | return StoreObjectInGCS( |
| 126 | content, mime_type, project_id, thumb_width=LOGO_THUMB_WIDTH, |
| 127 | thumb_height=LOGO_THUMB_HEIGHT) |
| 128 | |
| 129 | |
| 130 | @framework_helpers.retry(3, delay=0.25, backoff=1.25) |
| 131 | def _FetchSignedURL(url): |
| 132 | """Request that devstorage API signs a GCS content URL.""" |
| 133 | resp = urlfetch.fetch(url, follow_redirects=False) |
| 134 | redir = resp.headers["Location"] |
| 135 | return redir |
| 136 | |
| 137 | |
| 138 | def SignUrl(bucket, object_id): |
| 139 | """Get a signed URL to download a GCS object. |
| 140 | |
| 141 | Args: |
| 142 | bucket: string name of the GCS bucket. |
| 143 | object_id: string object ID of the file within that bucket. |
| 144 | |
| 145 | Returns: |
| 146 | A signed URL, or '/mising-gcs-url' if signing failed. |
| 147 | """ |
| 148 | try: |
| 149 | cache_key = 'gcs-object-url-%s' % object_id |
| 150 | cached = memcache.get(key=cache_key) |
| 151 | if cached is not None: |
| 152 | return cached |
| 153 | |
| 154 | if IS_DEV_APPSERVER: |
| 155 | attachment_url = '/_ah/gcs/%s%s' % (bucket, object_id) |
| 156 | else: |
| 157 | result = ('https://www.googleapis.com/storage/v1/b/' |
| 158 | '{bucket}/o/{object_id}?access_token={token}&alt=media') |
| 159 | scopes = ['https://www.googleapis.com/auth/devstorage.read_only'] |
| 160 | if object_id[0] == '/': |
| 161 | object_id = object_id[1:] |
| 162 | url = result.format( |
| 163 | bucket=bucket, |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 164 | object_id=urllib.parse.quote_plus(object_id), |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 165 | token=app_identity.get_access_token(scopes)[0]) |
| 166 | attachment_url = _FetchSignedURL(url) |
| 167 | |
| 168 | if not memcache.set(key=cache_key, value=attachment_url, time=GCS_SIG_TTL): |
| 169 | logging.error('Could not cache gcs url %s for %s', attachment_url, |
| 170 | object_id) |
| 171 | |
| 172 | return attachment_url |
| 173 | |
| 174 | except Exception as e: |
| 175 | logging.exception(e) |
| 176 | return '/missing-gcs-url' |
| 177 | |
| 178 | |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 179 | def MaybeCreateDownload(bucket_name, blob_name, filename): |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 180 | """If the obj is not huge, and no download version exists, create it.""" |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 181 | validated_blob_name = _RemoveLeadingSlash(blob_name) |
| 182 | dst_blob_name = '%s-download' % validated_blob_name |
| 183 | logging.info('Maybe create %r from %r', dst_blob_name, validated_blob_name) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 184 | |
| 185 | if IS_DEV_APPSERVER: |
| 186 | logging.info('dev environment never makes download copies.') |
| 187 | return False |
| 188 | |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 189 | storage_client = storage.Client() |
| 190 | bucket = storage_client.bucket(bucket_name) |
| 191 | |
| 192 | # Validate "View" object. |
| 193 | src_blob = bucket.get_blob(validated_blob_name) |
| 194 | if not src_blob: |
| 195 | return False |
| 196 | # If "Download" object already exists, it's already created. |
| 197 | # `Bucket.blob` doesn't make an HTTP request. |
| 198 | dst_blob = bucket.blob(dst_blob_name) |
| 199 | if dst_blob.exists(): |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 200 | logging.info('Download version of attachment already exists') |
| 201 | return True |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 202 | # If "View" object is huge, don't create a download. |
| 203 | if src_blob.size > MAX_ATTACH_SIZE_TO_COPY: |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 204 | logging.info('Download version of attachment would be too big') |
| 205 | return False |
| 206 | |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 207 | copied_dst_blob = bucket.copy_blob(src_blob, bucket, dst_blob_name) |
| 208 | content_disposition = 'attachment; filename="%s"' % filename |
| 209 | logging.info('Copying with content_disposition %r', content_disposition) |
| 210 | copied_dst_blob.content_disposition = content_disposition |
| 211 | copied_dst_blob.patch() |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 212 | logging.info('done writing') |
| 213 | |
| 214 | return True |