Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame] | 1 | # Copyright 2020 The Chromium Authors |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | """Classes to manage cached values. |
| 5 | |
| 6 | Monorail makes full use of the RAM of GAE frontends to reduce latency |
| 7 | and load on the database. |
| 8 | |
| 9 | Even though these caches do invalidation, there are rare race conditions |
| 10 | that can cause a somewhat stale object to be retrieved from memcache and |
| 11 | then put into a RAM cache and used by a given GAE instance for some time. |
| 12 | So, we only use these caches for operations that can tolerate somewhat |
| 13 | stale data. For example, displaying issues in a list or displaying brief |
| 14 | info about related issues. We never use the cache to load objects as |
| 15 | part of a read-modify-save sequence because that could cause stored data |
| 16 | to revert to a previous state. |
| 17 | """ |
| 18 | from __future__ import print_function |
| 19 | from __future__ import division |
| 20 | from __future__ import absolute_import |
| 21 | |
| 22 | import logging |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 23 | |
| 24 | from protorpc import protobuf |
| 25 | |
| 26 | from google.appengine.api import memcache |
| 27 | |
| 28 | import settings |
| 29 | from framework import framework_constants |
Adrià Vilanova Martínez | 9f9ade5 | 2022-10-10 23:20:11 +0200 | [diff] [blame] | 30 | from framework import logger |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 31 | |
| 32 | |
| 33 | DEFAULT_MAX_SIZE = 10000 |
| 34 | |
| 35 | |
| 36 | class RamCache(object): |
| 37 | """An in-RAM cache with distributed invalidation.""" |
| 38 | |
| 39 | def __init__(self, cache_manager, kind, max_size=None): |
| 40 | self.cache_manager = cache_manager |
| 41 | self.kind = kind |
| 42 | self.cache = {} |
| 43 | self.max_size = max_size or DEFAULT_MAX_SIZE |
| 44 | cache_manager.RegisterCache(self, kind) |
| 45 | |
| 46 | def CacheItem(self, key, item): |
| 47 | """Store item at key in this cache, discarding a random item if needed.""" |
| 48 | if len(self.cache) >= self.max_size: |
| 49 | self.cache.popitem() |
| 50 | |
| 51 | self.cache[key] = item |
| 52 | |
| 53 | def CacheAll(self, new_item_dict): |
| 54 | """Cache all items in the given dict, dropping old items if needed.""" |
| 55 | if len(new_item_dict) >= self.max_size: |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame] | 56 | logging.warning('Dumping the entire cache! %s', self.kind) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 57 | self.cache = {} |
| 58 | else: |
| 59 | while len(self.cache) + len(new_item_dict) > self.max_size: |
| 60 | self.cache.popitem() |
| 61 | |
| 62 | self.cache.update(new_item_dict) |
| 63 | |
| 64 | def GetItem(self, key): |
| 65 | """Return the cached item if present, otherwise None.""" |
| 66 | return self.cache.get(key) |
| 67 | |
| 68 | def HasItem(self, key): |
| 69 | """Return True if there is a value cached at the given key.""" |
| 70 | return key in self.cache |
| 71 | |
| 72 | def GetAll(self, keys): |
| 73 | """Look up the given keys. |
| 74 | |
| 75 | Args: |
| 76 | keys: a list of cache keys to look up. |
| 77 | |
| 78 | Returns: |
| 79 | A pair: (hits_dict, misses_list) where hits_dict is a dictionary of |
| 80 | all the given keys and the values that were found in the cache, and |
| 81 | misses_list is a list of given keys that were not in the cache. |
| 82 | """ |
| 83 | hits, misses = {}, [] |
| 84 | for key in keys: |
| 85 | try: |
| 86 | hits[key] = self.cache[key] |
| 87 | except KeyError: |
| 88 | misses.append(key) |
| 89 | |
| 90 | return hits, misses |
| 91 | |
| 92 | def LocalInvalidate(self, key): |
| 93 | """Drop the given key from this cache, without distributed notification.""" |
| 94 | if key in self.cache: |
| 95 | logging.info('Locally invalidating %r in kind=%r', key, self.kind) |
| 96 | self.cache.pop(key, None) |
| 97 | |
| 98 | def Invalidate(self, cnxn, key): |
| 99 | """Drop key locally, and append it to the Invalidate DB table.""" |
| 100 | self.InvalidateKeys(cnxn, [key]) |
| 101 | |
| 102 | def InvalidateKeys(self, cnxn, keys): |
| 103 | """Drop keys locally, and append them to the Invalidate DB table.""" |
| 104 | for key in keys: |
| 105 | self.LocalInvalidate(key) |
| 106 | if self.cache_manager: |
| 107 | self.cache_manager.StoreInvalidateRows(cnxn, self.kind, keys) |
| 108 | |
| 109 | def LocalInvalidateAll(self): |
| 110 | """Invalidate all keys locally: just start over with an empty dict.""" |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 111 | self.cache = {} |
| 112 | |
| 113 | def InvalidateAll(self, cnxn): |
| 114 | """Invalidate all keys in this cache.""" |
| 115 | self.LocalInvalidateAll() |
| 116 | if self.cache_manager: |
| 117 | self.cache_manager.StoreInvalidateAll(cnxn, self.kind) |
| 118 | |
| 119 | |
| 120 | class ShardedRamCache(RamCache): |
| 121 | """Specialized version of RamCache that stores values in parts. |
| 122 | |
| 123 | Instead of the cache keys being simple integers, they are pairs, e.g., |
| 124 | (project_id, shard_id). Invalidation will invalidate all shards for |
| 125 | a given main key, e.g, invalidating project_id 16 will drop keys |
| 126 | (16, 0), (16, 1), (16, 2), ... (16, 9). |
| 127 | """ |
| 128 | |
| 129 | def __init__(self, cache_manager, kind, max_size=None, num_shards=10): |
| 130 | super(ShardedRamCache, self).__init__( |
| 131 | cache_manager, kind, max_size=max_size) |
| 132 | self.num_shards = num_shards |
| 133 | |
| 134 | def LocalInvalidate(self, key): |
| 135 | """Use the specified value to drop entries from the local cache.""" |
| 136 | logging.info('About to invalidate shared RAM keys %r', |
| 137 | [(key, shard_id) for shard_id in range(self.num_shards) |
| 138 | if (key, shard_id) in self.cache]) |
| 139 | for shard_id in range(self.num_shards): |
| 140 | self.cache.pop((key, shard_id), None) |
| 141 | |
| 142 | |
| 143 | class ValueCentricRamCache(RamCache): |
| 144 | """Specialized version of RamCache that stores values in InvalidateTable. |
| 145 | |
| 146 | This is useful for caches that have non integer keys. |
| 147 | """ |
| 148 | |
| 149 | def LocalInvalidate(self, value): |
| 150 | """Use the specified value to drop entries from the local cache.""" |
| 151 | keys_to_drop = [] |
| 152 | # Loop through and collect all keys with the specified value. |
| 153 | for k, v in self.cache.items(): |
| 154 | if v == value: |
| 155 | keys_to_drop.append(k) |
| 156 | for k in keys_to_drop: |
| 157 | self.cache.pop(k, None) |
| 158 | |
| 159 | def InvalidateKeys(self, cnxn, keys): |
| 160 | """Drop keys locally, and append their values to the Invalidate DB table.""" |
| 161 | # Find values to invalidate. |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame] | 162 | values = [self.cache[key] for key in keys if key in self.cache] |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 163 | if len(values) == len(keys): |
| 164 | for value in values: |
| 165 | self.LocalInvalidate(value) |
| 166 | if self.cache_manager: |
| 167 | self.cache_manager.StoreInvalidateRows(cnxn, self.kind, values) |
| 168 | else: |
| 169 | # If a value is not found in the cache then invalidate the whole cache. |
| 170 | # This is done to ensure that we are not in an inconsistent state or in a |
| 171 | # race condition. |
| 172 | self.InvalidateAll(cnxn) |
| 173 | |
| 174 | |
| 175 | class AbstractTwoLevelCache(object): |
| 176 | """A class to manage both RAM and secondary-caching layer to retrieve objects. |
| 177 | |
| 178 | Subclasses must implement the FetchItems() method to get objects from |
| 179 | the database when both caches miss. |
| 180 | """ |
| 181 | |
| 182 | # When loading a huge number of issues from the database, do it in chunks |
| 183 | # so as to avoid timeouts. |
| 184 | _FETCH_BATCH_SIZE = 10000 |
| 185 | |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 186 | def __init__(self, cache_manager, kind, prefix, pb_class, max_size=None): |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 187 | |
| 188 | self.cache = self._MakeCache(cache_manager, kind, max_size=max_size) |
| 189 | self.prefix = prefix |
| 190 | self.pb_class = pb_class |
| 191 | |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 192 | def _MakeCache(self, cache_manager, kind, max_size=None): |
| 193 | """Make the RAM cache and register it with the cache_manager.""" |
| 194 | return RamCache(cache_manager, kind, max_size=max_size) |
| 195 | |
| 196 | def CacheItem(self, key, value): |
| 197 | """Add the given key-value pair to RAM and L2 cache.""" |
| 198 | self.cache.CacheItem(key, value) |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 199 | self._WriteToMemcache({key: value}) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 200 | |
| 201 | def HasItem(self, key): |
| 202 | """Return True if the given key is in the RAM cache.""" |
| 203 | return self.cache.HasItem(key) |
| 204 | |
| 205 | def GetAnyOnHandItem(self, keys, start=None, end=None): |
| 206 | """Try to find one of the specified items in RAM.""" |
| 207 | if start is None: |
| 208 | start = 0 |
| 209 | if end is None: |
| 210 | end = len(keys) |
| 211 | for i in range(start, end): |
| 212 | key = keys[i] |
| 213 | if self.cache.HasItem(key): |
| 214 | return self.cache.GetItem(key) |
| 215 | |
| 216 | # Note: We could check L2 here too, but the round-trips to L2 |
| 217 | # are kind of slow. And, getting too many hits from L2 actually |
| 218 | # fills our RAM cache too quickly and could lead to thrashing. |
| 219 | |
| 220 | return None |
| 221 | |
| 222 | def GetAll(self, cnxn, keys, use_cache=True, **kwargs): |
| 223 | """Get values for the given keys from RAM, the L2 cache, or the DB. |
| 224 | |
| 225 | Args: |
| 226 | cnxn: connection to the database. |
| 227 | keys: list of integer keys to look up. |
| 228 | use_cache: set to False to always hit the database. |
| 229 | **kwargs: any additional keywords are passed to FetchItems(). |
| 230 | |
| 231 | Returns: |
| 232 | A pair: hits, misses. Where hits is {key: value} and misses is |
| 233 | a list of any keys that were not found anywhere. |
| 234 | """ |
| 235 | if use_cache: |
| 236 | result_dict, missed_keys = self.cache.GetAll(keys) |
| 237 | else: |
| 238 | result_dict, missed_keys = {}, list(keys) |
| 239 | |
| 240 | if missed_keys: |
| 241 | if use_cache: |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 242 | cache_hits, missed_keys = self._ReadFromMemcache(missed_keys) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 243 | result_dict.update(cache_hits) |
| 244 | self.cache.CacheAll(cache_hits) |
| 245 | |
| 246 | while missed_keys: |
| 247 | missed_batch = missed_keys[:self._FETCH_BATCH_SIZE] |
| 248 | missed_keys = missed_keys[self._FETCH_BATCH_SIZE:] |
| 249 | retrieved_dict = self.FetchItems(cnxn, missed_batch, **kwargs) |
| 250 | result_dict.update(retrieved_dict) |
| 251 | if use_cache: |
| 252 | self.cache.CacheAll(retrieved_dict) |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 253 | self._WriteToMemcache(retrieved_dict) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 254 | |
| 255 | still_missing_keys = [key for key in keys if key not in result_dict] |
Adrià Vilanova Martínez | 9f9ade5 | 2022-10-10 23:20:11 +0200 | [diff] [blame] | 256 | if still_missing_keys: |
| 257 | # The keys were not found in the caches or the DB. |
| 258 | logger.log( |
| 259 | { |
| 260 | 'log_type': 'database/missing_keys', |
| 261 | 'kind': self.cache.kind, |
| 262 | 'prefix': self.prefix, |
| 263 | 'count': len(still_missing_keys), |
| 264 | 'keys': str(still_missing_keys) |
| 265 | }) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 266 | return result_dict, still_missing_keys |
| 267 | |
| 268 | def LocalInvalidateAll(self): |
| 269 | self.cache.LocalInvalidateAll() |
| 270 | |
| 271 | def LocalInvalidate(self, key): |
| 272 | self.cache.LocalInvalidate(key) |
| 273 | |
| 274 | def InvalidateKeys(self, cnxn, keys): |
| 275 | """Drop the given keys from both RAM and L2 cache.""" |
| 276 | self.cache.InvalidateKeys(cnxn, keys) |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 277 | self._DeleteFromMemcache(keys) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 278 | |
| 279 | def InvalidateAllKeys(self, cnxn, keys): |
| 280 | """Drop the given keys from L2 cache and invalidate all keys in RAM. |
| 281 | |
| 282 | Useful for avoiding inserting many rows into the Invalidate table when |
| 283 | invalidating a large group of keys all at once. Only use when necessary. |
| 284 | """ |
| 285 | self.cache.InvalidateAll(cnxn) |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 286 | self._DeleteFromMemcache(keys) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 287 | |
| 288 | def GetAllAlreadyInRam(self, keys): |
| 289 | """Look only in RAM to return {key: values}, missed_keys.""" |
| 290 | result_dict, missed_keys = self.cache.GetAll(keys) |
| 291 | return result_dict, missed_keys |
| 292 | |
| 293 | def InvalidateAllRamEntries(self, cnxn): |
| 294 | """Drop all RAM cache entries. It will refill as needed from L2 cache.""" |
| 295 | self.cache.InvalidateAll(cnxn) |
| 296 | |
| 297 | def FetchItems(self, cnxn, keys, **kwargs): |
| 298 | """On RAM and L2 cache miss, hit the database.""" |
| 299 | raise NotImplementedError() |
| 300 | |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 301 | def _ReadFromMemcache(self, keys): |
| 302 | # type: (Sequence[int]) -> Mapping[str, Any], Sequence[int] |
| 303 | """Read the given keys from memcache, return {key: value}, missing_keys.""" |
| 304 | cache_hits = {} |
| 305 | cached_dict = memcache.get_multi( |
| 306 | [self._KeyToStr(key) for key in keys], |
| 307 | key_prefix=self.prefix, |
| 308 | namespace=settings.memcache_namespace) |
| 309 | |
| 310 | for key_str, serialized_value in cached_dict.items(): |
| 311 | value = self._StrToValue(serialized_value) |
| 312 | key = self._StrToKey(key_str) |
| 313 | cache_hits[key] = value |
| 314 | self.cache.CacheItem(key, value) |
| 315 | |
| 316 | still_missing_keys = [key for key in keys if key not in cache_hits] |
| 317 | return cache_hits, still_missing_keys |
| 318 | |
| 319 | def _WriteToMemcache(self, retrieved_dict): |
| 320 | # type: (Mapping[int, int]) -> None |
| 321 | """Write entries for each key-value pair to memcache. Encode PBs.""" |
| 322 | strs_to_cache = { |
| 323 | self._KeyToStr(key): self._ValueToStr(value) |
| 324 | for key, value in retrieved_dict.items()} |
| 325 | |
| 326 | try: |
| 327 | memcache.add_multi( |
| 328 | strs_to_cache, |
| 329 | key_prefix=self.prefix, |
| 330 | time=framework_constants.CACHE_EXPIRATION, |
| 331 | namespace=settings.memcache_namespace) |
| 332 | except ValueError as identifier: |
| 333 | # If memcache does not accept the values, ensure that no stale |
| 334 | # values are left, then bail out. |
| 335 | logging.error('Got memcache error: %r', identifier) |
| 336 | self._DeleteFromMemcache(list(strs_to_cache.keys())) |
| 337 | return |
| 338 | |
| 339 | def _DeleteFromMemcache(self, keys): |
| 340 | # type: (Sequence[str]) -> None |
| 341 | """Delete key-values from memcache. """ |
Adrià Vilanova Martínez | 9f9ade5 | 2022-10-10 23:20:11 +0200 | [diff] [blame] | 342 | logger.log( |
| 343 | { |
| 344 | 'log_type': 'cache/memcache/delete', |
| 345 | 'kind': self.cache.kind, |
| 346 | 'prefix': self.prefix, |
| 347 | 'count': len(keys), |
Adrià Vilanova Martínez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame] | 348 | 'keys': str(keys)[:100000] |
Adrià Vilanova Martínez | 9f9ade5 | 2022-10-10 23:20:11 +0200 | [diff] [blame] | 349 | }) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 350 | memcache.delete_multi( |
| 351 | [self._KeyToStr(key) for key in keys], |
| 352 | seconds=5, |
| 353 | key_prefix=self.prefix, |
| 354 | namespace=settings.memcache_namespace) |
| 355 | |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 356 | def _KeyToStr(self, key): |
| 357 | # type: (int) -> str |
| 358 | """Convert our int IDs to strings for use as memcache keys.""" |
| 359 | return str(key) |
| 360 | |
| 361 | def _StrToKey(self, key_str): |
| 362 | # type: (str) -> int |
| 363 | """Convert memcache keys back to the ints that we use as IDs.""" |
| 364 | return int(key_str) |
| 365 | |
| 366 | def _ValueToStr(self, value): |
| 367 | # type: (Any) -> str |
| 368 | """Serialize an application object so that it can be stored in L2 cache.""" |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 369 | if not self.pb_class: |
| 370 | return value |
| 371 | elif self.pb_class == int: |
| 372 | return str(value) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 373 | else: |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 374 | return protobuf.encode_message(value) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 375 | |
| 376 | def _StrToValue(self, serialized_value): |
| 377 | # type: (str) -> Any |
| 378 | """Deserialize L2 cache string into an application object.""" |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 379 | if not self.pb_class: |
| 380 | return serialized_value |
| 381 | elif self.pb_class == int: |
| 382 | return int(serialized_value) |
Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 383 | else: |
Adrià Vilanova Martínez | de94280 | 2022-07-15 14:06:55 +0200 | [diff] [blame] | 384 | return protobuf.decode_message(self.pb_class, serialized_value) |