Adrià Vilanova MartÃnez | f19ea43 | 2024-01-23 20:20:52 +0100 | [diff] [blame^] | 1 | # Copyright 2016 Google Inc. All Rights Reserved. |
| 2 | # |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | # you may not use this file except in compliance with the License. |
| 5 | # You may obtain a copy of the License at |
| 6 | # |
| 7 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | # |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | # See the License for the specific language governing permissions and |
| 13 | # limitations under the License. |
| 14 | |
| 15 | """Utility library for reading user information from an id_token. |
| 16 | |
| 17 | This is an experimental library that can temporarily be used to extract |
| 18 | a user from an id_token. The functionality provided by this library |
| 19 | will be provided elsewhere in the future. |
| 20 | """ |
| 21 | |
| 22 | from __future__ import absolute_import |
| 23 | |
| 24 | import base64 |
| 25 | import binascii |
| 26 | import hmac |
| 27 | import json |
| 28 | import logging |
| 29 | import os |
| 30 | import re |
| 31 | import six |
| 32 | import time |
| 33 | from six.moves import urllib |
| 34 | from collections.abc import Container as _Container |
| 35 | from collections.abc import Iterable as _Iterable |
| 36 | from collections.abc import Mapping as _Mapping |
| 37 | |
| 38 | from google.appengine.api import memcache |
| 39 | from google.appengine.api import oauth |
| 40 | from google.appengine.api import urlfetch |
| 41 | from google.appengine.api import users |
| 42 | |
| 43 | from . import constants |
| 44 | from . import types as endpoints_types |
| 45 | |
| 46 | try: |
| 47 | # PyCrypto may not be installed for the import_aeta_test or in dev's |
| 48 | # individual Python installations. It is available on AppEngine in prod. |
| 49 | |
| 50 | # Disable "Import not at top of file" warning. |
| 51 | # pylint: disable=g-import-not-at-top |
| 52 | from Crypto.Hash import SHA256 |
| 53 | from Crypto.PublicKey import RSA |
| 54 | # pylint: enable=g-import-not-at-top |
| 55 | _CRYPTO_LOADED = True |
| 56 | except ImportError: |
| 57 | _CRYPTO_LOADED = False |
| 58 | |
| 59 | |
| 60 | __all__ = [ |
| 61 | 'convert_jwks_uri', |
| 62 | 'get_current_user', |
| 63 | 'get_verified_jwt', |
| 64 | 'InvalidGetUserCall', |
| 65 | 'SKIP_CLIENT_ID_CHECK', |
| 66 | ] |
| 67 | |
| 68 | _logger = logging.getLogger(__name__) |
| 69 | |
| 70 | SKIP_CLIENT_ID_CHECK = ['*'] # This needs to be a list, for comparisons. |
| 71 | _CLOCK_SKEW_SECS = 300 # 5 minutes in seconds |
| 72 | _MAX_TOKEN_LIFETIME_SECS = 86400 # 1 day in seconds |
| 73 | _DEFAULT_CERT_URI = ('https://www.googleapis.com/service_accounts/v1/metadata/' |
| 74 | 'raw/federated-signon@system.gserviceaccount.com') |
| 75 | _ENDPOINTS_USER_INFO = 'google.api.auth.user_info' |
| 76 | _ENV_USE_OAUTH_SCOPE = 'ENDPOINTS_USE_OAUTH_SCOPE' |
| 77 | _ENV_AUTH_EMAIL = 'ENDPOINTS_AUTH_EMAIL' |
| 78 | _ENV_AUTH_DOMAIN = 'ENDPOINTS_AUTH_DOMAIN' |
| 79 | _EMAIL_SCOPE = 'https://www.googleapis.com/auth/userinfo.email' |
| 80 | _TOKENINFO_URL = 'https://www.googleapis.com/oauth2/v3/tokeninfo' |
| 81 | _MAX_AGE_REGEX = re.compile(r'\s*max-age\s*=\s*(\d+)\s*') |
| 82 | _CERT_NAMESPACE = '__verify_jwt' |
| 83 | _ISSUERS = ('accounts.google.com', 'https://accounts.google.com') |
| 84 | _DEFAULT_GOOGLE_ISSUER = { |
| 85 | 'google_id_token': endpoints_types.Issuer(_ISSUERS, _DEFAULT_CERT_URI) |
| 86 | } |
| 87 | |
| 88 | |
| 89 | class _AppIdentityError(Exception): |
| 90 | pass |
| 91 | |
| 92 | |
| 93 | class InvalidGetUserCall(Exception): |
| 94 | """Called get_current_user when the environment was not set up for it.""" |
| 95 | |
| 96 | |
| 97 | # pylint: disable=g-bad-name |
| 98 | def get_current_user(): |
| 99 | """Get user information from the id_token or oauth token in the request. |
| 100 | |
| 101 | This should only be called from within an Endpoints request handler, |
| 102 | decorated with an @endpoints.method decorator. The decorator should include |
| 103 | the https://www.googleapis.com/auth/userinfo.email scope. |
| 104 | |
| 105 | If `endpoints_management.control.wsgi.AuthenticationMiddleware` is enabled, |
| 106 | this returns the user info decoded by the middleware. Otherwise, if the |
| 107 | current request uses an id_token, this validates and parses the token against |
| 108 | the info in the current request handler and returns the user. Or, for an |
| 109 | Oauth token, this call validates the token against the tokeninfo endpoint and |
| 110 | oauth.get_current_user with the scopes provided in the method's decorator. |
| 111 | |
| 112 | Returns: |
| 113 | None if there is no token or it's invalid. If the token was valid, this |
| 114 | returns a User. Only the user's email field is guaranteed to be set. |
| 115 | Other fields may be empty. |
| 116 | |
| 117 | Raises: |
| 118 | InvalidGetUserCall: if the environment variables necessary to determine the |
| 119 | endpoints user are not set. These are typically set when processing a |
| 120 | request using an Endpoints handler. If they are not set, it likely |
| 121 | indicates that this function was called from outside an Endpoints request |
| 122 | handler. |
| 123 | """ |
| 124 | if not _is_auth_info_available(): |
| 125 | raise InvalidGetUserCall('No valid endpoints user in environment.') |
| 126 | |
| 127 | if _ENDPOINTS_USER_INFO in os.environ: |
| 128 | user_info = os.environ[_ENDPOINTS_USER_INFO] |
| 129 | return users.User(user_info.email) |
| 130 | |
| 131 | if _ENV_USE_OAUTH_SCOPE in os.environ: |
| 132 | # We can get more information from the oauth.get_current_user function, |
| 133 | # as long as we know what scope to use. Since that scope has been |
| 134 | # cached, we can just return this: |
| 135 | return oauth.get_current_user(os.environ[_ENV_USE_OAUTH_SCOPE].split()) |
| 136 | |
| 137 | if (_ENV_AUTH_EMAIL in os.environ and |
| 138 | _ENV_AUTH_DOMAIN in os.environ): |
| 139 | if not os.environ[_ENV_AUTH_EMAIL]: |
| 140 | # Either there was no id token or we were unable to validate it, |
| 141 | # so there's no user. |
| 142 | return None |
| 143 | |
| 144 | return users.User(os.environ[_ENV_AUTH_EMAIL], |
| 145 | os.environ[_ENV_AUTH_DOMAIN] or None) |
| 146 | |
| 147 | # Shouldn't hit this, because all the _is_auth_info_available cases were |
| 148 | # checked, but just in case. |
| 149 | return None |
| 150 | |
| 151 | |
| 152 | # pylint: disable=g-bad-name |
| 153 | def _is_auth_info_available(): |
| 154 | """Check if user auth info has been set in environment variables.""" |
| 155 | return (_ENDPOINTS_USER_INFO in os.environ or |
| 156 | (_ENV_AUTH_EMAIL in os.environ and _ENV_AUTH_DOMAIN in os.environ) or |
| 157 | _ENV_USE_OAUTH_SCOPE in os.environ) |
| 158 | |
| 159 | |
| 160 | def _maybe_set_current_user_vars(method, api_info=None, request=None): |
| 161 | """Get user information from the id_token or oauth token in the request. |
| 162 | |
| 163 | Used internally by Endpoints to set up environment variables for user |
| 164 | authentication. |
| 165 | |
| 166 | Args: |
| 167 | method: The class method that's handling this request. This method |
| 168 | should be annotated with @endpoints.method. |
| 169 | api_info: An api_config._ApiInfo instance. Optional. If None, will attempt |
| 170 | to parse api_info from the implicit instance of the method. |
| 171 | request: The current request, or None. |
| 172 | """ |
| 173 | if _is_auth_info_available(): |
| 174 | return |
| 175 | |
| 176 | # By default, there's no user. |
| 177 | os.environ[_ENV_AUTH_EMAIL] = '' |
| 178 | os.environ[_ENV_AUTH_DOMAIN] = '' |
| 179 | |
| 180 | # Choose settings on the method, if specified. Otherwise, choose settings |
| 181 | # from the API. Specifically check for None, so that methods can override |
| 182 | # with empty lists. |
| 183 | try: |
| 184 | api_info = api_info or method.im_self.api_info |
| 185 | except AttributeError: |
| 186 | # The most common case for this is someone passing an unbound method |
| 187 | # to this function, which most likely only happens in our unit tests. |
| 188 | # We could propagate the exception, but this results in some really |
| 189 | # difficult to debug behavior. Better to log a warning and pretend |
| 190 | # there are no API-level settings. |
| 191 | _logger.warning('AttributeError when accessing %s.im_self. An unbound ' |
| 192 | 'method was probably passed as an endpoints handler.', |
| 193 | method.__name__) |
| 194 | scopes = method.method_info.scopes |
| 195 | audiences = method.method_info.audiences |
| 196 | allowed_client_ids = method.method_info.allowed_client_ids |
| 197 | else: |
| 198 | scopes = (method.method_info.scopes |
| 199 | if method.method_info.scopes is not None |
| 200 | else api_info.scopes) |
| 201 | audiences = (method.method_info.audiences |
| 202 | if method.method_info.audiences is not None |
| 203 | else api_info.audiences) |
| 204 | allowed_client_ids = (method.method_info.allowed_client_ids |
| 205 | if method.method_info.allowed_client_ids is not None |
| 206 | else api_info.allowed_client_ids) |
| 207 | |
| 208 | if not scopes and not audiences and not allowed_client_ids: |
| 209 | # The user hasn't provided any information to allow us to parse either |
| 210 | # an id_token or an Oauth token. They appear not to be interested in |
| 211 | # auth. |
| 212 | return |
| 213 | |
| 214 | token = _get_token(request) |
| 215 | if not token: |
| 216 | return None |
| 217 | |
| 218 | if allowed_client_ids and _is_local_dev(): |
| 219 | allowed_client_ids = (constants.API_EXPLORER_CLIENT_ID,) + tuple(allowed_client_ids) |
| 220 | |
| 221 | # When every item in the acceptable scopes list is |
| 222 | # "https://www.googleapis.com/auth/userinfo.email", and there is a non-empty |
| 223 | # allowed_client_ids list, the API code will first attempt OAuth 2/OpenID |
| 224 | # Connect ID token processing for any incoming bearer token. |
| 225 | if ((scopes == [_EMAIL_SCOPE] or scopes == (_EMAIL_SCOPE,)) and |
| 226 | allowed_client_ids): |
| 227 | _logger.debug('Checking for id_token.') |
| 228 | issuers = api_info.issuers |
| 229 | if issuers is None: |
| 230 | issuers = _DEFAULT_GOOGLE_ISSUER |
| 231 | elif 'google_id_token' not in issuers: |
| 232 | issuers.update(_DEFAULT_GOOGLE_ISSUER) |
| 233 | time_now = int(time.time()) |
| 234 | user = _get_id_token_user(token, issuers, audiences, allowed_client_ids, |
| 235 | time_now, memcache) |
| 236 | if user: |
| 237 | os.environ[_ENV_AUTH_EMAIL] = user.email() |
| 238 | os.environ[_ENV_AUTH_DOMAIN] = user.auth_domain() |
| 239 | return |
| 240 | |
| 241 | # Check if the user is interested in an oauth token. |
| 242 | if scopes: |
| 243 | _logger.debug('Checking for oauth token.') |
| 244 | if _is_local_dev(): |
| 245 | _set_bearer_user_vars_local(token, allowed_client_ids, scopes) |
| 246 | else: |
| 247 | _set_bearer_user_vars(allowed_client_ids, scopes) |
| 248 | |
| 249 | |
| 250 | def _get_token( |
| 251 | request=None, allowed_auth_schemes=('OAuth', 'Bearer'), |
| 252 | allowed_query_keys=('bearer_token', 'access_token')): |
| 253 | """Get the auth token for this request. |
| 254 | |
| 255 | Auth token may be specified in either the Authorization header or |
| 256 | as a query param (either access_token or bearer_token). We'll check in |
| 257 | this order: |
| 258 | 1. Authorization header. |
| 259 | 2. bearer_token query param. |
| 260 | 3. access_token query param. |
| 261 | |
| 262 | Args: |
| 263 | request: The current request, or None. |
| 264 | |
| 265 | Returns: |
| 266 | The token in the request or None. |
| 267 | """ |
| 268 | allowed_auth_schemes = _listlike_guard( |
| 269 | allowed_auth_schemes, 'allowed_auth_schemes', iterable_only=True) |
| 270 | # Check if the token is in the Authorization header. |
| 271 | auth_header = os.environ.get('HTTP_AUTHORIZATION') |
| 272 | if auth_header: |
| 273 | for auth_scheme in allowed_auth_schemes: |
| 274 | if auth_header.startswith(auth_scheme): |
| 275 | return auth_header[len(auth_scheme) + 1:] |
| 276 | # If an auth header was specified, even if it's an invalid one, we won't |
| 277 | # look for the token anywhere else. |
| 278 | return None |
| 279 | |
| 280 | # Check if the token is in the query string. |
| 281 | if request: |
| 282 | allowed_query_keys = _listlike_guard( |
| 283 | allowed_query_keys, 'allowed_query_keys', iterable_only=True) |
| 284 | for key in allowed_query_keys: |
| 285 | token, _ = request.get_unrecognized_field_info(key) |
| 286 | if token: |
| 287 | return token |
| 288 | |
| 289 | |
| 290 | def _get_id_token_user(token, issuers, audiences, allowed_client_ids, time_now, cache): |
| 291 | """Get a User for the given id token, if the token is valid. |
| 292 | |
| 293 | Args: |
| 294 | token: The id_token to check. |
| 295 | issuers: dict of Issuers |
| 296 | audiences: List of audiences that are acceptable. |
| 297 | allowed_client_ids: List of client IDs that are acceptable. |
| 298 | time_now: The current time as an int (eg. int(time.time())). |
| 299 | cache: Cache to use (eg. the memcache module). |
| 300 | |
| 301 | Returns: |
| 302 | A User if the token is valid, None otherwise. |
| 303 | """ |
| 304 | # Verify that the token is valid before we try to extract anything from it. |
| 305 | # This verifies the signature and some of the basic info in the token. |
| 306 | for issuer_key, issuer in issuers.items(): |
| 307 | issuer_cert_uri = convert_jwks_uri(issuer.jwks_uri) |
| 308 | try: |
| 309 | parsed_token = _verify_signed_jwt_with_certs( |
| 310 | token, time_now, cache, cert_uri=issuer_cert_uri) |
| 311 | except Exception: # pylint: disable=broad-except |
| 312 | _logger.debug( |
| 313 | 'id_token verification failed for issuer %s', issuer_key, exc_info=True) |
| 314 | continue |
| 315 | |
| 316 | issuer_values = _listlike_guard(issuer.issuer, 'issuer', log_warning=False) |
| 317 | if isinstance(audiences, _Mapping): |
| 318 | audiences = audiences[issuer_key] |
| 319 | if _verify_parsed_token( |
| 320 | parsed_token, issuer_values, audiences, allowed_client_ids, |
| 321 | # There's some special handling we do for Google issuers. |
| 322 | # ESP doesn't do this, and it's both unnecessary and invalid for other issuers. |
| 323 | # So we'll turn it off except in the Google issuer case. |
| 324 | is_legacy_google_auth=(issuer.issuer == _ISSUERS)): |
| 325 | email = parsed_token['email'] |
| 326 | # The token might have an id, but it's a Gaia ID that's been |
| 327 | # obfuscated with the Focus key, rather than the AppEngine (igoogle) |
| 328 | # key. If the developer ever put this email into the user DB |
| 329 | # and retrieved the ID from that, it'd be different from the ID we'd |
| 330 | # return here, so it's safer to not return the ID. |
| 331 | # Instead, we'll only return the email. |
| 332 | return users.User(email) |
| 333 | |
| 334 | |
| 335 | # pylint: disable=unused-argument |
| 336 | def _set_oauth_user_vars(token_info, audiences, allowed_client_ids, scopes, |
| 337 | local_dev): |
| 338 | _logger.warning('_set_oauth_user_vars is deprecated and will be removed ' |
| 339 | 'soon.') |
| 340 | return _set_bearer_user_vars(allowed_client_ids, scopes) |
| 341 | # pylint: enable=unused-argument |
| 342 | |
| 343 | |
| 344 | def _process_scopes(scopes): |
| 345 | """Parse a scopes list into a set of all scopes and a set of sufficient scope sets. |
| 346 | |
| 347 | scopes: A list of strings, each of which is a space-separated list of scopes. |
| 348 | Examples: ['scope1'] |
| 349 | ['scope1', 'scope2'] |
| 350 | ['scope1', 'scope2 scope3'] |
| 351 | |
| 352 | Returns: |
| 353 | all_scopes: a set of strings, each of which is one scope to check for |
| 354 | sufficient_scopes: a set of sets of strings; each inner set is |
| 355 | a set of scopes which are sufficient for access. |
| 356 | Example: {{'scope1'}, {'scope2', 'scope3'}} |
| 357 | """ |
| 358 | all_scopes = set() |
| 359 | sufficient_scopes = set() |
| 360 | for scope_set in scopes: |
| 361 | scope_set_scopes = frozenset(scope_set.split()) |
| 362 | all_scopes.update(scope_set_scopes) |
| 363 | sufficient_scopes.add(scope_set_scopes) |
| 364 | return all_scopes, sufficient_scopes |
| 365 | |
| 366 | |
| 367 | def _are_scopes_sufficient(authorized_scopes, sufficient_scopes): |
| 368 | """Check if a list of authorized scopes satisfies any set of sufficient scopes. |
| 369 | |
| 370 | Args: |
| 371 | authorized_scopes: a list of strings, return value from oauth.get_authorized_scopes |
| 372 | sufficient_scopes: a set of sets of strings, return value from _process_scopes |
| 373 | """ |
| 374 | for sufficient_scope_set in sufficient_scopes: |
| 375 | if sufficient_scope_set.issubset(authorized_scopes): |
| 376 | return True |
| 377 | return False |
| 378 | |
| 379 | |
| 380 | |
| 381 | def _set_bearer_user_vars(allowed_client_ids, scopes): |
| 382 | """Validate the oauth bearer token and set endpoints auth user variables. |
| 383 | |
| 384 | If the bearer token is valid, this sets ENDPOINTS_USE_OAUTH_SCOPE. This |
| 385 | provides enough information that our endpoints.get_current_user() function |
| 386 | can get the user. |
| 387 | |
| 388 | Args: |
| 389 | allowed_client_ids: List of client IDs that are acceptable. |
| 390 | scopes: List of acceptable scopes. |
| 391 | """ |
| 392 | all_scopes, sufficient_scopes = _process_scopes(scopes) |
| 393 | try: |
| 394 | authorized_scopes = oauth.get_authorized_scopes(sorted(all_scopes)) |
| 395 | except oauth.Error: |
| 396 | _logger.debug('Unable to get authorized scopes.', exc_info=True) |
| 397 | return |
| 398 | if not _are_scopes_sufficient(authorized_scopes, sufficient_scopes): |
| 399 | _logger.warning('Authorized scopes did not satisfy scope requirements.') |
| 400 | return |
| 401 | client_id = oauth.get_client_id(authorized_scopes) |
| 402 | |
| 403 | # The client ID must be in allowed_client_ids. If allowed_client_ids is |
| 404 | # empty, don't allow any client ID. If allowed_client_ids is set to |
| 405 | # SKIP_CLIENT_ID_CHECK, all client IDs will be allowed. |
| 406 | if (list(allowed_client_ids) != SKIP_CLIENT_ID_CHECK and |
| 407 | client_id not in allowed_client_ids): |
| 408 | _logger.warning('Client ID is not allowed: %s', client_id) |
| 409 | return |
| 410 | |
| 411 | os.environ[_ENV_USE_OAUTH_SCOPE] = ' '.join(authorized_scopes) |
| 412 | _logger.debug('get_current_user() will return user from matched oauth_user.') |
| 413 | |
| 414 | |
| 415 | def _set_bearer_user_vars_local(token, allowed_client_ids, scopes): |
| 416 | """Validate the oauth bearer token on the dev server. |
| 417 | |
| 418 | Since the functions in the oauth module return only example results in local |
| 419 | development, this hits the tokeninfo endpoint and attempts to validate the |
| 420 | token. If it's valid, we'll set _ENV_AUTH_EMAIL and _ENV_AUTH_DOMAIN so we |
| 421 | can get the user from the token. |
| 422 | |
| 423 | Args: |
| 424 | token: String with the oauth token to validate. |
| 425 | allowed_client_ids: List of client IDs that are acceptable. |
| 426 | scopes: List of acceptable scopes. |
| 427 | """ |
| 428 | # Get token info from the tokeninfo endpoint. |
| 429 | result = urlfetch.fetch( |
| 430 | '%s?%s' % (_TOKENINFO_URL, urllib.parse.urlencode({'access_token': token}))) |
| 431 | if result.status_code != 200: |
| 432 | try: |
| 433 | error_description = json.loads(result.content)['error_description'] |
| 434 | except (ValueError, KeyError): |
| 435 | error_description = '' |
| 436 | _logger.error('Token info endpoint returned status %s: %s', |
| 437 | result.status_code, error_description) |
| 438 | return |
| 439 | token_info = json.loads(result.content) |
| 440 | |
| 441 | # Validate email. |
| 442 | if 'email' not in token_info: |
| 443 | _logger.warning('Oauth token doesn\'t include an email address.') |
| 444 | return |
| 445 | if token_info.get('email_verified') != 'true': |
| 446 | _logger.warning('Oauth token email isn\'t verified.') |
| 447 | return |
| 448 | |
| 449 | # Validate client ID. |
| 450 | client_id = token_info.get('azp') |
| 451 | if (list(allowed_client_ids) != SKIP_CLIENT_ID_CHECK and |
| 452 | client_id not in allowed_client_ids): |
| 453 | _logger.warning('Client ID is not allowed: %s', client_id) |
| 454 | return |
| 455 | |
| 456 | # Verify at least one of the scopes matches. |
| 457 | _, sufficient_scopes = _process_scopes(scopes) |
| 458 | authorized_scopes = token_info.get('scope', '').split(' ') |
| 459 | if not _are_scopes_sufficient(authorized_scopes, sufficient_scopes): |
| 460 | _logger.warning('Oauth token scopes don\'t match any acceptable scopes.') |
| 461 | return |
| 462 | |
| 463 | os.environ[_ENV_AUTH_EMAIL] = token_info['email'] |
| 464 | os.environ[_ENV_AUTH_DOMAIN] = '' |
| 465 | _logger.debug('Local dev returning user from token.') |
| 466 | |
| 467 | |
| 468 | def _is_local_dev(): |
| 469 | return os.environ.get('SERVER_SOFTWARE', '').startswith('Development') |
| 470 | |
| 471 | |
| 472 | def _verify_parsed_token(parsed_token, issuers, audiences, allowed_client_ids, is_legacy_google_auth=True): |
| 473 | """Verify a parsed user ID token. |
| 474 | |
| 475 | Args: |
| 476 | parsed_token: The parsed token information. |
| 477 | issuers: A list of allowed issuers |
| 478 | audiences: The allowed audiences. |
| 479 | allowed_client_ids: The allowed client IDs. |
| 480 | |
| 481 | Returns: |
| 482 | True if the token is verified, False otherwise. |
| 483 | """ |
| 484 | # Verify the issuer. |
| 485 | if parsed_token.get('iss') not in issuers: |
| 486 | _logger.warning('Issuer was not valid: %s', parsed_token.get('iss')) |
| 487 | return False |
| 488 | |
| 489 | # Check audiences. |
| 490 | aud = parsed_token.get('aud') |
| 491 | if not aud: |
| 492 | _logger.warning('No aud field in token') |
| 493 | return False |
| 494 | # Special legacy handling if aud == cid. This occurs with iOS and browsers. |
| 495 | # As long as audience == client_id and cid is allowed, we need to accept |
| 496 | # the audience for compatibility. |
| 497 | cid = parsed_token.get('azp') |
| 498 | audience_allowed = (aud in audiences) or (is_legacy_google_auth and aud == cid) |
| 499 | if not audience_allowed: |
| 500 | _logger.warning('Audience not allowed: %s', aud) |
| 501 | return False |
| 502 | |
| 503 | # Check allowed client IDs, for legacy auth. |
| 504 | if is_legacy_google_auth: |
| 505 | if list(allowed_client_ids) == SKIP_CLIENT_ID_CHECK: |
| 506 | _logger.warning('Client ID check can\'t be skipped for ID tokens. ' |
| 507 | 'Id_token cannot be verified.') |
| 508 | return False |
| 509 | elif not cid or cid not in allowed_client_ids: |
| 510 | _logger.warning('Client ID is not allowed: %s', cid) |
| 511 | return False |
| 512 | |
| 513 | if 'email' not in parsed_token: |
| 514 | return False |
| 515 | |
| 516 | return True |
| 517 | |
| 518 | |
| 519 | def _urlsafe_b64decode(b64string): |
| 520 | # Guard against unicode strings, which base64 can't handle. |
| 521 | b64string = six.ensure_binary(b64string, 'ascii') |
| 522 | padded = b64string + '=' * ((4 - len(b64string)) % 4) |
| 523 | return base64.urlsafe_b64decode(padded) |
| 524 | |
| 525 | |
| 526 | def _get_cert_expiration_time(headers): |
| 527 | """Get the expiration time for a cert, given the response headers. |
| 528 | |
| 529 | Get expiration time from the headers in the result. If we can't get |
| 530 | a time from the headers, this returns 0, indicating that the cert |
| 531 | shouldn't be cached. |
| 532 | |
| 533 | Args: |
| 534 | headers: A dict containing the response headers from the request to get |
| 535 | certs. |
| 536 | |
| 537 | Returns: |
| 538 | An integer with the number of seconds the cert should be cached. This |
| 539 | value is guaranteed to be >= 0. |
| 540 | """ |
| 541 | # Check the max age of the cert. |
| 542 | cache_control = headers.get('Cache-Control', '') |
| 543 | # http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 indicates only |
| 544 | # a comma-separated header is valid, so it should be fine to split this on |
| 545 | # commas. |
| 546 | for entry in cache_control.split(','): |
| 547 | match = _MAX_AGE_REGEX.match(entry) |
| 548 | if match: |
| 549 | cache_time_seconds = int(match.group(1)) |
| 550 | break |
| 551 | else: |
| 552 | return 0 |
| 553 | |
| 554 | # Subtract the cert's age. |
| 555 | age = headers.get('Age') |
| 556 | if age is not None: |
| 557 | try: |
| 558 | age = int(age) |
| 559 | except ValueError: |
| 560 | age = 0 |
| 561 | cache_time_seconds -= age |
| 562 | |
| 563 | return max(0, cache_time_seconds) |
| 564 | |
| 565 | |
| 566 | def _get_cached_certs(cert_uri, cache): |
| 567 | """Get certs from cache if present; otherwise, gets from URI and caches them. |
| 568 | |
| 569 | Args: |
| 570 | cert_uri: URI from which to retrieve certs if cache is stale or empty. |
| 571 | cache: Cache of pre-fetched certs. |
| 572 | |
| 573 | Returns: |
| 574 | The retrieved certs. |
| 575 | """ |
| 576 | certs = cache.get(cert_uri, namespace=_CERT_NAMESPACE) |
| 577 | if certs is None: |
| 578 | _logger.debug('Cert cache miss for %s', cert_uri) |
| 579 | try: |
| 580 | result = urlfetch.fetch(cert_uri) |
| 581 | except AssertionError: |
| 582 | # This happens in unit tests. Act as if we couldn't get any certs. |
| 583 | return None |
| 584 | |
| 585 | if result.status_code == 200: |
| 586 | certs = json.loads(result.content) |
| 587 | expiration_time_seconds = _get_cert_expiration_time(result.headers) |
| 588 | if expiration_time_seconds: |
| 589 | cache.set(cert_uri, certs, time=expiration_time_seconds, |
| 590 | namespace=_CERT_NAMESPACE) |
| 591 | else: |
| 592 | _logger.error( |
| 593 | 'Certs not available, HTTP request returned %d', result.status_code) |
| 594 | |
| 595 | return certs |
| 596 | |
| 597 | |
| 598 | def _b64_to_int(b): |
| 599 | b = six.ensure_binary(b, 'ascii') |
| 600 | b += b'=' * ((4 - len(b)) % 4) |
| 601 | b = base64.b64decode(b) |
| 602 | return int(binascii.hexlify(b), 16) |
| 603 | |
| 604 | |
| 605 | def _verify_signed_jwt_with_certs( |
| 606 | jwt, time_now, cache, |
| 607 | cert_uri=_DEFAULT_CERT_URI): |
| 608 | """Verify a JWT against public certs. |
| 609 | |
| 610 | See http://self-issued.info/docs/draft-jones-json-web-token.html. |
| 611 | |
| 612 | The PyCrypto library included with Google App Engine is severely limited and |
| 613 | so you have to use it very carefully to verify JWT signatures. The first |
| 614 | issue is that the library can't read X.509 files, so we make a call to a |
| 615 | special URI that has the public cert in modulus/exponent form in JSON. |
| 616 | |
| 617 | The second issue is that the RSA.verify method doesn't work, at least for |
| 618 | how the JWT tokens are signed, so we have to manually verify the signature |
| 619 | of the JWT, which means hashing the signed part of the JWT and comparing |
| 620 | that to the signature that's been encrypted with the public key. |
| 621 | |
| 622 | Args: |
| 623 | jwt: string, A JWT. |
| 624 | time_now: The current time, as an int (eg. int(time.time())). |
| 625 | cache: Cache to use (eg. the memcache module). |
| 626 | cert_uri: string, URI to get cert modulus and exponent in JSON format. |
| 627 | |
| 628 | Returns: |
| 629 | dict, The deserialized JSON payload in the JWT. |
| 630 | |
| 631 | Raises: |
| 632 | _AppIdentityError: if any checks are failed. |
| 633 | """ |
| 634 | |
| 635 | segments = jwt.split('.') |
| 636 | |
| 637 | if len(segments) != 3: |
| 638 | # Note that anywhere we print the jwt or its json body, we need to use |
| 639 | # %r instead of %s, so that non-printable characters are escaped safely. |
| 640 | raise _AppIdentityError('Token is not an id_token (Wrong number of ' |
| 641 | 'segments)') |
| 642 | signed = '%s.%s' % (segments[0], segments[1]) |
| 643 | |
| 644 | signature = _urlsafe_b64decode(segments[2]) |
| 645 | |
| 646 | # pycrypto only deals in integers, so we have to convert the string of bytes |
| 647 | # into an int. |
| 648 | lsignature = int(binascii.hexlify(signature), 16) |
| 649 | |
| 650 | # Verify expected header. |
| 651 | header_body = _urlsafe_b64decode(segments[0]) |
| 652 | try: |
| 653 | header = json.loads(header_body) |
| 654 | except: |
| 655 | raise _AppIdentityError("Can't parse header") |
| 656 | if header.get('alg') != 'RS256': |
| 657 | raise _AppIdentityError('Unexpected encryption algorithm: %r' % |
| 658 | header.get('alg')) |
| 659 | |
| 660 | # Formerly we would parse the token body here. |
| 661 | # However, it's not safe to do that without first checking the signature. |
| 662 | |
| 663 | certs = _get_cached_certs(cert_uri, cache) |
| 664 | if certs is None: |
| 665 | raise _AppIdentityError( |
| 666 | 'Unable to retrieve certs needed to verify the signed JWT') |
| 667 | |
| 668 | # Verify that we were able to load the Crypto libraries, before we try |
| 669 | # to use them. |
| 670 | if not _CRYPTO_LOADED: |
| 671 | raise _AppIdentityError('Unable to load pycrypto library. Can\'t verify ' |
| 672 | 'id_token signature. See http://www.pycrypto.org ' |
| 673 | 'for more information on pycrypto.') |
| 674 | |
| 675 | # SHA256 hash of the already 'signed' segment from the JWT. Since a SHA256 |
| 676 | # hash, will always have length 64. |
| 677 | local_hash = SHA256.new(signed).hexdigest() |
| 678 | |
| 679 | # Check signature. |
| 680 | verified = False |
| 681 | for keyvalue in certs['keyvalues']: |
| 682 | try: |
| 683 | modulus = _b64_to_int(keyvalue['modulus']) |
| 684 | exponent = _b64_to_int(keyvalue['exponent']) |
| 685 | key = RSA.construct((modulus, exponent)) |
| 686 | |
| 687 | # Encrypt, and convert to a hex string. |
| 688 | hexsig = '%064x' % key.encrypt(lsignature, '')[0] |
| 689 | # Make sure we have only last 64 base64 chars |
| 690 | hexsig = hexsig[-64:] |
| 691 | |
| 692 | # Check the signature on 'signed' by encrypting 'signature' with the |
| 693 | # public key and confirming the result matches the SHA256 hash of |
| 694 | # 'signed'. hmac.compare_digest(a, b) is used to avoid timing attacks. |
| 695 | verified = hmac.compare_digest(hexsig, local_hash) |
| 696 | if verified: |
| 697 | break |
| 698 | except Exception as e: # pylint: disable=broad-except |
| 699 | # Log the exception for debugging purpose. |
| 700 | _logger.debug( |
| 701 | 'Signature verification error: %s; continuing with the next cert.', e) |
| 702 | continue |
| 703 | if not verified: |
| 704 | raise _AppIdentityError('Invalid token signature') |
| 705 | |
| 706 | # Parse token. |
| 707 | json_body = _urlsafe_b64decode(segments[1]) |
| 708 | try: |
| 709 | parsed = json.loads(json_body) |
| 710 | except: |
| 711 | raise _AppIdentityError("Can't parse token body") |
| 712 | |
| 713 | # Check creation timestamp. |
| 714 | iat = parsed.get('iat') |
| 715 | if iat is None: |
| 716 | raise _AppIdentityError('No iat field in token') |
| 717 | earliest = iat - _CLOCK_SKEW_SECS |
| 718 | |
| 719 | # Check expiration timestamp. |
| 720 | exp = parsed.get('exp') |
| 721 | if exp is None: |
| 722 | raise _AppIdentityError('No exp field in token') |
| 723 | if exp >= time_now + _MAX_TOKEN_LIFETIME_SECS: |
| 724 | raise _AppIdentityError('exp field too far in future') |
| 725 | latest = exp + _CLOCK_SKEW_SECS |
| 726 | |
| 727 | if time_now < earliest: |
| 728 | raise _AppIdentityError('Token used too early, %d < %d' % |
| 729 | (time_now, earliest)) |
| 730 | if time_now > latest: |
| 731 | raise _AppIdentityError('Token used too late, %d > %d' % |
| 732 | (time_now, latest)) |
| 733 | |
| 734 | return parsed |
| 735 | |
| 736 | |
| 737 | _TEXT_CERT_PREFIX = 'https://www.googleapis.com/robot/v1/metadata/x509/' |
| 738 | _JSON_CERT_PREFIX = 'https://www.googleapis.com/service_accounts/v1/metadata/raw/' |
| 739 | |
| 740 | |
| 741 | def convert_jwks_uri(jwks_uri): |
| 742 | """ |
| 743 | The PyCrypto library included with Google App Engine is severely limited and |
| 744 | can't read X.509 files, so we change the URI to a special URI that has the |
| 745 | public cert in modulus/exponent form in JSON. |
| 746 | """ |
| 747 | if not jwks_uri.startswith(_TEXT_CERT_PREFIX): |
| 748 | return jwks_uri |
| 749 | return jwks_uri.replace(_TEXT_CERT_PREFIX, _JSON_CERT_PREFIX) |
| 750 | |
| 751 | |
| 752 | def get_verified_jwt( |
| 753 | providers, audiences, |
| 754 | check_authorization_header=True, check_query_arg=True, |
| 755 | request=None, cache=memcache): |
| 756 | """ |
| 757 | This function will extract, verify, and parse a JWT token from the |
| 758 | Authorization header or access_token query argument. |
| 759 | |
| 760 | The JWT is assumed to contain an issuer and audience claim, as well |
| 761 | as issued-at and expiration timestamps. The signature will be |
| 762 | cryptographically verified, the claims and timestamps will be |
| 763 | checked, and the resulting parsed JWT body is returned. |
| 764 | |
| 765 | If at any point the JWT is missing or found to be invalid, the |
| 766 | return result will be None. |
| 767 | |
| 768 | Arguments: |
| 769 | providers - An iterable of dicts each containing 'issuer' and 'cert_uri' keys |
| 770 | audiences - An iterable of valid audiences |
| 771 | |
| 772 | check_authorization_header - Boolean; check 'Authorization: Bearer' header |
| 773 | check_query_arg - Boolean; check 'access_token' query arg |
| 774 | |
| 775 | request - Must be the request object if check_query_arg is true; otherwise ignored. |
| 776 | cache - In testing, override the certificate cache |
| 777 | """ |
| 778 | if not (check_authorization_header or check_query_arg): |
| 779 | raise ValueError( |
| 780 | 'Either check_authorization_header or check_query_arg must be True.') |
| 781 | if check_query_arg and request is None: |
| 782 | raise ValueError('Cannot check query arg without request object.') |
| 783 | schemes = ('Bearer',) if check_authorization_header else () |
| 784 | keys = ('access_token',) if check_query_arg else () |
| 785 | token = _get_token( |
| 786 | request=request, allowed_auth_schemes=schemes, allowed_query_keys=keys) |
| 787 | if token is None: |
| 788 | return None |
| 789 | time_now = int(time.time()) |
| 790 | for provider in providers: |
| 791 | parsed_token = _parse_and_verify_jwt( |
| 792 | token, time_now, (provider['issuer'],), audiences, provider['cert_uri'], cache) |
| 793 | if parsed_token is not None: |
| 794 | return parsed_token |
| 795 | return None |
| 796 | |
| 797 | |
| 798 | def _parse_and_verify_jwt(token, time_now, issuers, audiences, cert_uri, cache): |
| 799 | try: |
| 800 | parsed_token = _verify_signed_jwt_with_certs(token, time_now, cache, cert_uri) |
| 801 | except (_AppIdentityError, TypeError) as e: |
| 802 | _logger.debug('id_token verification failed: %s', e) |
| 803 | return None |
| 804 | |
| 805 | issuers = _listlike_guard(issuers, 'issuers') |
| 806 | audiences = _listlike_guard(audiences, 'audiences') |
| 807 | # We can't use _verify_parsed_token because there's no client id (azp) or email in these JWTs |
| 808 | # Verify the issuer. |
| 809 | if parsed_token.get('iss') not in issuers: |
| 810 | _logger.warning('Issuer was not valid: %s', parsed_token.get('iss')) |
| 811 | return None |
| 812 | |
| 813 | # Check audiences. |
| 814 | aud = parsed_token.get('aud') |
| 815 | if not aud: |
| 816 | _logger.warning('No aud field in token') |
| 817 | return None |
| 818 | if aud not in audiences: |
| 819 | _logger.warning('Audience not allowed: %s', aud) |
| 820 | return None |
| 821 | |
| 822 | return parsed_token |
| 823 | |
| 824 | |
| 825 | def _listlike_guard(obj, name, iterable_only=False, log_warning=True): |
| 826 | """ |
| 827 | We frequently require passed objects to support iteration or |
| 828 | containment expressions, but not be strings. (Of course, strings |
| 829 | support iteration and containment, but not usefully.) If the passed |
| 830 | object is a string, we'll wrap it in a tuple and return it. If it's |
| 831 | already an iterable, we'll return it as-is. Otherwise, we'll raise a |
| 832 | TypeError. |
| 833 | """ |
| 834 | required_type = (_Iterable,) if iterable_only else (_Container, _Iterable) |
| 835 | required_type_name = ' or '.join(t.__name__ for t in required_type) |
| 836 | |
| 837 | if not isinstance(obj, required_type): |
| 838 | raise ValueError('{} must be of type {}'.format(name, required_type_name)) |
| 839 | # at this point it is definitely the right type, but might be a string |
| 840 | if isinstance(obj, six.string_types): |
| 841 | if log_warning: |
| 842 | _logger.warning('{} passed as a string; should be list-like'.format(name)) |
| 843 | return (obj,) |
| 844 | return obj |