blob: 2080805e5b8ed0ff7034dd551a97f05bc15d7f5a [file] [log] [blame]
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +01001# Copyright 2016 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Utility library for reading user information from an id_token.
16
17This is an experimental library that can temporarily be used to extract
18a user from an id_token. The functionality provided by this library
19will be provided elsewhere in the future.
20"""
21
22from __future__ import absolute_import
23
24import base64
25import binascii
26import hmac
27import json
28import logging
29import os
30import re
31import six
32import time
33from six.moves import urllib
34from collections.abc import Container as _Container
35from collections.abc import Iterable as _Iterable
36from collections.abc import Mapping as _Mapping
37
38from google.appengine.api import memcache
39from google.appengine.api import oauth
40from google.appengine.api import urlfetch
41from google.appengine.api import users
42
43from . import constants
44from . import types as endpoints_types
45
46try:
47 # PyCrypto may not be installed for the import_aeta_test or in dev's
48 # individual Python installations. It is available on AppEngine in prod.
49
50 # Disable "Import not at top of file" warning.
51 # pylint: disable=g-import-not-at-top
52 from Crypto.Hash import SHA256
53 from Crypto.PublicKey import RSA
54 # pylint: enable=g-import-not-at-top
55 _CRYPTO_LOADED = True
56except ImportError:
57 _CRYPTO_LOADED = False
58
59
60__all__ = [
61 'convert_jwks_uri',
62 'get_current_user',
63 'get_verified_jwt',
64 'InvalidGetUserCall',
65 'SKIP_CLIENT_ID_CHECK',
66]
67
68_logger = logging.getLogger(__name__)
69
70SKIP_CLIENT_ID_CHECK = ['*'] # This needs to be a list, for comparisons.
71_CLOCK_SKEW_SECS = 300 # 5 minutes in seconds
72_MAX_TOKEN_LIFETIME_SECS = 86400 # 1 day in seconds
73_DEFAULT_CERT_URI = ('https://www.googleapis.com/service_accounts/v1/metadata/'
74 'raw/federated-signon@system.gserviceaccount.com')
75_ENDPOINTS_USER_INFO = 'google.api.auth.user_info'
76_ENV_USE_OAUTH_SCOPE = 'ENDPOINTS_USE_OAUTH_SCOPE'
77_ENV_AUTH_EMAIL = 'ENDPOINTS_AUTH_EMAIL'
78_ENV_AUTH_DOMAIN = 'ENDPOINTS_AUTH_DOMAIN'
79_EMAIL_SCOPE = 'https://www.googleapis.com/auth/userinfo.email'
80_TOKENINFO_URL = 'https://www.googleapis.com/oauth2/v3/tokeninfo'
81_MAX_AGE_REGEX = re.compile(r'\s*max-age\s*=\s*(\d+)\s*')
82_CERT_NAMESPACE = '__verify_jwt'
83_ISSUERS = ('accounts.google.com', 'https://accounts.google.com')
84_DEFAULT_GOOGLE_ISSUER = {
85 'google_id_token': endpoints_types.Issuer(_ISSUERS, _DEFAULT_CERT_URI)
86}
87
88
89class _AppIdentityError(Exception):
90 pass
91
92
93class InvalidGetUserCall(Exception):
94 """Called get_current_user when the environment was not set up for it."""
95
96
97# pylint: disable=g-bad-name
98def get_current_user():
99 """Get user information from the id_token or oauth token in the request.
100
101 This should only be called from within an Endpoints request handler,
102 decorated with an @endpoints.method decorator. The decorator should include
103 the https://www.googleapis.com/auth/userinfo.email scope.
104
105 If `endpoints_management.control.wsgi.AuthenticationMiddleware` is enabled,
106 this returns the user info decoded by the middleware. Otherwise, if the
107 current request uses an id_token, this validates and parses the token against
108 the info in the current request handler and returns the user. Or, for an
109 Oauth token, this call validates the token against the tokeninfo endpoint and
110 oauth.get_current_user with the scopes provided in the method's decorator.
111
112 Returns:
113 None if there is no token or it's invalid. If the token was valid, this
114 returns a User. Only the user's email field is guaranteed to be set.
115 Other fields may be empty.
116
117 Raises:
118 InvalidGetUserCall: if the environment variables necessary to determine the
119 endpoints user are not set. These are typically set when processing a
120 request using an Endpoints handler. If they are not set, it likely
121 indicates that this function was called from outside an Endpoints request
122 handler.
123 """
124 if not _is_auth_info_available():
125 raise InvalidGetUserCall('No valid endpoints user in environment.')
126
127 if _ENDPOINTS_USER_INFO in os.environ:
128 user_info = os.environ[_ENDPOINTS_USER_INFO]
129 return users.User(user_info.email)
130
131 if _ENV_USE_OAUTH_SCOPE in os.environ:
132 # We can get more information from the oauth.get_current_user function,
133 # as long as we know what scope to use. Since that scope has been
134 # cached, we can just return this:
135 return oauth.get_current_user(os.environ[_ENV_USE_OAUTH_SCOPE].split())
136
137 if (_ENV_AUTH_EMAIL in os.environ and
138 _ENV_AUTH_DOMAIN in os.environ):
139 if not os.environ[_ENV_AUTH_EMAIL]:
140 # Either there was no id token or we were unable to validate it,
141 # so there's no user.
142 return None
143
144 return users.User(os.environ[_ENV_AUTH_EMAIL],
145 os.environ[_ENV_AUTH_DOMAIN] or None)
146
147 # Shouldn't hit this, because all the _is_auth_info_available cases were
148 # checked, but just in case.
149 return None
150
151
152# pylint: disable=g-bad-name
153def _is_auth_info_available():
154 """Check if user auth info has been set in environment variables."""
155 return (_ENDPOINTS_USER_INFO in os.environ or
156 (_ENV_AUTH_EMAIL in os.environ and _ENV_AUTH_DOMAIN in os.environ) or
157 _ENV_USE_OAUTH_SCOPE in os.environ)
158
159
160def _maybe_set_current_user_vars(method, api_info=None, request=None):
161 """Get user information from the id_token or oauth token in the request.
162
163 Used internally by Endpoints to set up environment variables for user
164 authentication.
165
166 Args:
167 method: The class method that's handling this request. This method
168 should be annotated with @endpoints.method.
169 api_info: An api_config._ApiInfo instance. Optional. If None, will attempt
170 to parse api_info from the implicit instance of the method.
171 request: The current request, or None.
172 """
173 if _is_auth_info_available():
174 return
175
176 # By default, there's no user.
177 os.environ[_ENV_AUTH_EMAIL] = ''
178 os.environ[_ENV_AUTH_DOMAIN] = ''
179
180 # Choose settings on the method, if specified. Otherwise, choose settings
181 # from the API. Specifically check for None, so that methods can override
182 # with empty lists.
183 try:
184 api_info = api_info or method.im_self.api_info
185 except AttributeError:
186 # The most common case for this is someone passing an unbound method
187 # to this function, which most likely only happens in our unit tests.
188 # We could propagate the exception, but this results in some really
189 # difficult to debug behavior. Better to log a warning and pretend
190 # there are no API-level settings.
191 _logger.warning('AttributeError when accessing %s.im_self. An unbound '
192 'method was probably passed as an endpoints handler.',
193 method.__name__)
194 scopes = method.method_info.scopes
195 audiences = method.method_info.audiences
196 allowed_client_ids = method.method_info.allowed_client_ids
197 else:
198 scopes = (method.method_info.scopes
199 if method.method_info.scopes is not None
200 else api_info.scopes)
201 audiences = (method.method_info.audiences
202 if method.method_info.audiences is not None
203 else api_info.audiences)
204 allowed_client_ids = (method.method_info.allowed_client_ids
205 if method.method_info.allowed_client_ids is not None
206 else api_info.allowed_client_ids)
207
208 if not scopes and not audiences and not allowed_client_ids:
209 # The user hasn't provided any information to allow us to parse either
210 # an id_token or an Oauth token. They appear not to be interested in
211 # auth.
212 return
213
214 token = _get_token(request)
215 if not token:
216 return None
217
218 if allowed_client_ids and _is_local_dev():
219 allowed_client_ids = (constants.API_EXPLORER_CLIENT_ID,) + tuple(allowed_client_ids)
220
221 # When every item in the acceptable scopes list is
222 # "https://www.googleapis.com/auth/userinfo.email", and there is a non-empty
223 # allowed_client_ids list, the API code will first attempt OAuth 2/OpenID
224 # Connect ID token processing for any incoming bearer token.
225 if ((scopes == [_EMAIL_SCOPE] or scopes == (_EMAIL_SCOPE,)) and
226 allowed_client_ids):
227 _logger.debug('Checking for id_token.')
228 issuers = api_info.issuers
229 if issuers is None:
230 issuers = _DEFAULT_GOOGLE_ISSUER
231 elif 'google_id_token' not in issuers:
232 issuers.update(_DEFAULT_GOOGLE_ISSUER)
233 time_now = int(time.time())
234 user = _get_id_token_user(token, issuers, audiences, allowed_client_ids,
235 time_now, memcache)
236 if user:
237 os.environ[_ENV_AUTH_EMAIL] = user.email()
238 os.environ[_ENV_AUTH_DOMAIN] = user.auth_domain()
239 return
240
241 # Check if the user is interested in an oauth token.
242 if scopes:
243 _logger.debug('Checking for oauth token.')
244 if _is_local_dev():
245 _set_bearer_user_vars_local(token, allowed_client_ids, scopes)
246 else:
247 _set_bearer_user_vars(allowed_client_ids, scopes)
248
249
250def _get_token(
251 request=None, allowed_auth_schemes=('OAuth', 'Bearer'),
252 allowed_query_keys=('bearer_token', 'access_token')):
253 """Get the auth token for this request.
254
255 Auth token may be specified in either the Authorization header or
256 as a query param (either access_token or bearer_token). We'll check in
257 this order:
258 1. Authorization header.
259 2. bearer_token query param.
260 3. access_token query param.
261
262 Args:
263 request: The current request, or None.
264
265 Returns:
266 The token in the request or None.
267 """
268 allowed_auth_schemes = _listlike_guard(
269 allowed_auth_schemes, 'allowed_auth_schemes', iterable_only=True)
270 # Check if the token is in the Authorization header.
271 auth_header = os.environ.get('HTTP_AUTHORIZATION')
272 if auth_header:
273 for auth_scheme in allowed_auth_schemes:
274 if auth_header.startswith(auth_scheme):
275 return auth_header[len(auth_scheme) + 1:]
276 # If an auth header was specified, even if it's an invalid one, we won't
277 # look for the token anywhere else.
278 return None
279
280 # Check if the token is in the query string.
281 if request:
282 allowed_query_keys = _listlike_guard(
283 allowed_query_keys, 'allowed_query_keys', iterable_only=True)
284 for key in allowed_query_keys:
285 token, _ = request.get_unrecognized_field_info(key)
286 if token:
287 return token
288
289
290def _get_id_token_user(token, issuers, audiences, allowed_client_ids, time_now, cache):
291 """Get a User for the given id token, if the token is valid.
292
293 Args:
294 token: The id_token to check.
295 issuers: dict of Issuers
296 audiences: List of audiences that are acceptable.
297 allowed_client_ids: List of client IDs that are acceptable.
298 time_now: The current time as an int (eg. int(time.time())).
299 cache: Cache to use (eg. the memcache module).
300
301 Returns:
302 A User if the token is valid, None otherwise.
303 """
304 # Verify that the token is valid before we try to extract anything from it.
305 # This verifies the signature and some of the basic info in the token.
306 for issuer_key, issuer in issuers.items():
307 issuer_cert_uri = convert_jwks_uri(issuer.jwks_uri)
308 try:
309 parsed_token = _verify_signed_jwt_with_certs(
310 token, time_now, cache, cert_uri=issuer_cert_uri)
311 except Exception: # pylint: disable=broad-except
312 _logger.debug(
313 'id_token verification failed for issuer %s', issuer_key, exc_info=True)
314 continue
315
316 issuer_values = _listlike_guard(issuer.issuer, 'issuer', log_warning=False)
317 if isinstance(audiences, _Mapping):
318 audiences = audiences[issuer_key]
319 if _verify_parsed_token(
320 parsed_token, issuer_values, audiences, allowed_client_ids,
321 # There's some special handling we do for Google issuers.
322 # ESP doesn't do this, and it's both unnecessary and invalid for other issuers.
323 # So we'll turn it off except in the Google issuer case.
324 is_legacy_google_auth=(issuer.issuer == _ISSUERS)):
325 email = parsed_token['email']
326 # The token might have an id, but it's a Gaia ID that's been
327 # obfuscated with the Focus key, rather than the AppEngine (igoogle)
328 # key. If the developer ever put this email into the user DB
329 # and retrieved the ID from that, it'd be different from the ID we'd
330 # return here, so it's safer to not return the ID.
331 # Instead, we'll only return the email.
332 return users.User(email)
333
334
335# pylint: disable=unused-argument
336def _set_oauth_user_vars(token_info, audiences, allowed_client_ids, scopes,
337 local_dev):
338 _logger.warning('_set_oauth_user_vars is deprecated and will be removed '
339 'soon.')
340 return _set_bearer_user_vars(allowed_client_ids, scopes)
341# pylint: enable=unused-argument
342
343
344def _process_scopes(scopes):
345 """Parse a scopes list into a set of all scopes and a set of sufficient scope sets.
346
347 scopes: A list of strings, each of which is a space-separated list of scopes.
348 Examples: ['scope1']
349 ['scope1', 'scope2']
350 ['scope1', 'scope2 scope3']
351
352 Returns:
353 all_scopes: a set of strings, each of which is one scope to check for
354 sufficient_scopes: a set of sets of strings; each inner set is
355 a set of scopes which are sufficient for access.
356 Example: {{'scope1'}, {'scope2', 'scope3'}}
357 """
358 all_scopes = set()
359 sufficient_scopes = set()
360 for scope_set in scopes:
361 scope_set_scopes = frozenset(scope_set.split())
362 all_scopes.update(scope_set_scopes)
363 sufficient_scopes.add(scope_set_scopes)
364 return all_scopes, sufficient_scopes
365
366
367def _are_scopes_sufficient(authorized_scopes, sufficient_scopes):
368 """Check if a list of authorized scopes satisfies any set of sufficient scopes.
369
370 Args:
371 authorized_scopes: a list of strings, return value from oauth.get_authorized_scopes
372 sufficient_scopes: a set of sets of strings, return value from _process_scopes
373 """
374 for sufficient_scope_set in sufficient_scopes:
375 if sufficient_scope_set.issubset(authorized_scopes):
376 return True
377 return False
378
379
380
381def _set_bearer_user_vars(allowed_client_ids, scopes):
382 """Validate the oauth bearer token and set endpoints auth user variables.
383
384 If the bearer token is valid, this sets ENDPOINTS_USE_OAUTH_SCOPE. This
385 provides enough information that our endpoints.get_current_user() function
386 can get the user.
387
388 Args:
389 allowed_client_ids: List of client IDs that are acceptable.
390 scopes: List of acceptable scopes.
391 """
392 all_scopes, sufficient_scopes = _process_scopes(scopes)
393 try:
394 authorized_scopes = oauth.get_authorized_scopes(sorted(all_scopes))
395 except oauth.Error:
396 _logger.debug('Unable to get authorized scopes.', exc_info=True)
397 return
398 if not _are_scopes_sufficient(authorized_scopes, sufficient_scopes):
399 _logger.warning('Authorized scopes did not satisfy scope requirements.')
400 return
401 client_id = oauth.get_client_id(authorized_scopes)
402
403 # The client ID must be in allowed_client_ids. If allowed_client_ids is
404 # empty, don't allow any client ID. If allowed_client_ids is set to
405 # SKIP_CLIENT_ID_CHECK, all client IDs will be allowed.
406 if (list(allowed_client_ids) != SKIP_CLIENT_ID_CHECK and
407 client_id not in allowed_client_ids):
408 _logger.warning('Client ID is not allowed: %s', client_id)
409 return
410
411 os.environ[_ENV_USE_OAUTH_SCOPE] = ' '.join(authorized_scopes)
412 _logger.debug('get_current_user() will return user from matched oauth_user.')
413
414
415def _set_bearer_user_vars_local(token, allowed_client_ids, scopes):
416 """Validate the oauth bearer token on the dev server.
417
418 Since the functions in the oauth module return only example results in local
419 development, this hits the tokeninfo endpoint and attempts to validate the
420 token. If it's valid, we'll set _ENV_AUTH_EMAIL and _ENV_AUTH_DOMAIN so we
421 can get the user from the token.
422
423 Args:
424 token: String with the oauth token to validate.
425 allowed_client_ids: List of client IDs that are acceptable.
426 scopes: List of acceptable scopes.
427 """
428 # Get token info from the tokeninfo endpoint.
429 result = urlfetch.fetch(
430 '%s?%s' % (_TOKENINFO_URL, urllib.parse.urlencode({'access_token': token})))
431 if result.status_code != 200:
432 try:
433 error_description = json.loads(result.content)['error_description']
434 except (ValueError, KeyError):
435 error_description = ''
436 _logger.error('Token info endpoint returned status %s: %s',
437 result.status_code, error_description)
438 return
439 token_info = json.loads(result.content)
440
441 # Validate email.
442 if 'email' not in token_info:
443 _logger.warning('Oauth token doesn\'t include an email address.')
444 return
445 if token_info.get('email_verified') != 'true':
446 _logger.warning('Oauth token email isn\'t verified.')
447 return
448
449 # Validate client ID.
450 client_id = token_info.get('azp')
451 if (list(allowed_client_ids) != SKIP_CLIENT_ID_CHECK and
452 client_id not in allowed_client_ids):
453 _logger.warning('Client ID is not allowed: %s', client_id)
454 return
455
456 # Verify at least one of the scopes matches.
457 _, sufficient_scopes = _process_scopes(scopes)
458 authorized_scopes = token_info.get('scope', '').split(' ')
459 if not _are_scopes_sufficient(authorized_scopes, sufficient_scopes):
460 _logger.warning('Oauth token scopes don\'t match any acceptable scopes.')
461 return
462
463 os.environ[_ENV_AUTH_EMAIL] = token_info['email']
464 os.environ[_ENV_AUTH_DOMAIN] = ''
465 _logger.debug('Local dev returning user from token.')
466
467
468def _is_local_dev():
469 return os.environ.get('SERVER_SOFTWARE', '').startswith('Development')
470
471
472def _verify_parsed_token(parsed_token, issuers, audiences, allowed_client_ids, is_legacy_google_auth=True):
473 """Verify a parsed user ID token.
474
475 Args:
476 parsed_token: The parsed token information.
477 issuers: A list of allowed issuers
478 audiences: The allowed audiences.
479 allowed_client_ids: The allowed client IDs.
480
481 Returns:
482 True if the token is verified, False otherwise.
483 """
484 # Verify the issuer.
485 if parsed_token.get('iss') not in issuers:
486 _logger.warning('Issuer was not valid: %s', parsed_token.get('iss'))
487 return False
488
489 # Check audiences.
490 aud = parsed_token.get('aud')
491 if not aud:
492 _logger.warning('No aud field in token')
493 return False
494 # Special legacy handling if aud == cid. This occurs with iOS and browsers.
495 # As long as audience == client_id and cid is allowed, we need to accept
496 # the audience for compatibility.
497 cid = parsed_token.get('azp')
498 audience_allowed = (aud in audiences) or (is_legacy_google_auth and aud == cid)
499 if not audience_allowed:
500 _logger.warning('Audience not allowed: %s', aud)
501 return False
502
503 # Check allowed client IDs, for legacy auth.
504 if is_legacy_google_auth:
505 if list(allowed_client_ids) == SKIP_CLIENT_ID_CHECK:
506 _logger.warning('Client ID check can\'t be skipped for ID tokens. '
507 'Id_token cannot be verified.')
508 return False
509 elif not cid or cid not in allowed_client_ids:
510 _logger.warning('Client ID is not allowed: %s', cid)
511 return False
512
513 if 'email' not in parsed_token:
514 return False
515
516 return True
517
518
519def _urlsafe_b64decode(b64string):
520 # Guard against unicode strings, which base64 can't handle.
521 b64string = six.ensure_binary(b64string, 'ascii')
522 padded = b64string + '=' * ((4 - len(b64string)) % 4)
523 return base64.urlsafe_b64decode(padded)
524
525
526def _get_cert_expiration_time(headers):
527 """Get the expiration time for a cert, given the response headers.
528
529 Get expiration time from the headers in the result. If we can't get
530 a time from the headers, this returns 0, indicating that the cert
531 shouldn't be cached.
532
533 Args:
534 headers: A dict containing the response headers from the request to get
535 certs.
536
537 Returns:
538 An integer with the number of seconds the cert should be cached. This
539 value is guaranteed to be >= 0.
540 """
541 # Check the max age of the cert.
542 cache_control = headers.get('Cache-Control', '')
543 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 indicates only
544 # a comma-separated header is valid, so it should be fine to split this on
545 # commas.
546 for entry in cache_control.split(','):
547 match = _MAX_AGE_REGEX.match(entry)
548 if match:
549 cache_time_seconds = int(match.group(1))
550 break
551 else:
552 return 0
553
554 # Subtract the cert's age.
555 age = headers.get('Age')
556 if age is not None:
557 try:
558 age = int(age)
559 except ValueError:
560 age = 0
561 cache_time_seconds -= age
562
563 return max(0, cache_time_seconds)
564
565
566def _get_cached_certs(cert_uri, cache):
567 """Get certs from cache if present; otherwise, gets from URI and caches them.
568
569 Args:
570 cert_uri: URI from which to retrieve certs if cache is stale or empty.
571 cache: Cache of pre-fetched certs.
572
573 Returns:
574 The retrieved certs.
575 """
576 certs = cache.get(cert_uri, namespace=_CERT_NAMESPACE)
577 if certs is None:
578 _logger.debug('Cert cache miss for %s', cert_uri)
579 try:
580 result = urlfetch.fetch(cert_uri)
581 except AssertionError:
582 # This happens in unit tests. Act as if we couldn't get any certs.
583 return None
584
585 if result.status_code == 200:
586 certs = json.loads(result.content)
587 expiration_time_seconds = _get_cert_expiration_time(result.headers)
588 if expiration_time_seconds:
589 cache.set(cert_uri, certs, time=expiration_time_seconds,
590 namespace=_CERT_NAMESPACE)
591 else:
592 _logger.error(
593 'Certs not available, HTTP request returned %d', result.status_code)
594
595 return certs
596
597
598def _b64_to_int(b):
599 b = six.ensure_binary(b, 'ascii')
600 b += b'=' * ((4 - len(b)) % 4)
601 b = base64.b64decode(b)
602 return int(binascii.hexlify(b), 16)
603
604
605def _verify_signed_jwt_with_certs(
606 jwt, time_now, cache,
607 cert_uri=_DEFAULT_CERT_URI):
608 """Verify a JWT against public certs.
609
610 See http://self-issued.info/docs/draft-jones-json-web-token.html.
611
612 The PyCrypto library included with Google App Engine is severely limited and
613 so you have to use it very carefully to verify JWT signatures. The first
614 issue is that the library can't read X.509 files, so we make a call to a
615 special URI that has the public cert in modulus/exponent form in JSON.
616
617 The second issue is that the RSA.verify method doesn't work, at least for
618 how the JWT tokens are signed, so we have to manually verify the signature
619 of the JWT, which means hashing the signed part of the JWT and comparing
620 that to the signature that's been encrypted with the public key.
621
622 Args:
623 jwt: string, A JWT.
624 time_now: The current time, as an int (eg. int(time.time())).
625 cache: Cache to use (eg. the memcache module).
626 cert_uri: string, URI to get cert modulus and exponent in JSON format.
627
628 Returns:
629 dict, The deserialized JSON payload in the JWT.
630
631 Raises:
632 _AppIdentityError: if any checks are failed.
633 """
634
635 segments = jwt.split('.')
636
637 if len(segments) != 3:
638 # Note that anywhere we print the jwt or its json body, we need to use
639 # %r instead of %s, so that non-printable characters are escaped safely.
640 raise _AppIdentityError('Token is not an id_token (Wrong number of '
641 'segments)')
642 signed = '%s.%s' % (segments[0], segments[1])
643
644 signature = _urlsafe_b64decode(segments[2])
645
646 # pycrypto only deals in integers, so we have to convert the string of bytes
647 # into an int.
648 lsignature = int(binascii.hexlify(signature), 16)
649
650 # Verify expected header.
651 header_body = _urlsafe_b64decode(segments[0])
652 try:
653 header = json.loads(header_body)
654 except:
655 raise _AppIdentityError("Can't parse header")
656 if header.get('alg') != 'RS256':
657 raise _AppIdentityError('Unexpected encryption algorithm: %r' %
658 header.get('alg'))
659
660 # Formerly we would parse the token body here.
661 # However, it's not safe to do that without first checking the signature.
662
663 certs = _get_cached_certs(cert_uri, cache)
664 if certs is None:
665 raise _AppIdentityError(
666 'Unable to retrieve certs needed to verify the signed JWT')
667
668 # Verify that we were able to load the Crypto libraries, before we try
669 # to use them.
670 if not _CRYPTO_LOADED:
671 raise _AppIdentityError('Unable to load pycrypto library. Can\'t verify '
672 'id_token signature. See http://www.pycrypto.org '
673 'for more information on pycrypto.')
674
675 # SHA256 hash of the already 'signed' segment from the JWT. Since a SHA256
676 # hash, will always have length 64.
677 local_hash = SHA256.new(signed).hexdigest()
678
679 # Check signature.
680 verified = False
681 for keyvalue in certs['keyvalues']:
682 try:
683 modulus = _b64_to_int(keyvalue['modulus'])
684 exponent = _b64_to_int(keyvalue['exponent'])
685 key = RSA.construct((modulus, exponent))
686
687 # Encrypt, and convert to a hex string.
688 hexsig = '%064x' % key.encrypt(lsignature, '')[0]
689 # Make sure we have only last 64 base64 chars
690 hexsig = hexsig[-64:]
691
692 # Check the signature on 'signed' by encrypting 'signature' with the
693 # public key and confirming the result matches the SHA256 hash of
694 # 'signed'. hmac.compare_digest(a, b) is used to avoid timing attacks.
695 verified = hmac.compare_digest(hexsig, local_hash)
696 if verified:
697 break
698 except Exception as e: # pylint: disable=broad-except
699 # Log the exception for debugging purpose.
700 _logger.debug(
701 'Signature verification error: %s; continuing with the next cert.', e)
702 continue
703 if not verified:
704 raise _AppIdentityError('Invalid token signature')
705
706 # Parse token.
707 json_body = _urlsafe_b64decode(segments[1])
708 try:
709 parsed = json.loads(json_body)
710 except:
711 raise _AppIdentityError("Can't parse token body")
712
713 # Check creation timestamp.
714 iat = parsed.get('iat')
715 if iat is None:
716 raise _AppIdentityError('No iat field in token')
717 earliest = iat - _CLOCK_SKEW_SECS
718
719 # Check expiration timestamp.
720 exp = parsed.get('exp')
721 if exp is None:
722 raise _AppIdentityError('No exp field in token')
723 if exp >= time_now + _MAX_TOKEN_LIFETIME_SECS:
724 raise _AppIdentityError('exp field too far in future')
725 latest = exp + _CLOCK_SKEW_SECS
726
727 if time_now < earliest:
728 raise _AppIdentityError('Token used too early, %d < %d' %
729 (time_now, earliest))
730 if time_now > latest:
731 raise _AppIdentityError('Token used too late, %d > %d' %
732 (time_now, latest))
733
734 return parsed
735
736
737_TEXT_CERT_PREFIX = 'https://www.googleapis.com/robot/v1/metadata/x509/'
738_JSON_CERT_PREFIX = 'https://www.googleapis.com/service_accounts/v1/metadata/raw/'
739
740
741def convert_jwks_uri(jwks_uri):
742 """
743 The PyCrypto library included with Google App Engine is severely limited and
744 can't read X.509 files, so we change the URI to a special URI that has the
745 public cert in modulus/exponent form in JSON.
746 """
747 if not jwks_uri.startswith(_TEXT_CERT_PREFIX):
748 return jwks_uri
749 return jwks_uri.replace(_TEXT_CERT_PREFIX, _JSON_CERT_PREFIX)
750
751
752def get_verified_jwt(
753 providers, audiences,
754 check_authorization_header=True, check_query_arg=True,
755 request=None, cache=memcache):
756 """
757 This function will extract, verify, and parse a JWT token from the
758 Authorization header or access_token query argument.
759
760 The JWT is assumed to contain an issuer and audience claim, as well
761 as issued-at and expiration timestamps. The signature will be
762 cryptographically verified, the claims and timestamps will be
763 checked, and the resulting parsed JWT body is returned.
764
765 If at any point the JWT is missing or found to be invalid, the
766 return result will be None.
767
768 Arguments:
769 providers - An iterable of dicts each containing 'issuer' and 'cert_uri' keys
770 audiences - An iterable of valid audiences
771
772 check_authorization_header - Boolean; check 'Authorization: Bearer' header
773 check_query_arg - Boolean; check 'access_token' query arg
774
775 request - Must be the request object if check_query_arg is true; otherwise ignored.
776 cache - In testing, override the certificate cache
777 """
778 if not (check_authorization_header or check_query_arg):
779 raise ValueError(
780 'Either check_authorization_header or check_query_arg must be True.')
781 if check_query_arg and request is None:
782 raise ValueError('Cannot check query arg without request object.')
783 schemes = ('Bearer',) if check_authorization_header else ()
784 keys = ('access_token',) if check_query_arg else ()
785 token = _get_token(
786 request=request, allowed_auth_schemes=schemes, allowed_query_keys=keys)
787 if token is None:
788 return None
789 time_now = int(time.time())
790 for provider in providers:
791 parsed_token = _parse_and_verify_jwt(
792 token, time_now, (provider['issuer'],), audiences, provider['cert_uri'], cache)
793 if parsed_token is not None:
794 return parsed_token
795 return None
796
797
798def _parse_and_verify_jwt(token, time_now, issuers, audiences, cert_uri, cache):
799 try:
800 parsed_token = _verify_signed_jwt_with_certs(token, time_now, cache, cert_uri)
801 except (_AppIdentityError, TypeError) as e:
802 _logger.debug('id_token verification failed: %s', e)
803 return None
804
805 issuers = _listlike_guard(issuers, 'issuers')
806 audiences = _listlike_guard(audiences, 'audiences')
807 # We can't use _verify_parsed_token because there's no client id (azp) or email in these JWTs
808 # Verify the issuer.
809 if parsed_token.get('iss') not in issuers:
810 _logger.warning('Issuer was not valid: %s', parsed_token.get('iss'))
811 return None
812
813 # Check audiences.
814 aud = parsed_token.get('aud')
815 if not aud:
816 _logger.warning('No aud field in token')
817 return None
818 if aud not in audiences:
819 _logger.warning('Audience not allowed: %s', aud)
820 return None
821
822 return parsed_token
823
824
825def _listlike_guard(obj, name, iterable_only=False, log_warning=True):
826 """
827 We frequently require passed objects to support iteration or
828 containment expressions, but not be strings. (Of course, strings
829 support iteration and containment, but not usefully.) If the passed
830 object is a string, we'll wrap it in a tuple and return it. If it's
831 already an iterable, we'll return it as-is. Otherwise, we'll raise a
832 TypeError.
833 """
834 required_type = (_Iterable,) if iterable_only else (_Container, _Iterable)
835 required_type_name = ' or '.join(t.__name__ for t in required_type)
836
837 if not isinstance(obj, required_type):
838 raise ValueError('{} must be of type {}'.format(name, required_type_name))
839 # at this point it is definitely the right type, but might be a string
840 if isinstance(obj, six.string_types):
841 if log_warning:
842 _logger.warning('{} passed as a string; should be list-like'.format(name))
843 return (obj,)
844 return obj