Copybara | 854996b | 2021-09-07 19:36:02 +0000 | [diff] [blame] | 1 | # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style |
| 3 | # license that can be found in the LICENSE file or at |
| 4 | # https://developers.google.com/open-source/licenses/bsd |
| 5 | |
| 6 | """A simple profiler object to track how time is spent on a request. |
| 7 | |
| 8 | The profiler is called from application code at the begining and |
| 9 | end of each major phase and subphase of processing. The profiler |
| 10 | object keeps track of how much time was spent on each phase or subphase. |
| 11 | |
| 12 | This class is useful when developers need to understand where |
| 13 | server-side time is being spent. It includes durations in |
| 14 | milliseconds, and a simple bar chart on the HTML page. |
| 15 | |
| 16 | On-page debugging and performance info is useful because it makes it easier |
| 17 | to explore performance interactively. |
| 18 | """ |
| 19 | |
| 20 | from __future__ import division |
| 21 | from __future__ import print_function |
| 22 | from __future__ import absolute_import |
| 23 | |
| 24 | import datetime |
| 25 | import logging |
| 26 | import random |
| 27 | import re |
| 28 | import threading |
| 29 | import time |
| 30 | |
| 31 | from infra_libs import ts_mon |
| 32 | |
| 33 | from contextlib import contextmanager |
| 34 | |
| 35 | from google.appengine.api import app_identity |
| 36 | |
| 37 | PHASE_TIME = ts_mon.CumulativeDistributionMetric( |
| 38 | 'monorail/servlet/phase_time', |
| 39 | 'Time spent in profiler phases, in ms', |
| 40 | [ts_mon.StringField('phase')]) |
| 41 | |
| 42 | # trace_service requires names less than 128 bytes |
| 43 | # https://cloud.google.com/trace/docs/reference/v1/rest/v1/projects.traces#Trace |
| 44 | MAX_PHASE_NAME_LENGTH = 128 |
| 45 | |
| 46 | |
| 47 | class Profiler(object): |
| 48 | """Object to record and help display request processing profiling info. |
| 49 | |
| 50 | The Profiler class holds a list of phase objects, which can hold additional |
| 51 | phase objects (which are subphases). Each phase or subphase represents some |
| 52 | meaningful part of this application's HTTP request processing. |
| 53 | """ |
| 54 | |
| 55 | _COLORS = ['900', '090', '009', '360', '306', '036', |
| 56 | '630', '630', '063', '333'] |
| 57 | |
| 58 | def __init__(self, opt_trace_context=None, opt_trace_service=None): |
| 59 | """Each request processing profile begins with an empty list of phases.""" |
| 60 | self.top_phase = _Phase('overall profile', -1, None) |
| 61 | self.current_phase = self.top_phase |
| 62 | self.next_color = 0 |
| 63 | self.original_thread_id = threading.current_thread().ident |
| 64 | self.trace_context = opt_trace_context |
| 65 | self.trace_service = opt_trace_service |
| 66 | self.project_id = app_identity.get_application_id() |
| 67 | |
| 68 | def StartPhase(self, name='unspecified phase'): |
| 69 | """Begin a (sub)phase by pushing a new phase onto a stack.""" |
| 70 | if self.original_thread_id != threading.current_thread().ident: |
| 71 | return # We only profile the main thread. |
| 72 | color = self._COLORS[self.next_color % len(self._COLORS)] |
| 73 | self.next_color += 1 |
| 74 | self.current_phase = _Phase(name, color, self.current_phase) |
| 75 | |
| 76 | def EndPhase(self): |
| 77 | """End a (sub)phase by poping the phase stack.""" |
| 78 | if self.original_thread_id != threading.current_thread().ident: |
| 79 | return # We only profile the main thread. |
| 80 | self.current_phase = self.current_phase.End() |
| 81 | |
| 82 | @contextmanager |
| 83 | def Phase(self, name='unspecified phase'): |
| 84 | """Context manager to automatically begin and end (sub)phases.""" |
| 85 | self.StartPhase(name) |
| 86 | try: |
| 87 | yield |
| 88 | finally: |
| 89 | self.EndPhase() |
| 90 | |
| 91 | def LogStats(self): |
| 92 | """Log sufficiently-long phases and subphases, for debugging purposes.""" |
| 93 | self.top_phase.End() |
| 94 | lines = ['Stats:'] |
| 95 | self.top_phase.AccumulateStatLines(self.top_phase.elapsed_seconds, lines) |
| 96 | logging.info('\n'.join(lines)) |
| 97 | |
| 98 | def ReportTrace(self): |
| 99 | """Send a profile trace to Google Cloud Tracing.""" |
| 100 | self.top_phase.End() |
| 101 | spans = self.top_phase.SpanJson() |
| 102 | if not self.trace_service or not self.trace_context: |
| 103 | logging.info('would have sent trace: %s', spans) |
| 104 | return |
| 105 | |
| 106 | # Format of trace_context: 'TRACE_ID/SPAN_ID;o=TRACE_TRUE' |
| 107 | # (from https://cloud.google.com/trace/docs/troubleshooting#force-trace) |
| 108 | # TODO(crbug/monorail:7086): Respect the o=TRACE_TRUE part. |
| 109 | # Note: on Appngine it seems ';o=1' is omitted rather than set to 0. |
| 110 | trace_id, root_span_id = self.trace_context.split(';')[0].split('/') |
| 111 | for s in spans: |
| 112 | # TODO(crbug/monorail:7087): Consider setting `parentSpanId` to |
| 113 | # `root_span_id` for the children of `top_phase`. |
| 114 | if not 'parentSpanId' in s: |
| 115 | s['parentSpanId'] = root_span_id |
| 116 | traces_body = { |
| 117 | 'projectId': self.project_id, |
| 118 | 'traceId': trace_id, |
| 119 | 'spans': spans, |
| 120 | } |
| 121 | body = { |
| 122 | 'traces': [traces_body] |
| 123 | } |
| 124 | # TODO(crbug/monorail:7088): Do this async so it doesn't delay the response. |
| 125 | request = self.trace_service.projects().patchTraces( |
| 126 | projectId=self.project_id, body=body) |
| 127 | _res = request.execute() |
| 128 | |
| 129 | |
| 130 | class _Phase(object): |
| 131 | """A _Phase instance represents a period of time during request processing.""" |
| 132 | |
| 133 | def __init__(self, name, color, parent): |
| 134 | """Initialize a (sub)phase with the given name and current system clock.""" |
| 135 | self.start = time.time() |
| 136 | self.name = name[:MAX_PHASE_NAME_LENGTH] |
| 137 | self.color = color |
| 138 | self.subphases = [] |
| 139 | self.elapsed_seconds = None |
| 140 | self.ms = 'in_progress' # shown if the phase never records a finish. |
| 141 | self.uncategorized_ms = None |
| 142 | self.parent = parent |
| 143 | if self.parent is not None: |
| 144 | self.parent._RegisterSubphase(self) |
| 145 | |
| 146 | self.id = str(random.getrandbits(64)) |
| 147 | |
| 148 | |
| 149 | def _RegisterSubphase(self, subphase): |
| 150 | """Add a subphase to this phase.""" |
| 151 | self.subphases.append(subphase) |
| 152 | |
| 153 | def End(self): |
| 154 | """Record the time between the start and end of this (sub)phase.""" |
| 155 | self.elapsed_seconds = time.time() - self.start |
| 156 | self.ms = int(self.elapsed_seconds * 1000) |
| 157 | for sub in self.subphases: |
| 158 | if sub.elapsed_seconds is None: |
| 159 | logging.warn('issue3182: subphase is %r', sub and sub.name) |
| 160 | categorized = sum(sub.elapsed_seconds or 0.0 for sub in self.subphases) |
| 161 | self.uncategorized_ms = int((self.elapsed_seconds - categorized) * 1000) |
| 162 | return self.parent |
| 163 | |
| 164 | def AccumulateStatLines(self, total_seconds, lines, indent=''): |
| 165 | # Only phases that took longer than 30ms are interesting. |
| 166 | if self.ms <= 30: |
| 167 | return |
| 168 | |
| 169 | percent = self.elapsed_seconds // total_seconds * 100 |
| 170 | lines.append('%s%5d ms (%2d%%): %s' % (indent, self.ms, percent, self.name)) |
| 171 | |
| 172 | # Remove IDs etc to reduce the phase name cardinality for ts_mon. |
| 173 | normalized_phase = re.sub('[0-9]+', '', self.name) |
| 174 | PHASE_TIME.add(self.ms, {'phase': normalized_phase}) |
| 175 | |
| 176 | for subphase in self.subphases: |
| 177 | subphase.AccumulateStatLines(total_seconds, lines, indent=indent + ' ') |
| 178 | |
| 179 | def SpanJson(self): |
| 180 | """Return a json representation of this phase as a GCP Cloud Trace object. |
| 181 | """ |
| 182 | endTime = self.start + self.elapsed_seconds |
| 183 | |
| 184 | span = { |
| 185 | 'kind': 'RPC_SERVER', |
| 186 | 'name': self.name, |
| 187 | 'spanId': self.id, |
| 188 | 'startTime': |
| 189 | datetime.datetime.fromtimestamp(self.start).isoformat() + 'Z', |
| 190 | 'endTime': datetime.datetime.fromtimestamp(endTime).isoformat() + 'Z', |
| 191 | } |
| 192 | |
| 193 | if self.parent is not None and self.parent.id is not None: |
| 194 | span['parentSpanId'] = self.parent.id |
| 195 | |
| 196 | spans = [span] |
| 197 | for s in self.subphases: |
| 198 | spans.extend(s.SpanJson()) |
| 199 | |
| 200 | return spans |