blob: 362585fe6b3f74e2d2bbe4139a54cca77bc2b53c [file] [log] [blame]
Copybara854996b2021-09-07 19:36:02 +00001# Copyright 2016 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style
3# license that can be found in the LICENSE file or at
4# https://developers.google.com/open-source/licenses/bsd
5
6"""A simple profiler object to track how time is spent on a request.
7
8The profiler is called from application code at the begining and
9end of each major phase and subphase of processing. The profiler
10object keeps track of how much time was spent on each phase or subphase.
11
12This class is useful when developers need to understand where
13server-side time is being spent. It includes durations in
14milliseconds, and a simple bar chart on the HTML page.
15
16On-page debugging and performance info is useful because it makes it easier
17to explore performance interactively.
18"""
19
20from __future__ import division
21from __future__ import print_function
22from __future__ import absolute_import
23
24import datetime
25import logging
26import random
27import re
28import threading
29import time
30
31from infra_libs import ts_mon
32
33from contextlib import contextmanager
34
35from google.appengine.api import app_identity
36
37PHASE_TIME = ts_mon.CumulativeDistributionMetric(
38 'monorail/servlet/phase_time',
39 'Time spent in profiler phases, in ms',
40 [ts_mon.StringField('phase')])
41
42# trace_service requires names less than 128 bytes
43# https://cloud.google.com/trace/docs/reference/v1/rest/v1/projects.traces#Trace
44MAX_PHASE_NAME_LENGTH = 128
45
46
47class Profiler(object):
48 """Object to record and help display request processing profiling info.
49
50 The Profiler class holds a list of phase objects, which can hold additional
51 phase objects (which are subphases). Each phase or subphase represents some
52 meaningful part of this application's HTTP request processing.
53 """
54
55 _COLORS = ['900', '090', '009', '360', '306', '036',
56 '630', '630', '063', '333']
57
58 def __init__(self, opt_trace_context=None, opt_trace_service=None):
59 """Each request processing profile begins with an empty list of phases."""
60 self.top_phase = _Phase('overall profile', -1, None)
61 self.current_phase = self.top_phase
62 self.next_color = 0
63 self.original_thread_id = threading.current_thread().ident
64 self.trace_context = opt_trace_context
65 self.trace_service = opt_trace_service
66 self.project_id = app_identity.get_application_id()
67
68 def StartPhase(self, name='unspecified phase'):
69 """Begin a (sub)phase by pushing a new phase onto a stack."""
70 if self.original_thread_id != threading.current_thread().ident:
71 return # We only profile the main thread.
72 color = self._COLORS[self.next_color % len(self._COLORS)]
73 self.next_color += 1
74 self.current_phase = _Phase(name, color, self.current_phase)
75
76 def EndPhase(self):
77 """End a (sub)phase by poping the phase stack."""
78 if self.original_thread_id != threading.current_thread().ident:
79 return # We only profile the main thread.
80 self.current_phase = self.current_phase.End()
81
82 @contextmanager
83 def Phase(self, name='unspecified phase'):
84 """Context manager to automatically begin and end (sub)phases."""
85 self.StartPhase(name)
86 try:
87 yield
88 finally:
89 self.EndPhase()
90
91 def LogStats(self):
92 """Log sufficiently-long phases and subphases, for debugging purposes."""
93 self.top_phase.End()
94 lines = ['Stats:']
95 self.top_phase.AccumulateStatLines(self.top_phase.elapsed_seconds, lines)
96 logging.info('\n'.join(lines))
97
98 def ReportTrace(self):
99 """Send a profile trace to Google Cloud Tracing."""
100 self.top_phase.End()
101 spans = self.top_phase.SpanJson()
102 if not self.trace_service or not self.trace_context:
103 logging.info('would have sent trace: %s', spans)
104 return
105
106 # Format of trace_context: 'TRACE_ID/SPAN_ID;o=TRACE_TRUE'
107 # (from https://cloud.google.com/trace/docs/troubleshooting#force-trace)
108 # TODO(crbug/monorail:7086): Respect the o=TRACE_TRUE part.
109 # Note: on Appngine it seems ';o=1' is omitted rather than set to 0.
110 trace_id, root_span_id = self.trace_context.split(';')[0].split('/')
111 for s in spans:
112 # TODO(crbug/monorail:7087): Consider setting `parentSpanId` to
113 # `root_span_id` for the children of `top_phase`.
114 if not 'parentSpanId' in s:
115 s['parentSpanId'] = root_span_id
116 traces_body = {
117 'projectId': self.project_id,
118 'traceId': trace_id,
119 'spans': spans,
120 }
121 body = {
122 'traces': [traces_body]
123 }
124 # TODO(crbug/monorail:7088): Do this async so it doesn't delay the response.
125 request = self.trace_service.projects().patchTraces(
126 projectId=self.project_id, body=body)
127 _res = request.execute()
128
129
130class _Phase(object):
131 """A _Phase instance represents a period of time during request processing."""
132
133 def __init__(self, name, color, parent):
134 """Initialize a (sub)phase with the given name and current system clock."""
135 self.start = time.time()
136 self.name = name[:MAX_PHASE_NAME_LENGTH]
137 self.color = color
138 self.subphases = []
139 self.elapsed_seconds = None
140 self.ms = 'in_progress' # shown if the phase never records a finish.
141 self.uncategorized_ms = None
142 self.parent = parent
143 if self.parent is not None:
144 self.parent._RegisterSubphase(self)
145
146 self.id = str(random.getrandbits(64))
147
148
149 def _RegisterSubphase(self, subphase):
150 """Add a subphase to this phase."""
151 self.subphases.append(subphase)
152
153 def End(self):
154 """Record the time between the start and end of this (sub)phase."""
155 self.elapsed_seconds = time.time() - self.start
156 self.ms = int(self.elapsed_seconds * 1000)
157 for sub in self.subphases:
158 if sub.elapsed_seconds is None:
159 logging.warn('issue3182: subphase is %r', sub and sub.name)
160 categorized = sum(sub.elapsed_seconds or 0.0 for sub in self.subphases)
161 self.uncategorized_ms = int((self.elapsed_seconds - categorized) * 1000)
162 return self.parent
163
164 def AccumulateStatLines(self, total_seconds, lines, indent=''):
165 # Only phases that took longer than 30ms are interesting.
166 if self.ms <= 30:
167 return
168
169 percent = self.elapsed_seconds // total_seconds * 100
170 lines.append('%s%5d ms (%2d%%): %s' % (indent, self.ms, percent, self.name))
171
172 # Remove IDs etc to reduce the phase name cardinality for ts_mon.
173 normalized_phase = re.sub('[0-9]+', '', self.name)
174 PHASE_TIME.add(self.ms, {'phase': normalized_phase})
175
176 for subphase in self.subphases:
177 subphase.AccumulateStatLines(total_seconds, lines, indent=indent + ' ')
178
179 def SpanJson(self):
180 """Return a json representation of this phase as a GCP Cloud Trace object.
181 """
182 endTime = self.start + self.elapsed_seconds
183
184 span = {
185 'kind': 'RPC_SERVER',
186 'name': self.name,
187 'spanId': self.id,
188 'startTime':
189 datetime.datetime.fromtimestamp(self.start).isoformat() + 'Z',
190 'endTime': datetime.datetime.fromtimestamp(endTime).isoformat() + 'Z',
191 }
192
193 if self.parent is not None and self.parent.id is not None:
194 span['parentSpanId'] = self.parent.id
195
196 spans = [span]
197 for s in self.subphases:
198 spans.extend(s.SpanJson())
199
200 return spans