From 7ae7810c3a5dec01fb87a8015570c4db9852c55f Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Wed, 13 Nov 2024 10:27:34 -0500 Subject: [PATCH 01/14] Move files from dev2 branch --- src/splunk_otel/profile.py | 369 ++++++++++++++++++++++++ src/splunk_otel/profile_pb2.py | 44 +++ tests/fixtures/pb_profile.out.json | 417 +++++++++++++++++++++++++++ tests/fixtures/stacktraces.in.json | 157 ++++++++++ tests/fixtures/thread_states.in.json | 1 + tests/ott_profile.py | 0 tests/test_profile.py | 104 +++++++ 7 files changed, 1092 insertions(+) create mode 100644 src/splunk_otel/profile.py create mode 100644 src/splunk_otel/profile_pb2.py create mode 100644 tests/fixtures/pb_profile.out.json create mode 100644 tests/fixtures/stacktraces.in.json create mode 100644 tests/fixtures/thread_states.in.json create mode 100644 tests/ott_profile.py create mode 100644 tests/test_profile.py diff --git a/src/splunk_otel/profile.py b/src/splunk_otel/profile.py new file mode 100644 index 00000000..9942ec5f --- /dev/null +++ b/src/splunk_otel/profile.py @@ -0,0 +1,369 @@ +import base64 +import gzip +import sys +import threading +import time +import traceback +from collections import OrderedDict +from traceback import StackSummary + +import opentelemetry.context +import wrapt +from opentelemetry._logs import get_logger, Logger, set_logger_provider, SeverityNumber +from opentelemetry.context import Context +from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter +from opentelemetry.sdk._logs import LoggerProvider, LogRecord +from opentelemetry.sdk._logs._internal.export import BatchLogRecordProcessor +from opentelemetry.sdk.resources import Resource +from opentelemetry.trace import TraceFlags +from opentelemetry.trace.propagation import _SPAN_KEY +from splunk_otel import profile_pb2 + +_profiling_timer = None + + +def start_profiling(): + tcm = ThreadContextMapping() + tcm.wrap_context_methods() + + logger = get_logger("splunk-profiler") + + period_millis = 100 + scraper = ProfilingScraper( + mk_resource(), + tcm.get_thread_states(), + period_millis, + logger + ) + + global _profiling_timer + _profiling_timer = PeriodicTimer(period_millis, scraper.tick) + _profiling_timer.start() + + +def stop_profiling(): + global _profiling_timer + _profiling_timer.stop() + + +class ThreadContextMapping: + + def __init__(self): + self.thread_states = {} + + def get_thread_states(self): + return self.thread_states + + def wrap_context_methods(self): + wrapt.wrap_function_wrapper( + opentelemetry.context, "attach", self.wrap_context_attach() + ) + wrapt.wrap_function_wrapper( + opentelemetry.context, "detach", self.wrap_context_detach() + ) + + def wrap_context_attach(self): + def wrapper(wrapped, _instance, args, kwargs): + token = wrapped(*args, **kwargs) + + maybe_context = args[0] if args else None + + if maybe_context: + span = maybe_context.get(_SPAN_KEY) + + if span: + thread_id = threading.get_ident() + context = span.get_span_context() + self.thread_states[thread_id] = ( + context.trace_id, + context.span_id, + ) + + return token + + return wrapper + + def wrap_context_detach(self): + def wrapper(wrapped, _instance, args, kwargs): + token = args[0] if args else None + + if token: + prev = token.old_value + thread_id = threading.get_ident() + if isinstance(prev, Context): + span = prev.get(_SPAN_KEY) + + if span: + context = span.get_span_context() + self.thread_states[thread_id] = ( + context.trace_id, + context.span_id, + ) + else: + self.thread_states[thread_id] = None + else: + self.thread_states[thread_id] = None + return wrapped(*args, **kwargs) + + return wrapper + + +def collect_stacktraces(): + out = [] + frames = sys._current_frames() + + for thread_id, frame in frames.items(): + stack_summary = extract_stack_summary(frame) + frames = [(sf.filename, sf.name, sf.lineno) for sf in stack_summary] + out.append({ + "frames": frames, + "tid": thread_id, + }) + return out + + +class ProfilingScraper: + + def __init__( + self, + resource, + thread_states, + period_millis, + logger: Logger, + collect_stacktraces_func=collect_stacktraces, + time_func=time.time, + ): + self.resource = resource + self.thread_states = thread_states + self.period_millis = period_millis + self.collect_stacktraces = collect_stacktraces_func + self.time = time_func + self.logger = logger + + def tick(self): + stacktraces = self.collect_stacktraces() + log_record = self.mk_log_record(stacktraces) + self.logger.emit(log_record) + + def mk_log_record(self, stacktraces): + lengths = (len(trace["frames"]) for trace in stacktraces) + total_frame_count = sum(lengths) + + time_seconds = self.time() + + pb_profile = stacktraces_to_cpu_profile( + stacktraces, + self.thread_states, + self.period_millis, + time_seconds + ) + pb_profile_str = pb_profile_to_str(pb_profile) + + return LogRecord( + timestamp=int(time_seconds * 1e9), + trace_id=0, + span_id=0, + trace_flags=TraceFlags(0x01), + severity_number=SeverityNumber.UNSPECIFIED, + body=pb_profile_str, + resource=self.resource, + attributes={ + "profiling.data.format": "pprof-gzip-base64", + "profiling.data.type": "cpu", + "com.splunk.sourcetype": "otel.profiling", + "profiling.data.total.frame.count": total_frame_count, + }, + ) + + +class PeriodicTimer: + + def __init__(self, period_millis, target): + self.period_seconds = period_millis / 1e3 + self.target = target + self.cancel = threading.Event() + self.thread = threading.Thread(target=self._loop, daemon=True) + self.sleep = time.sleep + + def start(self): + self.thread.start() + + def _loop(self): + while True: + start_time_seconds = time.time() + self.target() + elapsed_seconds = time.time() - start_time_seconds + sleep_seconds = max(0, self.period_seconds - elapsed_seconds) + time.sleep(sleep_seconds) + + def stop(self): + self.cancel.set() + self.thread.join() + + +def emit_log_record(log_record): + print(f"emitting {log_record}") + + +def mk_resource(): + return Resource({}) + + +class StringTable: + def __init__(self): + self.strings = OrderedDict() + + def index(self, token): + idx = self.strings.get(token) + + if idx: + return idx + + idx = len(self.strings) + self.strings[token] = idx + return idx + + def keys(self): + return list(self.strings.keys()) + + +def get_location(functions_table, str_table, locations_table, frame): + (file_name, function_name, line_no) = frame + key = f"{file_name}:{function_name}:{line_no}" + location = locations_table.get(key) + + if location is None: + location = profile_pb2.Location() + location.id = len(locations_table) + 1 + line = get_line(functions_table, str_table, file_name, function_name, line_no) + location.line.append(line) + locations_table[key] = location + + return location + + +def get_line(functions_table, str_table, file_name, function_name, line_no): + line = profile_pb2.Line() + line.function_id = get_function(functions_table, str_table, file_name, function_name).id + line.line = line_no if line_no != 0 else -1 + return line + + +def get_function(functions_table, str_table, file_name, function_name): + key = f"{file_name}:{function_name}" + func = functions_table.get(key) + + if func is None: + name_id = str_table.index(function_name) + func = profile_pb2.Function() + func.id = len(functions_table) + 1 + func.name = name_id + func.system_name = name_id + func.filename = str_table.index(file_name) + functions_table[key] = func + + return func + + +def stacktraces_to_cpu_profile(stacktraces, thread_states, period_millis, time_seconds): + str_table = StringTable() + locations_table = OrderedDict() + functions_table = OrderedDict() + + timestamp_unix_millis = int(time_seconds * 1e3) + + timestamp_key = str_table.index("source.event.time") + trace_id_key = str_table.index("trace_id") + span_id_key = str_table.index("span_id") + thread_id_key = str_table.index("thread.id") + event_period_key = str_table.index("source.event.period") + + pb_profile = profile_pb2.Profile() + + event_period_label = profile_pb2.Label() + event_period_label.key = event_period_key + event_period_label.num = period_millis + + samples = [] + for stacktrace in stacktraces: + thread_id = stacktrace["tid"] + + timestamp_label = profile_pb2.Label() + timestamp_label.key = timestamp_key + timestamp_label.num = timestamp_unix_millis + + thread_id_label = profile_pb2.Label() + thread_id_label.key = thread_id_key + thread_id_label.num = thread_id + + labels = [timestamp_label, event_period_label, thread_id_label] + + trace_context = thread_states.get(thread_id) + if trace_context: + (trace_id, span_id) = trace_context + + trace_id_label = profile_pb2.Label() + trace_id_label.key = trace_id_key + trace_id_label.str = str_table.index(f"{trace_id:016x}") + labels.append(trace_id_label) + + span_id_label = profile_pb2.Label() + span_id_label.key = span_id_key + span_id_label.str = str_table.index(f"{span_id:08x}") + labels.append(span_id_label) + + sample = profile_pb2.Sample() + + location_ids = [] + + for frame in reversed(stacktrace["frames"]): + location = get_location(functions_table, str_table, locations_table, frame) + location_ids.append(location.id) + + sample.location_id.extend(location_ids) + sample.label.extend(labels) + + samples.append(sample) + + pb_profile.sample.extend(samples) + pb_profile.string_table.extend(str_table.keys()) + pb_profile.function.extend(list(functions_table.values())) + pb_profile.location.extend(list(locations_table.values())) + + return pb_profile + + +def pb_profile_to_str(pb_profile) -> str: + serialized = pb_profile.SerializeToString() + compressed = gzip.compress(serialized) + b64encoded = base64.b64encode(compressed) + stringified = b64encoded.decode() + return stringified + + +def pb_profile_from_str(stringified: str) -> profile_pb2.Profile: + byte_array = base64.b64decode(stringified) + decompressed = gzip.decompress(byte_array) + out = profile_pb2.Profile() + out.ParseFromString(decompressed) + return out + + +def extract_stack_summary(frame): + stack_iterator = traceback.walk_stack(frame) + out = StackSummary.extract(stack_iterator, limit=None, lookup_lines=False) + out.reverse() + return out + + +def configure_otel(): + logger_provider = LoggerProvider() + logger_provider.add_log_record_processor(BatchLogRecordProcessor(OTLPLogExporter())) + set_logger_provider(logger_provider) + + +if __name__ == "__main__": + configure_otel() + start_profiling() + time.sleep(12) + stop_profiling() diff --git a/src/splunk_otel/profile_pb2.py b/src/splunk_otel/profile_pb2.py new file mode 100644 index 00000000..a597ba98 --- /dev/null +++ b/src/splunk_otel/profile_pb2.py @@ -0,0 +1,44 @@ +# pylint: skip-file +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: profile.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\rprofile.proto\x12\x12perftools.profiles"\xd5\x03\n\x07Profile\x12\x32\n\x0bsample_type\x18\x01 \x03(\x0b\x32\x1d.perftools.profiles.ValueType\x12*\n\x06sample\x18\x02 \x03(\x0b\x32\x1a.perftools.profiles.Sample\x12,\n\x07mapping\x18\x03 \x03(\x0b\x32\x1b.perftools.profiles.Mapping\x12.\n\x08location\x18\x04 \x03(\x0b\x32\x1c.perftools.profiles.Location\x12.\n\x08\x66unction\x18\x05 \x03(\x0b\x32\x1c.perftools.profiles.Function\x12\x14\n\x0cstring_table\x18\x06 \x03(\t\x12\x13\n\x0b\x64rop_frames\x18\x07 \x01(\x03\x12\x13\n\x0bkeep_frames\x18\x08 \x01(\x03\x12\x12\n\ntime_nanos\x18\t \x01(\x03\x12\x16\n\x0e\x64uration_nanos\x18\n \x01(\x03\x12\x32\n\x0bperiod_type\x18\x0b \x01(\x0b\x32\x1d.perftools.profiles.ValueType\x12\x0e\n\x06period\x18\x0c \x01(\x03\x12\x0f\n\x07\x63omment\x18\r \x03(\x03\x12\x1b\n\x13\x64\x65\x66\x61ult_sample_type\x18\x0e \x01(\x03"\'\n\tValueType\x12\x0c\n\x04type\x18\x01 \x01(\x03\x12\x0c\n\x04unit\x18\x02 \x01(\x03"V\n\x06Sample\x12\x13\n\x0blocation_id\x18\x01 \x03(\x04\x12\r\n\x05value\x18\x02 \x03(\x03\x12(\n\x05label\x18\x03 \x03(\x0b\x32\x19.perftools.profiles.Label"@\n\x05Label\x12\x0b\n\x03key\x18\x01 \x01(\x03\x12\x0b\n\x03str\x18\x02 \x01(\x03\x12\x0b\n\x03num\x18\x03 \x01(\x03\x12\x10\n\x08num_unit\x18\x04 \x01(\x03"\xdd\x01\n\x07Mapping\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x14\n\x0cmemory_start\x18\x02 \x01(\x04\x12\x14\n\x0cmemory_limit\x18\x03 \x01(\x04\x12\x13\n\x0b\x66ile_offset\x18\x04 \x01(\x04\x12\x10\n\x08\x66ilename\x18\x05 \x01(\x03\x12\x10\n\x08\x62uild_id\x18\x06 \x01(\x03\x12\x15\n\rhas_functions\x18\x07 \x01(\x08\x12\x15\n\rhas_filenames\x18\x08 \x01(\x08\x12\x18\n\x10has_line_numbers\x18\t \x01(\x08\x12\x19\n\x11has_inline_frames\x18\n \x01(\x08"v\n\x08Location\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x12\n\nmapping_id\x18\x02 \x01(\x04\x12\x0f\n\x07\x61\x64\x64ress\x18\x03 \x01(\x04\x12&\n\x04line\x18\x04 \x03(\x0b\x32\x18.perftools.profiles.Line\x12\x11\n\tis_folded\x18\x05 \x01(\x08")\n\x04Line\x12\x13\n\x0b\x66unction_id\x18\x01 \x01(\x04\x12\x0c\n\x04line\x18\x02 \x01(\x03"_\n\x08\x46unction\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\x03\x12\x13\n\x0bsystem_name\x18\x03 \x01(\x03\x12\x10\n\x08\x66ilename\x18\x04 \x01(\x03\x12\x12\n\nstart_line\x18\x05 \x01(\x03\x42-\n\x1d\x63om.google.perftools.profilesB\x0cProfileProtob\x06proto3' +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "profile_pb2", _globals) +if _descriptor._USE_C_DESCRIPTORS is False: + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = ( + b"\n\035com.google.perftools.profilesB\014ProfileProto" + ) + _globals["_PROFILE"]._serialized_start = 38 + _globals["_PROFILE"]._serialized_end = 507 + _globals["_VALUETYPE"]._serialized_start = 509 + _globals["_VALUETYPE"]._serialized_end = 548 + _globals["_SAMPLE"]._serialized_start = 550 + _globals["_SAMPLE"]._serialized_end = 636 + _globals["_LABEL"]._serialized_start = 638 + _globals["_LABEL"]._serialized_end = 702 + _globals["_MAPPING"]._serialized_start = 705 + _globals["_MAPPING"]._serialized_end = 926 + _globals["_LOCATION"]._serialized_start = 928 + _globals["_LOCATION"]._serialized_end = 1046 + _globals["_LINE"]._serialized_start = 1048 + _globals["_LINE"]._serialized_end = 1089 + _globals["_FUNCTION"]._serialized_start = 1091 + _globals["_FUNCTION"]._serialized_end = 1186 +# @@protoc_insertion_point(module_scope) diff --git a/tests/fixtures/pb_profile.out.json b/tests/fixtures/pb_profile.out.json new file mode 100644 index 00000000..a68a9691 --- /dev/null +++ b/tests/fixtures/pb_profile.out.json @@ -0,0 +1,417 @@ +{ + "function": [ + { + "filename": "8", + "id": "1", + "name": "7", + "systemName": "7" + }, + { + "filename": "8", + "id": "2", + "name": "9", + "systemName": "9" + }, + { + "filename": "11", + "id": "3", + "name": "10", + "systemName": "10" + }, + { + "filename": "11", + "id": "4", + "name": "12", + "systemName": "12" + }, + { + "filename": "11", + "id": "5", + "name": "13", + "systemName": "13" + }, + { + "filename": "15", + "id": "6", + "name": "14", + "systemName": "14" + }, + { + "filename": "17", + "id": "7", + "name": "16", + "systemName": "16" + }, + { + "filename": "19", + "id": "8", + "name": "18", + "systemName": "18" + }, + { + "filename": "21", + "id": "9", + "name": "20", + "systemName": "20" + }, + { + "filename": "22", + "id": "10", + "name": "13", + "systemName": "13" + }, + { + "filename": "17", + "id": "11", + "name": "23", + "systemName": "23" + }, + { + "filename": "17", + "id": "12", + "name": "24", + "systemName": "24" + }, + { + "filename": "17", + "id": "13", + "name": "25", + "systemName": "25" + }, + { + "filename": "17", + "id": "14", + "name": "26", + "systemName": "26" + }, + { + "filename": "17", + "id": "15", + "name": "27", + "systemName": "27" + }, + { + "filename": "29", + "id": "16", + "name": "28", + "systemName": "28" + }, + { + "filename": "29", + "id": "17", + "name": "30", + "systemName": "30" + }, + { + "filename": "29", + "id": "18", + "name": "31", + "systemName": "31" + }, + { + "filename": "29", + "id": "19", + "name": "32", + "systemName": "32" + }, + { + "filename": "34", + "id": "20", + "name": "33", + "systemName": "33" + }, + { + "filename": "36", + "id": "21", + "name": "35", + "systemName": "35" + } + ], + "location": [ + { + "id": "1", + "line": [ + { + "functionId": "1", + "line": "52" + } + ] + }, + { + "id": "2", + "line": [ + { + "functionId": "2", + "line": "193" + } + ] + }, + { + "id": "3", + "line": [ + { + "functionId": "3", + "line": "633" + } + ] + }, + { + "id": "4", + "line": [ + { + "functionId": "4", + "line": "676" + } + ] + }, + { + "id": "5", + "line": [ + { + "functionId": "5", + "line": "736" + } + ] + }, + { + "id": "6", + "line": [ + { + "functionId": "6", + "line": "351" + } + ] + }, + { + "id": "7", + "line": [ + { + "functionId": "7", + "line": "174" + } + ] + }, + { + "id": "8", + "line": [ + { + "functionId": "8", + "line": "103" + } + ] + }, + { + "id": "9", + "line": [ + { + "functionId": "9", + "line": "120" + } + ] + }, + { + "id": "10", + "line": [ + { + "functionId": "10", + "line": "513" + } + ] + }, + { + "id": "11", + "line": [ + { + "functionId": "11", + "line": "242" + } + ] + }, + { + "id": "12", + "line": [ + { + "functionId": "12", + "line": "341" + } + ] + }, + { + "id": "13", + "line": [ + { + "functionId": "13", + "line": "241" + } + ] + }, + { + "id": "14", + "line": [ + { + "functionId": "14", + "line": "132" + } + ] + }, + { + "id": "15", + "line": [ + { + "functionId": "15", + "line": "113" + } + ] + }, + { + "id": "16", + "line": [ + { + "functionId": "16", + "line": "362" + } + ] + }, + { + "id": "17", + "line": [ + { + "functionId": "17", + "line": "337" + } + ] + }, + { + "id": "18", + "line": [ + { + "functionId": "18", + "line": "283" + } + ] + }, + { + "id": "19", + "line": [ + { + "functionId": "19", + "line": "330" + } + ] + }, + { + "id": "20", + "line": [ + { + "functionId": "20", + "line": "175" + } + ] + }, + { + "id": "21", + "line": [ + { + "functionId": "21", + "line": "75" + } + ] + } + ], + "sample": [ + { + "label": [ + { + "num": "1726760000000" + }, + { + "key": "4", + "num": "100" + }, + { + "key": "3", + "num": "140704486641088" + }, + { + "key": "1", + "str": "5" + }, + { + "key": "2", + "str": "6" + } + ], + "locationId": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "8", + "9", + "10", + "16", + "8", + "9", + "10", + "17", + "18", + "19", + "8", + "9", + "10", + "20", + "21" + ] + } + ], + "stringTable": [ + "source.event.time", + "trace_id", + "span_id", + "thread.id", + "source.event.period", + "1a7b69a6755c414461f4ec7fcdb41612", + "967d91eadf220b83", + "do_work", + "/Users/pcollins/github/signalfx/sop-worktree/tests/unit/test_profiling.py", + "test_profiling_export", + "_callTestMethod", + "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/unittest/case.py", + "run", + "__call__", + "runtest", + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/unittest.py", + "pytest_runtest_call", + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "_multicall", + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_callers.py", + "_hookexec", + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_manager.py", + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_hooks.py", + "", + "from_call", + "call_and_report", + "runtestprotocol", + "pytest_runtest_protocol", + "pytest_runtestloop", + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/main.py", + "_main", + "wrap_session", + "pytest_cmdline_main", + "main", + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/config/__init__.py", + "", + "/Applications/PyCharm.app/Contents/plugins/python-ce/helpers/pycharm/_jb_pytest_runner.py" + ] +} \ No newline at end of file diff --git a/tests/fixtures/stacktraces.in.json b/tests/fixtures/stacktraces.in.json new file mode 100644 index 00000000..d118be4b --- /dev/null +++ b/tests/fixtures/stacktraces.in.json @@ -0,0 +1,157 @@ +[ + { + "frames": [ + [ + "/Applications/PyCharm.app/Contents/plugins/python-ce/helpers/pycharm/_jb_pytest_runner.py", + "", + 75 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/config/__init__.py", + "main", + 175 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_hooks.py", + "__call__", + 513 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_manager.py", + "_hookexec", + 120 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_callers.py", + "_multicall", + 103 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/main.py", + "pytest_cmdline_main", + 330 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/main.py", + "wrap_session", + 283 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/main.py", + "_main", + 337 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_hooks.py", + "__call__", + 513 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_manager.py", + "_hookexec", + 120 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_callers.py", + "_multicall", + 103 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/main.py", + "pytest_runtestloop", + 362 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_hooks.py", + "__call__", + 513 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_manager.py", + "_hookexec", + 120 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_callers.py", + "_multicall", + 103 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "pytest_runtest_protocol", + 113 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "runtestprotocol", + 132 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "call_and_report", + 241 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "from_call", + 341 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "", + 242 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_hooks.py", + "__call__", + 513 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_manager.py", + "_hookexec", + 120 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_callers.py", + "_multicall", + 103 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "pytest_runtest_call", + 174 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/unittest.py", + "runtest", + 351 + ], + [ + "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/unittest/case.py", + "__call__", + 736 + ], + [ + "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/unittest/case.py", + "run", + 676 + ], + [ + "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/unittest/case.py", + "_callTestMethod", + 633 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/tests/unit/test_profiling.py", + "test_profiling_export", + 193 + ], + [ + "/Users/pcollins/github/signalfx/sop-worktree/tests/unit/test_profiling.py", + "do_work", + 52 + ] + ], + "tid": 140704486641088 + } +] \ No newline at end of file diff --git a/tests/fixtures/thread_states.in.json b/tests/fixtures/thread_states.in.json new file mode 100644 index 00000000..a43eb777 --- /dev/null +++ b/tests/fixtures/thread_states.in.json @@ -0,0 +1 @@ +{"140704486641088": [35200723245212232826843218619177113106, 10843983915729947523]} \ No newline at end of file diff --git a/tests/ott_profile.py b/tests/ott_profile.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_profile.py b/tests/test_profile.py new file mode 100644 index 00000000..e37e1218 --- /dev/null +++ b/tests/test_profile.py @@ -0,0 +1,104 @@ +import json +import random +import time + +import pytest +from google.protobuf.json_format import MessageToDict +from opentelemetry._logs import Logger +from opentelemetry.sdk.resources import Resource + +from splunk_otel import profile_pb2 +from splunk_otel.profile import pb_profile_from_str, pb_profile_to_str, ProfilingScraper, \ + stacktraces_to_cpu_profile + + +@pytest.fixture +def stacktraces_fixture(): + return load_json("stacktraces.in.json") + + +@pytest.fixture +def thread_states_fixture(): + out = {} + og = load_json("thread_states.in.json") + for k, v in og.items(): + out[int(k)] = v + return out + + +@pytest.fixture +def pb_profile_fixture(): + return load_json("pb_profile.out.json") + + +def load_json(fname): + with open(f"fixtures/{fname}", "r") as f: + return json.load(f) + + +def test_basic_proto_serialization(): + # noinspection PyUnresolvedReferences + profile = profile_pb2.Profile() + serialized = pb_profile_to_str(profile) + decoded_profile = pb_profile_from_str(serialized) + assert profile == decoded_profile + + +def test_stacktraces_to_cpu_profile(stacktraces_fixture, pb_profile_fixture, thread_states_fixture): + time_seconds = 1726760000 # corresponds to the timestamp in the fixture + interval_millis = 100 + profile = stacktraces_to_cpu_profile( + stacktraces_fixture, + thread_states_fixture, + interval_millis, + time_seconds + ) + assert pb_profile_fixture == MessageToDict(profile) + + +def test_profile_scraper(stacktraces_fixture, pb_profile_fixture): + time_seconds = 1726760000 + logger = FakeLogger() + ps = ProfilingScraper( + Resource({}), + {}, + 100, + logger, + collect_stacktraces_func=lambda: stacktraces_fixture, + time_func=lambda: time_seconds, + ) + ps.tick() + + log_record = logger.log_records[0] + + assert log_record.timestamp == int(time_seconds * 1e9) + assert len(MessageToDict(pb_profile_from_str(log_record.body))) == 4 # sanity check + assert log_record.attributes["profiling.data.total.frame.count"] == 30 + + +def do_work(time_ms): + now = time.time() + target = now + time_ms / 1000.0 + + total = 0.0 + while now < target: + value = random.random() + for _ in range(0, 10000): + value = value + random.random() + + total = total + value + + now = time.time() + time.sleep(0.01) + + return total + + +class FakeLogger(Logger): + + def __init__(self): + super().__init__("fake-logger") + self.log_records = [] + + def emit(self, record: "LogRecord") -> None: + self.log_records.append(record) From b0ccc566615b5017bab9954af851fbd509447b4a Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Thu, 14 Nov 2024 17:20:28 -0500 Subject: [PATCH 02/14] Add profiling to configurator Set profiling env in distro Add profiling oteltest Make fixture names generic Make fixture loading wd independent --- src/splunk_otel/configurator.py | 9 ++++- src/splunk_otel/distro.py | 17 ++++----- src/splunk_otel/env.py | 5 +++ tests/fixtures/pb_profile.out.json | 20 +++++----- tests/fixtures/stacktraces.in.json | 60 +++++++++++++++--------------- tests/ott_profile.py | 29 +++++++++++++++ tests/test_distro.py | 2 +- tests/test_profile.py | 4 +- 8 files changed, 93 insertions(+), 53 deletions(-) diff --git a/src/splunk_otel/configurator.py b/src/splunk_otel/configurator.py index fe17da94..86fd6852 100644 --- a/src/splunk_otel/configurator.py +++ b/src/splunk_otel/configurator.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from opentelemetry.sdk._configuration import _OTelSDKConfigurator +from opentelemetry.sdk._configuration import _initialize_components, _OTelSDKConfigurator + +from splunk_otel.profile import start_profiling class SplunkConfigurator(_OTelSDKConfigurator): - pass + + def _configure(self, **kwargs): + super()._configure(**kwargs) + start_profiling() diff --git a/src/splunk_otel/distro.py b/src/splunk_otel/distro.py index fb727f24..be274b13 100644 --- a/src/splunk_otel/distro.py +++ b/src/splunk_otel/distro.py @@ -19,17 +19,10 @@ from opentelemetry.instrumentation.system_metrics import SystemMetricsInstrumentor from opentelemetry.sdk.environment_variables import OTEL_EXPORTER_OTLP_HEADERS -from splunk_otel.env import ( - DEFAULTS, - OTEL_METRICS_ENABLED, - SPLUNK_ACCESS_TOKEN, - SPLUNK_TRACE_RESPONSE_HEADER_ENABLED, - Env, -) +from splunk_otel.env import (DEFAULTS, Env, OTEL_METRICS_ENABLED, SPLUNK_ACCESS_TOKEN, + SPLUNK_TRACE_RESPONSE_HEADER_ENABLED, X_SF_TOKEN) from splunk_otel.propagator import ServerTimingResponsePropagator -X_SF_TOKEN = "x-sf-token" # noqa S105 - class SplunkDistro(BaseDistro): """ @@ -43,6 +36,7 @@ def __init__(self): def _configure(self, **kwargs): # noqa: ARG002 self.set_env_defaults() + self.set_profiling_env() self.configure_headers() self.set_server_timing_propagator() @@ -50,6 +44,11 @@ def set_env_defaults(self): for key, value in DEFAULTS.items(): self.env.setdefault(key, value) + def set_profiling_env(self): + if self.env.is_true("SPLUNK_PROFILER_ENABLED"): + self.env.setdefault("OTEL_LOGS_ENABLED", "true") + self.env.setdefault("OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED", "true") + def configure_headers(self): tok = self.env.getval(SPLUNK_ACCESS_TOKEN).strip() if tok: diff --git a/src/splunk_otel/env.py b/src/splunk_otel/env.py index f3ffa67c..f61e1f70 100644 --- a/src/splunk_otel/env.py +++ b/src/splunk_otel/env.py @@ -18,6 +18,7 @@ "OTEL_PYTHON_LOG_CORRELATION": "true", # FIXME: revisit "OTEL_TRACES_EXPORTER": "otlp", "OTEL_METRICS_EXPORTER": "otlp", + "OTEL_LOGS_EXPORTER": "otlp", "OTEL_EXPORTER_OTLP_PROTOCOL": "grpc", # FIXME: revisit "OTEL_ATTRIBUTE_COUNT_LIMIT": "", "OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT": "", @@ -31,9 +32,13 @@ SPLUNK_OTEL_SYSTEM_METRICS_ENABLED = "SPLUNK_OTEL_SYSTEM_METRICS_ENABLED" SPLUNK_ACCESS_TOKEN = "SPLUNK_ACCESS_TOKEN" # noqa: S105 SPLUNK_TRACE_RESPONSE_HEADER_ENABLED = "SPLUNK_TRACE_RESPONSE_HEADER_ENABLED" +SPLUNK_PROFILER_ENABLED = "SPLUNK_PROFILER_ENABLED" +OTEL_TRACE_ENABLED = "OTEL_TRACE_ENABLED" OTEL_METRICS_ENABLED = "OTEL_METRICS_ENABLED" +X_SF_TOKEN = "x-sf-token" # noqa S105 + class Env: """ diff --git a/tests/fixtures/pb_profile.out.json b/tests/fixtures/pb_profile.out.json index a68a9691..df658601 100644 --- a/tests/fixtures/pb_profile.out.json +++ b/tests/fixtures/pb_profile.out.json @@ -384,34 +384,34 @@ "1a7b69a6755c414461f4ec7fcdb41612", "967d91eadf220b83", "do_work", - "/Users/pcollins/github/signalfx/sop-worktree/tests/unit/test_profiling.py", + "/worktree/tests/unit/app.py", "test_profiling_export", "_callTestMethod", - "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/unittest/case.py", + "/python/unittest/case.py", "run", "__call__", "runtest", - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/unittest.py", + "/site-packages/aaa/unittest.py", "pytest_runtest_call", - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "/site-packages/aaa/runner.py", "_multicall", - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_callers.py", + "/site-packages/bbb/_callers.py", "_hookexec", - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_manager.py", - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_hooks.py", + "/site-packages/bbb/_manager.py", + "/site-packages/bbb/_hooks.py", "", "from_call", "call_and_report", "runtestprotocol", "pytest_runtest_protocol", "pytest_runtestloop", - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/main.py", + "/site-packages/aaa/main.py", "_main", "wrap_session", "pytest_cmdline_main", "main", - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/config/__init__.py", + "/site-packages/aaa/config/__init__.py", "", - "/Applications/PyCharm.app/Contents/plugins/python-ce/helpers/pycharm/_jb_pytest_runner.py" + "/myapp/runner.py" ] } \ No newline at end of file diff --git a/tests/fixtures/stacktraces.in.json b/tests/fixtures/stacktraces.in.json index d118be4b..43691759 100644 --- a/tests/fixtures/stacktraces.in.json +++ b/tests/fixtures/stacktraces.in.json @@ -2,152 +2,152 @@ { "frames": [ [ - "/Applications/PyCharm.app/Contents/plugins/python-ce/helpers/pycharm/_jb_pytest_runner.py", + "/myapp/runner.py", "", 75 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/config/__init__.py", + "/site-packages/aaa/config/__init__.py", "main", 175 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_hooks.py", + "/site-packages/bbb/_hooks.py", "__call__", 513 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_manager.py", + "/site-packages/bbb/_manager.py", "_hookexec", 120 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_callers.py", + "/site-packages/bbb/_callers.py", "_multicall", 103 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/main.py", + "/site-packages/aaa/main.py", "pytest_cmdline_main", 330 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/main.py", + "/site-packages/aaa/main.py", "wrap_session", 283 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/main.py", + "/site-packages/aaa/main.py", "_main", 337 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_hooks.py", + "/site-packages/bbb/_hooks.py", "__call__", 513 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_manager.py", + "/site-packages/bbb/_manager.py", "_hookexec", 120 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_callers.py", + "/site-packages/bbb/_callers.py", "_multicall", 103 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/main.py", + "/site-packages/aaa/main.py", "pytest_runtestloop", 362 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_hooks.py", + "/site-packages/bbb/_hooks.py", "__call__", 513 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_manager.py", + "/site-packages/bbb/_manager.py", "_hookexec", 120 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_callers.py", + "/site-packages/bbb/_callers.py", "_multicall", 103 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "/site-packages/aaa/runner.py", "pytest_runtest_protocol", 113 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "/site-packages/aaa/runner.py", "runtestprotocol", 132 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "/site-packages/aaa/runner.py", "call_and_report", 241 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "/site-packages/aaa/runner.py", "from_call", 341 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "/site-packages/aaa/runner.py", "", 242 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_hooks.py", + "/site-packages/bbb/_hooks.py", "__call__", 513 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_manager.py", + "/site-packages/bbb/_manager.py", "_hookexec", 120 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/pluggy/_callers.py", + "/site-packages/bbb/_callers.py", "_multicall", 103 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/runner.py", + "/site-packages/aaa/runner.py", "pytest_runtest_call", 174 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/.venv/lib/python3.8/site-packages/_pytest/unittest.py", + "/site-packages/aaa/unittest.py", "runtest", 351 ], [ - "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/unittest/case.py", + "/python/unittest/case.py", "__call__", 736 ], [ - "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/unittest/case.py", + "/python/unittest/case.py", "run", 676 ], [ - "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/unittest/case.py", + "/python/unittest/case.py", "_callTestMethod", 633 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/tests/unit/test_profiling.py", + "/worktree/tests/unit/app.py", "test_profiling_export", 193 ], [ - "/Users/pcollins/github/signalfx/sop-worktree/tests/unit/test_profiling.py", + "/worktree/tests/unit/app.py", "do_work", 52 ] diff --git a/tests/ott_profile.py b/tests/ott_profile.py index e69de29b..c058521e 100644 --- a/tests/ott_profile.py +++ b/tests/ott_profile.py @@ -0,0 +1,29 @@ +from oteltest import Telemetry +from oteltest.telemetry import num_logs + +from ott_lib import project_path, trace_loop + +if __name__ == "__main__": + trace_loop(12) + + +class ProfileOtelTest: + def environment_variables(self): + return { + "SPLUNK_PROFILER_ENABLED": "true", + } + + def requirements(self): + return [project_path(), "oteltest"] + + def wrapper_command(self): + return "opentelemetry-instrument" + + def on_start(self): + pass + + def on_stop(self, tel: Telemetry, stdout: str, stderr: str, returncode: int): + assert num_logs(tel) + + def is_http(self): + return False diff --git a/tests/test_distro.py b/tests/test_distro.py index c2b72b16..9b9480ef 100644 --- a/tests/test_distro.py +++ b/tests/test_distro.py @@ -20,7 +20,7 @@ def test_distro_env(): env_store = {} configure_distro(env_store) assert env_store["OTEL_TRACES_EXPORTER"] == "otlp" - assert len(env_store) == 11 + assert len(env_store) == 12 def test_access_token(): diff --git a/tests/test_profile.py b/tests/test_profile.py index e37e1218..57fb297d 100644 --- a/tests/test_profile.py +++ b/tests/test_profile.py @@ -1,6 +1,7 @@ import json import random import time +from os.path import abspath, dirname import pytest from google.protobuf.json_format import MessageToDict @@ -32,7 +33,8 @@ def pb_profile_fixture(): def load_json(fname): - with open(f"fixtures/{fname}", "r") as f: + parent_dir = dirname(abspath(__file__)) + with open(f"{parent_dir}/fixtures/{fname}", "r") as f: return json.load(f) From cc96d56ee4b844149627d89fe3e22e0fede214d0 Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Fri, 15 Nov 2024 13:55:03 -0500 Subject: [PATCH 03/14] Only enable profiler if env var is true --- src/splunk_otel/configurator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/splunk_otel/configurator.py b/src/splunk_otel/configurator.py index 86fd6852..79f18089 100644 --- a/src/splunk_otel/configurator.py +++ b/src/splunk_otel/configurator.py @@ -14,6 +14,7 @@ from opentelemetry.sdk._configuration import _initialize_components, _OTelSDKConfigurator +from splunk_otel.env import Env from splunk_otel.profile import start_profiling @@ -21,4 +22,5 @@ class SplunkConfigurator(_OTelSDKConfigurator): def _configure(self, **kwargs): super()._configure(**kwargs) - start_profiling() + if Env().is_true("SPLUNK_PROFILER_ENABLED"): + start_profiling() From 75a80058e4de57e51550e11ef9466f5d3d501cfe Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Fri, 15 Nov 2024 13:58:40 -0500 Subject: [PATCH 04/14] Run hatch run fmt --- src/splunk_otel/configurator.py | 2 +- src/splunk_otel/distro.py | 10 ++++++++-- src/splunk_otel/profile.py | 3 ++- src/splunk_otel/profile_pb2.py | 1 - tests/ott_profile.py | 1 - tests/test_profile.py | 8 +++----- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/splunk_otel/configurator.py b/src/splunk_otel/configurator.py index 79f18089..6b97df90 100644 --- a/src/splunk_otel/configurator.py +++ b/src/splunk_otel/configurator.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from opentelemetry.sdk._configuration import _initialize_components, _OTelSDKConfigurator +from opentelemetry.sdk._configuration import _OTelSDKConfigurator from splunk_otel.env import Env from splunk_otel.profile import start_profiling diff --git a/src/splunk_otel/distro.py b/src/splunk_otel/distro.py index be274b13..682929d2 100644 --- a/src/splunk_otel/distro.py +++ b/src/splunk_otel/distro.py @@ -19,8 +19,14 @@ from opentelemetry.instrumentation.system_metrics import SystemMetricsInstrumentor from opentelemetry.sdk.environment_variables import OTEL_EXPORTER_OTLP_HEADERS -from splunk_otel.env import (DEFAULTS, Env, OTEL_METRICS_ENABLED, SPLUNK_ACCESS_TOKEN, - SPLUNK_TRACE_RESPONSE_HEADER_ENABLED, X_SF_TOKEN) +from splunk_otel.env import ( + DEFAULTS, + OTEL_METRICS_ENABLED, + SPLUNK_ACCESS_TOKEN, + SPLUNK_TRACE_RESPONSE_HEADER_ENABLED, + X_SF_TOKEN, + Env, +) from splunk_otel.propagator import ServerTimingResponsePropagator diff --git a/src/splunk_otel/profile.py b/src/splunk_otel/profile.py index 9942ec5f..bb103c36 100644 --- a/src/splunk_otel/profile.py +++ b/src/splunk_otel/profile.py @@ -9,7 +9,7 @@ import opentelemetry.context import wrapt -from opentelemetry._logs import get_logger, Logger, set_logger_provider, SeverityNumber +from opentelemetry._logs import Logger, SeverityNumber, get_logger, set_logger_provider from opentelemetry.context import Context from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter from opentelemetry.sdk._logs import LoggerProvider, LogRecord @@ -17,6 +17,7 @@ from opentelemetry.sdk.resources import Resource from opentelemetry.trace import TraceFlags from opentelemetry.trace.propagation import _SPAN_KEY + from splunk_otel import profile_pb2 _profiling_timer = None diff --git a/src/splunk_otel/profile_pb2.py b/src/splunk_otel/profile_pb2.py index a597ba98..93d8bc57 100644 --- a/src/splunk_otel/profile_pb2.py +++ b/src/splunk_otel/profile_pb2.py @@ -1,5 +1,4 @@ # pylint: skip-file -# -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: profile.proto """Generated protocol buffer code.""" diff --git a/tests/ott_profile.py b/tests/ott_profile.py index c058521e..d047a18a 100644 --- a/tests/ott_profile.py +++ b/tests/ott_profile.py @@ -1,6 +1,5 @@ from oteltest import Telemetry from oteltest.telemetry import num_logs - from ott_lib import project_path, trace_loop if __name__ == "__main__": diff --git a/tests/test_profile.py b/tests/test_profile.py index 57fb297d..27c04ccd 100644 --- a/tests/test_profile.py +++ b/tests/test_profile.py @@ -7,10 +7,8 @@ from google.protobuf.json_format import MessageToDict from opentelemetry._logs import Logger from opentelemetry.sdk.resources import Resource - from splunk_otel import profile_pb2 -from splunk_otel.profile import pb_profile_from_str, pb_profile_to_str, ProfilingScraper, \ - stacktraces_to_cpu_profile +from splunk_otel.profile import ProfilingScraper, pb_profile_from_str, pb_profile_to_str, stacktraces_to_cpu_profile @pytest.fixture @@ -34,7 +32,7 @@ def pb_profile_fixture(): def load_json(fname): parent_dir = dirname(abspath(__file__)) - with open(f"{parent_dir}/fixtures/{fname}", "r") as f: + with open(f"{parent_dir}/fixtures/{fname}") as f: return json.load(f) @@ -85,7 +83,7 @@ def do_work(time_ms): total = 0.0 while now < target: value = random.random() - for _ in range(0, 10000): + for _ in range(10000): value = value + random.random() total = total + value From beab79c0ab0cf0305783d38b9cdef0868253dd81 Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Fri, 15 Nov 2024 14:08:05 -0500 Subject: [PATCH 05/14] Fix lint --- pyproject.toml | 4 +++ src/splunk_otel/configurator.py | 1 - src/splunk_otel/distro.py | 2 +- src/splunk_otel/profile.py | 47 +++++++++------------------------ src/splunk_otel/profile_pb2.py | 5 ++-- src/splunk_otel/propagator.py | 2 +- tests/ott_propagator.py | 2 +- tests/ott_sf_token.py | 2 +- tests/ott_trace_loop.py | 2 +- tests/test_profile.py | 12 +++------ 10 files changed, 27 insertions(+), 52 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 37e776ad..440aeba7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,3 +101,7 @@ exclude_lines = [ "if __name__ == .__main__.:", "if TYPE_CHECKING:", ] + +[tool.ruff.lint] +ignore = ["FA100", "ARG002"] +exclude = ["src/splunk_otel/profile_pb2.py"] diff --git a/src/splunk_otel/configurator.py b/src/splunk_otel/configurator.py index 6b97df90..07412735 100644 --- a/src/splunk_otel/configurator.py +++ b/src/splunk_otel/configurator.py @@ -19,7 +19,6 @@ class SplunkConfigurator(_OTelSDKConfigurator): - def _configure(self, **kwargs): super()._configure(**kwargs) if Env().is_true("SPLUNK_PROFILER_ENABLED"): diff --git a/src/splunk_otel/distro.py b/src/splunk_otel/distro.py index 682929d2..c91520ad 100644 --- a/src/splunk_otel/distro.py +++ b/src/splunk_otel/distro.py @@ -40,7 +40,7 @@ def __init__(self): self.env = Env() self.logger = logging.getLogger(__name__) - def _configure(self, **kwargs): # noqa: ARG002 + def _configure(self, **kwargs): self.set_env_defaults() self.set_profiling_env() self.configure_headers() diff --git a/src/splunk_otel/profile.py b/src/splunk_otel/profile.py index bb103c36..35808b2e 100644 --- a/src/splunk_otel/profile.py +++ b/src/splunk_otel/profile.py @@ -30,25 +30,18 @@ def start_profiling(): logger = get_logger("splunk-profiler") period_millis = 100 - scraper = ProfilingScraper( - mk_resource(), - tcm.get_thread_states(), - period_millis, - logger - ) + scraper = ProfilingScraper(mk_resource(), tcm.get_thread_states(), period_millis, logger) - global _profiling_timer + global _profiling_timer # noqa PLW0603 _profiling_timer = PeriodicTimer(period_millis, scraper.tick) _profiling_timer.start() def stop_profiling(): - global _profiling_timer _profiling_timer.stop() class ThreadContextMapping: - def __init__(self): self.thread_states = {} @@ -56,12 +49,8 @@ def get_thread_states(self): return self.thread_states def wrap_context_methods(self): - wrapt.wrap_function_wrapper( - opentelemetry.context, "attach", self.wrap_context_attach() - ) - wrapt.wrap_function_wrapper( - opentelemetry.context, "detach", self.wrap_context_detach() - ) + wrapt.wrap_function_wrapper(opentelemetry.context, "attach", self.wrap_context_attach()) + wrapt.wrap_function_wrapper(opentelemetry.context, "detach", self.wrap_context_detach()) def wrap_context_attach(self): def wrapper(wrapped, _instance, args, kwargs): @@ -111,20 +100,21 @@ def wrapper(wrapped, _instance, args, kwargs): def collect_stacktraces(): out = [] - frames = sys._current_frames() + frames = sys._current_frames() # noqa SLF001 for thread_id, frame in frames.items(): stack_summary = extract_stack_summary(frame) frames = [(sf.filename, sf.name, sf.lineno) for sf in stack_summary] - out.append({ - "frames": frames, - "tid": thread_id, - }) + out.append( + { + "frames": frames, + "tid": thread_id, + } + ) return out class ProfilingScraper: - def __init__( self, resource, @@ -152,12 +142,7 @@ def mk_log_record(self, stacktraces): time_seconds = self.time() - pb_profile = stacktraces_to_cpu_profile( - stacktraces, - self.thread_states, - self.period_millis, - time_seconds - ) + pb_profile = stacktraces_to_cpu_profile(stacktraces, self.thread_states, self.period_millis, time_seconds) pb_profile_str = pb_profile_to_str(pb_profile) return LogRecord( @@ -178,7 +163,6 @@ def mk_log_record(self, stacktraces): class PeriodicTimer: - def __init__(self, period_millis, target): self.period_seconds = period_millis / 1e3 self.target = target @@ -202,10 +186,6 @@ def stop(self): self.thread.join() -def emit_log_record(log_record): - print(f"emitting {log_record}") - - def mk_resource(): return Resource({}) @@ -338,8 +318,7 @@ def pb_profile_to_str(pb_profile) -> str: serialized = pb_profile.SerializeToString() compressed = gzip.compress(serialized) b64encoded = base64.b64encode(compressed) - stringified = b64encoded.decode() - return stringified + return b64encoded.decode() def pb_profile_from_str(stringified: str) -> profile_pb2.Profile: diff --git a/src/splunk_otel/profile_pb2.py b/src/splunk_otel/profile_pb2.py index 93d8bc57..15aadbb2 100644 --- a/src/splunk_otel/profile_pb2.py +++ b/src/splunk_otel/profile_pb2.py @@ -2,6 +2,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # source: profile.proto """Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import symbol_database as _symbol_database @@ -21,9 +22,7 @@ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "profile_pb2", _globals) if _descriptor._USE_C_DESCRIPTORS is False: DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = ( - b"\n\035com.google.perftools.profilesB\014ProfileProto" - ) + DESCRIPTOR._serialized_options = b"\n\035com.google.perftools.profilesB\014ProfileProto" _globals["_PROFILE"]._serialized_start = 38 _globals["_PROFILE"]._serialized_end = 507 _globals["_VALUETYPE"]._serialized_start = 509 diff --git a/src/splunk_otel/propagator.py b/src/splunk_otel/propagator.py index 9c81963c..5f2b51ae 100644 --- a/src/splunk_otel/propagator.py +++ b/src/splunk_otel/propagator.py @@ -29,7 +29,7 @@ class ServerTimingResponsePropagator(ResponsePropagator): def inject( self, carrier: textmap.CarrierT, - context: typing.Optional[Context] = None, # noqa: FA100 + context: typing.Optional[Context] = None, setter: textmap.Setter = default_setter, # type: ignore ) -> None: """ diff --git a/tests/ott_propagator.py b/tests/ott_propagator.py index 874bced8..da7f882c 100644 --- a/tests/ott_propagator.py +++ b/tests/ott_propagator.py @@ -42,7 +42,7 @@ def requirements(self) -> Sequence[str]: def wrapper_command(self) -> str: return "opentelemetry-instrument" - def on_start(self) -> Optional[float]: # noqa: FA100 + def on_start(self) -> Optional[float]: import http.client import time diff --git a/tests/ott_sf_token.py b/tests/ott_sf_token.py index 8d94209e..20d921e5 100644 --- a/tests/ott_sf_token.py +++ b/tests/ott_sf_token.py @@ -21,7 +21,7 @@ def wrapper_command(self): def on_start(self): return None - def on_stop(self, telemetry: Telemetry, stdout: str, stderr: str, returncode: int) -> None: # noqa: ARG002 + def on_stop(self, telemetry: Telemetry, stdout: str, stderr: str, returncode: int) -> None: for request in telemetry.get_trace_requests(): assert request.headers.get("x-sf-token") == "s3cr3t" diff --git a/tests/ott_trace_loop.py b/tests/ott_trace_loop.py index 0b8434e2..62a03ed0 100644 --- a/tests/ott_trace_loop.py +++ b/tests/ott_trace_loop.py @@ -22,7 +22,7 @@ def wrapper_command(self): def on_start(self): return None - def on_stop(self, telemetry, stdout: str, stderr: str, returncode: int) -> None: # noqa: ARG002 + def on_stop(self, telemetry, stdout: str, stderr: str, returncode: int) -> None: assert num_spans(telemetry) == NUM_SPANS def is_http(self): diff --git a/tests/test_profile.py b/tests/test_profile.py index 27c04ccd..3f6352d4 100644 --- a/tests/test_profile.py +++ b/tests/test_profile.py @@ -47,16 +47,11 @@ def test_basic_proto_serialization(): def test_stacktraces_to_cpu_profile(stacktraces_fixture, pb_profile_fixture, thread_states_fixture): time_seconds = 1726760000 # corresponds to the timestamp in the fixture interval_millis = 100 - profile = stacktraces_to_cpu_profile( - stacktraces_fixture, - thread_states_fixture, - interval_millis, - time_seconds - ) + profile = stacktraces_to_cpu_profile(stacktraces_fixture, thread_states_fixture, interval_millis, time_seconds) assert pb_profile_fixture == MessageToDict(profile) -def test_profile_scraper(stacktraces_fixture, pb_profile_fixture): +def test_profile_scraper(stacktraces_fixture): time_seconds = 1726760000 logger = FakeLogger() ps = ProfilingScraper( @@ -95,10 +90,9 @@ def do_work(time_ms): class FakeLogger(Logger): - def __init__(self): super().__init__("fake-logger") self.log_records = [] - def emit(self, record: "LogRecord") -> None: + def emit(self, record) -> None: self.log_records.append(record) From d42782f3972a2ebc3708b95044842a6cd31b855d Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Fri, 15 Nov 2024 16:01:37 -0500 Subject: [PATCH 06/14] Fix logging and mk resource --- src/splunk_otel/env.py | 12 ++++++++---- src/splunk_otel/profile.py | 35 +++++++++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/src/splunk_otel/env.py b/src/splunk_otel/env.py index f61e1f70..c02f6402 100644 --- a/src/splunk_otel/env.py +++ b/src/splunk_otel/env.py @@ -29,14 +29,16 @@ "OTEL_ATTRIBUTE_VALUE_LENGTH_LIMIT": "12000", } +OTEL_TRACE_ENABLED = "OTEL_TRACE_ENABLED" +OTEL_METRICS_ENABLED = "OTEL_METRICS_ENABLED" +OTEL_LOGS_ENABLED = "OTEL_LOGS_ENABLED" +OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED = "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED" + SPLUNK_OTEL_SYSTEM_METRICS_ENABLED = "SPLUNK_OTEL_SYSTEM_METRICS_ENABLED" SPLUNK_ACCESS_TOKEN = "SPLUNK_ACCESS_TOKEN" # noqa: S105 SPLUNK_TRACE_RESPONSE_HEADER_ENABLED = "SPLUNK_TRACE_RESPONSE_HEADER_ENABLED" SPLUNK_PROFILER_ENABLED = "SPLUNK_PROFILER_ENABLED" -OTEL_TRACE_ENABLED = "OTEL_TRACE_ENABLED" -OTEL_METRICS_ENABLED = "OTEL_METRICS_ENABLED" - X_SF_TOKEN = "x-sf-token" # noqa S105 @@ -50,7 +52,7 @@ def __init__(self, store=None): self.store = os.environ if store is None else store def is_true(self, key, default=""): - return is_true_str(self.getval(key, default).strip()) + return is_true_str(self.getval(key, default)) def list_append(self, key, value): curr = self.getval(key) @@ -70,3 +72,5 @@ def setdefault(self, key, value): def is_true_str(s: str) -> bool: return s.strip().lower() == "true" + + diff --git a/src/splunk_otel/profile.py b/src/splunk_otel/profile.py index 35808b2e..0dbad743 100644 --- a/src/splunk_otel/profile.py +++ b/src/splunk_otel/profile.py @@ -1,5 +1,6 @@ import base64 import gzip +import logging import sys import threading import time @@ -9,9 +10,10 @@ import opentelemetry.context import wrapt -from opentelemetry._logs import Logger, SeverityNumber, get_logger, set_logger_provider +from opentelemetry._logs import get_logger, Logger, set_logger_provider, SeverityNumber from opentelemetry.context import Context from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter +from opentelemetry.instrumentation.version import __version__ as version from opentelemetry.sdk._logs import LoggerProvider, LogRecord from opentelemetry.sdk._logs._internal.export import BatchLogRecordProcessor from opentelemetry.sdk.resources import Resource @@ -19,18 +21,29 @@ from opentelemetry.trace.propagation import _SPAN_KEY from splunk_otel import profile_pb2 +from splunk_otel.env import Env + +_SERVICE_NAME_ATTR = "service.name" +_SPLUNK_DISTRO_VERSION_ATTR = "splunk.distro.version" +_DEFAULT_OTEL_SERVICE_NAME = "unknown_service" +_NO_SERVICE_NAME_WARNING = """service.name attribute is not set, your service is unnamed and will be difficult to identify. +set your service name using the OTEL_SERVICE_NAME environment variable. +E.g. `OTEL_SERVICE_NAME=""`""" +_DEFAULT_SERVICE_NAME = "unnamed-python-service" _profiling_timer = None +_pylogger = logging.getLogger(__name__) + def start_profiling(): tcm = ThreadContextMapping() tcm.wrap_context_methods() - logger = get_logger("splunk-profiler") - period_millis = 100 - scraper = ProfilingScraper(mk_resource(), tcm.get_thread_states(), period_millis, logger) + resource = mk_resource(Env().getval("OTEL_SERVICE_NAME")) + logger = get_logger("splunk-profiler") + scraper = ProfilingScraper(resource, tcm.get_thread_states(), period_millis, logger) global _profiling_timer # noqa PLW0603 _profiling_timer = PeriodicTimer(period_millis, scraper.tick) @@ -166,7 +179,6 @@ class PeriodicTimer: def __init__(self, period_millis, target): self.period_seconds = period_millis / 1e3 self.target = target - self.cancel = threading.Event() self.thread = threading.Thread(target=self._loop, daemon=True) self.sleep = time.sleep @@ -182,12 +194,19 @@ def _loop(self): time.sleep(sleep_seconds) def stop(self): - self.cancel.set() self.thread.join() -def mk_resource(): - return Resource({}) +def mk_resource(service_name) -> Resource: + if service_name: + resolved_name = service_name + else: + _pylogger.warning(_NO_SERVICE_NAME_WARNING) + resolved_name = _DEFAULT_SERVICE_NAME + return Resource.create({ + _SPLUNK_DISTRO_VERSION_ATTR: version, + _SERVICE_NAME_ATTR: resolved_name, + }) class StringTable: From d501e928a8670526f8c5ede416f17264dae6ebe1 Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Fri, 15 Nov 2024 16:56:41 -0500 Subject: [PATCH 07/14] Fix lint --- src/splunk_otel/env.py | 2 -- src/splunk_otel/profile.py | 12 +++++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/splunk_otel/env.py b/src/splunk_otel/env.py index c02f6402..9fee7d21 100644 --- a/src/splunk_otel/env.py +++ b/src/splunk_otel/env.py @@ -72,5 +72,3 @@ def setdefault(self, key, value): def is_true_str(s: str) -> bool: return s.strip().lower() == "true" - - diff --git a/src/splunk_otel/profile.py b/src/splunk_otel/profile.py index 0dbad743..53d660d0 100644 --- a/src/splunk_otel/profile.py +++ b/src/splunk_otel/profile.py @@ -10,7 +10,7 @@ import opentelemetry.context import wrapt -from opentelemetry._logs import get_logger, Logger, set_logger_provider, SeverityNumber +from opentelemetry._logs import Logger, SeverityNumber, get_logger, set_logger_provider from opentelemetry.context import Context from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter from opentelemetry.instrumentation.version import __version__ as version @@ -203,10 +203,12 @@ def mk_resource(service_name) -> Resource: else: _pylogger.warning(_NO_SERVICE_NAME_WARNING) resolved_name = _DEFAULT_SERVICE_NAME - return Resource.create({ - _SPLUNK_DISTRO_VERSION_ATTR: version, - _SERVICE_NAME_ATTR: resolved_name, - }) + return Resource.create( + { + _SPLUNK_DISTRO_VERSION_ATTR: version, + _SERVICE_NAME_ATTR: resolved_name, + } + ) class StringTable: From a3657b6b058bc2620a057583b8e9e3a5937c7bb7 Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Sat, 16 Nov 2024 11:46:44 -0500 Subject: [PATCH 08/14] Make stuff private, move things around --- src/splunk_otel/profile.py | 136 ++++++++++++++++--------------------- tests/test_profile.py | 30 +++++--- 2 files changed, 78 insertions(+), 88 deletions(-) diff --git a/src/splunk_otel/profile.py b/src/splunk_otel/profile.py index 53d660d0..b68b0954 100644 --- a/src/splunk_otel/profile.py +++ b/src/splunk_otel/profile.py @@ -10,12 +10,10 @@ import opentelemetry.context import wrapt -from opentelemetry._logs import Logger, SeverityNumber, get_logger, set_logger_provider +from opentelemetry._logs import get_logger, Logger, SeverityNumber from opentelemetry.context import Context -from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter from opentelemetry.instrumentation.version import __version__ as version -from opentelemetry.sdk._logs import LoggerProvider, LogRecord -from opentelemetry.sdk._logs._internal.export import BatchLogRecordProcessor +from opentelemetry.sdk._logs import LogRecord from opentelemetry.sdk.resources import Resource from opentelemetry.trace import TraceFlags from opentelemetry.trace.propagation import _SPAN_KEY @@ -24,6 +22,7 @@ from splunk_otel.env import Env _SERVICE_NAME_ATTR = "service.name" + _SPLUNK_DISTRO_VERSION_ATTR = "splunk.distro.version" _DEFAULT_OTEL_SERVICE_NAME = "unknown_service" _NO_SERVICE_NAME_WARNING = """service.name attribute is not set, your service is unnamed and will be difficult to identify. @@ -31,30 +30,43 @@ E.g. `OTEL_SERVICE_NAME=""`""" _DEFAULT_SERVICE_NAME = "unnamed-python-service" -_profiling_timer = None - +_profile_timer = None _pylogger = logging.getLogger(__name__) def start_profiling(): - tcm = ThreadContextMapping() + tcm = _ThreadContextMapping() tcm.wrap_context_methods() period_millis = 100 - resource = mk_resource(Env().getval("OTEL_SERVICE_NAME")) + resource = _mk_resource(Env().getval("OTEL_SERVICE_NAME")) logger = get_logger("splunk-profiler") - scraper = ProfilingScraper(resource, tcm.get_thread_states(), period_millis, logger) + scraper = _ProfileScraper(resource, tcm.get_thread_states(), period_millis, logger) - global _profiling_timer # noqa PLW0603 - _profiling_timer = PeriodicTimer(period_millis, scraper.tick) - _profiling_timer.start() + global _profile_timer # noqa PLW0603 + _profile_timer = _PeriodicTimer(period_millis, scraper.tick) + _profile_timer.start() def stop_profiling(): - _profiling_timer.stop() + _profile_timer.stop() + + +def _mk_resource(service_name) -> Resource: + if service_name: + resolved_name = service_name + else: + _pylogger.warning(_NO_SERVICE_NAME_WARNING) + resolved_name = _DEFAULT_SERVICE_NAME + return Resource.create( + { + _SPLUNK_DISTRO_VERSION_ATTR: version, + _SERVICE_NAME_ATTR: resolved_name, + } + ) -class ThreadContextMapping: +class _ThreadContextMapping: def __init__(self): self.thread_states = {} @@ -111,12 +123,12 @@ def wrapper(wrapped, _instance, args, kwargs): return wrapper -def collect_stacktraces(): +def _collect_stacktraces(): out = [] frames = sys._current_frames() # noqa SLF001 for thread_id, frame in frames.items(): - stack_summary = extract_stack_summary(frame) + stack_summary = _extract_stack_summary(frame) frames = [(sf.filename, sf.name, sf.lineno) for sf in stack_summary] out.append( { @@ -127,14 +139,14 @@ def collect_stacktraces(): return out -class ProfilingScraper: +class _ProfileScraper: def __init__( self, resource, thread_states, period_millis, logger: Logger, - collect_stacktraces_func=collect_stacktraces, + collect_stacktraces_func=_collect_stacktraces, time_func=time.time, ): self.resource = resource @@ -155,8 +167,8 @@ def mk_log_record(self, stacktraces): time_seconds = self.time() - pb_profile = stacktraces_to_cpu_profile(stacktraces, self.thread_states, self.period_millis, time_seconds) - pb_profile_str = pb_profile_to_str(pb_profile) + pb_profile = _stacktraces_to_cpu_profile(stacktraces, self.thread_states, self.period_millis, time_seconds) + pb_profile_str = _pb_profile_to_str(pb_profile) return LogRecord( timestamp=int(time_seconds * 1e9), @@ -175,7 +187,15 @@ def mk_log_record(self, stacktraces): ) -class PeriodicTimer: +def _pb_profile_to_str(pb_profile) -> str: + serialized = pb_profile.SerializeToString() + compressed = gzip.compress(serialized) + b64encoded = base64.b64encode(compressed) + return b64encoded.decode() + + +class _PeriodicTimer: + def __init__(self, period_millis, target): self.period_seconds = period_millis / 1e3 self.target = target @@ -197,21 +217,7 @@ def stop(self): self.thread.join() -def mk_resource(service_name) -> Resource: - if service_name: - resolved_name = service_name - else: - _pylogger.warning(_NO_SERVICE_NAME_WARNING) - resolved_name = _DEFAULT_SERVICE_NAME - return Resource.create( - { - _SPLUNK_DISTRO_VERSION_ATTR: version, - _SERVICE_NAME_ATTR: resolved_name, - } - ) - - -class StringTable: +class _StringTable: def __init__(self): self.strings = OrderedDict() @@ -229,7 +235,7 @@ def keys(self): return list(self.strings.keys()) -def get_location(functions_table, str_table, locations_table, frame): +def _get_location(functions_table, str_table, locations_table, frame): (file_name, function_name, line_no) = frame key = f"{file_name}:{function_name}:{line_no}" location = locations_table.get(key) @@ -237,21 +243,21 @@ def get_location(functions_table, str_table, locations_table, frame): if location is None: location = profile_pb2.Location() location.id = len(locations_table) + 1 - line = get_line(functions_table, str_table, file_name, function_name, line_no) + line = _get_line(functions_table, str_table, file_name, function_name, line_no) location.line.append(line) locations_table[key] = location return location -def get_line(functions_table, str_table, file_name, function_name, line_no): +def _get_line(functions_table, str_table, file_name, function_name, line_no): line = profile_pb2.Line() - line.function_id = get_function(functions_table, str_table, file_name, function_name).id + line.function_id = _get_function(functions_table, str_table, file_name, function_name).id line.line = line_no if line_no != 0 else -1 return line -def get_function(functions_table, str_table, file_name, function_name): +def _get_function(functions_table, str_table, file_name, function_name): key = f"{file_name}:{function_name}" func = functions_table.get(key) @@ -267,8 +273,15 @@ def get_function(functions_table, str_table, file_name, function_name): return func -def stacktraces_to_cpu_profile(stacktraces, thread_states, period_millis, time_seconds): - str_table = StringTable() +def _extract_stack_summary(frame): + stack_iterator = traceback.walk_stack(frame) + out = StackSummary.extract(stack_iterator, limit=None, lookup_lines=False) + out.reverse() + return out + + +def _stacktraces_to_cpu_profile(stacktraces, thread_states, period_millis, time_seconds): + str_table = _StringTable() locations_table = OrderedDict() functions_table = OrderedDict() @@ -319,7 +332,7 @@ def stacktraces_to_cpu_profile(stacktraces, thread_states, period_millis, time_s location_ids = [] for frame in reversed(stacktrace["frames"]): - location = get_location(functions_table, str_table, locations_table, frame) + location = _get_location(functions_table, str_table, locations_table, frame) location_ids.append(location.id) sample.location_id.extend(location_ids) @@ -333,38 +346,3 @@ def stacktraces_to_cpu_profile(stacktraces, thread_states, period_millis, time_s pb_profile.location.extend(list(locations_table.values())) return pb_profile - - -def pb_profile_to_str(pb_profile) -> str: - serialized = pb_profile.SerializeToString() - compressed = gzip.compress(serialized) - b64encoded = base64.b64encode(compressed) - return b64encoded.decode() - - -def pb_profile_from_str(stringified: str) -> profile_pb2.Profile: - byte_array = base64.b64decode(stringified) - decompressed = gzip.decompress(byte_array) - out = profile_pb2.Profile() - out.ParseFromString(decompressed) - return out - - -def extract_stack_summary(frame): - stack_iterator = traceback.walk_stack(frame) - out = StackSummary.extract(stack_iterator, limit=None, lookup_lines=False) - out.reverse() - return out - - -def configure_otel(): - logger_provider = LoggerProvider() - logger_provider.add_log_record_processor(BatchLogRecordProcessor(OTLPLogExporter())) - set_logger_provider(logger_provider) - - -if __name__ == "__main__": - configure_otel() - start_profiling() - time.sleep(12) - stop_profiling() diff --git a/tests/test_profile.py b/tests/test_profile.py index 3f6352d4..0ab9b7d0 100644 --- a/tests/test_profile.py +++ b/tests/test_profile.py @@ -1,3 +1,5 @@ +import base64 +import gzip import json import random import time @@ -7,8 +9,9 @@ from google.protobuf.json_format import MessageToDict from opentelemetry._logs import Logger from opentelemetry.sdk.resources import Resource + from splunk_otel import profile_pb2 -from splunk_otel.profile import ProfilingScraper, pb_profile_from_str, pb_profile_to_str, stacktraces_to_cpu_profile +from splunk_otel.profile import _pb_profile_to_str, _ProfileScraper, _stacktraces_to_cpu_profile @pytest.fixture @@ -39,22 +42,22 @@ def load_json(fname): def test_basic_proto_serialization(): # noinspection PyUnresolvedReferences profile = profile_pb2.Profile() - serialized = pb_profile_to_str(profile) - decoded_profile = pb_profile_from_str(serialized) + serialized = _pb_profile_to_str(profile) + decoded_profile = _pb_profile_from_str(serialized) assert profile == decoded_profile def test_stacktraces_to_cpu_profile(stacktraces_fixture, pb_profile_fixture, thread_states_fixture): time_seconds = 1726760000 # corresponds to the timestamp in the fixture interval_millis = 100 - profile = stacktraces_to_cpu_profile(stacktraces_fixture, thread_states_fixture, interval_millis, time_seconds) + profile = _stacktraces_to_cpu_profile(stacktraces_fixture, thread_states_fixture, interval_millis, time_seconds) assert pb_profile_fixture == MessageToDict(profile) def test_profile_scraper(stacktraces_fixture): time_seconds = 1726760000 - logger = FakeLogger() - ps = ProfilingScraper( + logger = _FakeLogger() + ps = _ProfileScraper( Resource({}), {}, 100, @@ -67,11 +70,19 @@ def test_profile_scraper(stacktraces_fixture): log_record = logger.log_records[0] assert log_record.timestamp == int(time_seconds * 1e9) - assert len(MessageToDict(pb_profile_from_str(log_record.body))) == 4 # sanity check + assert len(MessageToDict(_pb_profile_from_str(log_record.body))) == 4 # sanity check assert log_record.attributes["profiling.data.total.frame.count"] == 30 -def do_work(time_ms): +def _pb_profile_from_str(stringified: str) -> profile_pb2.Profile: + byte_array = base64.b64decode(stringified) + decompressed = gzip.decompress(byte_array) + out = profile_pb2.Profile() + out.ParseFromString(decompressed) + return out + + +def _do_work(time_ms): now = time.time() target = now + time_ms / 1000.0 @@ -89,7 +100,8 @@ def do_work(time_ms): return total -class FakeLogger(Logger): +class _FakeLogger(Logger): + def __init__(self): super().__init__("fake-logger") self.log_records = [] From d759f5ae8923d8088a9b4ad508fec3cd41694e94 Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Sat, 16 Nov 2024 11:47:02 -0500 Subject: [PATCH 09/14] Fix lint --- src/splunk_otel/profile.py | 3 +-- tests/test_profile.py | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/splunk_otel/profile.py b/src/splunk_otel/profile.py index b68b0954..e0fc44b1 100644 --- a/src/splunk_otel/profile.py +++ b/src/splunk_otel/profile.py @@ -10,7 +10,7 @@ import opentelemetry.context import wrapt -from opentelemetry._logs import get_logger, Logger, SeverityNumber +from opentelemetry._logs import Logger, SeverityNumber, get_logger from opentelemetry.context import Context from opentelemetry.instrumentation.version import __version__ as version from opentelemetry.sdk._logs import LogRecord @@ -195,7 +195,6 @@ def _pb_profile_to_str(pb_profile) -> str: class _PeriodicTimer: - def __init__(self, period_millis, target): self.period_seconds = period_millis / 1e3 self.target = target diff --git a/tests/test_profile.py b/tests/test_profile.py index 0ab9b7d0..f038efac 100644 --- a/tests/test_profile.py +++ b/tests/test_profile.py @@ -9,7 +9,6 @@ from google.protobuf.json_format import MessageToDict from opentelemetry._logs import Logger from opentelemetry.sdk.resources import Resource - from splunk_otel import profile_pb2 from splunk_otel.profile import _pb_profile_to_str, _ProfileScraper, _stacktraces_to_cpu_profile @@ -101,7 +100,6 @@ def _do_work(time_ms): class _FakeLogger(Logger): - def __init__(self): super().__init__("fake-logger") self.log_records = [] From dfdad0fc95254e04687ca1fe95f715a25e92d0f8 Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Mon, 18 Nov 2024 09:44:57 -0500 Subject: [PATCH 10/14] Use env var constants --- src/splunk_otel/configurator.py | 4 ++-- src/splunk_otel/distro.py | 12 ++++++------ src/splunk_otel/profile.py | 8 +++----- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/splunk_otel/configurator.py b/src/splunk_otel/configurator.py index 07412735..39df2e07 100644 --- a/src/splunk_otel/configurator.py +++ b/src/splunk_otel/configurator.py @@ -14,12 +14,12 @@ from opentelemetry.sdk._configuration import _OTelSDKConfigurator -from splunk_otel.env import Env +from splunk_otel.env import Env, SPLUNK_PROFILER_ENABLED from splunk_otel.profile import start_profiling class SplunkConfigurator(_OTelSDKConfigurator): def _configure(self, **kwargs): super()._configure(**kwargs) - if Env().is_true("SPLUNK_PROFILER_ENABLED"): + if Env().is_true(SPLUNK_PROFILER_ENABLED): start_profiling() diff --git a/src/splunk_otel/distro.py b/src/splunk_otel/distro.py index c91520ad..c55bb92b 100644 --- a/src/splunk_otel/distro.py +++ b/src/splunk_otel/distro.py @@ -21,9 +21,9 @@ from splunk_otel.env import ( DEFAULTS, - OTEL_METRICS_ENABLED, - SPLUNK_ACCESS_TOKEN, - SPLUNK_TRACE_RESPONSE_HEADER_ENABLED, + OTEL_LOGS_ENABLED, OTEL_METRICS_ENABLED, + OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED, SPLUNK_ACCESS_TOKEN, + SPLUNK_PROFILER_ENABLED, SPLUNK_TRACE_RESPONSE_HEADER_ENABLED, X_SF_TOKEN, Env, ) @@ -51,9 +51,9 @@ def set_env_defaults(self): self.env.setdefault(key, value) def set_profiling_env(self): - if self.env.is_true("SPLUNK_PROFILER_ENABLED"): - self.env.setdefault("OTEL_LOGS_ENABLED", "true") - self.env.setdefault("OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED", "true") + if self.env.is_true(SPLUNK_PROFILER_ENABLED): + self.env.setdefault(OTEL_LOGS_ENABLED, "true") + self.env.setdefault(OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED, "true") def configure_headers(self): tok = self.env.getval(SPLUNK_ACCESS_TOKEN).strip() diff --git a/src/splunk_otel/profile.py b/src/splunk_otel/profile.py index e0fc44b1..5430b826 100644 --- a/src/splunk_otel/profile.py +++ b/src/splunk_otel/profile.py @@ -22,12 +22,10 @@ from splunk_otel.env import Env _SERVICE_NAME_ATTR = "service.name" - _SPLUNK_DISTRO_VERSION_ATTR = "splunk.distro.version" -_DEFAULT_OTEL_SERVICE_NAME = "unknown_service" -_NO_SERVICE_NAME_WARNING = """service.name attribute is not set, your service is unnamed and will be difficult to identify. -set your service name using the OTEL_SERVICE_NAME environment variable. -E.g. `OTEL_SERVICE_NAME=""`""" +_NO_SERVICE_NAME_WARNING = """The service.name attribute is not set, which may make your service difficult to identify. +Set your service name using the OTEL_SERVICE_NAME environment variable. +e.g. `OTEL_SERVICE_NAME=""`""" _DEFAULT_SERVICE_NAME = "unnamed-python-service" _profile_timer = None From f74c43b9246c4c2107d59f2d726a8de952cca700 Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Mon, 18 Nov 2024 09:46:27 -0500 Subject: [PATCH 11/14] Lint --- src/splunk_otel/configurator.py | 2 +- src/splunk_otel/distro.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/splunk_otel/configurator.py b/src/splunk_otel/configurator.py index 39df2e07..cf8c7bc4 100644 --- a/src/splunk_otel/configurator.py +++ b/src/splunk_otel/configurator.py @@ -14,7 +14,7 @@ from opentelemetry.sdk._configuration import _OTelSDKConfigurator -from splunk_otel.env import Env, SPLUNK_PROFILER_ENABLED +from splunk_otel.env import SPLUNK_PROFILER_ENABLED, Env from splunk_otel.profile import start_profiling diff --git a/src/splunk_otel/distro.py b/src/splunk_otel/distro.py index c55bb92b..232688aa 100644 --- a/src/splunk_otel/distro.py +++ b/src/splunk_otel/distro.py @@ -21,9 +21,12 @@ from splunk_otel.env import ( DEFAULTS, - OTEL_LOGS_ENABLED, OTEL_METRICS_ENABLED, - OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED, SPLUNK_ACCESS_TOKEN, - SPLUNK_PROFILER_ENABLED, SPLUNK_TRACE_RESPONSE_HEADER_ENABLED, + OTEL_LOGS_ENABLED, + OTEL_METRICS_ENABLED, + OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED, + SPLUNK_ACCESS_TOKEN, + SPLUNK_PROFILER_ENABLED, + SPLUNK_TRACE_RESPONSE_HEADER_ENABLED, X_SF_TOKEN, Env, ) From 51f77fc638258e3c4d0f4cde124c2aa3a7e067d5 Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Mon, 18 Nov 2024 21:52:09 -0500 Subject: [PATCH 12/14] Add profiling env tests Improve profile ott assertions Explicitly state default env value of false --- src/splunk_otel/distro.py | 2 +- tests/ott_profile.py | 6 ++++-- tests/test_distro.py | 21 +++++++++++++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/splunk_otel/distro.py b/src/splunk_otel/distro.py index 232688aa..4ade9047 100644 --- a/src/splunk_otel/distro.py +++ b/src/splunk_otel/distro.py @@ -54,7 +54,7 @@ def set_env_defaults(self): self.env.setdefault(key, value) def set_profiling_env(self): - if self.env.is_true(SPLUNK_PROFILER_ENABLED): + if self.env.is_true(SPLUNK_PROFILER_ENABLED, "false"): self.env.setdefault(OTEL_LOGS_ENABLED, "true") self.env.setdefault(OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED, "true") diff --git a/tests/ott_profile.py b/tests/ott_profile.py index d047a18a..17a61978 100644 --- a/tests/ott_profile.py +++ b/tests/ott_profile.py @@ -1,5 +1,6 @@ from oteltest import Telemetry -from oteltest.telemetry import num_logs +from oteltest.telemetry import count_logs, has_log_attribute + from ott_lib import project_path, trace_loop if __name__ == "__main__": @@ -22,7 +23,8 @@ def on_start(self): pass def on_stop(self, tel: Telemetry, stdout: str, stderr: str, returncode: int): - assert num_logs(tel) + assert count_logs(tel) + assert has_log_attribute(tel, "profiling.data.format") def is_http(self): return False diff --git a/tests/test_distro.py b/tests/test_distro.py index 9b9480ef..ff3e4157 100644 --- a/tests/test_distro.py +++ b/tests/test_distro.py @@ -68,6 +68,27 @@ def test_server_timing_resp_prop_false(): assert get_global_response_propagator() is None +def test_profiling_enabled(): + env_store = {"SPLUNK_PROFILER_ENABLED": "true"} + configure_distro(env_store) + assert env_store["OTEL_LOGS_ENABLED"] == "true" + assert env_store["OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED"] == "true" + + +def test_profiling_disabled(): + env_store = {"SPLUNK_PROFILER_ENABLED": "false"} + configure_distro(env_store) + assert "OTEL_LOGS_ENABLED" not in env_store + assert "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED" not in env_store + + +def test_profiling_notset(): + env_store = {} + configure_distro(env_store) + assert "OTEL_LOGS_ENABLED" not in env_store + assert "OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED" not in env_store + + def configure_distro(env_store): sd = SplunkDistro() sd.env = Env(env_store) From a878e357536fcf18ba92dd0152b94b6e93a50ed2 Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Mon, 18 Nov 2024 21:54:12 -0500 Subject: [PATCH 13/14] Lint --- tests/ott_profile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/ott_profile.py b/tests/ott_profile.py index 17a61978..f91d4223 100644 --- a/tests/ott_profile.py +++ b/tests/ott_profile.py @@ -1,6 +1,5 @@ from oteltest import Telemetry from oteltest.telemetry import count_logs, has_log_attribute - from ott_lib import project_path, trace_loop if __name__ == "__main__": From b29d728ed3656bdf5bee50be2574dd1f5f89e4e4 Mon Sep 17 00:00:00 2001 From: Pablo Collins Date: Mon, 18 Nov 2024 21:59:40 -0500 Subject: [PATCH 14/14] Update ott fcn name --- tests/ott_trace_loop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ott_trace_loop.py b/tests/ott_trace_loop.py index 62a03ed0..84cf9b01 100644 --- a/tests/ott_trace_loop.py +++ b/tests/ott_trace_loop.py @@ -1,4 +1,4 @@ -from oteltest.telemetry import num_spans +from oteltest.telemetry import count_spans from ott_lib import project_path, trace_loop NUM_SPANS = 12 @@ -23,7 +23,7 @@ def on_start(self): return None def on_stop(self, telemetry, stdout: str, stderr: str, returncode: int) -> None: - assert num_spans(telemetry) == NUM_SPANS + assert count_spans(telemetry) == NUM_SPANS def is_http(self): return False