From 7940c9d4fbce2ec8674733e43d742411e26488c6 Mon Sep 17 00:00:00 2001 From: Roger Yang <80478925+RogerHYang@users.noreply.github.com> Date: Tue, 19 Mar 2024 15:37:54 -0700 Subject: [PATCH] fix(trace): redefine root span (#2632) --- app/src/pages/trace/TracePage.tsx | 22 ++++++++++++++++--- pyproject.toml | 1 - src/phoenix/core/project.py | 35 +++++++++++++++++++++---------- tests/core/test_project.py | 18 +++++++++++++++- 4 files changed, 60 insertions(+), 16 deletions(-) diff --git a/app/src/pages/trace/TracePage.tsx b/app/src/pages/trace/TracePage.tsx index be238c1fc0..04e7821dcd 100644 --- a/app/src/pages/trace/TracePage.tsx +++ b/app/src/pages/trace/TracePage.tsx @@ -151,6 +151,24 @@ const defaultCardProps: Partial = { collapsible: true, }; +/** + * A root span is defined to be a span whose parent span is not in our collection. + * But if more than one such span exists, return null. + */ +function findRootSpan(spansList: Span[]): Span | null { + // If there is a span whose parent is null, then it is the root span. + const rootSpan = spansList.find((span) => span.parentId == null); + if (rootSpan) return rootSpan; + // Otherwise we need to find all spans whose parent span is not in our collection. + const spanIds = new Set(spansList.map((span) => span.context.spanId)); + const rootSpans = spansList.filter( + (span) => span.parentId != null && !spanIds.has(span.parentId) + ); + // If only one such span exists, then return it, otherwise, return null. + if (rootSpans.length === 1) return rootSpans[0]; + return null; +} + /** * A page that shows the details of a trace (e.g. a collection of spans) */ @@ -234,9 +252,7 @@ export function TracePage() { const selectedSpan = spansList.find( (span) => span.context.spanId === selectedSpanId ); - const rootSpan = useMemo(() => { - return spansList.find((span) => span.parentId == null) || null; - }, [spansList]); + const rootSpan = useMemo(() => findRootSpan(spansList), [spansList]); return ( =3.20, <5.0", - "ddsketch", "tqdm", "requests", "opentelemetry-sdk", diff --git a/src/phoenix/core/project.py b/src/phoenix/core/project.py index fc2baeaf4b..5592cee276 100644 --- a/src/phoenix/core/project.py +++ b/src/phoenix/core/project.py @@ -20,7 +20,6 @@ ) import numpy as np -from ddsketch import DDSketch from google.protobuf.json_format import MessageToDict from openinference.semconv.trace import SpanAttributes from pandas import DataFrame, Index, MultiIndex @@ -216,10 +215,15 @@ def __init__(self) -> None: self._start_time_sorted_root_spans: SortedKeyList[WrappedSpan] = SortedKeyList( key=lambda span: span.start_time, ) + """ + A root span is defined to be a span whose parent span is not in our collection. + This includes spans whose parent is None and spans whose parent has not arrived + (or will not arrive). For spans whose parent is not None, the root span status + is temporary and will be revoked when its parent span arrives. + """ self._latency_sorted_root_spans: SortedKeyList[WrappedSpan] = SortedKeyList( key=lambda span: span[ComputedAttributes.LATENCY_MS], ) - self._root_span_latency_ms_sketch = DDSketch() self._token_count_total: int = 0 self._last_updated_at: Optional[datetime] = None @@ -285,7 +289,15 @@ def get_num_documents(self, span_id: SpanID) -> int: def root_span_latency_ms_quantiles(self, probability: float) -> Optional[float]: """Root span latency quantiles in milliseconds""" with self._lock: - return self._root_span_latency_ms_sketch.get_quantile_value(probability) + spans = self._latency_sorted_root_spans + if not (n := len(spans)): + return None + if probability >= 1: + return cast(float, spans[-1][ComputedAttributes.LATENCY_MS]) + if probability <= 0: + return cast(float, spans[0][ComputedAttributes.LATENCY_MS]) + k = max(0, round(n * probability) - 1) + return cast(float, spans[k][ComputedAttributes.LATENCY_MS]) def get_descendant_spans(self, span_id: SpanID) -> Iterator[WrappedSpan]: for span in self._get_descendant_spans(span_id): @@ -353,26 +365,27 @@ def _add_span(self, span: WrappedSpan) -> None: return parent_span_id = span.parent_id - is_root_span = parent_span_id is None - if not is_root_span: + if parent_span_id is not None: self._child_spans[parent_span_id].add(span) self._parent_span_ids[span_id] = parent_span_id + for child_span in self._child_spans.get(span_id, ()): + # A root span is a span whose parent span is not in our collection. + # Now that their parent span has arrived, they are no longer root spans. + self._start_time_sorted_root_spans.remove(child_span) + self._latency_sorted_root_spans.remove(child_span) + # Add computed attributes to span start_time = span.start_time end_time = span.end_time - span[ComputedAttributes.LATENCY_MS] = latency = ( - end_time - start_time - ).total_seconds() * 1000 - if is_root_span: - self._root_span_latency_ms_sketch.add(latency) + span[ComputedAttributes.LATENCY_MS] = (end_time - start_time).total_seconds() * 1000 span[ComputedAttributes.ERROR_COUNT] = int(span.status_code is SpanStatusCode.ERROR) # Store the new span (after adding computed attributes) self._spans[span_id] = span self._traces[span.context.trace_id].add(span) self._start_time_sorted_spans.add(span) - if is_root_span: + if parent_span_id is None or parent_span_id not in self._spans: self._start_time_sorted_root_spans.add(span) self._latency_sorted_root_spans.add(span) self._propagate_cumulative_values(span) diff --git a/tests/core/test_project.py b/tests/core/test_project.py index 46799c231a..3baf8d4b13 100644 --- a/tests/core/test_project.py +++ b/tests/core/test_project.py @@ -1,7 +1,7 @@ from binascii import hexlify from collections import defaultdict, namedtuple from itertools import count, islice, permutations -from typing import DefaultDict, Iterable, Iterator, Set, Tuple +from typing import DefaultDict, Dict, Iterable, Iterator, Set, Tuple import opentelemetry.proto.trace.v1.trace_pb2 as otlp import pytest @@ -17,6 +17,7 @@ def test_ingestion( otlp_trace: Tuple[otlp.Span, ...], permutation: Tuple[int, ...], child_ids: DefaultDict[str, Set[str]], + parent_ids: Dict[str, str], ) -> None: project = Project() _spans = project._spans._spans @@ -51,6 +52,12 @@ def test_ingestion( ) ), f"{i=}, {s=}, {span_id=}" + # Check that root spans are correctly designated: a root span is a span whose parent + # has not been ingested. + assert set(span.context.span_id for span in project.get_spans(root_spans_only=True)) == { + span_id for span_id in ingested_ids if parent_ids.get(span_id) not in ingested_ids + } + def test_get_descendant_span_ids(spans) -> None: spans = list(islice(spans, 6)) @@ -120,6 +127,15 @@ def child_ids(otlp_trace: Iterable[otlp.Span]) -> DefaultDict[str, Set[str]]: return ans +@pytest.fixture(scope="module") +def parent_ids(child_ids: DefaultDict[str, Set[str]]) -> Dict[str, str]: + return { + child_span_id: parent_span_id + for parent_span_id, child_span_ids in child_ids.items() + for child_span_id in child_span_ids + } + + def _connected_descendant_ids( span_id: str, child_ids: DefaultDict[str, Set[str]],