From 6e9d04773c1296d004d95857bd883dea87542f23 Mon Sep 17 00:00:00 2001 From: Pavel Tomskikh Date: Fri, 8 Nov 2024 19:50:51 +0700 Subject: [PATCH] Upgrade savant-rs to 0.4 (#881) * #870 update savant-rs to 0.4.1 * #870 update telemetry sample * #870 add TLS support in telemetry * #870 update template * #870 update jaeger docker image * #870 configure TLS in telemetry sample * #870 fix Jaeger with TLS sample --- .gitignore | 3 + .../0_dead_stream_eviction.rst | 2 +- docs/source/advanced_topics/10_client_sdk.rst | 30 +++++- .../advanced_topics/9_open_telemetry.rst | 19 ++-- docs/source/reference/api/client.rst | 57 ++++++++++- .../savant_101/12_module_definition.rst | 11 ++- docs/source/savant_101/12_pipeline.rst | 2 +- samples/telemetry/README.md | 82 +++++++++++++++- .../telemetry/docker-compose-with-tls.l4t.yml | 86 ++++++++++++++++ .../telemetry/docker-compose-with-tls.x86.yml | 98 +++++++++++++++++++ samples/telemetry/docker-compose.l4t.yml | 14 ++- samples/telemetry/docker-compose.x86.yml | 14 ++- samples/telemetry/module.yml | 32 +++--- samples/template/docker-compose.l4t.yml | 11 +-- samples/template/docker-compose.x86.yml | 11 +-- samples/template/src/client/run.py | 28 +++++- samples/template/src/module/module.yml | 9 +- savant/VERSION | 2 +- savant/client/log_provider/jaeger.py | 2 +- savant/config/default.yml | 9 +- savant/config/schema.py | 18 +++- savant/deepstream/pipeline.py | 5 +- savant/deepstream/utils/metrics.py | 28 ++++++ savant/deepstream/utils/pipeline.py | 61 +----------- savant/deepstream/utils/telemetry.py | 94 ++++++++++++++++++ 25 files changed, 598 insertions(+), 130 deletions(-) create mode 100644 samples/telemetry/docker-compose-with-tls.l4t.yml create mode 100644 samples/telemetry/docker-compose-with-tls.x86.yml create mode 100644 savant/deepstream/utils/metrics.py create mode 100644 savant/deepstream/utils/telemetry.py diff --git a/.gitignore b/.gitignore index 8c5ff94fe..79ba656b4 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,6 @@ data # logs (perf measurement) logs + +# certificates +certs diff --git a/docs/source/advanced_topics/0_dead_stream_eviction.rst b/docs/source/advanced_topics/0_dead_stream_eviction.rst index f85429000..61199fbf4 100644 --- a/docs/source/advanced_topics/0_dead_stream_eviction.rst +++ b/docs/source/advanced_topics/0_dead_stream_eviction.rst @@ -22,6 +22,6 @@ Take a look at the ``default.yml`` for details: .. literalinclude:: ../../../savant/config/default.yml :language: YAML - :lines: 188-221 + :lines: 195-228 You can override only required parameters in your module YAML configuration file. Also, take a look at corresponding environment variables helping to configure the parameters without specifying them in the module config. diff --git a/docs/source/advanced_topics/10_client_sdk.rst b/docs/source/advanced_topics/10_client_sdk.rst index 1bbd4b63b..318d32df6 100644 --- a/docs/source/advanced_topics/10_client_sdk.rst +++ b/docs/source/advanced_topics/10_client_sdk.rst @@ -61,13 +61,35 @@ Sources ingest frames and their metadata to a running module. .. code-block:: python import time - from savant_rs import init_jaeger_tracer + from savant_rs import telemetry + from savant_rs.telemetry import ( + ContextPropagationFormat, + Protocol, + TelemetryConfiguration, + TracerConfiguration, + ) from savant_rs.primitives import VideoFrameBatch, VideoFrameContent from savant.client import JaegerLogProvider, JpegSource, SourceBuilder # Initialize Jaeger tracer to send metrics and logs to Jaeger. # Note: the Jaeger tracer also should be configured in the module. - init_jaeger_tracer('savant-client', 'localhost:6831') + telemetry_config = TelemetryConfiguration( + context_propagation_format=ContextPropagationFormat.Jaeger, + tracer=TracerConfiguration( + service_name='savant-client', + protocol=Protocol.Grpc, + endpoint='http://jaeger:4317', + # tls=ClientTlsConfig( + # certificate='/path/to/ca.crt', + # identity=Identity( + # certificate='/path/to/client.crt', + # key='/path/to/client.key', + # ), + # ), + # timeout=5000, # milliseconds + ), + ) + telemetry.init(telemetry_config) # Build the source source = ( @@ -96,6 +118,8 @@ Sources ingest frames and their metadata to a running module. time.sleep(1) # Wait for the module to process the batch result.logs().pretty_print() + # Shutdown the Jaeger tracer + telemetry.shutdown() Sink Example ^^^^^^^^^^^^ @@ -124,4 +148,4 @@ Sinks retrieve results from a module. -.. youtube:: -oTwsN6Cpxs \ No newline at end of file +.. youtube:: -oTwsN6Cpxs diff --git a/docs/source/advanced_topics/9_open_telemetry.rst b/docs/source/advanced_topics/9_open_telemetry.rst index 2f5a3d7f4..e814820f5 100644 --- a/docs/source/advanced_topics/9_open_telemetry.rst +++ b/docs/source/advanced_topics/9_open_telemetry.rst @@ -58,19 +58,26 @@ Use ``params.telemetry.tracing`` to configure OpenTelemetry for the module. provider: jaeger provider_params: service_name: demo-pipeline - endpoint: jaeger:6831 + # Available protocols: grpc, http_binary, http_json. + # Protocol should be compatible with the port in endpoint. + # See https://www.jaegertracing.io/docs/1.62/deployment/#collector + protocol: grpc + endpoint: "http://jaeger:4317" + timeout: 5000 # milliseconds + tls: + certificate: /path/to/ca.crt + identity: + certificate: /path/to/client.crt + key: /path/to/client.key .. note:: - The module `template `_ already has valid configuration, considering that the Jaeger is launched in the all-in-one mode recommended on the Jaeger `website `_: + The module `template `_ already has valid configuration, considering that the Jaeger is launched in the all-in-one mode recommended on the Jaeger `website `_: .. code-block:: bash docker run -d --name jaeger \ -e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \ - -p 6831:6831/udp \ - -p 6832:6832/udp \ - -p 5778:5778 \ -p 16686:16686 \ -p 4317:4317 \ -p 4318:4318 \ @@ -78,6 +85,6 @@ Use ``params.telemetry.tracing`` to configure OpenTelemetry for the module. -p 14268:14268 \ -p 14269:14269 \ -p 9411:9411 \ - jaegertracing/all-in-one:1.48 + jaegertracing/all-in-one:1.62.0 .. youtube:: DkNifuKg-kY diff --git a/docs/source/reference/api/client.rst b/docs/source/reference/api/client.rst index 20e9580d3..82e432cc3 100644 --- a/docs/source/reference/api/client.rst +++ b/docs/source/reference/api/client.rst @@ -15,12 +15,33 @@ Source usage example: .. code-block:: python import time - from savant_rs import init_jaeger_tracer + from savant_rs.telemetry import ( + ContextPropagationFormat, + Protocol, + TelemetryConfiguration, + TracerConfiguration, + ) from savant.client import JaegerLogProvider, JpegSource, SourceBuilder # Initialize Jaeger tracer to send metrics and logs to Jaeger. # Note: the Jaeger tracer also should be configured in the module. - init_jaeger_tracer('savant-client', 'localhost:6831') + telemetry_config = TelemetryConfiguration( + context_propagation_format=ContextPropagationFormat.Jaeger, + tracer=TracerConfiguration( + service_name='savant-client', + protocol=Protocol.Grpc, + endpoint='http://jaeger:4317', + # tls=ClientTlsConfig( + # certificate='/path/to/ca.crt', + # identity=Identity( + # certificate='/path/to/client.crt', + # key='/path/to/client.key', + # ), + # ), + # timeout=5000, # milliseconds + ), + ) + telemetry.init(telemetry_config) # Build the source source = ( @@ -38,6 +59,9 @@ Source usage example: time.sleep(1) # Wait for the module to process the frame result.logs().pretty_print() + # Shutdown the Jaeger tracer + telemetry.shutdown() + Sink usage example: .. code-block:: python @@ -65,7 +89,12 @@ Async example (both source and sink): .. code-block:: python import asyncio - from savant_rs import init_jaeger_tracer + from savant_rs.telemetry import ( + ContextPropagationFormat, + Protocol, + TelemetryConfiguration, + TracerConfiguration, + ) from savant.client import JaegerLogProvider, JpegSource, SinkBuilder, SourceBuilder @@ -102,9 +131,29 @@ Async example (both source and sink): async def main(): # Initialize Jaeger tracer to send metrics and logs to Jaeger. # Note: the Jaeger tracer also should be configured in the module. - init_jaeger_tracer('savant-client', 'localhost:6831') + telemetry_config = TelemetryConfiguration( + context_propagation_format=ContextPropagationFormat.Jaeger, + tracer=TracerConfiguration( + service_name='savant-client', + protocol=Protocol.Grpc, + endpoint='http://jaeger:4317', + # tls=ClientTlsConfig( + # certificate='/path/to/ca.crt', + # identity=Identity( + # certificate='/path/to/client.crt', + # key='/path/to/client.key', + # ), + # ), + # timeout=5000, # milliseconds + ), + ) + telemetry.init(telemetry_config) + await asyncio.gather(run_sink(), run_source()) + # Shutdown the Jaeger tracer + telemetry.shutdown() + asyncio.run(main()) diff --git a/docs/source/savant_101/12_module_definition.rst b/docs/source/savant_101/12_module_definition.rst index 37a7c1ecb..6ebdde8a0 100644 --- a/docs/source/savant_101/12_module_definition.rst +++ b/docs/source/savant_101/12_module_definition.rst @@ -54,7 +54,7 @@ The following parameters are defined for a Savant module by default: .. literalinclude:: ../../../savant/config/default.yml :language: YAML - :lines: 1-188 + :lines: 1-195 .. note:: @@ -210,7 +210,14 @@ Example: provider: jaeger provider_params: service_name: demo-pipeline - endpoint: jaeger:6831 + protocol: grpc + endpoint: "http://jaeger:4317" + timeout: 5000 # milliseconds + tls: + certificate: /path/to/ca.crt + identity: + certificate: /path/to/client.crt + key: /path/to/client.key Read more on OpenTelemetry in :doc:`/advanced_topics/9_open_telemetry`. diff --git a/docs/source/savant_101/12_pipeline.rst b/docs/source/savant_101/12_pipeline.rst index 9d1029c43..8401debcf 100644 --- a/docs/source/savant_101/12_pipeline.rst +++ b/docs/source/savant_101/12_pipeline.rst @@ -17,7 +17,7 @@ Default module configuration file already defines the :py:attr:`~savant.config.s .. literalinclude:: ../../../savant/config/default.yml :language: YAML - :lines: 188- + :lines: 195- It is possible to redefine them, but the encouraged operation mode assumes the use of ZeroMQ source and sink. diff --git a/samples/telemetry/README.md b/samples/telemetry/README.md index 53400c3b8..46e6a8759 100644 --- a/samples/telemetry/README.md +++ b/samples/telemetry/README.md @@ -1,6 +1,6 @@ # OpenTelemetry Example -A simple pipeline demonstrating the use of OpenTelemetry in Savant. The pipeline contains one element, [Blur PyFunc](blur.py). It applies gaussian blur to the frame and contains OpenTelemetry instrumenting code. OpenTelemetry instrumenting is initialized with environment variables: see compose files for details. The telemetry is collected by the all-in-one Jaeger [container](https://www.jaegertracing.io/docs/1.48/getting-started/#all-in-one). The container also includes the Jaeger UI. +A simple pipeline demonstrating the use of OpenTelemetry in Savant. The pipeline contains one element, [Blur PyFunc](blur.py). It applies gaussian blur to the frame and contains OpenTelemetry instrumenting code. OpenTelemetry instrumenting is initialized with environment variables: see compose files for details. The telemetry is collected by the all-in-one Jaeger [container](https://www.jaegertracing.io/docs/1.62/getting-started/#all-in-one). The container also includes the Jaeger UI. Below are a few screenshots from the Jaeger UI. @@ -63,3 +63,83 @@ docker compose -f samples/telemetry/docker-compose.l4t.yml up # Ctrl+C to stop running the compose bundle ``` + +## Running Jaeger with TLS + +Jaeger and Savant support TLS for telemetry collection. [docker-compose-with-tls.x86.yml](docker-compose-with-tls.x86.yml) and [docker-compose-with-tls.l4t.yml](docker-compose-with-tls.l4t.yml) compose files demonstrate how to run Jaeger with TLS enabled (for both Jaeger and Savant authentication). + +### Generate Certificates + +Create directory for certificates: + +```bash +mkdir samples/telemetry/certs +``` + +Generate CA: + +```bash +openssl genpkey -algorithm RSA -out samples/telemetry/certs/ca.key +openssl req -new -x509 -days 365 \ + -key samples/telemetry/certs/ca.key \ + -out samples/telemetry/certs/ca.crt \ + -subj "/CN=jaeger-ca" +``` + +Generate server key and certificate: + +```bash +openssl genpkey -algorithm RSA -out samples/telemetry/certs/server.key +openssl req -new \ + -key samples/telemetry/certs/server.key \ + -out samples/telemetry/certs/server.csr \ + -subj "/CN=jaeger" +openssl x509 -req -days 365 \ + -in samples/telemetry/certs/server.csr \ + -CA samples/telemetry/certs/ca.crt \ + -CAkey samples/telemetry/certs/ca.key \ + -CAcreateserial \ + -out samples/telemetry/certs/server.crt \ + -extfile <(echo "subjectAltName=DNS:jaeger") +``` + +Make the server key readable by the container: + +```bash +chmod +r samples/telemetry/certs/server.key +``` + +Generate client key and certificate: + +```bash +openssl genpkey -algorithm RSA -out samples/telemetry/certs/client.key +openssl req -new \ + -key samples/telemetry/certs/client.key \ + -out samples/telemetry/certs/client.csr \ + -subj "/CN=module" +openssl x509 -req -days 365 \ + -in samples/telemetry/certs/client.csr \ + -CA samples/telemetry/certs/ca.crt \ + -CAkey samples/telemetry/certs/ca.key \ + -CAcreateserial \ + -out samples/telemetry/certs/client.crt +``` + +### Run Demo with TLS + +```bash +# you are expected to be in Savant/ directory + +# if x86 +docker compose -f samples/telemetry/docker-compose-with-tls.x86.yml up + +# if Jetson +docker compose -f samples/telemetry/docker-compose-with-tls.l4t.yml up + +# open 'rtsp://127.0.0.1:554/stream/0' in your player +# or visit 'http://127.0.0.1:888/stream/0/' (LL-HLS) + +# navigate to 'http://localhost:16686' to access the Jaeger UI + +# Ctrl+C to stop running the compose bundle +``` diff --git a/samples/telemetry/docker-compose-with-tls.l4t.yml b/samples/telemetry/docker-compose-with-tls.l4t.yml new file mode 100644 index 000000000..f7f013f3d --- /dev/null +++ b/samples/telemetry/docker-compose-with-tls.l4t.yml @@ -0,0 +1,86 @@ +services: + + video-loop-source: + image: ghcr.io/insight-platform/savant-adapters-gstreamer-l4t:latest + restart: unless-stopped + volumes: + - zmq_sockets:/tmp/zmq-sockets + - /tmp/video-loop-source-downloads:/tmp/video-loop-source-downloads + environment: + - LOCATION=https://eu-central-1.linodeobjects.com/savant-data/demo/shuffle_dance.mp4 + - DOWNLOAD_PATH=/tmp/video-loop-source-downloads + - ZMQ_ENDPOINT=pub+connect:ipc:///tmp/zmq-sockets/input-video.ipc + - SOURCE_ID=0 + - SYNC_OUTPUT=True + entrypoint: /opt/savant/adapters/gst/sources/video_loop.sh + depends_on: + module: + condition: service_healthy + + module: + image: ghcr.io/insight-platform/savant-deepstream-l4t:latest + restart: unless-stopped + volumes: + - zmq_sockets:/tmp/zmq-sockets + - ..:/opt/savant/samples + command: samples/telemetry/module.yml + environment: + - TRACING_PROVIDER=jaeger + - TRACING_PROVIDER_ENDPOINT=https://jaeger:4317 + - TRACING_PROVIDER_PROTOCOL=grpc + - TRACING_PROVIDER_TLS={"certificate":"/opt/savant/samples/telemetry/certs/ca.crt","identity":{"certificate":"/opt/savant/samples/telemetry/certs/client.crt","key":"/opt/savant/samples/telemetry/certs/client.key"}} + # Remove "identity" key to disable client authentication + # - TRACING_PROVIDER_TLS={"certificate":"/opt/savant/samples/telemetry/certs/ca.crt"} + - TRACING_SAMPLING_PERIOD=100 + - TRACING_APPEND_FRAME_META_TO_SPAN=True + - ZMQ_SRC_ENDPOINT=sub+bind:ipc:///tmp/zmq-sockets/input-video.ipc + - ZMQ_SINK_ENDPOINT=pub+bind:ipc:///tmp/zmq-sockets/output-video.ipc + - METRICS_FRAME_PERIOD=1000 + - CODEC=jpeg + runtime: nvidia + + always-on-sink: + image: ghcr.io/insight-platform/savant-adapters-deepstream-l4t:latest + restart: unless-stopped + ports: + - "554:554" # RTSP + - "1935:1935" # RTMP + - "888:888" # HLS + - "8889:8889" # WebRTC + volumes: + - zmq_sockets:/tmp/zmq-sockets + - ../assets/stub_imgs:/stub_imgs + environment: + - ZMQ_ENDPOINT=sub+connect:ipc:///tmp/zmq-sockets/output-video.ipc + - SOURCE_ID=0 + - FRAMERATE=25/1 + - STUB_FILE_LOCATION=/stub_imgs/smpte100_1280x720.jpeg + - DEV_MODE=True + command: python -m adapters.ds.sinks.always_on_rtsp + + # All-in-One Jaeger + # https://www.jaegertracing.io/docs/1.62/getting-started/#all-in-one + jaeger: + image: jaegertracing/all-in-one:1.62.0 + restart: unless-stopped + ports: + - "16686:16686" # query + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP + - "14250:14250" + - "14268:14268" + - "14269:14269" + - "9411:9411" + environment: + - COLLECTOR_ZIPKIN_HOST_PORT=:9411 + - COLLECTOR_OTLP_ENABLED=true + - COLLECTOR_OTLP_GRPC_TLS_ENABLED=true + - COLLECTOR_OTLP_GRPC_TLS_CERT=/certs/server.crt + - COLLECTOR_OTLP_GRPC_TLS_KEY=/certs/server.key + # Remove the following line to disable client authentication + - COLLECTOR_OTLP_GRPC_TLS_CLIENT_CA=/certs/ca.crt + volumes: + - ./certs:/certs + +volumes: + zmq_sockets: diff --git a/samples/telemetry/docker-compose-with-tls.x86.yml b/samples/telemetry/docker-compose-with-tls.x86.yml new file mode 100644 index 000000000..cd4703ae4 --- /dev/null +++ b/samples/telemetry/docker-compose-with-tls.x86.yml @@ -0,0 +1,98 @@ +services: + + video-loop-source: + image: ghcr.io/insight-platform/savant-adapters-gstreamer:latest + restart: unless-stopped + volumes: + - zmq_sockets:/tmp/zmq-sockets + - /tmp/video-loop-source-downloads:/tmp/video-loop-source-downloads + environment: + - LOCATION=https://eu-central-1.linodeobjects.com/savant-data/demo/shuffle_dance.mp4 + - DOWNLOAD_PATH=/tmp/video-loop-source-downloads + - ZMQ_ENDPOINT=pub+connect:ipc:///tmp/zmq-sockets/input-video.ipc + - SOURCE_ID=0 + - SYNC_OUTPUT=True + entrypoint: /opt/savant/adapters/gst/sources/video_loop.sh + depends_on: + module: + condition: service_healthy + + module: + image: ghcr.io/insight-platform/savant-deepstream:latest + restart: unless-stopped + volumes: + - zmq_sockets:/tmp/zmq-sockets + - ..:/opt/savant/samples + command: samples/telemetry/module.yml + environment: + - TRACING_PROVIDER=jaeger + - TRACING_PROVIDER_ENDPOINT=https://jaeger:4317 + - TRACING_PROVIDER_PROTOCOL=grpc + - TRACING_PROVIDER_TLS={"certificate":"/opt/savant/samples/telemetry/certs/ca.crt","identity":{"certificate":"/opt/savant/samples/telemetry/certs/client.crt","key":"/opt/savant/samples/telemetry/certs/client.key"}} + # Remove "identity" key to disable client authentication + # - TRACING_PROVIDER_TLS={"certificate":"/opt/savant/samples/telemetry/certs/ca.crt"} + - TRACING_SAMPLING_PERIOD=100 + - TRACING_APPEND_FRAME_META_TO_SPAN=True + - ZMQ_SRC_ENDPOINT=sub+bind:ipc:///tmp/zmq-sockets/input-video.ipc + - ZMQ_SINK_ENDPOINT=pub+bind:ipc:///tmp/zmq-sockets/output-video.ipc + - METRICS_FRAME_PERIOD=1000 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [ gpu ] + + always-on-sink: + image: ghcr.io/insight-platform/savant-adapters-deepstream:latest + restart: unless-stopped + ports: + - "554:554" # RTSP + - "1935:1935" # RTMP + - "888:888" # HLS + - "8889:8889" # WebRTC + volumes: + - zmq_sockets:/tmp/zmq-sockets + - ../assets/stub_imgs:/stub_imgs + environment: + - ZMQ_ENDPOINT=sub+connect:ipc:///tmp/zmq-sockets/output-video.ipc + - SOURCE_ID=0 + - FRAMERATE=25/1 + - STUB_FILE_LOCATION=/stub_imgs/smpte100_1280x720.jpeg + - DEV_MODE=True + command: python -m adapters.ds.sinks.always_on_rtsp + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [ gpu ] + + # All-in-One Jaeger + # https://www.jaegertracing.io/docs/1.62/getting-started/#all-in-one + jaeger: + image: jaegertracing/all-in-one:1.62.0 + restart: unless-stopped + ports: + - "16686:16686" # query + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP + - "14250:14250" + - "14268:14268" + - "14269:14269" + - "9411:9411" + environment: + - COLLECTOR_ZIPKIN_HOST_PORT=:9411 + - COLLECTOR_OTLP_ENABLED=true + - COLLECTOR_OTLP_GRPC_TLS_ENABLED=true + - COLLECTOR_OTLP_GRPC_TLS_CERT=/certs/server.crt + - COLLECTOR_OTLP_GRPC_TLS_KEY=/certs/server.key + # Remove the following line to disable client authentication + - COLLECTOR_OTLP_GRPC_TLS_CLIENT_CA=/certs/ca.crt + volumes: + - ./certs:/certs + +volumes: + zmq_sockets: diff --git a/samples/telemetry/docker-compose.l4t.yml b/samples/telemetry/docker-compose.l4t.yml index 4fef45948..09933d6ca 100644 --- a/samples/telemetry/docker-compose.l4t.yml +++ b/samples/telemetry/docker-compose.l4t.yml @@ -26,7 +26,8 @@ services: command: samples/telemetry/module.yml environment: - TRACING_PROVIDER=jaeger - - TRACING_PROVIDER_PARAMS={"endpoint":"jaeger:6831"} + - TRACING_PROVIDER_ENDPOINT=http://jaeger:4317 + - TRACING_PROVIDER_PROTOCOL=grpc - TRACING_SAMPLING_PERIOD=100 - TRACING_APPEND_FRAME_META_TO_SPAN=True - ZMQ_SRC_ENDPOINT=sub+bind:ipc:///tmp/zmq-sockets/input-video.ipc @@ -55,17 +56,14 @@ services: command: python -m adapters.ds.sinks.always_on_rtsp # All-in-One Jaeger - # https://www.jaegertracing.io/docs/1.48/getting-started/#all-in-one + # https://www.jaegertracing.io/docs/1.62/getting-started/#all-in-one jaeger: - image: jaegertracing/all-in-one:1.48 + image: jaegertracing/all-in-one:1.62.0 restart: unless-stopped ports: - - "6831:6831/udp" # agent - - "6832:6832/udp" - - "5778:5778" - "16686:16686" # query - - "4317:4317" - - "4318:4318" + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP - "14250:14250" - "14268:14268" - "14269:14269" diff --git a/samples/telemetry/docker-compose.x86.yml b/samples/telemetry/docker-compose.x86.yml index 3ba3a9cb6..c24603061 100644 --- a/samples/telemetry/docker-compose.x86.yml +++ b/samples/telemetry/docker-compose.x86.yml @@ -26,7 +26,8 @@ services: command: samples/telemetry/module.yml environment: - TRACING_PROVIDER=jaeger - - TRACING_PROVIDER_PARAMS={"endpoint":"jaeger:6831"} + - TRACING_PROVIDER_ENDPOINT=http://jaeger:4317 + - TRACING_PROVIDER_PROTOCOL=grpc - TRACING_SAMPLING_PERIOD=100 - TRACING_APPEND_FRAME_META_TO_SPAN=True - ZMQ_SRC_ENDPOINT=sub+bind:ipc:///tmp/zmq-sockets/input-video.ipc @@ -67,17 +68,14 @@ services: capabilities: [ gpu ] # All-in-One Jaeger - # https://www.jaegertracing.io/docs/1.48/getting-started/#all-in-one + # https://www.jaegertracing.io/docs/1.62/getting-started/#all-in-one jaeger: - image: jaegertracing/all-in-one:1.48 + image: jaegertracing/all-in-one:1.62.0 restart: unless-stopped ports: - - "6831:6831/udp" # agent - - "6832:6832/udp" - - "5778:5778" - "16686:16686" # query - - "4317:4317" - - "4318:4318" + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP - "14250:14250" - "14268:14268" - "14269:14269" diff --git a/samples/telemetry/module.yml b/samples/telemetry/module.yml index d61e2b367..43596388f 100644 --- a/samples/telemetry/module.yml +++ b/samples/telemetry/module.yml @@ -7,22 +7,28 @@ parameters: output_frame: codec: ${oc.env:CODEC, 'h264'} - # We use the default telemetry parameters from `savant/config/default.yml`, + # We use the default telemetry parameters, # which can be configured with environment variables in Docker compose manifests. # # Optionally, you can override them here. - # telemetry: - # tracing: - # # Sampling period in frames - # sampling_period: ${oc.decode:${oc.env:TRACING_SAMPLING_PERIOD, 100}} - # # Append frame metadata to telemetry span - # append_frame_meta_to_span: ${oc.decode:${oc.env:TRACING_APPEND_FRAME_META_TO_SPAN, False}} - # # Name for root span - # root_span_name: ${oc.decode:${oc.env:TRACING_ROOT_SPAN_NAME, null}} - # # Telemetry provider name - # provider: ${oc.decode:${oc.env:TRACING_PROVIDER, null}} - # # Parameters for telemetry provider - # provider_params: ${json:${oc.env:TRACING_PROVIDER_PARAMS, null}} + telemetry: + tracing: + # Sampling period in frames + sampling_period: ${oc.decode:${oc.env:TRACING_SAMPLING_PERIOD, 100}} + # Append frame metadata to telemetry span + append_frame_meta_to_span: ${oc.decode:${oc.env:TRACING_APPEND_FRAME_META_TO_SPAN, False}} + # Name for root span + root_span_name: ${oc.decode:${oc.env:TRACING_ROOT_SPAN_NAME, null}} + # Telemetry provider name + provider: ${oc.decode:${oc.env:TRACING_PROVIDER, null}} + # Parameters for telemetry provider + provider_params: + # Endpoint for telemetry provider + endpoint: ${oc.decode:${oc.env:TRACING_PROVIDER_ENDPOINT, null}} + # Protocol for telemetry provider + protocol: ${oc.decode:${oc.env:TRACING_PROVIDER_PROTOCOL, null}} + # TLS configuration for telemetry provider + tls: ${json:${oc.env:TRACING_PROVIDER_TLS, null}} # pipeline definition pipeline: diff --git a/samples/template/docker-compose.l4t.yml b/samples/template/docker-compose.l4t.yml index e43253ec9..282ac6fbc 100644 --- a/samples/template/docker-compose.l4t.yml +++ b/samples/template/docker-compose.l4t.yml @@ -54,16 +54,13 @@ services: - network jaeger: - image: jaegertracing/all-in-one:1.49 + image: jaegertracing/all-in-one:1.62.0 environment: - COLLECTOR_ZIPKIN_HOST_PORT=:9411 ports: - - "6831:6831/udp" - - "6832:6832/udp" - - "5778:5778" - - "16686:16686" - - "4317:4317" - - "4318:4318" + - "16686:16686" # query + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP - "14250:14250" - "14268:14268" - "14269:14269" diff --git a/samples/template/docker-compose.x86.yml b/samples/template/docker-compose.x86.yml index 3db940f70..ed851c2e1 100644 --- a/samples/template/docker-compose.x86.yml +++ b/samples/template/docker-compose.x86.yml @@ -66,16 +66,13 @@ services: - network jaeger: - image: jaegertracing/all-in-one:1.49 + image: jaegertracing/all-in-one:1.62.0 environment: - COLLECTOR_ZIPKIN_HOST_PORT=:9411 ports: - - "6831:6831/udp" - - "6832:6832/udp" - - "5778:5778" - - "16686:16686" - - "4317:4317" - - "4318:4318" + - "16686:16686" # query + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP - "14250:14250" - "14268:14268" - "14269:14269" diff --git a/samples/template/src/client/run.py b/samples/template/src/client/run.py index cb6863680..68220332c 100644 --- a/samples/template/src/client/run.py +++ b/samples/template/src/client/run.py @@ -4,7 +4,13 @@ import cv2 import numpy as np -from savant_rs import init_jaeger_tracer +from savant_rs import telemetry +from savant_rs.telemetry import ( + ContextPropagationFormat, + Protocol, + TelemetryConfiguration, + TracerConfiguration, +) from savant.api.builder import build_bbox from savant.client import JaegerLogProvider, JpegSource, SinkBuilder, SourceBuilder @@ -12,7 +18,23 @@ print('Starting Savant client...') # Initialize Jaeger tracer to send metrics and logs to Jaeger. # Note: the Jaeger tracer also should be configured in the module. -init_jaeger_tracer('savant-client', 'jaeger:6831') +telemetry_config = TelemetryConfiguration( + context_propagation_format=ContextPropagationFormat.Jaeger, + tracer=TracerConfiguration( + service_name='savant-client', + protocol=Protocol.Grpc, + endpoint='http://jaeger:4317', + # tls=ClientTlsConfig( + # certificate='/path/to/ca.crt', + # identity=Identity( + # certificate='/path/to/client.crt', + # key='/path/to/client.key', + # ), + # ), + # timeout=5000, # milliseconds + ), +) +telemetry.init(telemetry_config) module_hostname = os.environ.get('MODULE_HOSTNAME', 'localhost') jaeger_endpoint = 'http://jaeger:16686' @@ -99,4 +121,6 @@ print('Logs from the module:') result.logs().pretty_print() +# Shutdown the Jaeger tracer +telemetry.shutdown() print('Done.') diff --git a/samples/template/src/module/module.yml b/samples/template/src/module/module.yml index 73d820206..47fda7a18 100644 --- a/samples/template/src/module/module.yml +++ b/samples/template/src/module/module.yml @@ -15,7 +15,14 @@ parameters: provider: jaeger provider_params: service_name: demo-pipeline - endpoint: jaeger:6831 + protocol: grpc + endpoint: "http://jaeger:4317" + # timeout: 5000 # milliseconds + # tls: + # certificate: /path/to/ca.crt + # identity: + # certificate: /path/to/client.crt + # key: /path/to/client.key # pipeline processing frame parameters frame: diff --git a/savant/VERSION b/savant/VERSION index 158e93d53..c3df1635d 100644 --- a/savant/VERSION +++ b/savant/VERSION @@ -1,3 +1,3 @@ SAVANT=0.5.0 -SAVANT_RS=0.3.5 +SAVANT_RS=0.4.1 DEEPSTREAM=7.0 diff --git a/savant/client/log_provider/jaeger.py b/savant/client/log_provider/jaeger.py index 860ec184f..106d365f7 100644 --- a/savant/client/log_provider/jaeger.py +++ b/savant/client/log_provider/jaeger.py @@ -26,7 +26,7 @@ def __init__( def _fetch_logs(self, trace_id: str) -> List[LogEntry]: # TODO: use gRPC API since HTTP is internal - # https://www.jaegertracing.io/docs/1.48/apis/#grpcprotobuf-stable + # https://www.jaegertracing.io/docs/1.62/apis/#grpcprotobuf-stable response = requests.get(f'{self._endpoint}/api/traces/{trace_id}') response.raise_for_status() logs = [] diff --git a/savant/config/default.yml b/savant/config/default.yml index f1a397161..a654684a1 100644 --- a/savant/config/default.yml +++ b/savant/config/default.yml @@ -138,7 +138,14 @@ parameters: # provider: jaeger # provider_params: # service_name: demo-pipeline - # endpoint: jaeger:6831 + # protocol: grpc + # endpoint: "http://jaeger:4317" + # timeout: 5000 # milliseconds + # tls: + # certificate: /path/to/ca.crt + # identity: + # certificate: /path/to/client.crt + # key: /path/to/client.key # metrics: # frame_period: 1000 # time_period: 1 diff --git a/savant/config/schema.py b/savant/config/schema.py index fd97f7c0e..62d9cbbf4 100644 --- a/savant/config/schema.py +++ b/savant/config/schema.py @@ -130,7 +130,14 @@ class TracingParameters: provider: jaeger provider_params: service_name: demo-pipeline - endpoint: jaeger:6831 + protocol: grpc + endpoint: "http://jaeger:4317" + timeout: 5000 # milliseconds + tls: + certificate: /path/to/ca.crt + identity: + certificate: /path/to/client.crt + key: /path/to/client.key """ @@ -195,7 +202,14 @@ class TelemetryParameters: provider: jaeger provider_params: service_name: demo-pipeline - endpoint: jaeger:6831 + protocol: grpc + endpoint: "http://jaeger:4317" + timeout: 5000 # milliseconds + tls: + certificate: /path/to/ca.crt + identity: + certificate: /path/to/client.crt + key: /path/to/client.key metrics: frame_period: 1000 time_period: 1 diff --git a/savant/deepstream/pipeline.py b/savant/deepstream/pipeline.py index 2aab55721..5f47add19 100644 --- a/savant/deepstream/pipeline.py +++ b/savant/deepstream/pipeline.py @@ -64,14 +64,14 @@ nvds_frame_meta_iterator, nvds_obj_meta_iterator, ) +from savant.deepstream.utils.metrics import build_metrics_exporter from savant.deepstream.utils.object import nvds_is_empty_object_meta from savant.deepstream.utils.pipeline import ( add_queues_to_pipeline, - build_metrics_exporter, build_pipeline_stages, get_pipeline_element_stages, - init_tracing, ) +from savant.deepstream.utils.telemetry import init_tracing, shutdown_tracing from savant.gstreamer import GLib, Gst # noqa:F401 from savant.gstreamer.buffer_processor import GstBufferProcessor from savant.gstreamer.pipeline import GstPipeline @@ -331,6 +331,7 @@ def on_shutdown(self): self._video_pipeline.log_final_fps() if self._metrics_exporter is not None: self._metrics_exporter.stop() + shutdown_tracing() super().on_shutdown() def _on_shutdown_signal(self, element: Gst.Element): diff --git a/savant/deepstream/utils/metrics.py b/savant/deepstream/utils/metrics.py new file mode 100644 index 000000000..a1509828d --- /dev/null +++ b/savant/deepstream/utils/metrics.py @@ -0,0 +1,28 @@ +from typing import Optional + +from savant_rs.pipeline2 import VideoPipeline + +from savant.config.schema import MetricsParameters +from savant.metrics.base import BaseMetricsExporter +from savant.metrics.prometheus import ModuleMetricsCollector, PrometheusMetricsExporter + + +def build_metrics_exporter( + pipeline: VideoPipeline, + params: MetricsParameters, +) -> Optional[BaseMetricsExporter]: + """Build metrics exporter.""" + + if params.provider is None: + return None + + if params.provider == 'prometheus': + return PrometheusMetricsExporter( + params.provider_params, + ModuleMetricsCollector( + pipeline, + params.provider_params.get('labels') or {}, + ), + ) + + raise ValueError(f'Unknown metrics provider: {params.provider}') diff --git a/savant/deepstream/utils/pipeline.py b/savant/deepstream/utils/pipeline.py index c5e68ea1e..9d05cd44b 100644 --- a/savant/deepstream/utils/pipeline.py +++ b/savant/deepstream/utils/pipeline.py @@ -1,23 +1,14 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Union -from savant_rs import init_jaeger_tracer, init_noop_tracer -from savant_rs.pipeline2 import ( - StageFunction, - VideoPipeline, - VideoPipelineStagePayloadType, -) +from savant_rs.pipeline2 import StageFunction, VideoPipelineStagePayloadType from savant.config.schema import ( BufferQueuesParameters, ElementGroup, - MetricsParameters, Pipeline, PipelineElement, PyFuncElement, - TracingParameters, ) -from savant.metrics.base import BaseMetricsExporter -from savant.metrics.prometheus import ModuleMetricsCollector, PrometheusMetricsExporter from savant.utils.logging import get_logger logger = get_logger(__name__) @@ -257,51 +248,3 @@ def build_pipeline_stages(element_stages: List[Union[str, List[str]]]): ) return pipeline_stages - - -def init_tracing(module_name: str, tracing: TracingParameters): - """Initialize tracing provider.""" - - provider_params = tracing.provider_params or {} - if tracing.provider == 'jaeger': - service_name = provider_params.get('service_name', module_name) - try: - endpoint = provider_params['endpoint'] - except KeyError: - raise ValueError( - 'Jaeger endpoint is not specified. Please specify it in the config file.' - ) - logger.info( - 'Initializing Jaeger tracer with service name %r and endpoint %r.', - service_name, - endpoint, - ) - init_jaeger_tracer(service_name, endpoint) - - elif tracing.provider is not None: - raise ValueError(f'Unknown tracing provider: {tracing.provider}') - - else: - logger.info('No tracing provider specified. Using noop tracer.') - init_noop_tracer() - - -def build_metrics_exporter( - pipeline: VideoPipeline, - params: MetricsParameters, -) -> Optional[BaseMetricsExporter]: - """Build metrics exporter.""" - - if params.provider is None: - return None - - if params.provider == 'prometheus': - return PrometheusMetricsExporter( - params.provider_params, - ModuleMetricsCollector( - pipeline, - params.provider_params.get('labels') or {}, - ), - ) - - raise ValueError(f'Unknown metrics provider: {params.provider}') diff --git a/savant/deepstream/utils/telemetry.py b/savant/deepstream/utils/telemetry.py new file mode 100644 index 000000000..2e12d6ec2 --- /dev/null +++ b/savant/deepstream/utils/telemetry.py @@ -0,0 +1,94 @@ +from savant_rs.telemetry import ( + ClientTlsConfig, + ContextPropagationFormat, + Identity, + Protocol, + TelemetryConfiguration, + TracerConfiguration, + init, + shutdown, +) + +from savant.config.schema import TracingParameters +from savant.utils.logging import get_logger + +logger = get_logger(__name__) + +PROTOCOLS = { + 'grpc': Protocol.Grpc, + 'http_binary': Protocol.HttpBinary, + 'http_json': Protocol.HttpJson, +} + + +def init_tracing(module_name: str, tracing: TracingParameters): + """Initialize tracing provider.""" + + provider_params = tracing.provider_params or {} + if tracing.provider == 'jaeger': + service_name = provider_params.get('service_name', module_name) + + endpoint = provider_params.get('endpoint') + if not endpoint: + raise ValueError( + 'Jaeger endpoint is not specified. Please specify it in the config file.' + ) + + protocol_name = provider_params.get('protocol') or 'grpc' + try: + protocol = PROTOCOLS[protocol_name] + except KeyError: + raise ValueError(f'Unknown tracing protocol: {protocol_name}') + + tls = provider_params.get('tls') + if tls: + tls_identity = tls.get('identity') + if tls_identity: + tls_identity_config = Identity( + key=tls_identity.get('key'), + certificate=tls_identity.get('certificate'), + ) + else: + tls_identity_config = None + tls_config = ClientTlsConfig( + certificate=tls.get('certificate'), + identity=tls_identity_config, + ) + else: + tls_config = None + + timeout = provider_params.get('timeout') + if timeout is not None: + timeout = int(timeout) + + logger.info( + 'Initializing Jaeger tracer with service name %r and endpoint %r. Protocol: %s.', + service_name, + endpoint, + protocol, + ) + tracer_config = TracerConfiguration( + service_name=service_name, + protocol=protocol, + endpoint=endpoint, + tls=tls_config, + timeout=timeout, + ) + config = TelemetryConfiguration( + # TODO: support W3C + context_propagation_format=ContextPropagationFormat.Jaeger, + tracer=tracer_config, + ) + init(config) + + elif tracing.provider is not None: + raise ValueError(f'Unknown tracing provider: {tracing.provider}') + + else: + logger.info('No tracing provider specified. Using noop tracer.') + init(TelemetryConfiguration.no_op()) + + +def shutdown_tracing(): + """Shutdown tracing provider.""" + shutdown()