Skip to content

Commit

Permalink
Merge pull request #623 from containers/otel-trace-workflow
Browse files Browse the repository at this point in the history
udpate otel-collector workflow config
  • Loading branch information
rhatdan authored Jun 26, 2024
2 parents 85da91d + d45439b commit 75e03f3
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 171 deletions.
117 changes: 39 additions & 78 deletions .github/workflows/chatbot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,53 +48,55 @@ jobs:
run: |
pip install --no-cache-dir opentelemetry-sdk opentelemetry-exporter-otlp opentelemetry-instrumentation
- name: Download OpenTelemetry Collector Contrib
run: |
wget https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.103.0/otelcol-contrib_0.103.0_linux_amd64.tar.gz
tar -xvf otelcol-contrib_0.103.0_linux_amd64.tar.gz
- name: Write secrets to files
run: |
echo "${{ secrets.ROSA_OTEL_CACERT }}" > /tmp/ca.crt
echo "${{ secrets.ROSA_OTEL_SERVER_CRT }}" > /tmp/server.crt
echo "${{ secrets.ROSA_OTEL_SERVER_KEY }}" > /tmp/server.key
- name: Configure OpenTelemetry Collector
run: |
echo '
extensions:
basicauth/client:
client_auth:
username: "${{ secrets.OTEL_USERNAME }}"
password: "${{ secrets.OTEL_PASSWORD }}"
receivers:
otlp:
protocols:
grpc:
http:
exporters:
otlphttp:
endpoint: https://otc.apps.platform-sts.pcbk.p1.openshiftapps.com
auth:
authenticator: basicauth/client
tls:
insecure: false
ca_pem: "${{ secrets.ROSA_ROOT_CERT }}"
debug:
verbosity: detailed
service:
extensions: [basicauth/client]
pipelines:
traces:
receivers: [otlp]
exporters: [debug, otlphttp]
receivers:
otlp:
protocols:
grpc:
http:
exporters:
otlphttp:
endpoint: "${{ secrets.ROSA_OTEL_ENDPOINT }}"
tls:
insecure: false
cert_file: /tmp/server.crt
key_file: /tmp/server.key
ca_file: /tmp/ca.crt
debug:
verbosity: detailed
service:
pipelines:
traces:
receivers: [otlp]
exporters: [debug, otlphttp]
' > otel-collector-config.yaml
- name: Run OpenTelemetry Collector
run: |
wget https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.103.0/otelcol-contrib_0.103.0_linux_amd64.tar.gz
tar -xvf otelcol-contrib_0.103.0_linux_amd64.tar.gz
chmod +x otelcol-contrib
./otelcol-contrib --config otel-collector-config.yaml &
./otelcol-contrib --config otel-collector-config.yaml > otel-collector.log 2>&1 &
- name: Install qemu dependency
run: |
sudo apt-get update
sudo apt-get install -y qemu-user-static
- name: Start build trace
- name: Start job trace
run: |
export WORKFLOW_NAME="chatbot-build-push"
export STEP_NAME="build-image"
export WORKFLOW_NAME="chatbot"
export JOB_NAME="chatbot-build-and-push"
export TRACE_ACTION="start"
python ci/trace-steps.py
Expand All @@ -108,54 +110,19 @@ jobs:
containerfiles: ./recipes/natural_language_processing/${{ env.IMAGE_NAME }}/app/Containerfile
context: recipes/natural_language_processing/${{ env.IMAGE_NAME }}/app

- name: End build trace
run: |
export WORKFLOW_NAME="chatbot-build-push"
export STEP_NAME="build-image"
export TRACE_ACTION="end"
python ci/trace-steps.py
- name: Install Dependencies
working-directory: ./recipes/natural_language_processing/${{ env.IMAGE_NAME }}
run: make install

- name: Start download model trace
run: |
export WORKFLOW_NAME="chatbot-build-push"
export STEP_NAME="download-model"
export TRACE_ACTION="start"
python ci/trace-steps.py
- name: Download model
working-directory: ./models
run: make download-model-granite

- name: End download model trace
run: |
export WORKFLOW_NAME="chatbot-build-push"
export STEP_NAME="download-model"
export TRACE_ACTION="end"
python ci/trace-steps.py
- name: Start functional test run trace
run: |
export WORKFLOW_NAME="chatbot-build-push"
export STEP_NAME="run-functional-tests"
export TRACE_ACTION="start"
python ci/trace-steps.py
- name: Run Functional Tests
shell: bash
run: make functional-tests
working-directory: ./recipes/natural_language_processing/${{ env.IMAGE_NAME }}

- name: End functional test run trace
run: |
export WORKFLOW_NAME="chatbot-build-push"
export STEP_NAME="run-functional-tests"
export TRACE_ACTION="end"
python ci/trace-steps.py
- name: Login to Registry
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: redhat-actions/[email protected]
Expand All @@ -164,13 +131,6 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Start push image trace
run: |
export WORKFLOW_NAME="chatbot-build-push"
export STEP_NAME="push-image"
export TRACE_ACTION="start"
python ci/trace-steps.py
- name: Push Image
id: push_image
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
Expand All @@ -180,9 +140,10 @@ jobs:
tags: ${{ steps.build_image.outputs.tags }}
registry: ${{ env.REGISTRY }}

- name: End push image trace
- name: End job trace
run: |
export WORKFLOW_NAME="chatbot-build-push"
export STEP_NAME="push-image"
export WORKFLOW_NAME="chatbot"
export JOB_NAME="chatbot-build-and-push"
export TRACE_ACTION="end"
python ci/trace-steps.py
48 changes: 15 additions & 33 deletions .github/workflows/test-trace-steps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ on:
workflow_dispatch:

jobs:
test:
test-build:
if: "!contains(github.event.pull_request.labels.*.name, 'hold-tests')"
runs-on: ubuntu-22.04
runs-on: ubuntu-24.04
steps:
- uses: actions/[email protected]
- name: Set up Python
Expand All @@ -33,33 +33,29 @@ jobs:
- name: Write secrets to files
run: |
echo "${{ secrets.ROSA_OTEL_TLS_CERT }}" > /tmp/tls.crt
echo "${{ secrets.ROSA_OTEL_CACERT }}" > /tmp/ca.crt
echo "${{ secrets.ROSA_OTEL_SERVER_CRT }}" > /tmp/server.crt
echo "${{ secrets.ROSA_OTEL_SERVER_KEY }}" > /tmp/server.key
- name: Configure OpenTelemetry Collector
run: |
echo '
extensions:
basicauth/client:
client_auth:
username: "${{ secrets.OTEL_USERNAME }}"
password: "${{ secrets.OTEL_PASSWORD }}"
receivers:
otlp:
protocols:
grpc:
http:
exporters:
otlphttp:
endpoint: https://otc.apps.platform-sts.pcbk.p1.openshiftapps.com:4318
auth:
authenticator: basicauth/client
endpoint: "${{ secrets.ROSA_OTEL_ENDPOINT }}"
tls:
insecure: false
ca_file: /tmp/tls.crt
cert_file: /tmp/server.crt
key_file: /tmp/server.key
ca_file: /tmp/ca.crt
debug:
verbosity: detailed
service:
extensions: [basicauth/client]
pipelines:
traces:
receivers: [otlp]
Expand All @@ -70,10 +66,10 @@ jobs:
run: |
./otelcol-contrib --config otel-collector-config.yaml > otel-collector.log 2>&1 &
- name: Start build trace
- name: Start job trace
run: |
export WORKFLOW_NAME="test-workflow"
export STEP_NAME="build"
export WORKFLOW_NAME="test-trace"
export JOB_NAME="test-build"
export TRACE_ACTION="start"
python ci/trace-steps.py
Expand All @@ -82,29 +78,15 @@ jobs:
echo "Simulating build step..."
sleep 2
- name: End build trace
run: |
export WORKFLOW_NAME="test-workflow"
export STEP_NAME="build"
export TRACE_ACTION="end"
python ci/trace-steps.py
- name: Start test trace
run: |
export WORKFLOW_NAME="test-workflow"
export STEP_NAME="test"
export TRACE_ACTION="start"
python ci/trace-steps.py
- name: Test
run: |
echo "Simulating test step..."
sleep 2
- name: End test trace
- name: End job trace
run: |
export WORKFLOW_NAME="test-workflow"
export STEP_NAME="test"
export WORKFLOW_NAME="test-trace"
export JOB_NAME="test-build"
export TRACE_ACTION="end"
python ci/trace-steps.py
Expand Down
92 changes: 32 additions & 60 deletions ci/trace-steps.py
Original file line number Diff line number Diff line change
@@ -1,75 +1,47 @@
import os
import time
import logging
from datetime import datetime
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
from opentelemetry.trace import SpanContext, TraceFlags, TraceState, NonRecordingSpan
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
service_name = os.getenv("WORKFLOW_NAME", "default_service")
job_name = os.getenv("JOB_NAME", "default_job")

# Set up OpenTelemetry tracing
trace.set_tracer_provider(
TracerProvider(
resource=Resource.create({"service.name": os.getenv("WORKFLOW_NAME")})
)
)
resource = Resource.create({"service.name": service_name})
trace.set_tracer_provider(TracerProvider(resource=resource))
tracer = trace.get_tracer(__name__)

# Set up OTLP exporter to send to OpenTelemetry Collector
otlp_exporter = OTLPSpanExporter(endpoint="http://0.0.0.0:4317", insecure=True)

# Set up span processor
span_processor = BatchSpanProcessor(otlp_exporter)
trace.get_tracer_provider().add_span_processor(span_processor)

# Optionally, export to console for debugging
console_exporter = ConsoleSpanExporter()
trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(console_exporter))

def retry_operation(operation, retries=3, delay=5):
for attempt in range(retries):
try:
return operation()
except Exception as e:
logger.error(f"Attempt {attempt + 1} failed with error: {e}")
if attempt < retries - 1:
time.sleep(delay)
else:
raise

def start_trace(step_name):
span = tracer.start_span(name=step_name)
return span

def end_trace(span):
span.end()
console_span_processor = BatchSpanProcessor(ConsoleSpanExporter())
trace.get_tracer_provider().add_span_processor(console_span_processor)

# Adding OTLP Span Exporter for actual data export
otlp_exporter = OTLPSpanExporter(endpoint="localhost:4317", insecure=True)
otlp_span_processor = BatchSpanProcessor(otlp_exporter)
trace.get_tracer_provider().add_span_processor(otlp_span_processor)

print("Tracer initialized with service name:", service_name)

def set_start_time():
start_time = datetime.now().timestamp()
with open("/tmp/start_time.txt", "w") as file:
file.write(str(start_time))
print("Start time recorded")

def calculate_duration():
with open("/tmp/start_time.txt", "r") as file:
start_time = float(file.read())
end_time = datetime.now().timestamp()
duration = end_time - start_time
print(f"Total Duration: {duration}s")
with tracer.start_as_current_span(job_name) as span:
span.set_attribute("total_duration_s", duration)

if __name__ == "__main__":
step_name = os.getenv("STEP_NAME", "default_step")
action = os.getenv("TRACE_ACTION", "start")

if action == "start":
span = retry_operation(lambda: start_trace(step_name))
with open(f"/tmp/trace_{step_name}.txt", "w") as f:
f.write(str(span.get_span_context().trace_id))
set_start_time()
elif action == "end":
trace_id = os.getenv("TRACE_ID")
if not trace_id:
with open(f"/tmp/trace_{step_name}.txt", "r") as f:
trace_id = f.read().strip()
trace_id = int(trace_id, 16) # Convert trace_id back to int
span_context = SpanContext(
trace_id=trace_id,
span_id=0, # Span ID will be generated
trace_flags=TraceFlags(TraceFlags.SAMPLED),
trace_state=TraceState(),
is_remote=True
)
with tracer.start_as_current_span(name=step_name, context=trace.set_span_in_context(NonRecordingSpan(span_context))):
span = tracer.start_span(name=step_name)
end_trace(span)
calculate_duration()

0 comments on commit 75e03f3

Please sign in to comment.