From 87ec4e6e3b56effd895775f4f8f63e412bde7f75 Mon Sep 17 00:00:00 2001
From: Drew Robbins <drew@drewby.com>
Date: Thu, 3 Oct 2024 00:14:55 +0900
Subject: [PATCH] Add system specific conventions for OpenAI (#1385)

Co-authored-by: Liudmila Molkova <limolkova@microsoft.com>
---
 .chloggen/add_openai_specific_attributes.yaml |   4 +
 docs/attributes-registry/gen-ai.md            |  27 +++
 docs/gen-ai/openai.md                         | 190 ++++++++++++++++++
 model/gen-ai/metrics.yaml                     |   6 +
 model/gen-ai/registry.yaml                    |  48 +++++
 model/gen-ai/spans.yaml                       |  34 +++-
 6 files changed, 306 insertions(+), 3 deletions(-)
 create mode 100644 .chloggen/add_openai_specific_attributes.yaml
 create mode 100644 docs/gen-ai/openai.md

diff --git a/.chloggen/add_openai_specific_attributes.yaml b/.chloggen/add_openai_specific_attributes.yaml
new file mode 100644
index 0000000000..52d41f7b95
--- /dev/null
+++ b/.chloggen/add_openai_specific_attributes.yaml
@@ -0,0 +1,4 @@
+change_type: enhancement
+component: gen_ai
+note: Add system specific conventions for OpenAI.
+issues: [1370]
diff --git a/docs/attributes-registry/gen-ai.md b/docs/attributes-registry/gen-ai.md
index fd8e8ee117..0dc935e462 100644
--- a/docs/attributes-registry/gen-ai.md
+++ b/docs/attributes-registry/gen-ai.md
@@ -7,6 +7,7 @@
 # Gen AI
 
 - [GenAI Attributes](#genai-attributes)
+- [OpenAI Attributes](#openai-attributes)
 - [Deprecated GenAI Attributes](#deprecated-genai-attributes)
 
 ## GenAI Attributes
@@ -73,6 +74,32 @@ If none of these options apply, the `gen_ai.system` SHOULD be set to `_OTHER`.
 | `input`  | Input tokens (prompt, input, etc.)         | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
 | `output` | Output tokens (completion, response, etc.) | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
 
+## OpenAI Attributes
+
+Thie group defines attributes for OpenAI.
+
+| Attribute                               | Type   | Description                                                           | Examples           | Stability                                                        |
+| --------------------------------------- | ------ | --------------------------------------------------------------------- | ------------------ | ---------------------------------------------------------------- |
+| `gen_ai.openai.request.response_format` | string | The response format that is requested.                                | `json`             | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `gen_ai.openai.request.seed`            | int    | Requests with same seed value more likely to return same result.      | `100`              | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `gen_ai.openai.request.service_tier`    | string | The service tier requested. May be a specific tier, detault, or auto. | `auto`; `default`  | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `gen_ai.openai.response.service_tier`   | string | The service tier used for the response.                               | `scale`; `detault` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+
+`gen_ai.openai.request.response_format` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used.
+
+| Value         | Description                 | Stability                                                        |
+| ------------- | --------------------------- | ---------------------------------------------------------------- |
+| `json_object` | JSON object response format | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `json_schema` | JSON schema response format | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `text`        | Text response format        | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+
+`gen_ai.openai.request.service_tier` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used.
+
+| Value     | Description                                                          | Stability                                                        |
+| --------- | -------------------------------------------------------------------- | ---------------------------------------------------------------- |
+| `auto`    | The system will utilize scale tier credits until they are exhausted. | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `default` | The system will utilize the default scale tier.                      | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+
 ## Deprecated GenAI Attributes
 
 Describes deprecated `gen_ai` attributes.
diff --git a/docs/gen-ai/openai.md b/docs/gen-ai/openai.md
new file mode 100644
index 0000000000..0d797ed20b
--- /dev/null
+++ b/docs/gen-ai/openai.md
@@ -0,0 +1,190 @@
+<!--- Hugo front matter used to generate the website version of this page:
+linkTitle: OpenAI traces and metrics
+--->
+
+# Semantic Conventions for OpenAI operations
+
+**Status**: [Experimental][DocumentStatus]
+
+<!-- Re-generate TOC with `markdown-toc --no-first-h1 -i` -->
+
+<!-- toc -->
+
+- [OpenAI Span attributes](#openai-span-attributes)
+- [OpenAI Metric attributes](#openai-metric-attributes)
+  - [Metric: `gen_ai.client.token.usage`](#metric-gen_aiclienttokenusage)
+  - [Metric: `gen_ai.client.operation.duration`](#metric-gen_aiclientoperationduration)
+
+<!-- tocstop -->
+
+The Semantic Conventions for [OpenAI](https://openai.com/) extend and override the semantic conventions
+for [Gen AI Spans](gen-ai-spans.md) and [Gen AI Metrics](gen-ai-metrics.md).
+
+`gen_ai.system` MUST be set to `"openai"`.
+
+## OpenAI Span attributes
+
+These attributes track input data and metadata for a request to an OpenAI model. The attributes include general Generative AI
+attributes and ones specific the OpenAI.
+
+<!-- semconv trace.gen_ai.openai.client -->
+<!-- NOTE: THIS TEXT IS AUTOGENERATED. DO NOT EDIT BY HAND. -->
+<!-- see templates/registry/markdown/snippet.md.j2 -->
+<!-- prettier-ignore-start -->
+<!-- markdownlint-capture -->
+<!-- markdownlint-disable -->
+
+| Attribute  | Type | Description  | Examples  | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability |
+|---|---|---|---|---|---|
+| [`gen_ai.operation.name`](/docs/attributes-registry/gen-ai.md) | string | The name of the operation being performed. [1] | `chat`; `text_completion` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.request.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the GenAI model a request is being made to. [2] | `gpt-4` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.system`](/docs/attributes-registry/gen-ai.md) | string | The Generative AI product as identified by the client or server instrumentation. [3] | `openai` | `Required` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`error.type`](/docs/attributes-registry/error.md) | string | Describes a class of error the operation ended with. [4] | `timeout`; `java.net.UnknownHostException`; `server_certificate_invalid`; `500` | `Conditionally Required` if the operation ended in an error | ![Stable](https://img.shields.io/badge/-stable-lightgreen) |
+| [`gen_ai.openai.request.response_format`](/docs/attributes-registry/gen-ai.md) | string | The response format that is requested. | `json` | `Conditionally Required` if the request includes a response_format | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.openai.request.seed`](/docs/attributes-registry/gen-ai.md) | int | Requests with same seed value more likely to return same result. | `100` | `Conditionally Required` if the request includes a seed | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.openai.request.service_tier`](/docs/attributes-registry/gen-ai.md) | string | The service tier requested. May be a specific tier, detault, or auto. | `auto`; `default` | `Conditionally Required` [5] | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.openai.response.service_tier`](/docs/attributes-registry/gen-ai.md) | string | The service tier used for the response. | `scale`; `detault` | `Conditionally Required` [6] | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`server.port`](/docs/attributes-registry/server.md) | int | GenAI server port. [7] | `80`; `8080`; `443` | `Conditionally Required` If `server.address` is set. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) |
+| [`gen_ai.request.frequency_penalty`](/docs/attributes-registry/gen-ai.md) | double | The frequency penalty setting for the GenAI request. | `0.1` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.request.max_tokens`](/docs/attributes-registry/gen-ai.md) | int | The maximum number of tokens the model generates for a request. | `100` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.request.presence_penalty`](/docs/attributes-registry/gen-ai.md) | double | The presence penalty setting for the GenAI request. | `0.1` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.request.stop_sequences`](/docs/attributes-registry/gen-ai.md) | string[] | List of sequences that the model will use to stop generating further tokens. | `["forest", "lived"]` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.request.temperature`](/docs/attributes-registry/gen-ai.md) | double | The temperature setting for the GenAI request. | `0.0` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.request.top_p`](/docs/attributes-registry/gen-ai.md) | double | The top_p sampling setting for the GenAI request. | `1.0` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.response.finish_reasons`](/docs/attributes-registry/gen-ai.md) | string[] | Array of reasons the model stopped generating tokens, corresponding to each generation received. | `["stop"]`; `["stop", "length"]` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.response.id`](/docs/attributes-registry/gen-ai.md) | string | The unique identifier for the completion. | `chatcmpl-123` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.response.model`](/docs/attributes-registry/gen-ai.md) | string | The name of the model that generated the response. [8] | `gpt-4-0613` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.usage.input_tokens`](/docs/attributes-registry/gen-ai.md) | int | The number of tokens used in the prompt sent to OpenAI. | `100` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`gen_ai.usage.output_tokens`](/docs/attributes-registry/gen-ai.md) | int | The number of tokens used in the completions from OpenAI. | `180` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| [`server.address`](/docs/attributes-registry/server.md) | string | GenAI server address. [9] | `example.com`; `10.1.2.80`; `/tmp/my.sock` | `Recommended` | ![Stable](https://img.shields.io/badge/-stable-lightgreen) |
+
+**[1]:** If one of the predefined values applies, but specific system uses a different name it's RECOMMENDED to document it in the semantic conventions for specific GenAI system and use system-specific name in the instrumentation. If a different name is not documented, instrumentation libraries SHOULD use applicable predefined value.
+
+**[2]:** The name of the GenAI model a request is being made to. If the model is supplied by a vendor, then the value must be the exact name of the model requested. If the model is a fine-tuned custom model, the value should have a more specific name than the base model that's been fine-tuned.
+
+**[3]:** The `gen_ai.system` describes a family of GenAI models with specific model identified
+by `gen_ai.request.model` and `gen_ai.response.model` attributes.
+
+The actual GenAI product may differ from the one identified by the client.
+For example, when using OpenAI client libraries to communicate with Mistral, the `gen_ai.system`
+is set to `openai` based on the instrumentation's best knowledge.
+
+For custom model, a custom friendly name SHOULD be used.
+If none of these options apply, the `gen_ai.system` SHOULD be set to `_OTHER`.
+
+**[4]:** The `error.type` SHOULD match the error code returned by the Generative AI provider or the client library,
+the canonical name of exception that occurred, or another low-cardinality error identifier.
+Instrumentations SHOULD document the list of errors they report.
+
+**[5]:** if the request includes a service_tier and the value is not 'auto'
+
+**[6]:** if the response was received and includes a service_tier
+
+**[7]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries, for example proxies, if it's available.
+
+**[8]:** If available. The name of the GenAI model that provided the response. If the model is supplied by a vendor, then the value must be the exact name of the model actually used. If the model is a fine-tuned custom model, the value should have a more specific name than the base model that's been fine-tuned.
+
+**[9]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries, for example proxies, if it's available.
+
+
+
+`error.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used.
+
+| Value  | Description | Stability |
+|---|---|---|
+| `_OTHER` | A fallback error value to be used when the instrumentation doesn't define a custom value. | ![Stable](https://img.shields.io/badge/-stable-lightgreen) |
+
+
+`gen_ai.openai.request.response_format` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used.
+
+| Value  | Description | Stability |
+|---|---|---|
+| `json_object` | JSON object response format | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `json_schema` | JSON schema response format | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `text` | Text response format | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+
+
+`gen_ai.openai.request.service_tier` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used.
+
+| Value  | Description | Stability |
+|---|---|---|
+| `auto` | The system will utilize scale tier credits until they are exhausted. | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `default` | The system will utilize the default scale tier. | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+
+
+`gen_ai.operation.name` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used.
+
+| Value  | Description | Stability |
+|---|---|---|
+| `chat` | Chat completion operation such as [OpenAI Chat API](https://platform.openai.com/docs/api-reference/chat) | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `text_completion` | Text completions operation such as [OpenAI Completions API (Legacy)](https://platform.openai.com/docs/api-reference/completions) | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+
+
+`gen_ai.system` has the following list of well-known values. If one of them applies, then the respective value MUST be used; otherwise, a custom value MAY be used.
+
+| Value  | Description | Stability |
+|---|---|---|
+| `anthropic` | Anthropic | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `cohere` | Cohere | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `openai` | OpenAI | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+| `vertex_ai` | Vertex AI | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+
+
+
+<!-- markdownlint-restore -->
+<!-- prettier-ignore-end -->
+<!-- END AUTOGENERATED TEXT -->
+<!-- endsemconv -->
+
+## OpenAI Metric attributes
+
+OpenAI metrics follow [Generative AI metrics](gen-ai-metrics.md) with the noted additional attributes.
+Individual systems may include additional system-specific attributes. It is recommended to check system-specific documentation, if available.
+
+### Metric: `gen_ai.client.token.usage`
+
+Reports the usage of tokens following the common [gen_ai.client.token.usage](./gen-ai-metrics.md#metric-gen_aiclienttokenusage) definition.
+
+Additional attributes:
+
+<!-- semconv metric_attributes.gen_ai.openai -->
+<!-- NOTE: THIS TEXT IS AUTOGENERATED. DO NOT EDIT BY HAND. -->
+<!-- see templates/registry/markdown/snippet.md.j2 -->
+<!-- prettier-ignore-start -->
+<!-- markdownlint-capture -->
+<!-- markdownlint-disable -->
+
+| Attribute  | Type | Description  | Examples  | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability |
+|---|---|---|---|---|---|
+| [`gen_ai.openai.response.service_tier`](/docs/attributes-registry/gen-ai.md) | string | The service tier used for the response. | `scale`; `detault` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+
+
+<!-- markdownlint-restore -->
+<!-- prettier-ignore-end -->
+<!-- END AUTOGENERATED TEXT -->
+<!-- endsemconv -->
+
+### Metric: `gen_ai.client.operation.duration`
+
+Measures the to complete an operation following the common [gen_ai.client.operation.duration](./gen-ai-metrics.md#metric-gen_aiclientoperationduration) definition.
+
+Additional attributes:
+
+<!-- semconv metric_attributes.gen_ai.openai -->
+<!-- NOTE: THIS TEXT IS AUTOGENERATED. DO NOT EDIT BY HAND. -->
+<!-- see templates/registry/markdown/snippet.md.j2 -->
+<!-- prettier-ignore-start -->
+<!-- markdownlint-capture -->
+<!-- markdownlint-disable -->
+
+| Attribute  | Type | Description  | Examples  | [Requirement Level](https://opentelemetry.io/docs/specs/semconv/general/attribute-requirement-level/) | Stability |
+|---|---|---|---|---|---|
+| [`gen_ai.openai.response.service_tier`](/docs/attributes-registry/gen-ai.md) | string | The service tier used for the response. | `scale`; `detault` | `Recommended` | ![Experimental](https://img.shields.io/badge/-experimental-blue) |
+
+
+<!-- markdownlint-restore -->
+<!-- prettier-ignore-end -->
+<!-- END AUTOGENERATED TEXT -->
+<!-- endsemconv -->
+
+[DocumentStatus]: https://github.com/open-telemetry/opentelemetry-specification/tree/v1.22.0/specification/document-status.md
diff --git a/model/gen-ai/metrics.yaml b/model/gen-ai/metrics.yaml
index 2c7035876c..cbd22ebc70 100644
--- a/model/gen-ai/metrics.yaml
+++ b/model/gen-ai/metrics.yaml
@@ -30,6 +30,12 @@ groups:
           The `error.type` SHOULD match the error code returned by the Generative AI service,
           the canonical name of exception that occurred, or another low-cardinality error identifier.
           Instrumentations SHOULD document the list of errors they report.
+  - id: metric_attributes.gen_ai.openai
+    type: attribute_group
+    brief: 'This group describes GenAI server metrics attributes'
+    attributes:
+      - ref: gen_ai.openai.response.service_tier
+        requirement_level: recommended
   - id: metric.gen_ai.client.token.usage
     type: metric
     metric_name: gen_ai.client.token.usage
diff --git a/model/gen-ai/registry.yaml b/model/gen-ai/registry.yaml
index 1470ca1cb3..5b3d1cff79 100644
--- a/model/gen-ai/registry.yaml
+++ b/model/gen-ai/registry.yaml
@@ -148,3 +148,51 @@ groups:
           If one of the predefined values applies, but specific system uses a different name it's RECOMMENDED to document it in the semantic
           conventions for specific GenAI system and use system-specific name in the instrumentation.
           If a different name is not documented, instrumentation libraries SHOULD use applicable predefined value.
+  - id: registry.gen_ai.openai
+    type: attribute_group
+    display_name: OpenAI Attributes
+    brief: >
+      Thie group defines attributes for OpenAI.
+    attributes:
+      - id: gen_ai.openai.request.seed
+        stability: experimental
+        type: int
+        brief: Requests with same seed value more likely to return same result.
+        examples: [100]
+      - id: gen_ai.openai.request.response_format
+        stability: experimental
+        type:
+          members:
+            - id: text
+              value: "text"
+              brief: 'Text response format'
+              stability: experimental
+            - id: json_object
+              value: "json_object"
+              brief: 'JSON object response format'
+              stability: experimental
+            - id: json_schema
+              value: "json_schema"
+              brief: 'JSON schema response format'
+              stability: experimental
+        brief: The response format that is requested.
+        examples: ['json']
+      - id: gen_ai.openai.request.service_tier
+        stability: experimental
+        type:
+          members:
+            - id: auto
+              value: "auto"
+              brief: The system will utilize scale tier credits until they are exhausted.
+              stability: experimental
+            - id: default
+              value: "default"
+              brief: The system will utilize the default scale tier.
+              stability: experimental
+        brief: The service tier requested. May be a specific tier, detault, or auto.
+        examples: ['auto', 'default']
+      - id: gen_ai.openai.response.service_tier
+        stability: experimental
+        type: string
+        brief: The service tier used for the response.
+        examples: ['scale', 'detault']
diff --git a/model/gen-ai/spans.yaml b/model/gen-ai/spans.yaml
index 9345f4f249..d634d94473 100644
--- a/model/gen-ai/spans.yaml
+++ b/model/gen-ai/spans.yaml
@@ -1,5 +1,5 @@
 groups:
-  - id: trace.gen_ai.client
+  - id: trace.gen_ai.client.common
     type: span
     brief: >
       Describes GenAI operation span.
@@ -20,8 +20,6 @@ groups:
         requirement_level: recommended
       - ref: gen_ai.request.top_p
         requirement_level: recommended
-      - ref: gen_ai.request.top_k
-        requirement_level: recommended
       - ref: gen_ai.request.stop_sequences
         requirement_level: recommended
       - ref: gen_ai.request.frequency_penalty
@@ -85,3 +83,33 @@ groups:
           conditionally_required: if and only if corresponding event is enabled
         note: >
           It's RECOMMENDED to format completions as JSON string matching [OpenAI messages format](https://platform.openai.com/docs/guides/text-generation)
+
+  - id: trace.gen_ai.client
+    extends: trace.gen_ai.client.common
+    brief: >
+      Describes a GenAI operation span.
+    attributes:
+      - ref: gen_ai.request.top_k
+        requirement_level: recommended
+
+  - id: trace.gen_ai.openai.client
+    extends: trace.gen_ai.client.common
+    brief: >
+      Describes an OpenAI operation span.
+    attributes:
+      - ref: gen_ai.openai.request.seed
+        requirement_level:
+          conditionally_required: if the request includes a seed
+      - ref: gen_ai.openai.request.response_format
+        requirement_level:
+          conditionally_required: if the request includes a response_format
+      - ref: gen_ai.openai.request.service_tier
+        requirement_level:
+          conditionally_required: if the request includes a service_tier and the value is not 'auto'
+      - ref: gen_ai.openai.response.service_tier
+        requirement_level:
+          conditionally_required: if the response was received and includes a service_tier
+      - ref: gen_ai.usage.input_tokens
+        brief: The number of tokens used in the prompt sent to OpenAI.
+      - ref: gen_ai.usage.output_tokens
+        brief: The number of tokens used in the completions from OpenAI.