fix(openai): handle image_url option on chat completion request gra…

…cefully (#4445) * add support for image_url tagging * test * test * change variable names * prompt token counts excludes image_url
DataDog · Jul 11, 2024 · e79deb1 · e79deb1
1 parent a028b62
commit e79deb1
Show file tree

Hide file tree

Showing 2 changed files with 146 additions and 5 deletions.
diff --git a/packages/datadog-plugin-openai/src/index.js b/packages/datadog-plugin-openai/src/index.js
@@ -309,6 +309,7 @@ class OpenApiPlugin extends TracingPlugin {
   }
 
   sendLog (methodName, span, tags, store, error) {
+    if (!store) return
     if (!Object.keys(store).length) return
     if (!this.sampler.isSampled()) return
 
@@ -329,9 +330,22 @@ function countPromptTokens (methodName, payload, model) {
     const messages = payload.messages
     for (const message of messages) {
       const content = message.content
-      const { tokens, estimated } = countTokens(content, model)
-      promptTokens += tokens
-      promptEstimated = estimated
+      if (typeof content === 'string') {
+        const { tokens, estimated } = countTokens(content, model)
+        promptTokens += tokens
+        promptEstimated = estimated
+      } else if (Array.isArray(content)) {
+        for (const c of content) {
+          if (c.type === 'text') {
+            const { tokens, estimated } = countTokens(c.text, model)
+            promptTokens += tokens
+            promptEstimated = estimated
+          }
+          // unsupported token computation for image_url
+          // as even though URL is a string, its true token count
+          // is based on the image itself, something onerous to do client-side
+        }
+      }
     }
   } else if (methodName === 'completions.create') {
     let prompt = payload.prompt
@@ -403,7 +417,7 @@ function createChatCompletionRequestExtraction (tags, payload, store) {
   store.messages = payload.messages
   for (let i = 0; i < payload.messages.length; i++) {
     const message = payload.messages[i]
-    tags[`openai.request.messages.${i}.content`] = truncateText(message.content)
+    tagChatCompletionRequestContent(message.content, i, tags)
     tags[`openai.request.messages.${i}.role`] = message.role
     tags[`openai.request.messages.${i}.name`] = message.name
     tags[`openai.request.messages.${i}.finish_reason`] = message.finish_reason
@@ -692,7 +706,7 @@ function commonCreateResponseExtraction (tags, body, store, methodName) {
   for (let choiceIdx = 0; choiceIdx < body.choices.length; choiceIdx++) {
     const choice = body.choices[choiceIdx]
 
-    // logprobs can be nullm and we still want to tag it as 'returned' even when set to 'null'
+    // logprobs can be null and we still want to tag it as 'returned' even when set to 'null'
     const specifiesLogProb = Object.keys(choice).indexOf('logprobs') !== -1
 
     tags[`openai.response.choices.${choiceIdx}.finish_reason`] = choice.finish_reason
@@ -766,6 +780,7 @@ function truncateApiKey (apiKey) {
  */
 function truncateText (text) {
   if (!text) return
+  if (typeof text !== 'string' || !text || (typeof text === 'string' && text.length === 0)) return
 
   text = text
     .replace(RE_NEWLINE, '\\n')
@@ -778,6 +793,28 @@ function truncateText (text) {
   return text
 }
 
+function tagChatCompletionRequestContent (contents, messageIdx, tags) {
+  if (typeof contents === 'string') {
+    tags[`openai.request.messages.${messageIdx}.content`] = contents
+  } else if (Array.isArray(contents)) {
+    // content can also be an array of objects
+    // which represent text input or image url
+    for (const contentIdx in contents) {
+      const content = contents[contentIdx]
+      const type = content.type
+      tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.type`] = content.type
+      if (type === 'text') {
+        tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.text`] = truncateText(content.text)
+      } else if (type === 'image_url') {
+        tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.image_url.url`] =
+          truncateText(content.image_url.url)
+      }
+      // unsupported type otherwise, won't be tagged
+    }
+  }
+  // unsupported type otherwise, won't be tagged
+}
+
 // The server almost always responds with JSON
 function coerceResponseBody (body, methodName) {
   switch (methodName) {

diff --git a/packages/datadog-plugin-openai/test/index.spec.js b/packages/datadog-plugin-openai/test/index.spec.js
@@ -2703,6 +2703,62 @@ describe('Plugin', () => {
 
             await checkTraces
           })
+
+          it('should tag image_url', async () => {
+            const checkTraces = agent
+              .use(traces => {
+                const span = traces[0][0]
+                // image_url is only relevant on request/input, output has the same shape as a normal chat completion
+                expect(span.meta).to.have.property('openai.request.messages.0.content.0.type', 'text')
+                expect(span.meta).to.have.property(
+                  'openai.request.messages.0.content.0.text', 'I\'m allergic to peanuts. Should I avoid this food?'
+                )
+                expect(span.meta).to.have.property('openai.request.messages.0.content.1.type', 'image_url')
+                expect(span.meta).to.have.property(
+                  'openai.request.messages.0.content.1.image_url.url', 'dummy/url/peanut_food.png'
+                )
+              })
+
+            const params = {
+              model: 'gpt-4-visual-preview',
+              messages: [
+                {
+                  role: 'user',
+                  name: 'hunter2',
+                  content: [
+                    {
+                      type: 'text',
+                      text: 'I\'m allergic to peanuts. Should I avoid this food?'
+                    },
+                    {
+                      type: 'image_url',
+                      image_url: {
+                        url: 'dummy/url/peanut_food.png'
+                      }
+                    }
+                  ]
+                }
+              ]
+            }
+
+            if (semver.satisfies(realVersion, '>=4.0.0')) {
+              const result = await openai.chat.completions.create(params)
+
+              expect(result.id).to.eql('chatcmpl-7GaWqyMTD9BLmkmy8SxyjUGX3KSRN')
+              expect(result.choices[0].message.role).to.eql('assistant')
+              expect(result.choices[0].message.content).to.eql('In that case, it\'s best to avoid peanut')
+              expect(result.choices[0].finish_reason).to.eql('length')
+            } else {
+              const result = await openai.createChatCompletion(params)
+
+              expect(result.data.id).to.eql('chatcmpl-7GaWqyMTD9BLmkmy8SxyjUGX3KSRN')
+              expect(result.data.choices[0].message.role).to.eql('assistant')
+              expect(result.data.choices[0].message.content).to.eql('In that case, it\'s best to avoid peanut')
+              expect(result.data.choices[0].finish_reason).to.eql('length')
+            }
+
+            await checkTraces
+          })
         })
 
         describe('create chat completion with tools', () => {
@@ -3267,6 +3323,54 @@ describe('Plugin', () => {
             expect(metricStub).to.have.been.calledWith('openai.tokens.total', 16, 'd', expectedTags)
           })
 
+          it('makes a successful chat completion call without image_url usage computed', async () => {
+            nock('https://api.openai.com:443')
+              .post('/v1/chat/completions')
+              .reply(200, function () {
+                return fs.createReadStream(Path.join(__dirname, 'streamed-responses/chat.completions.simple.txt'))
+              }, {
+                'Content-Type': 'text/plain',
+                'openai-organization': 'kill-9'
+              })
+
+            const checkTraces = agent
+              .use(traces => {
+                const span = traces[0][0]
+
+                // we shouldn't be trying to capture the image_url tokens
+                expect(span.metrics).to.have.property('openai.response.usage.prompt_tokens', 1)
+              })
+
+            const stream = await openai.chat.completions.create({
+              stream: 1,
+              model: 'gpt-4o',
+              messages: [
+                {
+                  role: 'user',
+                  name: 'hunter2',
+                  content: [
+                    {
+                      type: 'text',
+                      text: 'One' // one token, for ease of testing
+                    },
+                    {
+                      type: 'image_url',
+                      image_url: {
+                        url: 'dummy/url/peanut_food.png'
+                      }
+                    }
+                  ]
+                }
+              ]
+            })
+
+            for await (const part of stream) {
+              expect(part).to.have.property('choices')
+            }
+
+            await checkTraces
+          })
+
           it('makes a successful completion call', async () => {
             nock('https://api.openai.com:443')
               .post('/v1/completions')