Merge pull request #3982 from janhq/chore/jan-api-proxies-to-cortex

chore: proxies Jan APIs to cortex.cpp
janhq · Nov 9, 2024 · 8378e0d · 8378e0d
2 parents b877e84 + 487fd27
commit 8378e0d
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 63 deletions.
diff --git a/core/src/node/api/restful/common.ts b/core/src/node/api/restful/common.ts
@@ -10,6 +10,7 @@ import {
   getMessages,
   retrieveMessage,
   updateThread,
+  models,
 } from './helper/builder'
 
 import { JanApiRouteConfiguration } from './helper/configuration'
@@ -26,9 +27,12 @@ export const commonRouter = async (app: HttpServer) => {
   // Common Routes
   // Read & Delete :: Threads | Models | Assistants
   Object.keys(JanApiRouteConfiguration).forEach((key) => {
-    app.get(`/${key}`, async (_request) =>
-      getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
-    )
+    app.get(`/${key}`, async (_req, _res) => {
+      if (key === 'models') {
+        return models(_req, _res)
+      }
+      return getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
+    })
 
     app.get(`/${key}/:id`, async (request: any) =>
       retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)

diff --git a/core/src/node/api/restful/helper/builder.test.ts b/core/src/node/api/restful/helper/builder.test.ts
@@ -220,22 +220,6 @@ describe('builder helper functions', () => {
   })
 
   describe('chatCompletions', () => {
-    it('should return an error if model is not found', async () => {
-      const request = { body: { model: 'nonexistentModel' } }
-      const reply = { code: jest.fn().mockReturnThis(), send: jest.fn() }
-
-      await chatCompletions(request, reply)
-      expect(reply.code).toHaveBeenCalledWith(404)
-      expect(reply.send).toHaveBeenCalledWith({
-        error: {
-          message: 'The model nonexistentModel does not exist',
-          type: 'invalid_request_error',
-          param: null,
-          code: 'model_not_found',
-        },
-      })
-    })
-
     it('should return the error on status not ok', async () => {
       const request = { body: { model: 'model1' } }
       const mockSend = jest.fn()

diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts
@@ -10,9 +10,9 @@ import {
 } from 'fs'
 import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
 import { join } from 'path'
-import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types'
-import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper'
-import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
+import { ContentType, InferenceEngine, MessageStatus, ThreadMessage } from '../../../../types'
+import { getJanDataFolderPath } from '../../../helper'
+import { CORTEX_API_URL } from './consts'
 
 // TODO: Refactor these
 export const getBuilder = async (configuration: RouteConfiguration) => {
@@ -297,57 +297,56 @@ export const downloadModel = async (
   }
 }
 
-export const chatCompletions = async (request: any, reply: any) => {
-  const modelList = await getBuilder(JanApiRouteConfiguration.models)
-  const modelId = request.body.model
-
-  const matchedModels = modelList.filter((model: Model) => model.id === modelId)
-  if (matchedModels.length === 0) {
-    const error = {
-      error: {
-        message: `The model ${request.body.model} does not exist`,
-        type: 'invalid_request_error',
-        param: null,
-        code: 'model_not_found',
-      },
-    }
-    reply.code(404).send(error)
-    return
+/**
+ * Proxy /models to cortex
+ * @param request
+ * @param reply
+ */
+export const models = async (request: any, reply: any) => {
+  const fetch = require('node-fetch')
+  const headers: Record<string, any> = {
+    'Content-Type': 'application/json',
   }
 
-  const requestedModel = matchedModels[0]
-
-  const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
-
-  let apiKey: string | undefined = undefined
-  let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL
+  const response = await fetch(`${CORTEX_API_URL}/models`, {
+    method: request.method,
+    headers: headers,
+    body: JSON.stringify(request.body),
+  })
 
-  if (engineConfiguration) {
-    apiKey = engineConfiguration.api_key
-    apiUrl = engineConfiguration.full_url ?? DEFAULT_CHAT_COMPLETION_URL
+  if (response.status !== 200) {
+    // Forward the error response to client via reply
+    const responseBody = await response.text()
+    const responseHeaders = Object.fromEntries(response.headers)
+    reply.code(response.status).headers(responseHeaders).send(responseBody)
+  } else {
+    reply.raw.writeHead(200, {
+      'Content-Type': 'application/json',
+      'Cache-Control': 'no-cache',
+      'Connection': 'keep-alive',
+      'Access-Control-Allow-Origin': '*',
+    })
+    response.body.pipe(reply.raw)
   }
+}
 
+/**
+ * Proxy chat completions
+ * @param request
+ * @param reply
+ */
+export const chatCompletions = async (request: any, reply: any) => {
   const headers: Record<string, any> = {
     'Content-Type': 'application/json',
   }
 
-  if (apiKey) {
-    headers['Authorization'] = `Bearer ${apiKey}`
-    headers['api-key'] = apiKey
-  }
-
-  if (requestedModel.engine === 'openai' && request.body.stop) {
-    // openai only allows max 4 stop words
-    request.body.stop = request.body.stop.slice(0, 4)
-  }
-
   // add engine for new cortex cpp engine
-  if (requestedModel.engine === 'nitro') {
-    request.body.engine = 'llama-cpp'
+  if (request.body.engine === InferenceEngine.nitro) {
+    request.body.engine = InferenceEngine.cortex_llamacpp
   }
 
   const fetch = require('node-fetch')
-  const response = await fetch(apiUrl, {
+  const response = await fetch(`${CORTEX_API_URL}/chat/completions`, {
     method: 'POST',
     headers: headers,
     body: JSON.stringify(request.body),

diff --git a/core/src/node/api/restful/helper/consts.ts b/core/src/node/api/restful/helper/consts.ts
@@ -1,9 +1,7 @@
-// The PORT to use for the Nitro subprocess
 export const CORTEX_DEFAULT_PORT = 39291
 
-// The HOST address to use for the Nitro subprocess
 export const LOCAL_HOST = '127.0.0.1'
 
 export const SUPPORTED_MODEL_FORMAT = '.gguf'
 
-export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url
+export const CORTEX_API_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1`