Fix issue #4196: '[Feature request]: Allow LiteLLM to track cost when…

… accessing models from LiteLLM Proxy'
All-Hands-AI · Oct 3, 2024 · 52a51ee · 52a51ee
1 parent 909e332
commit 52a51ee
Showing 1 changed file with 47 additions and 3 deletions.
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
@@ -188,14 +188,19 @@ def wrapper(*args, **kwargs):
             # we don't support streaming here, thus we get a ModelResponse
             resp: ModelResponse = completion_unwrapped(*args, **kwargs)
 
+            # Check for x-litellm-response-cost header
+            completion_cost = None
+            if hasattr(resp, 'headers') and 'x-litellm-response-cost' in resp.headers:
+                completion_cost = float(resp.headers['x-litellm-response-cost'])
+
             # log for evals or other scripts that need the raw completion
             if self.config.log_completions:
                 self.llm_completions.append(
                     {
                         'messages': messages,
                         'response': resp,
                         'timestamp': time.time(),
-                        'cost': self._completion_cost(resp),
+                        'cost': completion_cost if completion_cost is not None else self._completion_cost(resp),
                     }
                 )
 
@@ -337,10 +342,49 @@ def _is_local(self):
                 return True
         return False
 
-    def _completion_cost(self, response):
-        """Calculate the cost of a completion response based on the model.  Local models are treated as free.
+def _completion_cost(self, response):
+        """Calculate the cost of a completion response based on the model. Local models are treated as free.
         Add the current cost into total cost in metrics.
 
+        Args:
+            response: A response from a model invocation.
+
+        Returns:
+            number: The cost of the response.
+        """
+        if not self.cost_metric_supported:
+            return 0.0
+
+        # Check for x-litellm-response-cost header
+        if hasattr(response, 'headers') and 'x-litellm-response-cost' in response.headers:
+            cost = float(response.headers['x-litellm-response-cost'])
+            self.metrics.add_cost(cost)
+            return cost
+
+        extra_kwargs = {}
+        if (
+            self.config.input_cost_per_token is not None
+            and self.config.output_cost_per_token is not None
+        ):
+            cost_per_token = CostPerToken(
+                input_cost_per_token=self.config.input_cost_per_token,
+                output_cost_per_token=self.config.output_cost_per_token,
+            )
+            logger.info(f'Using custom cost per token: {cost_per_token}')
+            extra_kwargs['custom_cost_per_token'] = cost_per_token
+
+        if not self._is_local():
+            try:
+                cost = litellm_completion_cost(
+                    completion_response=response, **extra_kwargs
+                )
+                self.metrics.add_cost(cost)
+                return cost
+            except Exception:
+                self.cost_metric_supported = False
+                logger.warning('Cost calculation not supported for this model.')
+        return 0.0
+
         Args:
             response: A response from a model invocation.