Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Capture output directly
Browse files Browse the repository at this point in the history
  • Loading branch information
dbarbuzzi committed May 29, 2024
1 parent 21fa864 commit 8869ec6
Showing 1 changed file with 6 additions and 12 deletions.
18 changes: 6 additions & 12 deletions neuralmagic/benchmarks/run_benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import sys
import time
from pathlib import Path
from tempfile import TemporaryFile
from typing import Dict, List, NamedTuple, Optional

import requests
Expand All @@ -30,28 +29,23 @@ def __init__(self, args: Dict, max_ready_wait: int = 600):
self.cmd.extend([f"--{k}", str(v)])
self.max_ready_wait = max_ready_wait
self.proc = None
self.output_file = TemporaryFile()

def __enter__(self):
log_banner(self.logger, "server startup command", shlex.join(self.cmd))
self.proc = subprocess.Popen(self.cmd,
stderr=subprocess.STDOUT,
stdout=self.output_file.fileno())
stdout=subprocess.PIPE)
self._wait_for_server_ready()

def __exit__(self, exc_type, exc_value, exc_traceback):
if self.proc and self.proc.poll() is None:
self.logger.info("killing server")
self.proc.kill()

if exc_type is None:
return # only log if an exception occurred

self.output_file.seek(0)
self.output = self.output_file.read()
self.output_file.close()

log_banner(self.logger, "server output", self.output)
log_banner(
self.logger, "context exit args", f"exc_type={exc_type}\n"
f"exc_value={exc_value}\n"
f"exc_traceback={exc_traceback}")
log_banner(self.logger, "server output", self.proc.stdout)

def _wait_for_server_ready(self):
self.logger.info("waiting for server to become ready")
Expand Down

1 comment on commit 8869ec6

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: 8869ec6 Previous: a6b9443 Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.3.0", "python_version": "3.10.12 (main, May 10 2024, 13:42:25) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"} 3.8397533322013477 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA A10G x 1", "vllm_version": "0.3.0", "python_version": "3.10.12 (main, May 10 2024, 13:42:25) [GCC 9.4.0]", "torch_version": "2.3.0+cu121"} 1474.4652795653176 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.