diff --git a/docker/llm/inference/xpu/docker/Dockerfile b/docker/llm/inference/xpu/docker/Dockerfile index 7a812482db7..ce629ad2b69 100644 --- a/docker/llm/inference/xpu/docker/Dockerfile +++ b/docker/llm/inference/xpu/docker/Dockerfile @@ -61,7 +61,7 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO cp -r ./ipex-llm/python/llm/example/GPU/vLLM-Serving/ ./vLLM-Serving && \ # Download pp_serving mkdir -p /llm/pp_serving && \ - cp ./ipex-llm/python/llm/example/GPU/Pipeline-Parallel-FastAPI/*.py /llm/pp_serving/ && \ + cp ./ipex-llm/python/llm/example/GPU/Pipeline-Parallel-Serving/*.py /llm/pp_serving/ && \ # Install related library of benchmarking pip install pandas omegaconf && \ chmod +x /llm/benchmark.sh && \ diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/README.md b/python/llm/example/GPU/Pipeline-Parallel-Serving/README.md similarity index 100% rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/README.md rename to python/llm/example/GPU/Pipeline-Parallel-Serving/README.md diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py b/python/llm/example/GPU/Pipeline-Parallel-Serving/benchmark.py similarity index 100% rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py rename to python/llm/example/GPU/Pipeline-Parallel-Serving/benchmark.py diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/gradio_webui.py b/python/llm/example/GPU/Pipeline-Parallel-Serving/gradio_webui.py similarity index 100% rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/gradio_webui.py rename to python/llm/example/GPU/Pipeline-Parallel-Serving/gradio_webui.py diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/pipeline_serving.py b/python/llm/example/GPU/Pipeline-Parallel-Serving/pipeline_serving.py similarity index 100% rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/pipeline_serving.py rename to python/llm/example/GPU/Pipeline-Parallel-Serving/pipeline_serving.py diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/1024.txt b/python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/1024.txt similarity index 100% rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/1024.txt rename to python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/1024.txt diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/128.txt b/python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/128.txt similarity index 100% rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/128.txt rename to python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/128.txt diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/2048.txt b/python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/2048.txt similarity index 100% rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/2048.txt rename to python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/2048.txt diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/32.txt b/python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/32.txt similarity index 100% rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/32.txt rename to python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/32.txt diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/run.sh b/python/llm/example/GPU/Pipeline-Parallel-Serving/run.sh similarity index 100% rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/run.sh rename to python/llm/example/GPU/Pipeline-Parallel-Serving/run.sh diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/serving.py b/python/llm/example/GPU/Pipeline-Parallel-Serving/serving.py similarity index 100% rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/serving.py rename to python/llm/example/GPU/Pipeline-Parallel-Serving/serving.py diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/wrk_script_1024.lua b/python/llm/example/GPU/Pipeline-Parallel-Serving/wrk_script_1024.lua similarity index 100% rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/wrk_script_1024.lua rename to python/llm/example/GPU/Pipeline-Parallel-Serving/wrk_script_1024.lua diff --git a/python/llm/src/ipex_llm/serving/api/__init__.py b/python/llm/src/ipex_llm/serving/api/__init__.py index 20e93ad6b3c..79ddfd9dcba 100644 --- a/python/llm/src/ipex_llm/serving/api/__init__.py +++ b/python/llm/src/ipex_llm/serving/api/__init__.py @@ -14,5 +14,5 @@ # limitations under the License. # -from .api import FastApp +from .api_server import FastApp from .model_worker import ModelWorker \ No newline at end of file diff --git a/python/llm/src/ipex_llm/serving/api/api.py b/python/llm/src/ipex_llm/serving/api/api_server.py similarity index 100% rename from python/llm/src/ipex_llm/serving/api/api.py rename to python/llm/src/ipex_llm/serving/api/api_server.py