diff --git a/docker/llm/inference/xpu/docker/Dockerfile b/docker/llm/inference/xpu/docker/Dockerfile
index 7a812482db7..ce629ad2b69 100644
--- a/docker/llm/inference/xpu/docker/Dockerfile
+++ b/docker/llm/inference/xpu/docker/Dockerfile
@@ -61,7 +61,7 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
     cp -r ./ipex-llm/python/llm/example/GPU/vLLM-Serving/ ./vLLM-Serving && \
     # Download pp_serving
     mkdir -p /llm/pp_serving && \
-    cp ./ipex-llm/python/llm/example/GPU/Pipeline-Parallel-FastAPI/*.py /llm/pp_serving/ && \
+    cp ./ipex-llm/python/llm/example/GPU/Pipeline-Parallel-Serving/*.py /llm/pp_serving/ && \
     # Install related library of benchmarking
     pip install pandas omegaconf && \
     chmod +x /llm/benchmark.sh && \
diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/README.md b/python/llm/example/GPU/Pipeline-Parallel-Serving/README.md
similarity index 100%
rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/README.md
rename to python/llm/example/GPU/Pipeline-Parallel-Serving/README.md
diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py b/python/llm/example/GPU/Pipeline-Parallel-Serving/benchmark.py
similarity index 100%
rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/benchmark.py
rename to python/llm/example/GPU/Pipeline-Parallel-Serving/benchmark.py
diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/gradio_webui.py b/python/llm/example/GPU/Pipeline-Parallel-Serving/gradio_webui.py
similarity index 100%
rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/gradio_webui.py
rename to python/llm/example/GPU/Pipeline-Parallel-Serving/gradio_webui.py
diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/pipeline_serving.py b/python/llm/example/GPU/Pipeline-Parallel-Serving/pipeline_serving.py
similarity index 100%
rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/pipeline_serving.py
rename to python/llm/example/GPU/Pipeline-Parallel-Serving/pipeline_serving.py
diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/1024.txt b/python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/1024.txt
similarity index 100%
rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/1024.txt
rename to python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/1024.txt
diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/128.txt b/python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/128.txt
similarity index 100%
rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/128.txt
rename to python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/128.txt
diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/2048.txt b/python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/2048.txt
similarity index 100%
rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/2048.txt
rename to python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/2048.txt
diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/32.txt b/python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/32.txt
similarity index 100%
rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/prompt/32.txt
rename to python/llm/example/GPU/Pipeline-Parallel-Serving/prompt/32.txt
diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/run.sh b/python/llm/example/GPU/Pipeline-Parallel-Serving/run.sh
similarity index 100%
rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/run.sh
rename to python/llm/example/GPU/Pipeline-Parallel-Serving/run.sh
diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/serving.py b/python/llm/example/GPU/Pipeline-Parallel-Serving/serving.py
similarity index 100%
rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/serving.py
rename to python/llm/example/GPU/Pipeline-Parallel-Serving/serving.py
diff --git a/python/llm/example/GPU/Pipeline-Parallel-FastAPI/wrk_script_1024.lua b/python/llm/example/GPU/Pipeline-Parallel-Serving/wrk_script_1024.lua
similarity index 100%
rename from python/llm/example/GPU/Pipeline-Parallel-FastAPI/wrk_script_1024.lua
rename to python/llm/example/GPU/Pipeline-Parallel-Serving/wrk_script_1024.lua
diff --git a/python/llm/src/ipex_llm/serving/api/__init__.py b/python/llm/src/ipex_llm/serving/api/__init__.py
index 20e93ad6b3c..79ddfd9dcba 100644
--- a/python/llm/src/ipex_llm/serving/api/__init__.py
+++ b/python/llm/src/ipex_llm/serving/api/__init__.py
@@ -14,5 +14,5 @@
 # limitations under the License.
 #
 
-from .api import FastApp
+from .api_server import FastApp
 from .model_worker import ModelWorker
\ No newline at end of file
diff --git a/python/llm/src/ipex_llm/serving/api/api.py b/python/llm/src/ipex_llm/serving/api/api_server.py
similarity index 100%
rename from python/llm/src/ipex_llm/serving/api/api.py
rename to python/llm/src/ipex_llm/serving/api/api_server.py