Merge pull request #253 from MichaelClifford/fix-224

update all recipes to use MODEL_ENDPOINT
containers · Apr 12, 2024 · 198fa39 · 198fa39
2 parents 4a080c1 + 59867d1
commit 198fa39
Show file tree

Hide file tree

Showing 20 changed files with 38 additions and 33 deletions.
diff --git a/recipes/audio/audio_to_text/app/whisper_client.py b/recipes/audio/audio_to_text/app/whisper_client.py
@@ -6,7 +6,8 @@
 st.set_page_config(page_title="Whisper Speech Recognition", page_icon=":studio_microphone:")
 st.title(":studio_microphone: Speech Recognition")
 st.markdown("Upload an audio file you wish to have translated")
-endpoint = os.getenv("MODEL_ENDPOINT", default="http://0.0.0.0:8001/inference")
+endpoint = os.getenv("MODEL_ENDPOINT", default="http://0.0.0.0:8001")
+endpoint = f"{endpoint}/inference"
 audio = st.file_uploader("", type=["wav"], accept_multiple_files=False)
 # read audio file
 if audio:

diff --git a/recipes/audio/audio_to_text/quadlet/audio-text.yaml b/recipes/audio/audio_to_text/quadlet/audio-text.yaml
@@ -14,8 +14,8 @@ spec:
       mountPath: /shared
   containers:
   - env:
-    - name: MODEL_SERVICE_ENDPOINT
-      value: http://0.0.0.0:8001/inference
+    - name: MODEL_ENDPOINT
+      value: http://0.0.0.0:8001
     image: quay.io/redhat-et/locallm-audio-to-text:latest
     name: audio-to-text
     ports:

diff --git a/recipes/common/Makefile.common b/recipes/common/Makefile.common
@@ -153,7 +153,7 @@ quadlet:
 
 .PHONY: run
 run: 
-	podman run -it -p $(PORT):$(PORT) -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 ${APP_IMAGE}
+	podman run -it -p $(PORT):$(PORT) -e MODEL_ENDPOINT=http://10.88.0.1:8001 ${APP_IMAGE}
 
 .PHONY: clean
 clean:

diff --git a/recipes/computer_vision/object_detection/client/object_detection_client.py b/recipes/computer_vision/object_detection/client/object_detection_client.py
@@ -6,7 +6,7 @@
 import io
 
 st.title("🕵️‍♀️ Object Detection")
-endpoint =os.getenv("MODEL_ENDPOINT", default = "http://0.0.0.0:8000/detection")
+endpoint =os.getenv("MODEL_ENDPOINT", default = "http://0.0.0.0:8000")
 headers = {"accept": "application/json",
            "Content-Type": "application/json"}
 image = st.file_uploader("Upload Image")
@@ -27,7 +27,7 @@
     img_bytes = bytes_io.getvalue()
     b64_image = base64.b64encode(img_bytes).decode('utf-8')
     data = {'image': b64_image}
-    response = requests.post(endpoint, headers=headers,json=data, verify=False)
+    response = requests.post(f'{endpoint}/detection', headers=headers,json=data, verify=False)
     # parse response and display outputs
     response_json = response.json()
     image = response_json["image"]

diff --git a/recipes/multimodal/image_understanding/README.md b/recipes/multimodal/image_understanding/README.md
@@ -30,7 +30,7 @@ podman run -it -p 8001:8001 -v <LOCAL_PATH>/locallm/models:/locallm/models:Z -e
 ### Run AI Application Image Locally
 
 ```bash
-podman run --rm -it -p 8501:8501 -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 image_understanding   
+podman run --rm -it -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8001 image_understanding   
 ```
 
 Interact with the application from your local browser at `localhost:8501`. You can upload an image file from your host machine and the app will provide a natural language description of the image.   

diff --git a/recipes/multimodal/image_understanding/app/image_understanding.py b/recipes/multimodal/image_understanding/app/image_understanding.py
@@ -3,15 +3,15 @@
 import base64
 import os
 
-model_service = os.getenv("MODEL_SERVICE_ENDPOINT",
-                          default="http://localhost:8001/v1")
+model_service = os.getenv("MODEL_ENDPOINT",
+                          default="http://localhost:8001")
 
 st.title("📷 Image Analysis")
 image = st.file_uploader("Upload Image:",)
 top_container = st.container(border=True)
 if image is not None:
     b64_image = base64.b64encode(image.read()).decode("utf-8")
-    client = OpenAI(base_url=model_service, 
+    client = OpenAI(base_url=f'{model_service}/v1', 
                     api_key="sk-xxx")
     with st.spinner("Analyzing Image..."):
         st.image(image)

diff --git a/recipes/natural_language_processing/chatbot/README.md b/recipes/natural_language_processing/chatbot/README.md
@@ -88,7 +88,7 @@ If you wish to run this as a codesnippet instead of a make command checkout the
 
 ### Build and Deploy the AI Application
 
-Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To build and deploy the AI application use the following:
+Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. To build and deploy the AI application use the following:
 
 ```bash
 # Run this from the current directory (path recipes/natural_language_processing/chatbot from repo containers/ai-lab-recipes)

diff --git a/recipes/natural_language_processing/chatbot/app/chatbot_ui.py b/recipes/natural_language_processing/chatbot/app/chatbot_ui.py
@@ -9,8 +9,9 @@
 import json
 import os 
 
-model_service = os.getenv("MODEL_SERVICE_ENDPOINT",
-                          "http://localhost:8001/v1")
+model_service = os.getenv("MODEL_ENDPOINT",
+                          "http://localhost:8001")
+model_service = f"{model_service}/v1"
 
 @st.cache_resource(show_spinner=False)
 def checking_model_service():

diff --git a/recipes/natural_language_processing/chatbot/provision/playbook.yml b/recipes/natural_language_processing/chatbot/provision/playbook.yml
@@ -57,4 +57,4 @@
       ports:
       - 8501:8501
       env:
-        MODEL_SERVICE_ENDPOINT: http://10.88.0.1:8001/v1
+        MODEL_ENDPOINT: http://10.88.0.1:8001
diff --git a/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml b/recipes/natural_language_processing/chatbot/quadlet/chatbot.yaml
@@ -14,8 +14,8 @@ spec:
       mountPath: /shared
   containers:
   - env:
-    - name: MODEL_SERVICE_ENDPOINT
-      value: http://0.0.0.0:8001/v1
+    - name: MODEL_ENDPOINT
+      value: http://0.0.0.0:8001
     image: APP_IMAGE
     name: chatbot-inference
     ports:

diff --git a/recipes/natural_language_processing/chatbot/tests/functional/conftest.py b/recipes/natural_language_processing/chatbot/tests/functional/conftest.py
@@ -42,7 +42,7 @@
 CB = pytest_container.Container(
         url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
         extra_environment_variables={
-            "MODEL_SERVICE_ENDPOINT": "http://10.88.0.1:8001/v1"
+            "MODEL_ENDPOINT": "http://10.88.0.1:8001"
         },
         forwarded_ports=[
             pytest_container.PortForwarding(

diff --git a/recipes/natural_language_processing/codegen/README.md b/recipes/natural_language_processing/codegen/README.md
@@ -82,10 +82,10 @@ podman build -t codegen app
 ```
 ### Deploy the AI Application
 
-Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application:
+Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application:
 
 ```bash
-podman run --rm -it -p 8501:8501 -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 codegen
+podman run --rm -it -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8001 codegen
 ```
 
 ### Interact with the AI Application

diff --git a/recipes/natural_language_processing/codegen/app/codegen-app.py b/recipes/natural_language_processing/codegen/app/codegen-app.py
@@ -8,7 +8,8 @@
 import requests
 import time
 
-model_service = os.getenv("MODEL_SERVICE_ENDPOINT", "http://localhost:8001/v1")
+model_service = os.getenv("MODEL_ENDPOINT", "http://localhost:8001")
+model_service = f"{model_service}/v1"
 
 @st.cache_resource(show_spinner=False)
 def checking_model_service():

diff --git a/recipes/natural_language_processing/codegen/quadlet/codegen.yaml b/recipes/natural_language_processing/codegen/quadlet/codegen.yaml
@@ -14,8 +14,8 @@ spec:
       mountPath: /shared
   containers:
   - env:
-    - name: MODEL_SERVICE_ENDPOINT
-      value: http://0.0.0.0:8001/v1
+    - name: MODEL_ENDPOINT
+      value: http://0.0.0.0:8001
     image: APP_IMAGE
     name: codegen-inference
     ports:

diff --git a/recipes/natural_language_processing/rag/README.md b/recipes/natural_language_processing/rag/README.md
@@ -133,15 +133,15 @@ make APP_IMAGE=rag build
 
 ### Deploy the AI Application
 
-Make sure the Model Service and the Vector Database are up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The same goes for the Vector Database. Make sure the `VECTORDB_HOST` is correctly set to `10.88.0.1` for communication within the Podman virtual machine.
+Make sure the Model Service and the Vector Database are up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The same goes for the Vector Database. Make sure the `VECTORDB_HOST` is correctly set to `10.88.0.1` for communication within the Podman virtual machine.
 
 There also needs to be a volume mount into the `models/` directory so that the application can access the embedding model as well as a volume mount into the `data/` directory where it can pull documents from to populate the Vector Database.  
 
 The following Podman command can be used to run your AI Application:
 
 ```bash
 podman run --rm -it -p 8501:8501 \
--e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 \
+-e MODEL_ENDPOINT=http://10.88.0.1:8001 \
 -e VECTORDB_HOST=10.88.0.1 \
 -v Local/path/to/locallm/models/:/rag/models \
 rag   

diff --git a/recipes/natural_language_processing/rag/app/rag_app.py b/recipes/natural_language_processing/rag/app/rag_app.py
@@ -18,7 +18,8 @@
 import argparse
 import pathlib
 
-model_service = os.getenv("MODEL_SERVICE_ENDPOINT","http://0.0.0.0:8001/v1")
+model_service = os.getenv("MODEL_ENDPOINT","http://0.0.0.0:8001/v1")
+model_service = f"{model_service}/v1"
 chunk_size = os.getenv("CHUNK_SIZE", 150)
 embedding_model = os.getenv("EMBEDDING_MODEL","BAAI/bge-base-en-v1.5")
 vdb_host = os.getenv("VECTORDB_HOST", "0.0.0.0")

diff --git a/recipes/natural_language_processing/rag/quadlet/rag.yaml b/recipes/natural_language_processing/rag/quadlet/rag.yaml
@@ -14,8 +14,8 @@ spec:
       mountPath: /shared
   containers:
   - env:
-    - name: MODEL_SERVICE_ENDPOINT
-      value: http://0.0.0.0:8001/v1
+    - name: MODEL_ENDPOINT
+      value: http://0.0.0.0:8001
     image: APP_IMAGE
     name: rag-inference
     ports:

diff --git a/recipes/natural_language_processing/summarizer/README.md b/recipes/natural_language_processing/summarizer/README.md
@@ -110,10 +110,10 @@ make APP_IMAGE=summarizer build
 
 ### Deploy the AI Application
 
-Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_SERVICE_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application:
+Make sure the Model Service is up and running before starting this container image. When starting the AI Application container image we need to direct it to the correct `MODEL_ENDPOINT`. This could be any appropriately hosted Model Service (running locally or in the cloud) using an OpenAI compatible API. In our case the Model Service is running inside the Podman machine so we need to provide it with the appropriate address `10.88.0.1`. The following Podman command can be used to run your AI Application:
 
 ```bash
-podman run --rm -it -p 8501:8501 -e MODEL_SERVICE_ENDPOINT=http://10.88.0.1:8001/v1 summarizer   
+podman run --rm -it -p 8501:8501 -e MODEL_ENDPOINT=http://10.88.0.1:8001 summarizer   
 ```
 
 ### Interact with the AI Application

diff --git a/recipes/natural_language_processing/summarizer/app/summarizer.py b/recipes/natural_language_processing/summarizer/app/summarizer.py
@@ -7,8 +7,9 @@
 import time
 import os
 
-model_service = os.getenv("MODEL_SERVICE_ENDPOINT",
-                          "http://localhost:8001/v1")
+model_service = os.getenv("MODEL_ENDPOINT",
+                          "http://localhost:8001")
+model_service = f"{model_service}/v1"
 
 @st.cache_resource(show_spinner=False)
 def checking_model_service():

diff --git a/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml b/recipes/natural_language_processing/summarizer/quadlet/summarizer.yaml
@@ -14,8 +14,8 @@ spec:
       mountPath: /shared
   containers:
   - env:
-    - name: MODEL_SERVICE_ENDPOINT
-      value: http://0.0.0.0:8001/v1
+    - name: MODEL_ENDPOINT
+      value: http://0.0.0.0:8001
     image: APP_IMAGE
     name: summarizer-inference
     ports: