diff --git a/examples/notebooks/beam-ml/run_inference_vllm.ipynb b/examples/notebooks/beam-ml/run_inference_vllm.ipynb index c4803eccebff..eaa72a613bfc 100644 --- a/examples/notebooks/beam-ml/run_inference_vllm.ipynb +++ b/examples/notebooks/beam-ml/run_inference_vllm.ipynb @@ -100,7 +100,9 @@ "source": [ "!pip install openai>=1.52.2\n", "!pip install vllm>=0.6.3\n", - "!pip install apache-beam[gcp]==2.60.0\n", + "!pip install triton>=3.1.0\n", + "!pip install apache-beam[gcp]==2.61.0\n", + "!pip install nest_asyncio # only needed in colab\n", "!pip check" ] }, @@ -109,6 +111,30 @@ "metadata": { "id": "3xz8zuA7vcS4" }, + "source": [ + "## Colab only: allow nested asyncio\n", + "\n", + "The vLLM model handler logic below uses asyncio to feed vLLM records. This only works if we are not already in an asyncio event loop. Most of the time, this is fine, but colab already operates in an event loop. To work around this, we can use nest_asyncio to make things work smoothly in colab. Do not include this step outside of colab." + ], + "metadata": { + "id": "3xz8zuA7vcS3" + } + }, + { + "cell_type": "code", + "source": [ + "# This should not be necessary outside of colab.\n", + "import nest_asyncio\n", + "nest_asyncio.apply()\n" + ], + "metadata": { + "id": "sUqjOzw3wpI3" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", "source": [ "## Run locally without Apache Beam\n", "\n", @@ -315,7 +341,7 @@ "COPY --from=apache/beam_python3.10_sdk:2.60.0 /opt/apache/beam /opt/apache/beam\n", "\n", "RUN pip install --no-cache-dir -vvv apache-beam[gcp]==2.60.0\n", - "RUN pip install openai>=1.52.2 vllm>=0.6.3\n", + "RUN pip install openai>=1.52.2 vllm>=0.6.3 triton>=3.1.0\n", "\n", "RUN apt install libcairo2-dev pkg-config python3-dev -y\n", "RUN pip install pycairo\n",