mosecorg · lkevinzc · Feb 26, 2024 · Feb 23, 2024
diff --git a/README.md b/README.md
@@ -189,6 +189,7 @@ More ready-to-use examples can be found in the [Example](https://mosecorg.github
 - [Request validation](https://mosecorg.github.io/mosec/examples/validate.html): validate the request with type annotation.
 - [Multiple route](https://mosecorg.github.io/mosec/examples/multi_route.html): serve multiple models in one service
 - [Embedding service](https://mosecorg.github.io/mosec/examples/embedding.html): OpenAI compatible embedding service
+- [Reranking service](https://mosecorg.github.io/mosec/examples/rerank.html): rerank a list of passages based on a query
 - [Shared memory IPC](https://mosecorg.github.io/mosec/examples/ipc.html): inter-process communication with shared memory.
 - [Customized GPU allocation](https://mosecorg.github.io/mosec/examples/env.html): deploy multiple replicas, each using different GPUs.
 - [Customized metrics](https://mosecorg.github.io/mosec/examples/metric.html): record your own metrics for monitoring.

diff --git a/docs/source/examples/index.md b/docs/source/examples/index.md
@@ -13,6 +13,7 @@ ipc
 metric
 multi_route
 pytorch
+rerank
 stable_diffusion
 validate
 ```

diff --git a/docs/source/examples/rerank.md b/docs/source/examples/rerank.md
@@ -0,0 +1,24 @@
+# Cross-Encoder model for reranking
+
+This example shows how to use a cross-encoder model to rerank a list of passages based on a query. This is useful for hybrid search that combines multiple retrieval results.
+
+
+## Server
+
+```bash
+python examples/rerank/server.py
+```
+
+```{include} ../../../examples/rerank/server.py
+:code: python
+```
+
+## Client
+
+```bash
+python examples/rerank/client.py
+```
+
+```{include} ../../../examples/rerank/client.py
+:code: python
+```
diff --git a/examples/rerank/client.py b/examples/rerank/client.py
@@ -0,0 +1,35 @@
+# Copyright 2024 MOSEC Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from http import HTTPStatus
+
+import httpx
+import msgspec
+
+req = {
+    "query": "talk is cheap, show me the code",
+    "docs": [
+        "what a nice day",
+        "life is short, use python",
+        "early bird catches the worm",
+    ],
+}
+
+resp = httpx.post(
+    "http://127.0.0.1:8000/inference", content=msgspec.msgpack.encode(req)
+)
+if resp.status_code == HTTPStatus.OK:
+    print(f"OK: {msgspec.msgpack.decode(resp.content)}")
+else:
+    print(f"err[{resp.status_code}] {resp.text}")
diff --git a/examples/rerank/server.py b/examples/rerank/server.py
@@ -0,0 +1,51 @@
+# Copyright 2024 MOSEC Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from os import environ
+
+from msgspec import Struct
+from sentence_transformers import CrossEncoder
+
+from mosec import Server, Worker
+from mosec.mixin import TypedMsgPackMixin
+
+DEFAULT_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
+WORKER_NUM = int(environ.get("WORKER_NUM", 1))
+
+
+class Request(Struct, kw_only=True):
+    query: str
+    docs: list[str]
+
+
+class Response(Struct, kw_only=True):
+    scores: list[float]
+
+
+class Encoder(TypedMsgPackMixin, Worker):
+    def __init__(self):
+        self.model_name = environ.get("MODEL_NAME", DEFAULT_MODEL)
+        self.model = CrossEncoder(self.model_name)
+
+    def forward(self, data: Request) -> Response:
+        scores = self.model.predict([[data.query, doc] for doc in data.docs])
+        return Response(scores=scores.tolist())
+
+
+if __name__ == "__main__":
+    server = Server()
+    server.append_worker(Encoder, num=WORKER_NUM)
+    server.run()
diff --git a/examples/type_validation/client.py b/examples/type_validation/client.py
@@ -15,15 +15,17 @@
 from http import HTTPStatus
 
 import httpx
-import msgpack  # type: ignore
+import msgspec
 
 req = {
     "bin": b"hello mosec",
     "name": "type check",
 }
 
-resp = httpx.post("http://127.0.0.1:8000/inference", content=msgpack.packb(req))
+resp = httpx.post(
+    "http://127.0.0.1:8000/inference", content=msgspec.msgpack.encode(req)
+)
 if resp.status_code == HTTPStatus.OK:
-    print(f"OK: {msgpack.unpackb(resp.content)}")
+    print(f"OK: {msgspec.msgpack.decode(resp.content)}")
 else:
     print(f"err[{resp.status_code}] {resp.text}")
diff --git a/pyproject.toml b/pyproject.toml
@@ -62,6 +62,7 @@ pretty = true
 module = [
     "torch.*",
     "transformers",
+    "sentence_transformers",
     "llmspec",
     "openai",
 ]
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,6 +13,7 @@ ipc @@
     metric
     multi_route
     pytorch
+    rerank
     stable_diffusion
     validate
     ```
@@ Expand Down @@