From b90bc400ecf31a944ac8d46bf25158d521306a35 Mon Sep 17 00:00:00 2001 From: Julien Bouquillon Date: Thu, 12 Dec 2024 12:26:51 +0100 Subject: [PATCH] fix --- package.json | 1 + src/lib/albert.ts | 156 ++++++++++++++++++++++ src/pages/a-propos.mdx | 42 +----- src/pages/collection/[id].tsx | 245 +++++++++++++++++----------------- src/pages/index.tsx | 119 +++++------------ yarn.lock | 12 ++ 6 files changed, 334 insertions(+), 241 deletions(-) create mode 100644 src/lib/albert.ts diff --git a/package.json b/package.json index 36a9337..0b888b4 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "is-ci": "^3.0.1", "next": "15.1.0", "nuqs": "^2.2.3", + "p-all": "^5.0.0", "react": "18.2.0", "react-dom": "18.2.0", "react-dropzone": "^14.3.5", diff --git a/src/lib/albert.ts b/src/lib/albert.ts new file mode 100644 index 0000000..a72f13e --- /dev/null +++ b/src/lib/albert.ts @@ -0,0 +1,156 @@ +import { useEffect, useState } from "react"; + +const ALBERT_API_KEY = process.env.ALBERT_API_KEY; +const API_URL = "/api/albert"; //https://albert.api.etalab.gouv.fr"; +const LANGUAGE_MODEL = "AgentPublic/llama3-instruct-8b"; // see https://albert.api.etalab.gouv.fr/v1/models +const EMBEDDING_MODEL = "BAAI/bge-m3"; + +export const albertApi = ({ + path, + method = "POST", + body, +}: { + path: string; + method?: "POST" | "GET"; + body?: string; +}) => + fetch(`${API_URL}/v1${path}`, { + method, + headers: { + // Authorization: `Bearer ${ALBERT_API_KEY}`, + "Content-Type": "application/json", + }, + body, + }).then((r) => r.json()); + +type AlbertCollection = { + id: string; + name: string; + type: "public" | "private"; + model: "string"; // "BAAI/bge-m3"; + user: string; + description: string; + created_at: number; + documents: null | number; +}; + +export const useAlbertCollections = () => { + const [collections, setCollections] = useState([]); + const loadCollections = async () => { + const collections = await albertApi({ + path: "/collections", + method: "GET", + }); + return collections; + }; + useEffect(() => { + reloadCollections(); + }, []); + + const reloadCollections = () => { + loadCollections().then((res) => { + setCollections(res.data); + }); + }; + return { collections, reloadCollections }; +}; + +export const createCollection = ({ name, model = EMBEDDING_MODEL }) => + fetch(`${API_URL}/v1/collections`, { + method: "POST", + headers: { + Authorization: `Bearer ${ALBERT_API_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ name, model }), + }) + .then((r) => r.json()) + .then((d) => { + console.log(d); + return d; + }) + .then((d) => d.id); + +export const addFileToCollection = async ({ file, fileName, collectionId }) => { + const formData = new FormData(); + formData.append("file", file, fileName); + formData.append("request", JSON.stringify({ collection: collectionId })); + return fetch(`${API_URL}/v1/files`, { + method: "POST", + headers: { + Authorization: `Bearer ${ALBERT_API_KEY}`, + //"Content-Type": "multipart/form-data", + }, + body: formData, + }).then(async (r) => { + //console.log(r); + if (r.status !== 200) { + console.log("Cannot upload document", r.statusText); + return { + detail: r.statusText, + }; + } + if (r.statusText === "OK") { + let json = {}; + try { + json = await r.json(); + } catch (e) {} + if (json && json.detail) { + console.log("Cannot upload document", json.detail); + return { + detail: json.detail, + }; + } + return json; + } + return { + detail: "plop", + }; + }); +}; + +export const getSearch = ({ + collections, + query, +}: { + collections: string[]; + query: string; +}) => { + console.log({ url: `${API_URL}/v1/search`, query }); + return fetch(`${API_URL}/v1/search`, { + cache: "no-cache", + method: "POST", + headers: { + Authorization: `Bearer ${ALBERT_API_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ collections, k: 6, prompt: query }), + }) + .then((r) => { + console.log(r); + return r.json(); + }) + .catch((r) => { + console.error(r); + throw r; + }); +}; + +export const getPromptWithRagResults = ({ + results, + input, +}) => `Réponds à la question suivante au format markdown sans mettre de titre et en te basant sur le contexte fourni uniquement. + + ## Question: ${input} + + ## Contexte + + ${results.data + .map( + (hit) => `${hit.chunk.metadata.title} ${hit.chunk.metadata.document_name} + + ${hit.chunk.content} + ` + ) + .join("\n")} + `; diff --git a/src/pages/a-propos.mdx b/src/pages/a-propos.mdx index d1a1e17..4cd8e75 100644 --- a/src/pages/a-propos.mdx +++ b/src/pages/a-propos.mdx @@ -2,44 +2,14 @@ import Head from "next/head"; import { Alert } from "@codegouvfr/react-dsfr/Alert"; - Titre article | template + Démo albert-docs - À propos -# Page au format Markdown(X) +# albert-docs demo -C'est du texte, que l'on peut éditer facilement et enrichir avec des composants. [voir la source de cette page](https://github.com/betagouv/template/blob/main/src/pages/article.mdx?plain=1) +Cette application permet d'utiliser l'[API Albert](https://github.com/etalab-ia/albert-api) pour uploader des document et effectuer des recherches dessus. -## Du texte +## Références -🚀 Ensemble, nous pouvons rendre le web meilleur pour tous et toutes ! - -## Un composant du DSFR - -[Voir tous les composants de @codegouv/react-dsfr](https://components.react-dsfr.codegouv.studio/) - - - -## Un tableau - -| Tables | Are | Cool | -| -------- | --- | ------- | -| col 1 is | 👌 | $1600 | -| col 2 is | 🆗 | $12 | -| col 3 is | 🙀 | **$42** | - -## Une image - -![artwork](https://beta.gouv.fr/assets/home/design.svg) - -## Citation - -> exemple de citation - -## Liens - -- [Lien interne](/dsfr) -- [Lien externe](https://beta.gouv.fr) +- repo Albert: https://github.com/etalab-ia/albert-api +- doc API Albert: https://albert.api.etalab.gouv.fr/documentation diff --git a/src/pages/collection/[id].tsx b/src/pages/collection/[id].tsx index fe21753..114c193 100644 --- a/src/pages/collection/[id].tsx +++ b/src/pages/collection/[id].tsx @@ -11,6 +11,13 @@ import { useDropzone } from "react-dropzone"; import Markdown from "react-markdown"; import remarkGfm from "remark-gfm"; +import { + useAlbertCollections, + getSearch, + addFileToCollection, + getPromptWithRagResults, +} from "../../lib/albert"; + const ALBERT_API_KEY = process.env.ALBERT_API_KEY; const API_URL = "/api/albert"; //https://albert.api.etalab.gouv.fr"; const LANGUAGE_MODEL = "AgentPublic/llama3-instruct-8b"; // see https://albert.api.etalab.gouv.fr/v1/models @@ -18,6 +25,7 @@ const EMBEDDING_MODEL = "BAAI/bge-m3"; import { mdxComponents } from "../../../mdx-components"; import { cp } from "fs"; +import pAll from "p-all"; // const albertApi = ({ // path, @@ -91,28 +99,25 @@ function MyDropzone({ children, onDrop }) { ); } -export function Chat({ messages, handleSubmit, handleInputChange, input }) { +export function Chat({ + messages, + handleSubmit, + handleInputChange, + input, + isLoading, + hintText, +}) { return ( -
+
{messages.map((m) => (
{m.role === "user" ? ( - <> - - + ) : ( - <> - - + )}
))} + {isLoading &&
...
}
- fetch(`${API_URL}/v1/collections`, { - method: "POST", - headers: { - Authorization: `Bearer ${ALBERT_API_KEY}`, - "Content-Type": "application/json", - }, - body: JSON.stringify({ name, model }), - }) - .then((r) => r.json()) - .then((d) => { - console.log(d); - return d; - }) - .then((d) => d.id); - -const addFileToCollection = async ({ file, fileName, collectionId }) => { - const formData = new FormData(); - formData.append("file", file, fileName); - formData.append("request", JSON.stringify({ collection: collectionId })); - return fetch(`${API_URL}/v1/files`, { - method: "POST", - headers: { - Authorization: `Bearer ${ALBERT_API_KEY}`, - //"Content-Type": "multipart/form-data", - }, - body: formData, - }).then(async (r) => { - console.log(r); - return r.text(); - }); -}; - -const getSearch = ({ - collections, - query, -}: { - collections: string[]; - query: string; -}) => { - console.log({ url: `${API_URL}/v1/search`, query }); - return fetch(`${API_URL}/v1/search`, { - cache: "no-cache", - method: "POST", - headers: { - Authorization: `Bearer ${ALBERT_API_KEY}`, - "Content-Type": "application/json", - }, - body: JSON.stringify({ collections, k: 6, prompt: query }), - }) - .then((r) => { - console.log(r); - return r.json(); - }) - .catch((r) => { - console.error(r); - throw r; - }); -}; - const CollectionPage: NextPage<{ collectionId: string }> = ({ collectionId, }) => { const { query } = useRouter(); - //const [collections] = useAlbertCollections(); - //const [currentCollectionId, setCurrentCollectionId] = useQueryState("name"); - //console.log("Page", collections); + + // store message overrides to update messages status + const [messagesOverrides, setMessagesOverrides] = useState< + Record + >({}); + + const { collections, reloadCollections } = useAlbertCollections(); + const collection = collections.find((c) => c.id === collectionId); + + //console.log("collection", collection); + + const overrideMessage = (id: string, data: any) => { + setMessagesOverrides((o) => ({ + ...o, + [id]: data, + })); + }; console.log("router", query, collectionId); const uuid = query.id; const onDrop = async (acceptedFiles: File[]) => { @@ -221,43 +182,68 @@ const CollectionPage: NextPage<{ collectionId: string }> = ({ console.log("collectionId", collectionId); - setMessages((messages) => [ - ...messages, - { - role: "assistant", - id: "upload-" + Math.random(), - content: `Je traite les fichiers : ${acceptedFiles.map( - (f) => f.name - )}...`, - }, - ]); + await pAll( + acceptedFiles.map((file) => async () => { + const uploadId = "upload-" + Math.random(); + setMessages((messages) => [ + ...messages, + { + role: "assistant", + id: uploadId, + content: `Je traite le fichier : ${file.name}... ⏳`, + }, + ]); + const uploaded = await addFileToCollection({ + file, + fileName: file.name, + collectionId, + }); + console.log("uploaded", uploaded); - //addFileToCollection - acceptedFiles.forEach(async (file) => { - await addFileToCollection({ file, fileName: file.name, collectionId }); - }); - - setMessages((messages) => [ - ...messages, - { - role: "assistant", - id: "upload-" + Math.random(), - content: `C'est tout bon, je suis prêt :)`, - }, - ]); + if (uploaded.detail) { + overrideMessage(uploadId, { + content: `Souci avec le fichier : ${file.name}: ${uploaded.detail} ❌`, + }); + } else { + overrideMessage(uploadId, { + content: `J'ai traité le fichier : ${file.name}... ✅`, + }); + } + }) + ); + reloadCollections(); }; const myHandleSubmit = async (event) => { + event.preventDefault(); console.log("myHandleSubmit", event, input); - //getSearch; // get relevant RAG informations - const data = undefined; const searchResults = await getSearch({ collections: [collectionId], query: input, }); console.log("searchResults", searchResults); - handleSubmit(event); + + const prompt = getPromptWithRagResults({ input, results: searchResults }); + + console.log("prompt", prompt); + + const ragId = "rag-" + Math.random(); + + // we need to override the displayed message so the user dont see the real prompt + overrideMessage(ragId, { + content: input, + }); + + setTimeout(() => { + // TODO: hack to prevent non overriden message to show up + append({ + id: ragId, + role: "user", + content: prompt, + }); + setInput(""); + }); }; const { @@ -266,7 +252,9 @@ const CollectionPage: NextPage<{ collectionId: string }> = ({ handleInputChange, handleSubmit, setMessages, + setInput, isLoading, + append, } = useChat({ api: `${API_URL}/v1/chat/completions`, headers: { @@ -281,10 +269,11 @@ const CollectionPage: NextPage<{ collectionId: string }> = ({ role: "assistant", id: "initial", content: - "Bonjour, déposez des fichiers dans cette fenêtre et j'essaierai de répondre à vos questions", + "Déposez des fichiers PDF, Markdown, HTML ou JSON et j'essaierai de répondre à vos questions.", }, ], onResponse: async (message) => { + console.log("onResponse", message); const m = await message.json(); setMessages((messages) => [ @@ -297,21 +286,35 @@ const CollectionPage: NextPage<{ collectionId: string }> = ({ ]); }, }); + console.log({ + fixed: messages.map((m) => ({ + ...m, + ...(messagesOverrides[m.id]?.data || {}), + })), + messages, + messagesOverrides, + }); return ( - <> -
- -
- -
-
-
- +
+ +
+ ({ + ...m, + ...(messagesOverrides[m.id] || {}), + }))} + input={input} + handleInputChange={handleInputChange} + handleSubmit={myHandleSubmit} + hintText={ + collection && + `Albert cherchera parmi les ${collection.documents} documents de votre collection "${collection.name}"` + } + /> +
+
+
); }; diff --git a/src/pages/index.tsx b/src/pages/index.tsx index 52a7883..f4fda18 100644 --- a/src/pages/index.tsx +++ b/src/pages/index.tsx @@ -1,77 +1,20 @@ import * as React from "react"; -import Head from "next/head"; import { NextPage } from "next"; -import Stack from "@mui/material/Stack"; -import Link from "next/link"; -import { push as matomoPush } from "@socialgouv/matomo-next"; -import { Accordion } from "@codegouvfr/react-dsfr/Accordion"; -import { Alert } from "@codegouvfr/react-dsfr/Alert"; -import { Button } from "@codegouvfr/react-dsfr/Button"; import { fr } from "@codegouvfr/react-dsfr"; -import { useEffect, useState } from "react"; import Card from "@codegouvfr/react-dsfr/Card"; -const ALBERT_API_KEY = process.env.ALBERT_API_KEY; -const API_URL = "/api/albert"; //https://albert.api.etalab.gouv.fr"; -const LANGUAGE_MODEL = "AgentPublic/llama3-instruct-8b"; // see https://albert.api.etalab.gouv.fr/v1/models - -const albertApi = ({ - path, - method = "POST", - body, -}: { - path: string; - method?: "POST" | "GET"; - body?: string; -}) => - fetch(`${API_URL}/v1${path}`, { - method, - headers: { - // Authorization: `Bearer ${ALBERT_API_KEY}`, - "Content-Type": "application/json", - }, - body, - }).then((r) => r.json()); - -type AlbertCollection = { - id: string; - name: string; - type: "public" | "private"; - model: "string"; // "BAAI/bge-m3"; - user: string; - description: string; - created_at: number; - documents: null | number; -}; - -const useAlbertCollections = () => { - const [collections, setCollections] = useState([]); - const loadCollections = async () => { - const collections = await albertApi({ - path: "/collections", - method: "GET", - }); - return collections; - }; - useEffect(() => { - loadCollections().then((res) => { - setCollections(res.data); - }); - }, []); - return [collections]; -}; +import { useAlbertCollections, createCollection } from "../lib/albert"; +import { useRouter } from "next/router"; const Home: NextPage = () => { - const onClick1 = () => { - throw new Error("Hello, sentry"); - }; - const [collections] = useAlbertCollections(); + const router = useRouter(); + const { collections } = useAlbertCollections(); return ( <>
-
+

albert-docs

Intérroger rapidement des documents avec Albert
@@ -81,14 +24,14 @@ const Home: NextPage = () => { enlargeLink className={fr.cx("fr-col-4")} background - border desc={`Ajouter des fichiers et les intérroger`} linkProps={{ - href: `#`, - onClick: () => { + href: `#not`, + onClick: async () => { const name = prompt("Nom de la collection à créer ?"); if (name) { - // create collection + const collectionId = await createCollection({ name }); + router.push(`/collection/${collectionId}`); } }, }} @@ -96,24 +39,32 @@ const Home: NextPage = () => { title={"Nouveau"} titleAs="h3" /> - {collections.map((coll) => ( - - ))} + {collections + .filter((coll) => coll.type === "private") + .map((coll) => ( + + {coll.documents + ? `${coll.documents} documents` + : "Aucun document"} + + {coll.description} + + + } + linkProps={{ + href: `/collection/${coll.id}`, + }} + size="small" + title={coll.name} + titleAs="h3" + /> + ))}
); diff --git a/yarn.lock b/yarn.lock index 93a40d2..64b1946 100644 --- a/yarn.lock +++ b/yarn.lock @@ -11348,6 +11348,13 @@ oxc-resolver@^1.10.2: "@oxc-resolver/binding-win32-arm64-msvc" "1.12.0" "@oxc-resolver/binding-win32-x64-msvc" "1.12.0" +p-all@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/p-all/-/p-all-5.0.0.tgz#3fcbdf28177a09442fc7f4ec9e252e0eed5ecfc5" + integrity sha512-pofqu/1FhCVa+78xNAptCGc9V45exFz2pvBRyIvgXkNM0Rh18Py7j8pQuSjA+zpabI46v9hRjNWmL9EAFcEbpw== + dependencies: + p-map "^6.0.0" + p-limit@^2.0.0, p-limit@^2.2.0: version "2.3.0" resolved "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz" @@ -11411,6 +11418,11 @@ p-map@^4.0.0: dependencies: aggregate-error "^3.0.0" +p-map@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/p-map/-/p-map-6.0.0.tgz#4d9c40d3171632f86c47601b709f4b4acd70fed4" + integrity sha512-T8BatKGY+k5rU+Q/GTYgrEf2r4xRMevAN5mtXc2aPc4rS1j3s+vWTaO2Wag94neXuCAUAs8cxBL9EeB5EA6diw== + p-try@^2.0.0: version "2.2.0" resolved "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz"