Merge pull request containers#508 from MichaelClifford/promptfoo

initial promptfoo commit
mhdawson · May 25, 2024 · 0dad0b5 · 0dad0b5
2 parents 96f9181 + d8b4669
commit 0dad0b5
Show file tree

Hide file tree

Showing 4 changed files with 55 additions and 0 deletions.
diff --git a/eval/promptfoo/README.md b/eval/promptfoo/README.md
@@ -0,0 +1,15 @@
+# LLM Evaluation with Promptfoo
+
+We are using the [Promptfoo.dev](https://www.promptfoo.dev/) project for LLM model evaluation. 
+
+```
+ podman build -t promptfoo eval/promptfoo/build
+```
+
+Make sure you are running an LLM before starting the promptfoo container. 
+
+```
+podman run -it -p 15500:15500 -v <LOCAL/PATH/TO/>/locallm/eval/promptfoo/evals/:/promptfoo/evals:ro promptfoo
+```
+
+Go to `http://0.0.0.0:15500/setup/` to set up your tests.
diff --git a/eval/promptfoo/base/Containerfile b/eval/promptfoo/base/Containerfile
@@ -0,0 +1,8 @@
+FROM registry.access.redhat.com/ubi9/nodejs-20-minimal:1-47.1715773198
+WORKDIR /promptfoo
+RUN npm install promptfoo
+ENV PROMPTFOO_DISABLE_TELEMETRY=1
+RUN mkdir evals
+ENV PROMPTFOO_CONFIG_DIR=/promptfoo/evals
+COPY promptfooconfig.yaml /promptfoo
+ENTRYPOINT [ "npx", "promptfoo@latest", "view", "--yes" ]
diff --git a/eval/promptfoo/base/promptfooconfig.yaml b/eval/promptfoo/base/promptfooconfig.yaml
@@ -0,0 +1,31 @@
+# This configuration compares LLM output of 2 prompts x 2 GPT models across 3 test cases.
+# Learn more: https://promptfoo.dev/docs/configuration/guide
+description: 'My first eval'
+
+prompts:
+  - "Write a tweet about {{topic}}"
+  - "Write a very concise, funny tweet about {{topic}}"
+
+providers:
+  - openai:gpt-3.5-turbo-0613
+  - openai:gpt-4
+
+tests:
+  - vars:
+      topic: bananas
+
+  - vars:
+      topic: avocado toast
+    assert:
+      # For more information on assertions, see https://promptfoo.dev/docs/configuration/expected-outputs
+      - type: icontains
+        value: avocado
+      - type: javascript
+        value: 1 / (output.length + 1)  # prefer shorter outputs
+
+  - vars:
+      topic: new york city
+    assert:
+      # For more information on model-graded evals, see https://promptfoo.dev/docs/configuration/expected-outputs/model-graded
+      - type: llm-rubric
+        value: ensure that the output is funny
diff --git a/eval/promptfoo/evals/README.md b/eval/promptfoo/evals/README.md
@@ -0,0 +1 @@
+Directory to store evaluation runs locally