instructor-ai · devin-ai-integration · Dec 15, 2024 · Dec 15, 2024 · Dec 15, 2024 · Dec 15, 2024
diff --git a/.env.tests b/.env.tests
@@ -0,0 +1,7 @@
+# Mistral API Configuration
+MISTRAL_API_KEY=your_mistral_api_key_here
+MISTRAL_BASE_URL=https://api.mistral.ai/v1
+
+# Other API keys for reference
+OPENAI_API_KEY=your_openai_api_key_here
+ANTHROPIC_API_KEY=your_anthropic_api_key_here
diff --git a/.ruff.toml b/.ruff.toml
@@ -46,6 +46,8 @@ ignore = [
   # mutable defaults
   "B006",
   "B018",
+  # ignore union syntax warnings for Python 3.9 compatibility
+  "UP007",
 ]
 
 unfixable = [

diff --git a/README.md b/README.md
@@ -326,6 +326,70 @@ assert resp.name == "Jason"
 assert resp.age == 25
 ```
 
+### Using Mistral Models with Multimodal Support
+
+Make sure to install `mistralai` and set your system environment variable with `export MISTRAL_API_KEY=<YOUR_MISTRAL_API_KEY>`.
+
+```bash
+pip install mistralai
+```
+
+```python
+import instructor
+from mistralai import MistralClient
+from instructor.multimodal import Image
+from pydantic import BaseModel, Field
+
+
+class ImageAnalysis(BaseModel):
+    description: str = Field(..., description="A detailed description of the image")
+    objects: list[str] = Field(..., description="List of objects identified in the image")
+    colors: list[str] = Field(..., description="List of dominant colors in the image")
+
+
+# Initialize the Mistral client with Instructor
+client = instructor.from_mistral(
+    MistralClient(api_key="your-api-key"),
+    mode=instructor.Mode.MISTRAL_JSON
+)
+
+# Analyze an image using Pixtral model
+analysis = client.chat.completions.create(
+    model="pixtral-12b-2409",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What's in this image? List the objects and colors."},
+                Image.from_url("https://example.com/image.jpg")  # You can also use Image.from_path()
+            ]
+        }
+    ],
+    response_model=ImageAnalysis,
+)
+
+print(f"Description: {analysis.description}")
+print(f"Objects: {', '.join(analysis.objects)}")
+print(f"Colors: {', '.join(analysis.colors)}")
+
+# Example with multiple images
+images = [
+    Image.from_url("https://example.com/image1.jpg"),
+    Image.from_url("https://example.com/image2.jpg"),
+]
+
+analysis = client.chat.completions.create(
+    model="pixtral-12b-2409",
+    messages=[
+        {
+            "role": "user",
+            "content": ["Describe these images"] + images,
+        }
+    ],
+    response_model=ImageAnalysis,
+)
+```
+
 ## Types are inferred correctly
 
 This was the dream of Instructor but due to the patching of OpenAI, it wasn't possible for me to get typing to work well. Now, with the new client, we can get typing to work well! We've also added a few `create_*` methods to make it easier to create iterables and partials, and to access the original completion.

diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,12 @@
+import pytest  # noqa: F401
+from _pytest.config import Config
+
+def pytest_configure(config: Config) -> None:
+    config.addinivalue_line(
+        "markers",
+        "requires_openai: mark test as requiring OpenAI API credentials",
+    )
+    config.addinivalue_line(
+        "markers",
+        "requires_mistral: mark test as requiring Mistral API credentials",
+    )
diff --git a/docs/examples/bulk_classification.md b/docs/examples/bulk_classification.md
@@ -268,6 +268,85 @@ async def tag_request(request: TagRequest) -> TagResponse:
         predictions=predictions,
     )
 
+## working-with-dataframes
+
+When working with large datasets, it's often convenient to use pandas DataFrames. Here's how you can integrate this classification system with pandas:
+
+```python
+import pandas as pd
+
+async def classify_dataframe(df: pd.DataFrame, text_column: str, tags: List[TagWithInstructions]) -> pd.DataFrame:
+    request = TagRequest(
+        texts=df[text_column].tolist(),
+        tags=tags
+    )
+    response = await tag_request(request)
+    df['predicted_tag'] = [pred.name for pred in response.predictions]
+    return df
+```
+
+## streaming-responses
+
+For real-time processing, you can stream responses as they become available:
+
+```python
+async def stream_classifications(texts: List[str], tags: List[TagWithInstructions]):
+    async def process_single(text: str):
+        prediction = await tag_single_request(text, tags)
+        return {"text": text, "prediction": prediction}
+
+    tasks = [process_single(text) for text in texts]
+    for completed in asyncio.as_completed(tasks):
+        yield await completed
+```
+
+## Single-Label Classification
+
+For simple classification tasks where each text belongs to exactly one category:
+
+```python
+async def classify_single_label(text: str, tags: List[TagWithInstructions]) -> Tag:
+    return await tag_single_request(text, tags)
+```
+
+## Multi-Label Classification
+
+For cases where texts might belong to multiple categories:
+
+```python
+class MultiLabelTag(BaseModel):
+    tags: List[Tag]
+
+    @model_validator(mode="after")
+    def validate_tags(self, info: ValidationInfo):
+        context = info.context
+        if context and context.get("tags"):
+            valid_tags = context["tags"]
+            for tag in self.tags:
+                assert tag.id in {t.id for t in valid_tags}, f"Tag ID {tag.id} not found"
+                assert tag.name in {t.name for t in valid_tags}, f"Tag name {tag.name} not found"
+        return self
+
+async def classify_multi_label(text: str, tags: List[TagWithInstructions]) -> List[Tag]:
+    response = await client.chat.completions.create(
+        model="gpt-4",
+        messages=[
+            {"role": "system", "content": "You are a multi-label classification system."},
+            {"role": "user", "content": f"Classify this text into multiple categories: {text}"},
+            {"role": "user", "content": f"Available categories: {', '.join(t.name for t in tags)}"},
+        ],
+        response_model=MultiLabelTag,
+        validation_context={"tags": tags},
+    )
+    return response.tags
+```
+
+# Example Usage
+
+```python
+# PLACEHOLDER: existing example code
+```
+
 
 # <%hide%>
 tags = [

diff --git a/docs/examples/index.md b/docs/examples/index.md
@@ -37,7 +37,7 @@ Welcome to our collection of cookbooks showcasing the power of structured output
 26. [Action Items Extraction](action_items.md): Extract structured action items and tasks from text content.
 27. [Batch Classification with LangSmith](batch_classification_langsmith.md): Efficiently classify content in batches using LangSmith integration.
 28. [Contact Information Extraction](extract_contact_info.md): Extract structured contact details from unstructured text.
-29. [Knowledge Graph Building](building_knowledge_graph.md): Create and manipulate knowledge graphs from textual data.
+29. [Knowledge Graph Building](building_knowledge_graphs.md): Create and manipulate knowledge graphs from textual data.
 30. [Multiple Classification Tasks](multiple_classification.md): Handle multiple classification categories simultaneously.
 31. [Pandas DataFrame Integration](pandas_df.md): Work with structured data using Pandas DataFrames.
 32. [Partial Response Streaming](partial_streaming.md): Stream partial results for real-time processing.

diff --git a/docs/integrations/mistral.md b/docs/integrations/mistral.md
@@ -2,21 +2,24 @@
 draft: False
 date: 2024-02-26
 title: "Structured outputs with Mistral, a complete guide w/ instructor"
-description: "Complete guide to using Instructor with Mistral. Learn how to generate structured, type-safe outputs with Mistral."
+description: "Complete guide to using Instructor with Mistral. Learn how to generate structured, type-safe outputs with Mistral, including multimodal support with Pixtral."
 slug: mistral
 tags:
   - patching
+  - multimodal
 authors:
   - shanktt
 ---
 
 # Structured outputs with Mistral, a complete guide w/ instructor
 
-This guide demonstrates how to use Mistral with Instructor to generate structured outputs. You'll learn how to use function calling with Mistral Large to create type-safe responses.
+This guide demonstrates how to use Mistral with Instructor to generate structured outputs. You'll learn how to use function calling with Mistral Large to create type-safe responses, including support for multimodal inputs with Pixtral.
 
-Mistral Large is the flagship model from Mistral AI, supporting 32k context windows and functional calling abilities. Mistral Large's addition of [function calling](https://docs.mistral.ai/guides/function-calling/) makes it possible to obtain structured outputs using JSON schema.
+Mistral Large is the flagship model from Mistral AI, supporting 32k context windows and functional calling abilities. Mistral Large's addition of [function calling](https://docs.mistral.ai/guides/function-calling/) makes it possible to obtain structured outputs using JSON schema. With Pixtral, you can now also process images alongside text inputs.
 
-By the end of this blog post, you will learn how to effectively utilize Instructor with Mistral Large.
+By the end of this blog post, you will learn how to effectively utilize Instructor with Mistral Large and Pixtral for both text and image processing tasks.
+
+## Text Processing with Mistral Large
 
 ```python
 import os
@@ -47,5 +50,166 @@ resp = instructor_client.messages.create(
 )
 
 print(resp)
+```
+
+## Multimodal Processing with Pixtral
+
+```python
+import os
+from pydantic import BaseModel
+from mistralai import Mistral
+from instructor import from_mistral, Mode
+from instructor.multimodal import Image
+
+class ImageDescription(BaseModel):
+    description: str
+    objects: list[str]
+    colors: list[str]
+
+# Initialize the client with Pixtral model
+client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY"))
+instructor_client = from_mistral(
+    client=client,
+    model="pixtral",  # Use Pixtral for multimodal capabilities
+    mode=Mode.MISTRAL_JSON,
+    max_tokens=1000,
+)
+
+# Load and process an image
+image = Image.from_path("path/to/your/image.jpg")
+resp = instructor_client.messages.create(
+    response_model=ImageDescription,
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                "Describe this image in detail, including the main objects and colors present.",
+                image
+            ]
+        }
+    ],
+    temperature=0,
+)
+
+print(resp)
+```
+
+## Image Requirements and Validation
+
+When working with images in Pixtral:
+- Supported formats: JPEG, PNG, GIF, WEBP
+- Maximum image size: 20MB
+- Images larger than the size limit will be automatically resized
+- Base64 and file paths are supported input formats
+
+The `Image` class handles all validation and preprocessing automatically, ensuring your images meet Mistral's requirements.
+
+## Async Implementation
+
+```python
+import os
+from pydantic import BaseModel
+from mistralai import AsyncMistral
+from instructor import from_mistral, Mode
+
+class UserDetails(BaseModel):
+    name: str
+    age: int
 
+# Initialize async client
+client = AsyncMistral(api_key=os.environ.get("MISTRAL_API_KEY"))
+instructor_client = from_mistral(
+    client=client,
+    model="mistral-large-latest",
+    mode=Mode.MISTRAL_TOOLS,
+    max_tokens=1000,
+)
+
+async def get_user_details(text: str) -> UserDetails:
+    return await instructor_client.messages.create(
+        response_model=UserDetails,
+        messages=[{"role": "user", "content": text}],
+        temperature=0,
+    )
+
+# Usage
+import asyncio
+user = asyncio.run(get_user_details("Jason is 10"))
+print(user)
+```
+
+## Streaming Support
+
+Mistral supports streaming responses, which can be useful for real-time processing:
+
+```python
+from typing import AsyncIterator
+from pydantic import BaseModel
+
+class PartialResponse(BaseModel):
+    partial_text: str
+
+async def stream_response(text: str) -> AsyncIterator[PartialResponse]:
+    async for partial in instructor_client.messages.create(
+        response_model=PartialResponse,
+        messages=[{"role": "user", "content": text}],
+        temperature=0,
+        stream=True,
+    ):
+        yield partial
+
+# Usage
+async for chunk in stream_response("Describe the weather"):
+    print(chunk.partial_text)
+```
+
+## Using Instructor Hooks
+
+Hooks allow you to add custom processing logic:
+
+```python
+from instructor import patch
+
+# Add a custom hook
+@patch.register_hook
+def log_response(response, **kwargs):
+    print(f"Model response: {response}")
+    return response
+
+# The hook will be called automatically
+result = instructor_client.messages.create(
+    response_model=UserDetails,
+    messages=[{"role": "user", "content": "Jason is 10"}],
+    temperature=0,
+)
 ```
+
+## Best Practices
+
+When working with Mistral and Instructor:
+
+1. **API Key Management**
+   - Use environment variables for API keys
+   - Consider using a .env file for development
+
+2. **Model Selection**
+   - Use mistral-large-latest for complex tasks
+   - Use mistral-medium or mistral-small for simpler tasks
+   - Use pixtral for multimodal applications
+
+3. **Error Handling**
+   - Implement proper try-except blocks
+   - Handle rate limits and token limits
+   - Use validation_context to prevent hallucinations
+
+4. **Performance Optimization**
+   - Use async implementations for concurrent requests
+   - Implement streaming for long responses
+   - Cache responses when appropriate
+
+## Related Resources
+
+- [Mistral AI Documentation](https://docs.mistral.ai/)
+- [Instructor GitHub Repository](https://github.com/jxnl/instructor/)
+- [Pydantic Documentation](https://docs.pydantic.dev/)
+- [AsyncIO in Python](https://docs.python.org/3/library/asyncio.html)