This repository has been archived by the owner on Mar 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 736
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
206 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Azure Computer Vision Tool | ||
|
||
This tool connects to a Azure account and allows an Agent to perform a variety of computer vision tasks on image urls. | ||
|
||
You will need to set up an api key and computer vision instance using Azure, learn more here: https://azure.microsoft.com/en-ca/products/cognitive-services/computer-vision | ||
|
||
## Usage | ||
|
||
This tool has more extensive example usage documented in a Jupyter notebook [here](https://github.com/emptycrown/llama-hub/tree/main/llama_hub/tools/notebooks/azure_vision.ipynb) | ||
|
||
Here's an example usage of the AzureCVToolSpec. | ||
|
||
```python | ||
from llama_hub.tools.google_search.base import AzureCVToolSpec | ||
from llama_index.agent import OpenAIAgent | ||
|
||
tool_spec = AzureCVToolSpec(api_key='your-key', resource='your-resource') | ||
|
||
agent = OpenAIAgent.from_tools(tool_spec.to_tool_list()) | ||
|
||
agent.chat('caption this image and tell me what tags are in it https://portal.vision.cognitive.azure.com/dist/assets/ImageCaptioningSample1-bbe41ac5.png') | ||
agent.chat('caption this image and read any text https://portal.vision.cognitive.azure.com/dist/assets/OCR3-4782f088.jpg') | ||
``` | ||
|
||
`process_image`: Send an image for computer vision classification of objects, tags, captioning or OCR. | ||
|
||
This loader is designed to be used as a way to load data as a Tool in a Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
## init |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
"""Azure Cognitive Vision tool spec.""" | ||
|
||
from llama_index.tools.tool_spec.base import BaseToolSpec | ||
from typing import Optional, List | ||
import requests | ||
import urllib.parse | ||
|
||
CV_URL_TMPL = "https://{resource}.cognitiveservices.azure.com/computervision/imageanalysis:analyze" | ||
|
||
class AzureCVToolSpec(BaseToolSpec): | ||
"""Wolfram Alpha tool spec.""" | ||
|
||
spec_functions = ["process_image"] | ||
|
||
def __init__( | ||
self, | ||
resource: str, | ||
api_key: str, | ||
language: Optional[str] = 'en', | ||
api_version: Optional[str] = '2023-04-01-preview' | ||
) -> None: | ||
"""Initialize with parameters.""" | ||
self.api_key = api_key | ||
self.cv_url = CV_URL_TMPL.format(resource=resource) | ||
self.language = language | ||
self.api_version = api_version | ||
|
||
def process_image(self, url: str, features: List[str]): | ||
""" | ||
This tool accepts an image url or file and can process and return a variety of text depending on the use case. | ||
You can use the features argument to configure what text you want returned. | ||
args: | ||
url (str): The url for the image to caption | ||
features (List[str]): Instructions on how to process the image. Valid keys are tags, objects, read, caption | ||
""" | ||
response = requests.post( | ||
f'{self.cv_url}?features={",".join(features)}&language={self.language}&api-version={self.api_version}', | ||
headers={ 'Ocp-Apim-Subscription-Key': self.api_key }, | ||
json={'url': url} | ||
) | ||
response_json = response.json() | ||
if 'read' in features: | ||
response_json['readResult'] = response_json['readResult']['content'] | ||
|
||
return response_json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "34e66a0e-e41d-48e0-8a1f-b82b5ea18ad1", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Setup OpenAI Agent\n", | ||
"import openai\n", | ||
"openai.api_key = 'sk-your-key'\n", | ||
"from llama_index.agent import OpenAIAgent\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "eb11c1a6-1540-4538-8d1a-bb8b265fdb64", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"=== Calling Function ===\n", | ||
"Calling function: process_image with args: {\n", | ||
" \"url\": \"https://portal.vision.cognitive.azure.com/dist/assets/ImageCaptioningSample1-bbe41ac5.png\",\n", | ||
" \"features\": [\"caption\", \"tags\"]\n", | ||
"}\n", | ||
"Got output: {'captionResult': {'text': 'a group of cows grazing in a field', 'confidence': 0.861102819442749}, 'modelVersion': '2023-02-01-preview', 'metadata': {'width': 375, 'height': 250}, 'tagsResult': {'values': [{'name': 'grass', 'confidence': 0.9988070130348206}, {'name': 'outdoor', 'confidence': 0.9931809306144714}, {'name': 'field', 'confidence': 0.9857797622680664}, {'name': 'animal', 'confidence': 0.9708199501037598}, {'name': 'livestock', 'confidence': 0.965355634689331}, {'name': 'cow', 'confidence': 0.954204797744751}, {'name': 'herd', 'confidence': 0.9496941566467285}, {'name': 'ranch', 'confidence': 0.9301772117614746}, {'name': 'mammal', 'confidence': 0.9299676418304443}, {'name': 'dairy cow', 'confidence': 0.9291023015975952}, {'name': 'bovine', 'confidence': 0.9199285507202148}, {'name': 'herding', 'confidence': 0.8967740535736084}, {'name': 'fodder', 'confidence': 0.8817697763442993}, {'name': 'grassland', 'confidence': 0.8811800479888916}, {'name': 'standing', 'confidence': 0.8034635782241821}, {'name': 'pasture', 'confidence': 0.6391813158988953}, {'name': 'grazing', 'confidence': 0.6333702802658081}, {'name': 'farm', 'confidence': 0.6285721063613892}, {'name': 'cattle', 'confidence': 0.5256974697113037}, {'name': 'landscape', 'confidence': 0.4293440878391266}]}}\n", | ||
"========================\n", | ||
"The caption for the image is \"a group of cows grazing in a field\". \n", | ||
"\n", | ||
"The tags in the image include: grass, outdoor, field, animal, livestock, cow, herd, ranch, mammal, dairy cow, bovine, herding, fodder, grassland, standing, pasture, grazing, farm, cattle, and landscape.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from llama_hub.tools.azure_cv.base import AzureCVToolSpec\n", | ||
"\n", | ||
"cv_tool = AzureCVToolSpec(\n", | ||
" api_key='your-key',\n", | ||
" resource='your-resource'\n", | ||
")\n", | ||
"\n", | ||
"agent = OpenAIAgent.from_tools(\n", | ||
" cv_tool.to_tool_list(),\n", | ||
" verbose=True,\n", | ||
")\n", | ||
"\n", | ||
"print(agent.chat('caption this image and tell me what tags are in it https://portal.vision.cognitive.azure.com/dist/assets/ImageCaptioningSample1-bbe41ac5.png'))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "d6be81e1-41a6-48b6-920b-b225c0f16a9b", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"=== Calling Function ===\n", | ||
"Calling function: process_image with args: {\n", | ||
" \"url\": \"https://portal.vision.cognitive.azure.com/dist/assets/OCR3-4782f088.jpg\",\n", | ||
" \"features\": [\"caption\", \"read\"]\n", | ||
"}\n", | ||
"Got output: {'captionResult': {'text': 'close-up of a nutrition label', 'confidence': 0.822258710861206}, 'readResult': 'Nutrition Facts Amount Per Serving\\nServing size: 1 bar (40g)\\nServing Per Package: 4\\nTotal Fat 13g\\nSaturated Fat 1.5g\\nAmount Per Serving\\nTrans Fat 0g\\ncalories 190\\nCholesterol 0mg\\nories from Fat 110\\nSodium 20mg\\nnt Daily Values are based on\\nVitamin A 50\\ncalorie diet', 'modelVersion': '2023-02-01-preview', 'metadata': {'width': 1254, 'height': 704}}\n", | ||
"========================\n", | ||
"The caption for the image is \"close-up of a nutrition label\".\n", | ||
"\n", | ||
"The text from the image is as follows:\n", | ||
"\n", | ||
"\"Nutrition Facts Amount Per Serving\n", | ||
"Serving size: 1 bar (40g)\n", | ||
"Serving Per Package: 4\n", | ||
"Total Fat 13g\n", | ||
"Saturated Fat 1.5g\n", | ||
"Amount Per Serving\n", | ||
"Trans Fat 0g\n", | ||
"calories 190\n", | ||
"Cholesterol 0mg\n", | ||
"ories from Fat 110\n", | ||
"Sodium 20mg\n", | ||
"nt Daily Values are based on\n", | ||
"Vitamin A 50\n", | ||
"calorie diet\"\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(agent.chat('caption this image and read any text https://portal.vision.cognitive.azure.com/dist/assets/OCR3-4782f088.jpg'))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "708cc1e0-199b-48a6-a88b-17af19b3f518", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |