Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Adding MultiOn browsing tool #481

Merged
merged 3 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 9 additions & 18 deletions llama_hub/tools/gmail/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,7 @@ def load_data(self) -> List[Document]:
"""Load emails from the user's account"""
self._cache_service()

messsages = self.search_messages()

results = []
for message in messsages:
text = message.pop("body")
extra_info = message
results.append(Document(text=text, extra_info=extra_info))

return results
return self.search_messages()

def _get_credentials(self) -> Any:
"""Get valid user credentials from storage.
Expand Down Expand Up @@ -87,10 +79,9 @@ def _get_credentials(self) -> Any:

return creds

def search_messages(self):
query = self.query

max_results = self.max_results
def search_messages(self, query: str, max_results: Optional[int] = None):
if not max_results:
max_results = self.max_results

self._cache_service()

Expand All @@ -102,17 +93,17 @@ def search_messages(self):
.get("messages", [])
)

result = []
results = []
try:
for message in messages:
message_data = self.get_message_data(message)
if not message_data:
continue
result.append(message_data)
text = message_data.pop("body")
extra_info = message_data
results.append(Document(text=text, extra_info=extra_info))
except Exception as e:
raise Exception("Can't get message data" + str(e))

return result
return results

def get_message_data(self, message):
message_id = message["id"]
Expand Down
4 changes: 4 additions & 0 deletions llama_hub/tools/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@
"id": "tools/metaphor",
"author": "ajhofmann"
},
"MultionToolSpec": {
"id": "tools/multion",
"author": "ajhofmann"
},
"NotionToolSpec": {
"id": "tools/notion",
"author": "jerryjliu"
Expand Down
29 changes: 29 additions & 0 deletions llama_hub/tools/multion/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# MultiOn Tool

This tool connects to [MultiOn](https://www.multion.ai/) to enable your agent to easily
connect to the internet through your Chrome Web browser and act on your behalf

You will need to have the MultiOn chrome extension installed and a MultiOn account
to use this integration

## Usage

This tool has more a extensive example usage documented in a Jupyter notebook [here](https://github.com/emptycrown/llama-hub/tree/main/llama_hub/tools/notebooks/multion.ipynb)

Here's an example usage of the MultionToolSpec.

```python
from llama_hub.tools.metaphor.base import MultionToolSpec
from llama_index.agent import OpenAIAgent

multion_tool = MultionToolSpec()

agent = OpenAIAgent.from_tools(multion_tool.to_tool_list())

agent.chat('Can you read the latest tweets from my followers')
agent.chat('Whats the next thing on my google calendar?')
```

`browse`: The core function that takes natural language instructions to pass to the web browser to execute

This loader is designed to be used as a way to load data as a Tool in a Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
1 change: 1 addition & 0 deletions llama_hub/tools/multion/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
## init
57 changes: 57 additions & 0 deletions llama_hub/tools/multion/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Multion tool spec."""

from llama_index.tools.tool_spec.base import BaseToolSpec
from typing import Optional, List
from llama_index.readers.schema.base import Document
import base64
from io import BytesIO

class MultionToolSpec(BaseToolSpec):
"""Multion tool spec."""

spec_functions = [
"browse"
]

def __init__(self, token_file: Optional[str] = 'multion_token.txt') -> None:
"""Initialize with parameters."""
import multion
multion.login()
self.last_tab = None

def browse(self, instruction: str):
"""
Browse the web using Multion
Multion gives the ability for LLMs to control web browsers using natural language instructions

You may have to repeat the instruction through multiple steps or update your instruction to get to
the final desired state. If the status is 'CONTINUE', reissue the same instruction to continue execution

args:
instruction (str): The detailed and specific natural language instructrion for web browsing
"""
import multion
if self.last_tab:
session = multion.update_session(self.last_tab, { 'input': instruction })
else:
session = multion.new_session({'input': instruction, 'url': 'https://google.com'})
self.last_tab = session['tabId']

return {
'url': session['url'],
'status': session['status'],
'action_completed': session['message'],
'content': self._read_screenshot(session['screenshot']),
}

def _read_screenshot(self, screenshot) -> str:
import pytesseract
from PIL import Image

image_bytes = screenshot.replace('data:image/png;base64,', '')
image = Image.open(self._bytes_to_image(image_bytes))

return pytesseract.image_to_string(image)

def _bytes_to_image(self, img_bytes):
return BytesIO(base64.b64decode(img_bytes))
3 changes: 3 additions & 0 deletions llama_hub/tools/multion/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
multion
pytesseract
Pillow
Loading