From 1cc0f8084185d96bce96c2783c62764c21e7ca74 Mon Sep 17 00:00:00 2001 From: Jerry Liu Date: Sun, 20 Aug 2023 21:49:31 -0700 Subject: [PATCH] cr --- llama_hub/tools/metaphor/base.py | 7 +- llama_hub/tools/notebooks/metaphor.ipynb | 99 ++++++++++++++---------- 2 files changed, 64 insertions(+), 42 deletions(-) diff --git a/llama_hub/tools/metaphor/base.py b/llama_hub/tools/metaphor/base.py index 8243c81750..a45dc82516 100644 --- a/llama_hub/tools/metaphor/base.py +++ b/llama_hub/tools/metaphor/base.py @@ -19,11 +19,12 @@ class MetaphorToolSpec(BaseToolSpec): "current_date" ] - def __init__(self, api_key: str) -> None: + def __init__(self, api_key: str, verbose: bool = True) -> None: """Initialize with parameters.""" from metaphor_python import Metaphor self.client = Metaphor(api_key=api_key) + self._verbose = verbose def search( self, @@ -54,6 +55,8 @@ def search( end_published_date=end_published_date, use_autoprompt=True ) + if self._verbose: + print(f"[Metaphor Tool] Autoprompt: {response.autoprompt_string}") return [ {"title": result.title, "url": result.url, "id": result.id} for result in response.results @@ -126,6 +129,8 @@ def search_and_retrieve_documents( end_published_date=end_published_date, use_autoprompt=True ) + if self._verbose: + print(f"[Metaphor Tool] Autoprompt: {response.autoprompt_string}") ids = [result.id for result in response.results] documents = self.client.get_contents(ids) return [Document(text=document.extract) for document in documents.contents] diff --git a/llama_hub/tools/notebooks/metaphor.ipynb b/llama_hub/tools/notebooks/metaphor.ipynb index 08b01c2727..48c0cf3d59 100644 --- a/llama_hub/tools/notebooks/metaphor.ipynb +++ b/llama_hub/tools/notebooks/metaphor.ipynb @@ -67,18 +67,25 @@ "id": "e64da618-b4ab-42d7-903d-f4eeb624f43c", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Metaphor Tool] Autoprompt: Here's a great resource for learning about machine learning transformers:\n" + ] + }, { "data": { "text/plain": [ - "[{'title': 'On the potential of Transformers in Reinforcement Learning',\n", - " 'url': 'https://lorenzopieri.com/rl_transformers/',\n", - " 'id': 'ysJlYSgeGW3l4zyOBoSGcg'},\n", - " {'title': 'Transformers: Attention in Disguise',\n", - " 'url': 'https://www.mihaileric.com/posts/transformers-attention-in-disguise/',\n", - " 'id': 'iEYMai5rS9k0hN5_BH0VZg'},\n", - " {'title': 'Transformers in Computer Vision: Farewell Convolutions!',\n", - " 'url': 'https://towardsdatascience.com/transformers-in-computer-vision-farewell-convolutions-f083da6ef8ab?gi=a1d0a9a2896c',\n", - " 'id': 'kX1Z89DdjSvBrH1S1XLvwg'}]" + "[{'title': 'Natural Language Processing with Transformers Book',\n", + " 'url': 'https://transformersbook.com/',\n", + " 'id': 'm0fnocgD1zPLxtDZuGt5JQ'},\n", + " {'title': 'Transformers',\n", + " 'url': 'https://www.nlpdemystified.org/course/transformers',\n", + " 'id': 'jPHVA37dax24EwEan9jj0g'},\n", + " {'title': 'transformersbook (Natural Language Processing with Transformers)',\n", + " 'url': 'https://huggingface.co/transformersbook',\n", + " 'id': 'lcsZRBBHevoB4wAn1SFMtA'}]" ] }, "execution_count": 2, @@ -99,7 +106,7 @@ { "data": { "text/plain": [ - "[Document(id_='f95ec92c-c733-4ec5-9d9e-e1f06e5de8da', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='bbd2ad7f78cf02100add6077decdf885ffebfe37743bfc13e80e61b5b833e46c', text='

\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n

\\n

In this post, we will be describing a class of sequence processing models known as Transformers (…robots in disguise).\\nJokes aside, Transformers came out on the scene not too long ago and have rocked the natural language processing community because of\\ntheir pitch: state-of-the-art and efficient sequence processing without recurrent units or convolution.

\\n

“No recurrent units or convolution?! What are these models even made of?!”, you may be exclaiming to unsuspecting strangers on the streets.

\\n

Not much it turns out, other than a bunch of attention and feedforward operations.

\\n

While the individual components that make up the Transformer model are not particularly novel, this is still a pretty dense paper with a lot\\nof moving parts. So our aim in this post will be to distill the model to its key contributions, without getting too stuck\\nin the details.

\\n

But first, the TLDR for the paper:

\\n
    \\n
  1. Transformers demonstrate that recurrence and convolution are not essential for building high-performance natural language models
  2. \\n
  3. They achieve state-of-the-art machine translation results using a self-attention operation
  4. \\n
  5. Attention is a highly-efficient operation due to its parallelizability and runtime characteristics
  6. \\n
\\n

If that sounds exciting, read onward!

\\n

How Transformers Work

\\n

While the Transformer does not use traditional recurrent units or convolutions, it still takes inspiration from\\nsequence-to-sequence architectures where we encode some input and iteratively decode a desired output.

\\n

How does this play out in practice? Let’s focus on the encoder first. There are quite a few elements to the process,\\nso don’t get too lost in the details. All we are doing is encoding some inputs 🙂.

\\n

Assume we start with a certain phrase that we would like to translate from Spanish to English. The Transformer\\nbegins by embedding the tokens of the Spanish phrase into a conventional embedding matrix:

\\n

\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n

\\n

Because the model makes no use of recurrence, we need some way to represent position-based information\\nin the model. Hence we add a positional encoding to this embedding matrix, whose exact form we will describe\\nin the next section:

\\n

\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n

\\n

Our modified input is fed into the first layer of the Transformer encoder. Within each encoder layer,\\nwe perform a series of operations on the inputs.

\\n

First off, we feed the input through a multi-head attention operation:

\\n

\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n

\\n

To this attention output, we also add a residual connection as well as perform a layer normalization step:

\\n

\\n