From b7974f9d66f45b52eb254e1cef282720f0b6ca56 Mon Sep 17 00:00:00 2001 From: amaan-ai20 Date: Mon, 23 Dec 2024 23:01:47 +0530 Subject: [PATCH 1/4] community: add Dappier retriever --- .../integrations/retrievers/dappier.ipynb | 223 ++++++++++++++++++ .../retrievers/__init__.py | 5 + .../langchain_community/retrievers/dappier.py | 121 ++++++++++ .../unit_tests/retrievers/test_imports.py | 1 + 4 files changed, 350 insertions(+) create mode 100644 docs/docs/integrations/retrievers/dappier.ipynb create mode 100644 libs/community/langchain_community/retrievers/dappier.py diff --git a/docs/docs/integrations/retrievers/dappier.ipynb b/docs/docs/integrations/retrievers/dappier.ipynb new file mode 100644 index 0000000000000..31ac2be5ef811 --- /dev/null +++ b/docs/docs/integrations/retrievers/dappier.ipynb @@ -0,0 +1,223 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dappier\n", + "\n", + "> [Dappier](https://dappier.com) connects any LLM or your Agentic AI to real-time, rights-cleared, proprietary data from trusted sources, making your AI an expert in anything. Our specialized models include Real-Time Web Search, News, Sports, Financial Stock Market Data, Crypto Data, and exclusive content from premium publishers. Explore a wide range of data models in our marketplace at [marketplace.dappier.com](https://marketplace.dappier.com).\n", + "\n", + "> [Dappier](https://dappier.com) delivers enriched, prompt-ready, and contextually relevant data strings, optimized for seamless integration with LangChain. Whether you're building conversational AI, recommendation engines, or intelligent search, Dappier's LLM-agnostic RAG models ensure your AI has access to verified, up-to-date data—without the complexity of building and managing your own retrieval pipeline.\n", + "\n", + "## Setup\n", + "\n", + "The integration lives in the `langchain-community` package. We also need to install the `dappier` package itself.\n", + "\n", + "```bash\n", + "pip install -U langchain-community dappier\n", + "```\n", + "\n", + "You also need to set our Dappier API credentials, which can be generated at the [Dappier site.](https://platform.dappier.com).\n", + "\n", + "You can find the supported data models by heading over to the [Dappier marketplace.](https://platform.dappier.com/marketplace)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"DAPPIER_API_KEY\"] = getpass.getpass()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", + "# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(metadata={'title': 'Hannah Hidalgo scores 29 as No. 8 Notre Dame holds off No. 2 UConn', 'author': 'Curt Rallo, Associated Press', 'source_url': 'https://www.wishtv.com/sports/college-basketball/hannah-hidalgo-scores-29-as-no-8-notre-dame-holds-off-no-2-uconn/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/ONLINE-CROP-Notre-Dame-Hannah-Hidalgo-AP24348035473873_.jpg?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 03:03:56 +0000'}, page_content=\"Hannah Hidalgo led No. 8 Notre Dame to a 79-68 victory over No. 2 UConn, marking the Huskies' first loss of the season. Hidalgo scored 29 points, while Olivia Miles and Liatu King each added 16. Notre Dame's strong defense limited UConn to just 3-of-16 from three-point range, contrasting with their own 10-for-18 performance. A pivotal buzzer-beater by Hidalgo at the end of the third quarter helped regain momentum for the Fighting Irish.\\n\\nLiza Karlen made her return for Notre Dame after an ankle injury, while UConn played without Azzi Fudd due to a knee sprain. Despite narrowing a halftime deficit to one point, UConn couldn't close the gap further than six in the final quarter. Both teams are set to continue their seasons with upcoming games against Eastern Michigan and Georgetown, respectively.\"),\n", + " Document(metadata={'title': 'Winchester firefighter dies while fighting fire', 'author': 'Jay Adkins', 'source_url': 'https://www.wishtv.com/news/local-news/winchester-firefighter-dies-while-fighting-fire/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/firetruck-cropped-e1687307890661_.jpg?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 02:58:53 +0000'}, page_content='Rick Sanders, a firefighter from the Winchester Fire Department, died while fighting a structure fire on Watson Street in Winchester, Indiana. The incident involved multiple fire departments, and during the efforts, Sanders suffered a medical emergency that led to cardiac arrest while being transported to the hospital.\\n\\nDespite receiving medical assistance, he was pronounced dead upon arrival at Winchester Hospital. Sanders leaves behind a wife, four children, and four grandchildren, representing a profound loss for his family and the local community.'),\n", + " Document(metadata={'title': '1 killed, 1 injured in 2-vehicle crash on I-74 in Fountain County', 'author': 'Jay Adkins', 'source_url': 'https://www.wishtv.com/news/local-news/1-killed-1-injured-in-2-vehicle-crash-on-i-74-in-fountain-county/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/ambulance2-istock_1522688992036_38887937_ver1.0_.webp?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 02:13:02 +0000'}, page_content=\"A two-vehicle crash on I-74 in Fountain County, Indiana, resulted in one death and one injury. The accident occurred around 4:30 p.m. when a 2004 Ford F-150 lost control, crossed the median, and collided with an eastbound semitruck. The Ford caught fire after the collision, and its driver was pronounced dead at the scene, with their identity pending release by the coroner's office.\\n\\nThe semitruck driver, Thomas Connolly, 51, from Buckeye, Arizona, suffered non-life-threatening injuries and was taken to a local hospital. Firefighters rescued him from his vehicle, which had come to a stop in a ditch. Investigators are still looking into whether weather conditions played a role in the crash.\"),\n", + " Document(metadata={'title': 'Butler hires new head football coach', 'author': 'Angela Moryan', 'source_url': 'https://www.wishtv.com/sports/butler-hires-new-head-football-coach/', 'image_url': 'https://www.wishtv.com/wp-content/uploads/2024/12/13/GettyImages-1236644042-e1734053738310.jpg', 'pubdata': 'Fri, 13 Dec 2024 01:37:50 +0000'}, page_content='Butler University has named Kevin Lynch as the new head football coach for the Bulldogs, succeeding Mike Uremovich. Lynch, son of former Bulldogs coach Bill Lynch, previously served as assistant head coach and quarterbacks coach at Ball State University, where he played a crucial role in developing MAC Freshman of the Year, Kadin Semonza. His coaching experience also includes positions at the University of Indianapolis and Indiana University.\\n\\nLynch expressed his excitement for the opportunity, highlighting Butler\\'s unique environment and the potential for student-athletes to compete for championships while receiving a quality education. He aims to uphold \"The Butler Way\" and foster a competitive spirit in the Pioneer Football League. Butler\\'s Director of Athletics, Grant Leiendecker, commended Lynch\\'s passion for the university and his talent for relationship-building and player development. Lynch will be officially introduced at Hinkle Fieldhouse on December 17.'),\n", + " Document(metadata={'title': '73-year-old man acted alone in deadly shooting at laundromat', 'author': 'Curt Rallo, Associated Press', 'source_url': 'https://www.wishtv.com/news/crime-watch-8/lafayette-shooting-laundromat-update/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/Lafayette-Laundry_.jpg?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 01:30:55 +0000'}, page_content='On December 5, 2023, a shooting at a laundromat in Lafayette, Indiana, resulted in the death of 35-year-old Keith Ford and injuries to two others. The shooter, 73-year-old Louis McGlothlin, acted alone, firing eight rounds before taking his own life. The victims included Renee Martin, who was shot four times and is in stable condition, and Salvador Antonio De La Cruz Reyes, who was treated for a single gunshot wound.\\n\\nInvestigators found no prior connection between McGlothlin and the victims, leaving the motive unclear. The Lafayette Police Department is working with the Lincoln Police Department to investigate the incident further. Mental health resources have been made available for those affected by this tragedy, which occurred in a strip mall laundromat on South Street near Sagamore Parkway.'),\n", + " Document(metadata={'title': '‘LONS Unreleased Vol 2.’ event raises money for Boys and Girls Club', 'author': 'Emily Reuben', 'source_url': 'https://www.wishtv.com/news/allindiana/rare-wave-benefits-boys-girls-club/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/Rare.wav_.jpg?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 00:25:18 +0000'}, page_content='The article highlights the \"LONS Unreleased Vol 2.\" event organized by The Rare Wav, scheduled for December 13 from 7-9 p.m. at The Rare Space in Indianapolis. This event aims to support the Boys and Girls Club and will showcase six immersive experiences centered around the music of local Alternative R&B artist LONS.\\n\\nWhile admission is free, attendees are encouraged to donate $5 or bring toys and warm-weather clothing for the Boys and Girls Club. The Rare Wav is a creative agency focused on enhancing the online presence of Black and Brown businesses. For further details, individuals can visit their website.'),\n", + " Document(metadata={'title': 'Indiana State Police investigating fatal pursuit in Tipton and Hamilton counties', 'author': 'Jay Adkins', 'source_url': 'https://www.wishtv.com/news/crime-watch-8/indiana-state-police-investigating-fatal-pursuit-in-tipton-and-hamilton-counties/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/ISP-care_.jpg?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 00:17:06 +0000'}, page_content='Indiana State Police are investigating a fatal incident that occurred during a police pursuit in Tipton County, Indiana, early Thursday morning. A deputy attempted to stop a speeding 2014 Ford Fusion, but the driver fled, leading to a chase that ended in Hamilton County when the vehicle rolled over and landed in a ditch. One occupant fled on foot, while the other was hospitalized due to injuries.\\n\\nA K-9 unit was deployed to locate the fleeing occupant, who was found collapsed in a nearby field and later pronounced dead at the scene from crash-related injuries. The Hamilton County Coroner’s Office will release the identity of the deceased after notifying family members. The investigation into the incident is ongoing.'),\n", + " Document(metadata={'title': 'Fathers and Families Center’s All-Star Dads program provides dads with more support', 'author': 'Emily Reuben', 'source_url': 'https://www.wishtv.com/news/allindiana/fathers-families-center-indianapolis/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/All-Star-Dads-Program_.jpg?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 00:08:08 +0000'}, page_content=\"The Fathers and Families Center in Indianapolis has been supporting fathers for over 30 years, focusing on equipping them with skills to positively impact their families and communities. Recently, the center celebrated the graduation of 14 men who earned their GEDs through a partnership with Warren Township. The center emphasizes that engaged fathers can improve their children's academic performance and reduce juvenile justice involvement.\\n\\nTo further support fathers, the center launched the All-Star Dads program, a two-week initiative designed for working fathers, highlighting the importance of being present in their children's lives. This program reinforces that emotional presence is more valuable than financial support. The center also offers workforce development programs in high-demand skilled trades, encouraging active father involvement for healthier child development. For more information, individuals can visit the center's website.\"),\n", + " Document(metadata={'title': 'The Zone Extra | Dec. 12, 2024', 'author': 'Josh Bode', 'source_url': 'https://www.wishtv.com/sports/high-school-the-zone/the-zone-extra-dec-12-2024/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/ZONE-EXTRA-2409112027_frame_8628_.jpg?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 00:00:00 +0000'}, page_content='The article from WISH TV covers the shift from high school football to basketball in central Indiana, celebrating Decatur Central\\'s first state championship win. Coaches share insights on this significant achievement, marking a milestone for the team.\\n\\nAdditionally, Zionsville\\'s Maguire Mitchell is named \"Athlete of the Week\" for his outstanding basketball performance, while the \"Play of the Week\" features Blake Gray\\'s thrilling buzzer-beater for University High School. The article invites readers to explore more high school sports coverage via the \"All Indiana Sports\" app.')]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_community.retrievers import DappierRetriever\n", + "\n", + "retriever = DappierRetriever(data_model_id=\"dm_01jagy9nqaeer9hxx8z1sk1jx6\")\n", + "\n", + "retriever.invoke(\"latest tech news\")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(metadata={'title': 'Hannah Hidalgo scores 29 as No. 8 Notre Dame holds off No. 2 UConn', 'author': 'Curt Rallo, Associated Press', 'source_url': 'https://www.wishtv.com/sports/college-basketball/hannah-hidalgo-scores-29-as-no-8-notre-dame-holds-off-no-2-uconn/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/ONLINE-CROP-Notre-Dame-Hannah-Hidalgo-AP24348035473873_.jpg?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 03:03:56 +0000'}, page_content=\"Hannah Hidalgo led No. 8 Notre Dame to a 79-68 victory over No. 2 UConn, marking the Huskies' first loss of the season. Hidalgo scored 29 points, while Olivia Miles and Liatu King each added 16. Notre Dame's strong defense limited UConn to just 3-of-16 from three-point range, contrasting with their own 10-for-18 performance. A pivotal buzzer-beater by Hidalgo at the end of the third quarter helped regain momentum for the Fighting Irish.\\n\\nLiza Karlen made her return for Notre Dame after an ankle injury, while UConn played without Azzi Fudd due to a knee sprain. Despite narrowing a halftime deficit to one point, UConn couldn't close the gap further than six in the final quarter. Both teams are set to continue their seasons with upcoming games against Eastern Michigan and Georgetown, respectively.\"),\n", + " Document(metadata={'title': 'Winchester firefighter dies while fighting fire', 'author': 'Jay Adkins', 'source_url': 'https://www.wishtv.com/news/local-news/winchester-firefighter-dies-while-fighting-fire/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/firetruck-cropped-e1687307890661_.jpg?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 02:58:53 +0000'}, page_content='Rick Sanders, a firefighter from the Winchester Fire Department, died while fighting a structure fire on Watson Street in Winchester, Indiana. The incident involved multiple fire departments, and during the efforts, Sanders suffered a medical emergency that led to cardiac arrest while being transported to the hospital.\\n\\nDespite receiving medical assistance, he was pronounced dead upon arrival at Winchester Hospital. Sanders leaves behind a wife, four children, and four grandchildren, representing a profound loss for his family and the local community.'),\n", + " Document(metadata={'title': '1 killed, 1 injured in 2-vehicle crash on I-74 in Fountain County', 'author': 'Jay Adkins', 'source_url': 'https://www.wishtv.com/news/local-news/1-killed-1-injured-in-2-vehicle-crash-on-i-74-in-fountain-county/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/ambulance2-istock_1522688992036_38887937_ver1.0_.webp?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 02:13:02 +0000'}, page_content=\"A two-vehicle crash on I-74 in Fountain County, Indiana, resulted in one death and one injury. The accident occurred around 4:30 p.m. when a 2004 Ford F-150 lost control, crossed the median, and collided with an eastbound semitruck. The Ford caught fire after the collision, and its driver was pronounced dead at the scene, with their identity pending release by the coroner's office.\\n\\nThe semitruck driver, Thomas Connolly, 51, from Buckeye, Arizona, suffered non-life-threatening injuries and was taken to a local hospital. Firefighters rescued him from his vehicle, which had come to a stop in a ditch. Investigators are still looking into whether weather conditions played a role in the crash.\"),\n", + " Document(metadata={'title': 'Butler hires new head football coach', 'author': 'Angela Moryan', 'source_url': 'https://www.wishtv.com/sports/butler-hires-new-head-football-coach/', 'image_url': 'https://www.wishtv.com/wp-content/uploads/2024/12/13/GettyImages-1236644042-e1734053738310.jpg', 'pubdata': 'Fri, 13 Dec 2024 01:37:50 +0000'}, page_content='Butler University has named Kevin Lynch as the new head football coach for the Bulldogs, succeeding Mike Uremovich. Lynch, son of former Bulldogs coach Bill Lynch, previously served as assistant head coach and quarterbacks coach at Ball State University, where he played a crucial role in developing MAC Freshman of the Year, Kadin Semonza. His coaching experience also includes positions at the University of Indianapolis and Indiana University.\\n\\nLynch expressed his excitement for the opportunity, highlighting Butler\\'s unique environment and the potential for student-athletes to compete for championships while receiving a quality education. He aims to uphold \"The Butler Way\" and foster a competitive spirit in the Pioneer Football League. Butler\\'s Director of Athletics, Grant Leiendecker, commended Lynch\\'s passion for the university and his talent for relationship-building and player development. Lynch will be officially introduced at Hinkle Fieldhouse on December 17.'),\n", + " Document(metadata={'title': '73-year-old man acted alone in deadly shooting at laundromat', 'author': 'Curt Rallo, Associated Press', 'source_url': 'https://www.wishtv.com/news/crime-watch-8/lafayette-shooting-laundromat-update/', 'image_url': 'https://images.dappier.com/dm_01jagy9nqaeer9hxx8z1sk1jx6/Lafayette-Laundry_.jpg?width=428&height=321', 'pubdata': 'Fri, 13 Dec 2024 01:30:55 +0000'}, page_content='On December 5, 2023, a shooting at a laundromat in Lafayette, Indiana, resulted in the death of 35-year-old Keith Ford and injuries to two others. The shooter, 73-year-old Louis McGlothlin, acted alone, firing eight rounds before taking his own life. The victims included Renee Martin, who was shot four times and is in stable condition, and Salvador Antonio De La Cruz Reyes, who was treated for a single gunshot wound.\\n\\nInvestigators found no prior connection between McGlothlin and the victims, leaving the motive unclear. The Lafayette Police Department is working with the Lincoln Police Department to investigate the incident further. Mental health resources have been made available for those affected by this tragedy, which occurred in a strip mall laundromat on South Street near Sagamore Parkway.')]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you have full control on filtering by category, time, pagination, and even the search method you use.\n", + "from datetime import datetime, timedelta\n", + "\n", + "start = (datetime.now() - timedelta(days=7)).timestamp()\n", + "end = datetime.now().timestamp()\n", + "\n", + "retriever = DappierRetriever(\n", + " data_model_id=\"dm_01jagy9nqaeer9hxx8z1sk1jx6\",\n", + " k=5,\n", + " ref=\"techcrunch.com\",\n", + " num_articles_ref=2,\n", + " search_algorithm=\"most_recent\"\n", + ")\n", + "\n", + "retriever.invoke(\"latest tech news\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can easily combine this retriever in to a chain." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "prompt = ChatPromptTemplate.from_template(\n", + " \"\"\"The following news articles may come in handy for answering the question:\n", + "\n", + "{context}\n", + "\n", + "Question:\n", + "\n", + "{question}\"\"\"\n", + ")\n", + "chain = (\n", + " RunnablePassthrough.assign(context=(lambda x: x[\"question\"]) | retriever)\n", + " | prompt\n", + " | ChatOpenAI(model=\"gpt-4-1106-preview\")\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"The key highlights and outcomes from the events mentioned in the articles are as follows:\\n\\n1. Hannah Hidalgo's Performance in Notre Dame vs. UConn Basketball Game:\\n - Hannah Hidalgo scored 29 points leading Notre Dame to a 79-68 victory over UConn.\\n - This was UConn's first loss of the season.\\n - Olivia Miles and Liatu King each contributed 16 points to Notre Dame's win.\\n - Notre Dame displayed strong defense, limiting UConn to 3-of-16 from three-point range.\\n - Hidalgo scored a pivotal buzzer-beater at the end of the third quarter.\\n - Notre Dame's Liza Karlen returned from an ankle injury.\\n - UConn played without Azzi Fudd due to a knee sprain.\\n - Both teams have upcoming games, with Notre Dame playing against Eastern Michigan and UConn against Georgetown.\\n\\n2. Death of Winchester Firefighter Rick Sanders:\\n - Rick Sanders died while fighting a fire on Watson Street in Winchester, Indiana.\\n - Sanders suffered a medical emergency that led to cardiac arrest during transportation to the hospital.\\n - He was pronounced dead upon arrival at Winchester Hospital.\\n - Sanders is survived by a wife, four children, and four grandchildren.\\n\\n3. Fatal Two-Vehicle Crash on I-74 in Fountain County:\\n - A crash involving a Ford F-150 and a semitruck resulted in one death and one injury.\\n - The driver of the Ford F-150 was pronounced dead at the scene.\\n - Semitruck driver Thomas Connolly suffered non-life-threatening injuries.\\n - The cause of the crash is under investigation, including potential weather conditions.\\n\\n4. Butler University's New Head Football Coach:\\n - Kevin Lynch was named the head football coach for Butler University's Bulldogs.\\n - Lynch previously served as assistant head coach and quarterbacks coach at Ball State University.\\n - He is the son of former Bulldogs coach Bill Lynch.\\n - Lynch is set to be officially introduced at Hinkle Fieldhouse on December 17.\\n\\n5. Shooting at Lafayette Laundromat:\\n - A shooting at a Lafayette laundromat resulted in the death of 35-year-old Keith Ford.\\n - Two other victims were injured: Renee Martin, in stable condition, and Salvador Antonio De La Cruz Reyes.\\n - The shooter, 73-year-old Louis McGlothlin, acted alone and took his own life after the shooting.\\n - No prior connection between McGlothlin and the victims was found.\\n - The motive for the shooting remains unclear.\\n - Mental health resources are available for those affected by the incident.\"" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.invoke({\"question\": \"What are the key highlights and outcomes from the latest events covered in the article?\"})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/libs/community/langchain_community/retrievers/__init__.py b/libs/community/langchain_community/retrievers/__init__.py index ce4ac731bde28..08fbe9af0f290 100644 --- a/libs/community/langchain_community/retrievers/__init__.py +++ b/libs/community/langchain_community/retrievers/__init__.py @@ -53,6 +53,9 @@ from langchain_community.retrievers.cohere_rag_retriever import ( CohereRagRetriever, ) + from langchain_community.retrievers.dappier import ( + DappierRetriever, + ) from langchain_community.retrievers.docarray import ( DocArrayRetriever, ) @@ -159,6 +162,7 @@ "ChaindeskRetriever": "langchain_community.retrievers.chaindesk", "ChatGPTPluginRetriever": "langchain_community.retrievers.chatgpt_plugin_retriever", "CohereRagRetriever": "langchain_community.retrievers.cohere_rag_retriever", + "DappierRetriever": "langchain_community.retrievers.dappier", "DocArrayRetriever": "langchain_community.retrievers.docarray", "DriaRetriever": "langchain_community.retrievers.dria_index", "ElasticSearchBM25Retriever": "langchain_community.retrievers.elastic_search_bm25", @@ -216,6 +220,7 @@ def __getattr__(name: str) -> Any: "ChaindeskRetriever", "ChatGPTPluginRetriever", "CohereRagRetriever", + "DappierRetriever", "DocArrayRetriever", "DriaRetriever", "ElasticSearchBM25Retriever", diff --git a/libs/community/langchain_community/retrievers/dappier.py b/libs/community/langchain_community/retrievers/dappier.py new file mode 100644 index 0000000000000..0a2bd43f84cbf --- /dev/null +++ b/libs/community/langchain_community/retrievers/dappier.py @@ -0,0 +1,121 @@ +import os +from typing import Any, List, Literal, Optional + +from langchain_core.callbacks.manager import ( + AsyncCallbackManagerForRetrieverRun, + CallbackManagerForRetrieverRun, +) +from langchain_core.documents import Document +from langchain_core.retrievers import BaseRetriever + + +class DappierRetriever(BaseRetriever): + """Dappier retriever.""" + + data_model_id: str + """Data model ID, starting with dm_.""" + k: int = 9 + """Number of documents to return.""" + ref: Optional[str] = None + """Site domain where AI recommendations are displayed.""" + num_articles_ref: int = 0 + """Minimum number of articles from the ref domain specified. + The rest will come from other sites within the RAG model.""" + search_algorithm: Literal[ + "most_recent", "most_recent_semantic", "semantic", "trending" + ] = "most_recent" + """Search algorithm for retrieving articles.""" + api_key: Optional[str] = None + """The API key used to interact with the Dappier APIs.""" + + def _get_relevant_documents( + self, query: str, *, run_manager: CallbackManagerForRetrieverRun + ) -> List[Document]: + """Get documents relevant to a query. + + Args: + query: String to find relevant documents for + run_manager: The callabacks handler to use + Returns: + List of relevant documents + """ + try: + from dappier import Dappier + except ImportError: + raise ImportError( + "Dappier python package not found." + "Please install it with `pip install dappier`" + ) + try: + if not self.data_model_id: + raise ValueError("Data model id is not initialized.") + dp_client = Dappier(api_key=self.api_key or os.environ["DAPPIER_API_KEY"]) + response = dp_client.get_ai_recommendations( + query=query, + data_model_id=self.data_model_id, + similarity_top_k=self.k, + ref=self.ref, + num_articles_ref=self.num_articles_ref, + search_algorithm=self.search_algorithm, + ) + return self._extract_documents(response=response) + except Exception as e: + raise ValueError(f"Error while retrieving documents: {e}") from e + + async def _aget_relevant_documents( + self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun + ) -> List[Document]: + """Asynchronously get documents relevant to a query. + Args: + query: String to find relevant docuements for + run_manager: The callbacks handler to use + Returns: + List of relevant documents + """ + try: + from dappier import DappierAsync + except ImportError: + raise ImportError( + "Dappier python package not found." + "Please install it with `pip install dappier`." + ) + try: + dp_client = DappierAsync( + api_key=self.api_key or os.environ["DAPPIER_API_KEY"] + ) + async with dp_client as client: + response = await client.get_ai_recommendations_async( + query=query, + data_model_id=self.data_model_id, + similarity_top_k=self.k, + ref=self.ref, + num_articles_ref=self.num_articles_ref, + search_algorithm=self.search_algorithm, + ) + return self._extract_documents(response=response) + except Exception as e: + raise ValueError(f"Error while retrieving documents: {e}") from e + + def _extract_documents(self, response: Any) -> List[Document]: + """Extract documents from an api response""" + + from dappier.types import AIRecommendationsResponse + + docs: List[Document] = [] + rec_response: AIRecommendationsResponse = response + if rec_response.response is None or rec_response.response.results is None: + return docs + for doc in rec_response.response.results: + docs.append( + Document( + page_content=doc.summary, + metadata={ + "title": doc.title, + "author": doc.author, + "source_url": doc.source_url, + "image_url": doc.image_url, + "pubdata": doc.pubdate, + }, + ) + ) + return docs diff --git a/libs/community/tests/unit_tests/retrievers/test_imports.py b/libs/community/tests/unit_tests/retrievers/test_imports.py index dde08e2f81700..86d0f9289c220 100644 --- a/libs/community/tests/unit_tests/retrievers/test_imports.py +++ b/libs/community/tests/unit_tests/retrievers/test_imports.py @@ -12,6 +12,7 @@ "ChatGPTPluginRetriever", "ChaindeskRetriever", "CohereRagRetriever", + "DappierRetriever", "DriaRetriever", "ElasticSearchBM25Retriever", "EmbedchainRetriever", From 5218b2fe1f4e65c285be6bd9ae137ad63af01962 Mon Sep 17 00:00:00 2001 From: amaan-ai20 Date: Mon, 23 Dec 2024 23:08:25 +0530 Subject: [PATCH 2/4] chore: ruff lint and format fixes --- docs/docs/integrations/retrievers/dappier.ipynb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/docs/integrations/retrievers/dappier.ipynb b/docs/docs/integrations/retrievers/dappier.ipynb index 31ac2be5ef811..7def11c35a66b 100644 --- a/docs/docs/integrations/retrievers/dappier.ipynb +++ b/docs/docs/integrations/retrievers/dappier.ipynb @@ -123,7 +123,7 @@ " k=5,\n", " ref=\"techcrunch.com\",\n", " num_articles_ref=2,\n", - " search_algorithm=\"most_recent\"\n", + " search_algorithm=\"most_recent\",\n", ")\n", "\n", "retriever.invoke(\"latest tech news\")" @@ -195,7 +195,11 @@ } ], "source": [ - "chain.invoke({\"question\": \"What are the key highlights and outcomes from the latest events covered in the article?\"})" + "chain.invoke(\n", + " {\n", + " \"question\": \"What are the key highlights and outcomes from the latest events covered in the article?\"\n", + " }\n", + ")" ] } ], From 177914f3d052488e028c1b152bcc063e8599a688 Mon Sep 17 00:00:00 2001 From: amaan-ai20 Date: Mon, 23 Dec 2024 23:29:14 +0530 Subject: [PATCH 3/4] chore: update a comment inside dappier retriever notebook --- docs/docs/integrations/retrievers/dappier.ipynb | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/docs/integrations/retrievers/dappier.ipynb b/docs/docs/integrations/retrievers/dappier.ipynb index 7def11c35a66b..bc156bee8cc8d 100644 --- a/docs/docs/integrations/retrievers/dappier.ipynb +++ b/docs/docs/integrations/retrievers/dappier.ipynb @@ -112,11 +112,7 @@ } ], "source": [ - "# you have full control on filtering by category, time, pagination, and even the search method you use.\n", - "from datetime import datetime, timedelta\n", - "\n", - "start = (datetime.now() - timedelta(days=7)).timestamp()\n", - "end = datetime.now().timestamp()\n", + "# You have full control over selecting a data model, number of documents to return, site domain reference, minimum articles from the reference domain, and the search algorithm.\n", "\n", "retriever = DappierRetriever(\n", " data_model_id=\"dm_01jagy9nqaeer9hxx8z1sk1jx6\",\n", From c4a313c7068e7005c9d1d0bed945a31baddb0289 Mon Sep 17 00:00:00 2001 From: amaan-ai20 Date: Mon, 23 Dec 2024 23:44:15 +0530 Subject: [PATCH 4/4] chore: improve dappier retriever notebook documentation --- docs/docs/integrations/retrievers/dappier.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/docs/integrations/retrievers/dappier.ipynb b/docs/docs/integrations/retrievers/dappier.ipynb index bc156bee8cc8d..bbde2f9cd4492 100644 --- a/docs/docs/integrations/retrievers/dappier.ipynb +++ b/docs/docs/integrations/retrievers/dappier.ipynb @@ -18,9 +18,9 @@ "pip install -U langchain-community dappier\n", "```\n", "\n", - "You also need to set our Dappier API credentials, which can be generated at the [Dappier site.](https://platform.dappier.com).\n", + "We also need to set our Dappier API credentials, which can be generated at the [Dappier site.](https://platform.dappier.com).\n", "\n", - "You can find the supported data models by heading over to the [Dappier marketplace.](https://platform.dappier.com/marketplace)" + "We can find the supported data models by heading over to the [Dappier marketplace.](https://platform.dappier.com/marketplace)" ] }, { @@ -112,7 +112,7 @@ } ], "source": [ - "# You have full control over selecting a data model, number of documents to return, site domain reference, minimum articles from the reference domain, and the search algorithm.\n", + "# Full control over selecting a data model, number of documents to return, site domain reference, minimum articles from the reference domain, and the search algorithm.\n", "\n", "retriever = DappierRetriever(\n", " data_model_id=\"dm_01jagy9nqaeer9hxx8z1sk1jx6\",\n",