Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix duckduckgo_search news search #13670

Merged
merged 7 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/docs/integrations/tools/ddg.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"metadata": {},
"outputs": [],
"source": [
"# !pip install duckduckgo-search"
"# !pip install -U duckduckgo-search"
]
},
{
Expand Down Expand Up @@ -125,7 +125,7 @@
"metadata": {},
"outputs": [],
"source": [
"search = DuckDuckGoSearchResults(backend=\"news\")"
"search = DuckDuckGoSearchResults(source=\"news\")"
]
},
{
Expand Down Expand Up @@ -176,7 +176,7 @@
"metadata": {},
"outputs": [],
"source": [
"search = DuckDuckGoSearchResults(api_wrapper=wrapper, backend=\"news\")"
"search = DuckDuckGoSearchResults(api_wrapper=wrapper, source=\"news\")"
]
},
{
Expand Down
10 changes: 7 additions & 3 deletions libs/langchain/langchain/tools/ddg_search/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,12 @@ class DuckDuckGoSearchResults(BaseTool):
"Useful for when you need to answer questions about current events. "
"Input should be a search query. Output is a JSON array of the query results"
)
num_results: int = 4
max_results: int = 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we do this in a backwards compatible way?

api_wrapper: DuckDuckGoSearchAPIWrapper = Field(
default_factory=DuckDuckGoSearchAPIWrapper
)
backend: str = "api"
backend: str = "api" # which backend to use in DDGS.text() (api, html, lite)
source: str = "text" # which function to use in DDGS (DDGS.text() or DDGS.news())
args_schema: Type[BaseModel] = DDGInput

def _run(
Expand All @@ -59,7 +60,10 @@ def _run(
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
res = self.api_wrapper.results(query, self.num_results, backend=self.backend)
assert self.backend in ["api", "html", "lite"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we validate this upon instantiation of the object?

self.api_wrapper.backend = self.backend
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems a bit wrong to mutate the underlying object

self.api_wrapper.source = self.source
res = self.api_wrapper.results(query, self.max_results)
res_strs = [", ".join([f"{k}: {v}" for k, v in d.items()]) for d in res]
return ", ".join([f"[{rs}]" for rs in res_strs])

Expand Down
109 changes: 54 additions & 55 deletions libs/langchain/langchain/utilities/duckduckgo_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ class DuckDuckGoSearchAPIWrapper(BaseModel):
safesearch: str = "moderate"
time: Optional[str] = "y"
max_results: int = 5
backend: str = "api" # which backend to use in DDGS.text() (api, html, lite)
source: str = "text" # which function to use in DDGS (DDGS.text() or DDGS.news())

class Config:
"""Configuration for this pydantic object."""
Expand All @@ -32,82 +34,79 @@ def validate_environment(cls, values: Dict) -> Dict:
except ImportError:
raise ImportError(
"Could not import duckduckgo-search python package. "
"Please install it with `pip install duckduckgo-search`."
"Please install it with `pip install -U duckduckgo-search`."
)
return values

def get_snippets(self, query: str) -> List[str]:
"""Run query through DuckDuckGo and return concatenated results."""
def _ddgs_text(self, query: str) -> List[Dict[str, str]]:
"""Run query through DuckDuckGo text search and return results."""
from duckduckgo_search import DDGS

with DDGS() as ddgs:
results = ddgs.text(
ddgs_gen = ddgs.text(
query,
region=self.region,
safesearch=self.safesearch,
timelimit=self.time,
max_results=self.max_results,
backend=self.backend,
)
if results is None:
return ["No good DuckDuckGo Search Result was found"]
snippets = []
for i, res in enumerate(results, 1):
if res is not None:
snippets.append(res["body"])
if len(snippets) == self.max_results:
break
return snippets
if ddgs_gen:
return [r for r in ddgs_gen]

def run(self, query: str) -> str:
snippets = self.get_snippets(query)
return " ".join(snippets)
def _ddgs_news(self, query: str) -> List[Dict[str, str]]:
"""Run query through DuckDuckGo news search and return results."""
from duckduckgo_search import DDGS

with DDGS() as ddgs:
ddgs_gen = ddgs.news(
query,
region=self.region,
safesearch=self.safesearch,
timelimit=self.time,
max_results=self.max_results,
)
if ddgs_gen:
return [r for r in ddgs_gen]

def results(
self, query: str, num_results: int, backend: str = "api"
) -> List[Dict[str, str]]:
def run(self, query: str) -> str:
"""Run query through DuckDuckGo and return concatenated results."""
if self.source == "text":
results = self._ddgs_text(query)
elif self.source == "news":
results = self._ddgs_news(query)
else:
results = []

if not results:
return "No good DuckDuckGo Search Result was found"
return " ".join(r["body"] for r in results)

def results(self, query: str, max_results: int) -> List[Dict[str, str]]:
"""Run query through DuckDuckGo and return metadata.

Args:
query: The query to search for.
num_results: The number of results to return.
max_results: The number of results to return.

Returns:
A list of dictionaries with the following keys:
snippet - The description of the result.
title - The title of the result.
link - The link to the result.
"""
from duckduckgo_search import DDGS

with DDGS() as ddgs:
results = ddgs.text(
query,
region=self.region,
safesearch=self.safesearch,
timelimit=self.time,
backend=backend,
)
if results is None:
return [{"Result": "No good DuckDuckGo Search Result was found"}]

def to_metadata(result: Dict) -> Dict[str, str]:
if backend == "news":
return {
"date": result["date"],
"title": result["title"],
"snippet": result["body"],
"source": result["source"],
"link": result["url"],
}
return {
"snippet": result["body"],
"title": result["title"],
"link": result["href"],
}

formatted_results = []
for i, res in enumerate(results, 1):
if res is not None:
formatted_results.append(to_metadata(res))
if len(formatted_results) == num_results:
break
return formatted_results
if self.source == "text":
return [
{"snippet": r["body"], "title": r["title"], "link": r["href"]}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why get rid of date and source? are those not guaranteed to be in results

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right. I added 'date' and 'source' to the results

for r in self._ddgs_text(query)
]
elif self.source == "news":
return [
{"snippet": r["body"], "title": r["title"], "link": r["url"]}
for r in self._ddgs_news(query)
]
else:
results = []

if results is None:
return [{"Result": "No good DuckDuckGo Search Result was found"}]
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import pytest

from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
from langchain.tools.ddg_search.tool import DuckDuckGoSearchResults, DuckDuckGoSearchRun


def ddg_installed() -> bool:
try:
from duckduckgo_search import ddg # noqa: F401
from duckduckgo_search import DDGS # noqa: F401

return True
except Exception as e:
Expand All @@ -20,3 +20,12 @@ def test_ddg_search_tool() -> None:
result = tool(keywords)
print(result)
assert len(result.split()) > 20


@pytest.mark.skipif(not ddg_installed(), reason="requires duckduckgo-search package")
def test_ddg_search_news_tool() -> None:
keywords = "Tesla"
tool = DuckDuckGoSearchResults(source="news")
result = tool(keywords)
print(result)
assert len(result.split()) > 20
Loading