-
Notifications
You must be signed in to change notification settings - Fork 15.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bugfix duckduckgo_search news search #13670
Changes from 2 commits
5bfc4ad
f693bb3
3e5976e
3692396
96229cd
86959a1
fe377aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,11 +46,12 @@ class DuckDuckGoSearchResults(BaseTool): | |
"Useful for when you need to answer questions about current events. " | ||
"Input should be a search query. Output is a JSON array of the query results" | ||
) | ||
num_results: int = 4 | ||
max_results: int = 4 | ||
api_wrapper: DuckDuckGoSearchAPIWrapper = Field( | ||
default_factory=DuckDuckGoSearchAPIWrapper | ||
) | ||
backend: str = "api" | ||
backend: str = "api" # which backend to use in DDGS.text() (api, html, lite) | ||
source: str = "text" # which function to use in DDGS (DDGS.text() or DDGS.news()) | ||
args_schema: Type[BaseModel] = DDGInput | ||
|
||
def _run( | ||
|
@@ -59,7 +60,10 @@ def _run( | |
run_manager: Optional[CallbackManagerForToolRun] = None, | ||
) -> str: | ||
"""Use the tool.""" | ||
res = self.api_wrapper.results(query, self.num_results, backend=self.backend) | ||
assert self.backend in ["api", "html", "lite"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we validate this upon instantiation of the object? |
||
self.api_wrapper.backend = self.backend | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this seems a bit wrong to mutate the underlying object |
||
self.api_wrapper.source = self.source | ||
res = self.api_wrapper.results(query, self.max_results) | ||
res_strs = [", ".join([f"{k}: {v}" for k, v in d.items()]) for d in res] | ||
return ", ".join([f"[{rs}]" for rs in res_strs]) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,8 @@ class DuckDuckGoSearchAPIWrapper(BaseModel): | |
safesearch: str = "moderate" | ||
time: Optional[str] = "y" | ||
max_results: int = 5 | ||
backend: str = "api" # which backend to use in DDGS.text() (api, html, lite) | ||
source: str = "text" # which function to use in DDGS (DDGS.text() or DDGS.news()) | ||
|
||
class Config: | ||
"""Configuration for this pydantic object.""" | ||
|
@@ -32,82 +34,79 @@ def validate_environment(cls, values: Dict) -> Dict: | |
except ImportError: | ||
raise ImportError( | ||
"Could not import duckduckgo-search python package. " | ||
"Please install it with `pip install duckduckgo-search`." | ||
"Please install it with `pip install -U duckduckgo-search`." | ||
) | ||
return values | ||
|
||
def get_snippets(self, query: str) -> List[str]: | ||
"""Run query through DuckDuckGo and return concatenated results.""" | ||
def _ddgs_text(self, query: str) -> List[Dict[str, str]]: | ||
"""Run query through DuckDuckGo text search and return results.""" | ||
from duckduckgo_search import DDGS | ||
|
||
with DDGS() as ddgs: | ||
results = ddgs.text( | ||
ddgs_gen = ddgs.text( | ||
query, | ||
region=self.region, | ||
safesearch=self.safesearch, | ||
timelimit=self.time, | ||
max_results=self.max_results, | ||
backend=self.backend, | ||
) | ||
if results is None: | ||
return ["No good DuckDuckGo Search Result was found"] | ||
snippets = [] | ||
for i, res in enumerate(results, 1): | ||
if res is not None: | ||
snippets.append(res["body"]) | ||
if len(snippets) == self.max_results: | ||
break | ||
return snippets | ||
if ddgs_gen: | ||
return [r for r in ddgs_gen] | ||
|
||
def run(self, query: str) -> str: | ||
snippets = self.get_snippets(query) | ||
return " ".join(snippets) | ||
def _ddgs_news(self, query: str) -> List[Dict[str, str]]: | ||
"""Run query through DuckDuckGo news search and return results.""" | ||
from duckduckgo_search import DDGS | ||
|
||
with DDGS() as ddgs: | ||
ddgs_gen = ddgs.news( | ||
query, | ||
region=self.region, | ||
safesearch=self.safesearch, | ||
timelimit=self.time, | ||
max_results=self.max_results, | ||
) | ||
if ddgs_gen: | ||
return [r for r in ddgs_gen] | ||
|
||
def results( | ||
self, query: str, num_results: int, backend: str = "api" | ||
) -> List[Dict[str, str]]: | ||
def run(self, query: str) -> str: | ||
"""Run query through DuckDuckGo and return concatenated results.""" | ||
if self.source == "text": | ||
results = self._ddgs_text(query) | ||
elif self.source == "news": | ||
results = self._ddgs_news(query) | ||
else: | ||
results = [] | ||
|
||
if not results: | ||
return "No good DuckDuckGo Search Result was found" | ||
return " ".join(r["body"] for r in results) | ||
|
||
def results(self, query: str, max_results: int) -> List[Dict[str, str]]: | ||
"""Run query through DuckDuckGo and return metadata. | ||
|
||
Args: | ||
query: The query to search for. | ||
num_results: The number of results to return. | ||
max_results: The number of results to return. | ||
|
||
Returns: | ||
A list of dictionaries with the following keys: | ||
snippet - The description of the result. | ||
title - The title of the result. | ||
link - The link to the result. | ||
""" | ||
from duckduckgo_search import DDGS | ||
|
||
with DDGS() as ddgs: | ||
results = ddgs.text( | ||
query, | ||
region=self.region, | ||
safesearch=self.safesearch, | ||
timelimit=self.time, | ||
backend=backend, | ||
) | ||
if results is None: | ||
return [{"Result": "No good DuckDuckGo Search Result was found"}] | ||
|
||
def to_metadata(result: Dict) -> Dict[str, str]: | ||
if backend == "news": | ||
return { | ||
"date": result["date"], | ||
"title": result["title"], | ||
"snippet": result["body"], | ||
"source": result["source"], | ||
"link": result["url"], | ||
} | ||
return { | ||
"snippet": result["body"], | ||
"title": result["title"], | ||
"link": result["href"], | ||
} | ||
|
||
formatted_results = [] | ||
for i, res in enumerate(results, 1): | ||
if res is not None: | ||
formatted_results.append(to_metadata(res)) | ||
if len(formatted_results) == num_results: | ||
break | ||
return formatted_results | ||
if self.source == "text": | ||
return [ | ||
{"snippet": r["body"], "title": r["title"], "link": r["href"]} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why get rid of date and source? are those not guaranteed to be in results There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right. I added 'date' and 'source' to the results |
||
for r in self._ddgs_text(query) | ||
] | ||
elif self.source == "news": | ||
return [ | ||
{"snippet": r["body"], "title": r["title"], "link": r["url"]} | ||
for r in self._ddgs_news(query) | ||
] | ||
else: | ||
results = [] | ||
|
||
if results is None: | ||
return [{"Result": "No good DuckDuckGo Search Result was found"}] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we do this in a backwards compatible way?