diff --git a/apps/url_summary/README.md b/apps/url_summary/README.md new file mode 100644 index 0000000000..96bff80b9b --- /dev/null +++ b/apps/url_summary/README.md @@ -0,0 +1,22 @@ +# YouTube Channel Question Answering + +## Overview +The URL Summarization System leverages the capabilities of OpenAI's language model and EvaDB to automatically generate concise and informative summaries of web content linked by URLs. This system aims to provide users with quick insights into the content of a webpage without having to read through the entire page. + +## Dependencies + +This app is powered by EvaDB's Python API and ChatGPT UDF. + +## Setup +Ensure that the local Python version is >= 3.8. Install the required libraries: + +```bat +pip install -r requirements.txt +``` + +## Usage +Run script: +```bat +python url_summary.py +``` + diff --git a/apps/url_summary/url_summary.py b/apps/url_summary/url_summary.py new file mode 100644 index 0000000000..903b584258 --- /dev/null +++ b/apps/url_summary/url_summary.py @@ -0,0 +1,88 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import shutil + +import pandas as pd +import validators +from langchain.document_loaders import UnstructuredURLLoader + +import evadb + +DEFAULT_URL = "https://alphasec.io/what-are-passkeys/" + + +def cleanup(): + """Removes any temporary file / directory created by EvaDB.""" + if os.path.exists("summary.csv"): + os.remove("summary.csv") + if os.path.exists("evadb_data"): + shutil.rmtree("evadb_data") + + +if __name__ == "__main__": + print("šŸ”® Welcome to EvaDB! This app lets you summarize the content of any URL.\n") + + # Get OpenAI key if needed + try: + api_key = os.environ["OPENAI_KEY"] + except KeyError: + api_key = str(input("šŸ”‘ Enter your OpenAI API key: ")) + os.environ["OPENAI_KEY"] = api_key + + try: + # Get the url + url_link = str(input("šŸ”— Enter the URL (press Enter to use our default URL) : ")) + + if url_link == "": + url_link = DEFAULT_URL + + if not validators.url(url_link): + raise Exception("Please enter a valid URL.") + + print("\nā³ Loading URL data\n") + url_data = UnstructuredURLLoader(urls=[url_link]).load() + + df = pd.DataFrame({"text": [url_data]}) + df.to_csv("summary.csv") + print(df) + + print("šŸ“¶ Establishing evadb api cursor connection.") + cursor = evadb.connect().cursor() + + # Load summary into table + cursor.drop_table("URL_Summary", if_exists=True).execute() + cursor.query( + """CREATE TABLE IF NOT EXISTS URL_Summary (text TEXT(4096));""" + ).execute() + cursor.load("summary.csv", "URL_Summary", "csv").execute() + + # Generate summary with chatgpt udf + print("ā³ Generating Summary (may take a while)... \n") + query = "Create a summary of the provided content in 250-300 words." + generate_chatgpt_response_rel = cursor.table("URL_Summary").select( + f"ChatGPT('{query}', text)" + ) + responses = generate_chatgpt_response_rel.df()["chatgpt.response"] + print(responses[0], "\n") + + cleanup() + print("āœ… Session ended.") + print("===========================================") + except Exception as e: + cleanup() + print("ā—ļø Session ended with error : ", e) + print(e) + print("===========================================") diff --git a/test/app_tests/test_url_summary.py b/test/app_tests/test_url_summary.py new file mode 100644 index 0000000000..7606b81438 --- /dev/null +++ b/test/app_tests/test_url_summary.py @@ -0,0 +1,57 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import subprocess +import unittest +from pathlib import Path +from test.util import get_evadb_for_testing, shutdown_ray + + +class URLSummaryTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.evadb = get_evadb_for_testing() + cls.evadb.catalog().reset() + os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray")) + + @classmethod + def tearDownClass(cls): + pass + + def setUp(self): + pass + + def tearDown(self) -> None: + shutdown_ray() + + def test_should_run_url_summary_app(self): + app_path = Path("apps", "url_summary", "url_summary.py") + input1 = "\n\n" # Summarize the default url. + # Assuming that OPENAI_KEY is already set as an environment variable + inputs = input1 + command = ["python", app_path] + + process = subprocess.Popen( + command, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + stdout, stderr = process.communicate(inputs.encode()) + + decoded_stdout = stdout.decode() + assert "Passkeys" or "AliExpress" or "Rate limit" in decoded_stdout + print(decoded_stdout) + print(stderr.decode())