Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat : Url summary tool #948

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions apps/url_summary/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# YouTube Channel Question Answering

## Overview
The URL Summarization System leverages the capabilities of OpenAI's language model and EvaDB to automatically generate concise and informative summaries of web content linked by URLs. This system aims to provide users with quick insights into the content of a webpage without having to read through the entire page.

## Dependencies

This app is powered by EvaDB's Python API and ChatGPT UDF.

## Setup
Ensure that the local Python version is >= 3.8. Install the required libraries:

```bat
pip install -r requirements.txt
```

## Usage
Run script:
```bat
python url_summary.py
```

88 changes: 88 additions & 0 deletions apps/url_summary/url_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil

import pandas as pd
import validators
from langchain.document_loaders import UnstructuredURLLoader

import evadb

DEFAULT_URL = "https://alphasec.io/what-are-passkeys/"


def cleanup():
"""Removes any temporary file / directory created by EvaDB."""
if os.path.exists("summary.csv"):
os.remove("summary.csv")
if os.path.exists("evadb_data"):
shutil.rmtree("evadb_data")


if __name__ == "__main__":
print("🔮 Welcome to EvaDB! This app lets you summarize the content of any URL.\n")

# Get OpenAI key if needed
try:
api_key = os.environ["OPENAI_KEY"]
except KeyError:
api_key = str(input("🔑 Enter your OpenAI API key: "))
os.environ["OPENAI_KEY"] = api_key

try:
# Get the url
url_link = str(input("🔗 Enter the URL (press Enter to use our default URL) : "))

if url_link == "":
url_link = DEFAULT_URL

if not validators.url(url_link):
raise Exception("Please enter a valid URL.")

print("\n⏳ Loading URL data\n")
url_data = UnstructuredURLLoader(urls=[url_link]).load()

df = pd.DataFrame({"text": [url_data]})
df.to_csv("summary.csv")
print(df)

print("📶 Establishing evadb api cursor connection.")
cursor = evadb.connect().cursor()

# Load summary into table
cursor.drop_table("URL_Summary", if_exists=True).execute()
cursor.query(
"""CREATE TABLE IF NOT EXISTS URL_Summary (text TEXT(4096));"""
).execute()
cursor.load("summary.csv", "URL_Summary", "csv").execute()

# Generate summary with chatgpt udf
print("⏳ Generating Summary (may take a while)... \n")
query = "Create a summary of the provided content in 250-300 words."
generate_chatgpt_response_rel = cursor.table("URL_Summary").select(
f"ChatGPT('{query}', text)"
)
responses = generate_chatgpt_response_rel.df()["chatgpt.response"]
print(responses[0], "\n")

cleanup()
print("✅ Session ended.")
print("===========================================")
except Exception as e:
cleanup()
print("❗️ Session ended with error : ", e)
print(e)
print("===========================================")
57 changes: 57 additions & 0 deletions test/app_tests/test_url_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import subprocess
import unittest
from pathlib import Path
from test.util import get_evadb_for_testing, shutdown_ray


class URLSummaryTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.evadb = get_evadb_for_testing()
cls.evadb.catalog().reset()
os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray"))

@classmethod
def tearDownClass(cls):
pass

def setUp(self):
pass

def tearDown(self) -> None:
shutdown_ray()

def test_should_run_url_summary_app(self):
app_path = Path("apps", "url_summary", "url_summary.py")
input1 = "\n\n" # Summarize the default url.
# Assuming that OPENAI_KEY is already set as an environment variable
inputs = input1
command = ["python", app_path]

process = subprocess.Popen(
command,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = process.communicate(inputs.encode())

decoded_stdout = stdout.decode()
assert "Passkeys" or "AliExpress" or "Rate limit" in decoded_stdout
print(decoded_stdout)
print(stderr.decode())