-
Notifications
You must be signed in to change notification settings - Fork 0
/
vidgenie.py
431 lines (354 loc) · 16.9 KB
/
vidgenie.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
import os
import json
from typing import List, Union, Optional
import dspy
import pandas as pd
import pm4py
from pydantic import BaseModel
from twelvelabs import TwelveLabs
from groq import Groq
import chromadb
import requests
from pprint import pprint
import logging
import time
from twelvelabs.exceptions import InternalServerError
# Set up API keys and environment variables
os.environ['API_URL'] = 'https://api.twelvelabs.io/v1.2'
os.environ['TWELVE_LABS_API_KEY'] = 'tlk_2N42Q7B0R5JHV029BKVH51WXV26H' # It's better to set this outside the script
os.environ['GROQ_API_KEY'] = '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' # It's better to set this outside the script
TWELVE_LABS_API_KEY = os.getenv("TWELVE_LABS_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Check if API keys are set
if not TWELVE_LABS_API_KEY:
raise ValueError("TWELVE_LABS_API_KEY is not set or is empty. Please set this environment variable.")
if not GROQ_API_KEY:
raise ValueError("GROQ_API_KEY is not set or is empty. Please set this environment variable.")
# Initialize clients
twelve_labs_client = TwelveLabs(api_key=TWELVE_LABS_API_KEY)
groq_client = Groq(api_key=GROQ_API_KEY)
chroma_client = chromadb.Client()
class Slide(BaseModel):
title: str
content: Union[str, List[str]]
image_path: Optional[str] = None
class Presentation(BaseModel):
title: str
subtitle: str
slides: List[Slide]
file_path: Optional[str] = None
def insert_slide_at(self, slide: Slide, position: int) -> None:
self.slides.insert(position, slide)
def to_ppt(self, file_path: str = None):
if file_path:
self.file_path = file_path
from pptx import Presentation as PPTPresentation
from pptx.util import Inches
from PIL import Image
prs = PPTPresentation()
# Title Slide
title_slide_layout = prs.slide_layouts[0]
slide = prs.slides.add_slide(title_slide_layout)
title = slide.shapes.title
subtitle = slide.placeholders[1]
title.text = self.title
subtitle.text = self.subtitle
# Add Slides
for slide_data in self.slides:
slide_layout = prs.slide_layouts[1]
slide = prs.slides.add_slide(slide_layout)
title = slide.shapes.title
content = slide.placeholders[1]
title.text = slide_data.title
if isinstance(slide_data.content, list):
content.text = "\n".join(slide_data.content)
else:
content.text = slide_data.content
if slide_data.image_path:
img = Image.open(slide_data.image_path)
width, height = img.size
aspect_ratio = width / height
max_width = Inches(6)
max_height = Inches(4.5)
if aspect_ratio > 1:
if width > max_width:
width = max_width
height = width / aspect_ratio
else:
if height > max_height:
height = max_height
width = height * aspect_ratio
left = (prs.slide_width - width) / 2
top = (prs.slide_height - height) / 2
slide.shapes.add_picture(slide_data.image_path, left, top, width, height)
prs.save(file_path)
print(f"Presentation saved as '{file_path}'")
class VideoAnalysis(BaseModel):
summary: str
topics: List[str]
chapters: List[dict]
transcript: str
class VidGenieContent(BaseModel):
title: str
content_type: str
sections: List[dict]
class GenerateGlossarySlide(dspy.Signature):
"""
Generates a glossary slide for the provided presentation JSON string.
The format should be:
- {TERM}: {DEFINITION}
"""
presentation_json_str = dspy.InputField(desc="Presentation JSON string.")
glossary_slide = dspy.OutputField(desc="Generated glossary slide content. NO FORMATING OR MARKDOWN. PLAINTEXT ONLY", prefix="```plaintext\nGlossary Slide:\n\n")
class GlossaryModule(dspy.Module):
def __init__(self, **forward_args):
super().__init__()
self.forward_args = forward_args
self.output = None
def forward(self, presentation_json_str):
pred = dspy.Predict(GenerateGlossarySlide)
self.output = pred(presentation_json_str=presentation_json_str).glossary_slide
return self.output
class VidGenieGenerator:
def __init__(self):
self.twelve_labs_client = twelve_labs_client # Use the global client
self.index_id = "66aa6336c0cd130695ae109c" # Make sure this is the correct index ID
self.api_key = TWELVE_LABS_API_KEY
def list_indexes(self):
try:
indexes = self.twelve_labs_client.index.list()
print(f"Type of indexes: {type(indexes)}")
print(f"Structure of indexes: {indexes}")
for index_obj in indexes.root:
print(f"Index ID: {index_obj.id}, Name: {index_obj.name}, Created At: {index_obj.created_at}, Updated At: {index_obj.updated_at}")
for engine in index_obj.engines.root:
print(f"Engine Name: {engine.name}, Options: {engine.options}")
except Exception as e:
print(f"Error retrieving indexes: {e}")
def analyze_video(self, video_file: str, is_youtube_url: bool = False, max_retries: int = 3, retry_delay: int = 5) -> Optional[VideoAnalysis]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.info(f"Analyzing video: {video_file}, is_youtube_url: {is_youtube_url}")
headers = {
"Accept": "application/json",
"x-api-key": self.twelve_labs_client.api_key
}
for attempt in range(max_retries):
try:
if is_youtube_url:
logger.info("Processing YouTube URL")
payload = {
"index_id": self.index_id,
"url": video_file
}
response = requests.post("https://api.twelvelabs.io/v1.2/tasks", json=payload, headers=headers)
else:
logger.info("Processing local file")
if not os.path.exists(video_file):
logger.error(f"Error: The file '{video_file}' does not exist.")
return None
file_size = os.path.getsize(video_file)
logger.info(f"File size: {file_size} bytes")
if file_size > 2000000000: # 2GB limit
logger.error("Error: File size exceeds 2GB limit.")
return None
with open(video_file, 'rb') as file:
logger.info(f"File opened successfully: {video_file}")
files = {'file': (os.path.basename(video_file), file)}
data = {'index_id': self.index_id}
response = requests.post("https://api.twelvelabs.io/v1.2/tasks", files=files, data=data, headers=headers)
response.raise_for_status()
task = response.json()
logger.info(f"Task created successfully: {task['id']}")
break # If successful, break out of the retry loop
except requests.exceptions.RequestException as e:
logger.error(f"Attempt {attempt + 1} failed: {e}")
logger.error(f"Response status code: {response.status_code}")
logger.error(f"Response headers: {response.headers}")
logger.error(f"Response content: {response.text}")
try:
error_details = response.json()
logger.error(f"Error details: {json.dumps(error_details, indent=2)}")
except json.JSONDecodeError:
logger.error("Could not parse error response as JSON")
if attempt < max_retries - 1:
logger.info(f"Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
else:
logger.error("Max retries reached. Unable to process the video.")
return None
try:
# Poll for task completion
while True:
response = requests.get(f"https://api.twelvelabs.io/v1.2/tasks/{task['id']}", headers=headers)
response.raise_for_status()
task_status = response.json()
if task_status['status'] == 'ready':
break
time.sleep(2)
logger.info(f"Embedding done: {task_status['status']}")
# Retrieve video information
response = requests.get(f"https://api.twelvelabs.io/v1.2/indexes/{self.index_id}/videos/{task_status['video_id']}", headers=headers)
response.raise_for_status()
video_info = response.json()
# Generate summary
summary_response = requests.post(f"https://api.twelvelabs.io/v1.2/summarize",
json={"video_id": task_status['video_id'], "type": "summary"},
headers=headers)
summary_response.raise_for_status()
summary = summary_response.json()['summary']
# Generate gist (topics and chapters)
gist_response = requests.post(f"https://api.twelvelabs.io/v1.2/generate",
json={"video_id": task_status['video_id'], "types": ["topic", "chapter"]},
headers=headers)
gist_response.raise_for_status()
gist = gist_response.json()
# Generate transcript
transcript_response = requests.post(f"https://api.twelvelabs.io/v1.2/generate",
json={"video_id": task_status['video_id'], "prompt": "Provide a full transcript of the video"},
headers=headers)
transcript_response.raise_for_status()
transcript = transcript_response.json()['data']
return VideoAnalysis(
summary=summary,
topics=gist['topics'],
chapters=gist['chapters'],
transcript=transcript
)
except requests.exceptions.RequestException as e:
logger.error(f"An error occurred while processing the task: {e}")
import traceback
traceback.print_exc()
return None
def generate_content(self, video_analysis: VideoAnalysis, content_type: str) -> VidGenieContent:
prompt = f"""
Based on the following video analysis, generate a {content_type} for an educational course:
Summary: {video_analysis.summary}
Topics: {', '.join(video_analysis.topics)}
Chapters: {video_analysis.chapters}
Transcript: {video_analysis.transcript[:1000]}... # Truncated for brevity
Please format the {content_type} as a JSON structure with the following format:
{{
"title": "Overall title for the {content_type}",
"sections": [
{{
"title": "Section title",
"content": "Section content or bullet points"
}},
...
]
}}
"""
response = groq_client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="mixtral-8x7b-32768",
)
generated_content = response.choices[0].message.content
parsed_content = json.loads(generated_content)
return VidGenieContent(
title=parsed_content["title"],
content_type=content_type,
sections=parsed_content["sections"]
)
def create_syllabus(self, content: VidGenieContent) -> str:
syllabus = f"# {content.title}\n\n"
for section in content.sections:
syllabus += f"## {section['title']}\n{section['content']}\n\n"
return syllabus
def create_study_guide(self, content: VidGenieContent) -> str:
study_guide = f"# {content.title}\n\n"
for section in content.sections:
study_guide += f"## {section['title']}\n{section['content']}\n\n"
return study_guide
def create_presentation(self, content: VidGenieContent) -> Presentation:
slides = [Slide(title=section['title'], content=section['content']) for section in content.sections]
presentation = Presentation(
title=content.title,
subtitle="Generated by VidGenie",
slides=slides
)
glossary_slide = self.generate_glossary_slide(presentation)
presentation.insert_slide_at(glossary_slide, position=-1)
return presentation
def generate_glossary_slide(self, presentation: Presentation) -> Slide:
presentation_json_str = presentation.json()
glossary = GlossaryModule()
glossary_slide_content = glossary.forward(presentation_json_str=presentation_json_str)
return Slide(title="Glossary of Terms", content=glossary_slide_content)
def query_video_content(self, query: str) -> List[dict]:
collection = chroma_client.get_or_create_collection("video_embeddings")
embedding = self.twelve_labs_client.embed.create(
engine_name="Marengo-retrieval-2.6",
text=query,
text_truncate="start",
)
query_result = collection.query(
query_embeddings=[embedding.text_embedding.float],
n_results=2,
)
return query_result['metadatas'][0]
def generate_response(self, video_id: str, prompt: str) -> str:
GENERATE_URL = "https://api.twelvelabs.io/v1.2/generate"
headers = {"x-api-key": TWELVE_LABS_API_KEY}
data = {"video_id": video_id, "prompt": prompt}
response = requests.post(GENERATE_URL, headers=headers, json=data)
return response.json()
def process_video(video_file: str, content_types: List[str], is_youtube_url: bool = False):
print(f"Processing video: {video_file}, content_types: {content_types}, is_youtube_url: {is_youtube_url}")
generator = VidGenieGenerator()
print("Analyzing video")
video_analysis = generator.analyze_video(video_file, is_youtube_url)
if video_analysis is None:
print("Video analysis failed.")
return None
print("Video analysis completed successfully")
results = {}
for content_type in content_types:
print(f"Generating content for type: {content_type}")
content = generator.generate_content(video_analysis, content_type)
if content_type == "syllabus":
results[content_type] = generator.create_syllabus(content)
elif content_type == "study_guide":
results[content_type] = generator.create_study_guide(content)
elif content_type == "presentation":
presentation = generator.create_presentation(content)
results[content_type] = presentation.to_dict()
print(f"Content generation completed. Results: {results}")
return results
def main():
print("Starting main function")
from dspygen.utils.dspy_tools import init_ol
init_ol()
print("Creating VidGenieGenerator instance")
generator = VidGenieGenerator()
print("Listing indexes")
generator.list_indexes()
# Example usage
video_input = "Building Custom GPTs with GPT Crawler.mp4"
is_youtube_url = False
content_types = ["summary", "transcript"]
print(f"Starting video processing with input: {video_input}")
results = process_video(video_input, content_types, is_youtube_url)
if results is None:
print("Video processing failed. Exiting.")
return
print("Video processing completed successfully")
print(json.dumps(results, indent=2))
# Create PowerPoint presentation if requested
if "presentation" in results:
print("Creating PowerPoint presentation")
presentation = Presentation(**results["presentation"])
presentation.to_ppt(file_path="generated_presentation.pptx")
# Example of querying video content
print("Querying video content")
query_results = generator.query_video_content("custom GPTs")
print("Query results:", query_results)
# Example of generating a response based on video content
if query_results:
print("Generating response based on video content")
video_id = query_results[0]['video_id']
response = generator.generate_response(video_id, "What are the main steps to create a custom GPT?")
print("Generated response:", response)
print("Main function completed")
if __name__ == '__main__':
main()