Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weโ€™ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/#47/develop summary fragmentation logic(swm 405) #48

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Sroom-AI-Deploy
name: Sroom-AI-Deploy-Main

on:
workflow_dispatch:
Expand Down Expand Up @@ -33,22 +33,3 @@ jobs:

tmux send-keys -t server "python3 main.py server" C-m
tmux send-keys -t celery "celery -A celery_app worker --concurrency=10 -l info" C-m

- name: Deploy Test Server
uses: appleboy/[email protected]
with:
host: ${{ secrets.AWS_SSH_TEST_HOST }}
username: ubuntu
key: ${{ secrets.SSH_SECRET_KEY }}
script_stop: true
script: |
cd sroom-ai/
git pull
pip3 install -r requirements.txt

tmux send-keys -t celery "^C" C-m
tmux send-keys -t server "^C" C-m

tmux send-keys -t server "python3 main.py server" C-m
tmux send-keys -t celery "celery -A celery_app worker --concurrency=2 -l info" C-m

39 changes: 39 additions & 0 deletions .github/workflows/deploy-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Sroom-AI-Deploy-Test

on:
workflow_dispatch:
pull_request:
branches:
- main
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_KEY }}
aws-region: ap-northeast-2

- name: Deploy Test Server
uses: appleboy/[email protected]
with:
host: ${{ secrets.AWS_SSH_TEST_HOST }}
username: ubuntu
key: ${{ secrets.SSH_SECRET_KEY }}
script_stop: true
script: |
cd sroom-ai/

git fetch
git checkout '${{ github.event.pull_request.head.ref }}'
git pull

pip3 install -r requirements.txt

tmux send-keys -t celery "^C" C-m
tmux send-keys -t server "^C" C-m

tmux send-keys -t server "python3 main.py server" C-m
tmux send-keys -t celery "celery -A celery_app worker --concurrency=2 -l info" C-m
10 changes: 6 additions & 4 deletions app/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from app.script import script, scriptService
from main import constants
from app.summary import summary
from app.quiz import quiz, quizv2
from app.summary import summaryv2
from app.quiz import quiz
from app.quiz import quizv2


class ResponseModel:
Expand All @@ -21,7 +23,7 @@ def to_dict(self):
'is_valid': self.is_valid,
'summary': self.summary,
'quizzes': self.quizzes,
'tokens' : self.tokens
'tokens': self.tokens
}


Expand All @@ -38,8 +40,8 @@ def index(video_id: str = '', video_title: str = ''):
if youtube_script.is_valid:
response.is_valid = 1

summary_result = loop.run_until_complete(summary.generate_summary(youtube_script.text, video_title))
quizzes_result = loop.run_until_complete(quizv2.generate_quizzes(summary_result, youtube_script.token_count))
summary_result, summaries = loop.run_until_complete(summaryv2.generate_summary(youtube_script.raw_script, video_title))
quizzes_result = loop.run_until_complete(quizv2.generate_quizzes(summaries))

response.summary = summary_result
response.quizzes = quizzes_result
Expand Down
40 changes: 15 additions & 25 deletions app/quiz/quiz.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,34 @@
import json

from app.gpt import gpt
from main import constants
from app.gpt import gpt

MAX_TRY_COUNT = 3


async def generate_quiz(summary: str):

quiz_prompt = constants['prompt']['quiz']['kr']
prompt = summary + quiz_prompt
async def generate_quizzes(summary: str, script_tokens: int):
quiz_count = set_quiz_count(script_tokens)
prompt = summary + constants['prompt']['multiple_choice_quiz']['kr'] + str(quiz_count)
quiz_json = {}
system_message = constants['prompt']['multiple_choice_quiz']['system_message']

for count in range(MAX_TRY_COUNT):
gpt_response = await gpt.request_gpt(prompt)
gpt_response = await gpt.request_gpt(prompt, system_message)
quiz_json, is_valid = _reformat_quiz(gpt_response)
if is_valid:
break

quizzes = []
for quiz in quiz_json['quizzes']:
# quiz = await validation_quiz(json.dumps(quiz))
quizzes.append(quiz)

return quizzes


async def validation_quiz(raw_quiz: str):
return quiz_json

quiz_validation_prompt = constants['prompt']['quiz_validation']['kr']
prompt = raw_quiz + quiz_validation_prompt
quiz_json = {}

for count in range(MAX_TRY_COUNT):
gpt_response = await gpt.request_gpt(prompt)
quiz_json, is_valid = _reformat_quiz(gpt_response)
if is_valid:
break
def set_quiz_count(script_tokens: int):
quiz_count = 3
if script_tokens > 5000:
quiz_count += int((script_tokens - 5000) / 2500) + 1
if quiz_count > 15:
quiz_count = 15

return quiz_json
return quiz_count


def _reformat_quiz(quiz_json: str):
Expand All @@ -49,6 +39,6 @@ def _reformat_quiz(quiz_json: str):
quiz_json = json.loads(quiz_json)
except json.decoder.JSONDecodeError as e:
print("JSON Decode Error : retry generate quiz")
return {'quizzes': []}, False
return [{'quiz_type': 1, 'quiz_question': 'ERROR!', 'quiz_select_options': ['ํ€ด์ฆˆ ์ƒ์„ฑ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ใ… ใ… '], 'answer': 1}], False

return quiz_json, True
30 changes: 18 additions & 12 deletions app/quiz/quizv2.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import json

from main import constants
Expand All @@ -6,8 +7,23 @@
MAX_TRY_COUNT = 3


async def generate_quizzes(summary: str, script_tokens: int):
quiz_count = set_quiz_count(script_tokens)
async def generate_quizzes(summaries: list):

if len(summaries) == 1:
quiz_count = 3
else:
quiz_count = 2

tasks = [generate_quizzes_chunk(summary, quiz_count) for summary in summaries]
quiz_chunk_list = await asyncio.gather(*tasks)

quiz_list = []
for quiz_chunk in quiz_chunk_list:
quiz_list.extend(quiz_chunk)
return quiz_list


async def generate_quizzes_chunk(summary: str, quiz_count: int):
prompt = summary + constants['prompt']['multiple_choice_quiz']['kr'] + str(quiz_count)
quiz_json = {}
system_message = constants['prompt']['multiple_choice_quiz']['system_message']
Expand All @@ -21,16 +37,6 @@ async def generate_quizzes(summary: str, script_tokens: int):
return quiz_json


def set_quiz_count(script_tokens: int):
quiz_count = 3
if script_tokens > 5000:
quiz_count += int((script_tokens - 5000) / 2500) + 1
if quiz_count > 15:
quiz_count = 15

return quiz_count


def _reformat_quiz(quiz_json: str):
quiz_json = quiz_json.replace("\n", "")
quiz_json = quiz_json.replace("\"", '"')
Expand Down
62 changes: 62 additions & 0 deletions app/summary/summaryv2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import asyncio
import datetime
import re

from main import constants
from app.gpt import gpt


async def generate_summary(scripts: dict, video_title: str):
time_stamp, chunks = divide_chunk(scripts)
summary_prompt = constants['prompt']['final_summary']['kr']

tasks = [gpt.request_gpt(summary_prompt + "\n script : " + chunk,
constants['prompt']['final_summary']['system_message']) for idx, chunk in enumerate(chunks)]

summaries = await asyncio.gather(*tasks)

final_summary = ''
for idx, summary in enumerate(summaries):
time_delta = datetime.timedelta(seconds=int(time_stamp[idx]))
time_format = str(time_delta)
final_summary += ('<button id=\"' + time_format.replace(":", "") + '\" class=\"timestamp\" style=\"'
'color:#FA5B3E;font-size: 1.125rem;line-height: 1.75rem;text-decoration-line:none;'
'display:inline-block;background-color:rgba(250, 91, 62, 0.2);border-radius:0.25rem;padding:0.125rem 0.25rem;\">' +
time_format + '</button>' + '\n')
final_summary += summary + '\n \n '

final_summary = reformat_summary(final_summary)
return final_summary, summaries


def divide_chunk(scripts: dict):

chunk_text = ''
time_stamp = 0

time_stamps = []
chunks = []
for script in scripts:
if len(chunk_text) > 3000:
chunk_text.replace("[์Œ์•…]", "")
chunk_text.replace("[๋ฐ•์ˆ˜]", "")
chunks.append(chunk_text)
time_stamps.append(time_stamp)
time_stamp = script['start']
chunk_text = script['text'] + ' '
else:
chunk_text += script['text']

if len(chunk_text) < 1000:
chunks[-1] += chunk_text
else:
time_stamps.append(time_stamp)
chunks.append(chunk_text)

return time_stamps, chunks


def reformat_summary(summary: str):
summary.replace("\#", "#")
summary = re.sub(r"```", "", summary)
return summary
27 changes: 4 additions & 23 deletions constants.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,33 +24,14 @@ model_parameter :
prompt :
system_message : {"role": "system", "content": "You are an assistant that generates quizzes and summaries"}
final_summary:
system_message : {"role": "system", "content": "write the given text in the given markdown format, where applicable : ## {Title} \n ### {section name} \n {content}"}
en : "\n\n Please summarize the article in make sure to write the summary in Korean. Please write in Korean but English terms in English. And please make the summary in the form of a markdown raw code with escape sequence. And please exclude inline codes such as ```."
kr : "\n\n ์œ„ ๊ธ€์„ ํ•œ๊ตญ์–ด๋กœ ๋งˆํฌ๋‹ค์šด ํ˜•์‹์œผ๋กœ ๋งŒ๋“ค์–ด์ค˜. ์ฝ”๋“œ๋ธ”๋ก์€ ๋นผ๊ณ  ์ž‘์„ฑํ•ด์ค˜.\n
Heading2๋กœ ์ œ๋ชฉ์„ ๋‹ฌ์•„์ฃผ๊ณ , ๊ฐ ํ‚ค์›Œ๋“œ๋‚˜ ํ•ต์‹ฌ ๋‚ด์šฉ๋“ค์„ ๋‚˜๋ˆ ์„œ ์ž‘์„ฑํ•ด์ค˜.\n\n"
system_message : {"role": "system", "content": "write the given text in the given markdown format, where applicable : ### {section name} \n {content}"}
en : "\n\n Please summarize the article in make sure to write the summary in Korean. Please write in Korean but English terms in English. And please make the summary in the form of a markdown raw code with escape sequence."
kr : "\n\n ์•„๋ž˜๋Š” ์œ ํŠœ๋ธŒ ์Šคํฌ๋ฆฝํŠธ์•ผ. ํ•ด๋‹น ์Šคํฌ๋ฆฝํŠธ๋ฅผ ํ•œ๊ตญ์–ด๋กœ ๋งˆํฌ๋‹ค์šด ํ˜•์‹์œผ๋กœ ์š”์•ฝํ•ด์ค˜. ํ‚ค์›Œ๋“œ๋‚˜ ํ•ต์‹ฌ ๋‚ด์šฉ๋“ค์„ ๋‚˜๋ˆ ์„œ ์ž‘์„ฑํ•ด์ค˜."

summary :
system_message: { "role": "system", "content": "summarize the given text in korean so that it contains everything information as much as possible." }
en : "\n\n Please summarize the above script so that everything is reflected as much as possible. Please write a summary as if the student is writing down the contents of the class in a notebook. Please make the summary in Korean."
kr : "\n\n ์•„๋ž˜๋Š” ์œ ํŠœ๋ธŒ ์˜์ƒ ์ œ๋ชฉ๊ณผ ๊ทธ ์˜์ƒ์˜ ์Šคํฌ๋ฆฝํŠธ์•ผ. ์ œ๋ชฉ๊ณผ ์Šคํฌ๋ฆฝํŠธ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์š”์•ฝ์„ ์ง„ํ–‰ํ•ด์ค˜. ์ตœ๋Œ€ํ•œ ๋””ํ…Œ์ผํ•˜๊ฒŒ ๋‚ด์šฉ์„ ๋‹ด์•„์ค˜. ์š”์•ฝ์€ ํ•œ๊ตญ์–ด๋กœ ์ง„ํ–‰ํ•ด์ค˜."
quiz :
en : "\n\n Based on the summary above, please give me 1 multiple choice question, 1 short answer question, and 1 True or False question. \n
Please answer in JSON format and follow the format below. \n
quizzes: [{\"quiz_type\":\"\" ,\"quiz_question\": \"\" \"quiz_select_options\": [], \"answer\":\"\"}] \n
quiz_type is assigned to 1 for multiple choice, 2 for short answer, and 3 for True or False \n
quiz_question allows questions to go in\"
Please put multiple choice options in the quiz_select_option list. The number of optional list elements must be 5. In multiple choice, answer is to put a number in the option list with the correct answer among the options (starting from 1)\n
For short answer and TF problems, please return the option list as an empty list. Please unify the blank list format to []\n
Please return the TF question to 1 if true and 0 if false. All answer must be filled out. If the answer is too long, please give me a short answer. Make sure to give one question for each type. Please write the quiz questions, answers, and options in Korean"
kr : "\n\n ์œ„ ์š”์•ฝ๋ณธ์„ ๋ฐ”ํƒ•์œผ๋กœ ํ€ด์ฆˆ๋ฅผ ๋งŒ๋“ค์–ด์ค˜. ํ€ด์ฆˆ๋Š” ๊ฐ๊ด€์‹, ์ฃผ๊ด€์‹, true or false ๋ฌธ์ œ ๊ฐ๊ฐ 1๋ฌธ์ œ์”ฉ ๋งŒ๋“ค์–ด์ค˜.\n
ํ€ด์ฆˆ๋Š” json ํ˜•์‹์œผ๋กœ ๋งŒ๋“ค์–ด์ฃผ๊ณ , ๊ตฌ์ฒด์ ์ธ ํ˜•์‹์€ ์•„๋ž˜์™€ ๊ฐ™์•„. \n
quizzes: [{\"quiz_type\":\"\" ,\"quiz_question\": \"\" \"quiz_select_options\": [], \"answer\":\"\"}] \n
quiz_type์€ ๊ฐ๊ด€์‹์€ 1, ์ฃผ๊ด€์‹์€ 2, true or false ๋ฌธ์ œ๋Š” 3์œผ๋กœ ํ• ๋‹นํ•ด์ค˜.\n
quiz_question์—๋Š” ํ€ด์ฆˆ ๋ฌธ์ œ๊ฐ€ ๋“ค์–ด๊ฐ€๊ฒŒ ํ•ด์ค˜.\n
quiz_select_options์—๋Š” ๊ฐ๊ด€์‹์—์„œ ์„ ํƒํ•  ์ˆ˜ ์žˆ๋Š” ์„ ํƒ ์˜ต์…˜๋“ค์ด stringํ˜•์‹์œผ๋กœ ๋“ค์–ด๊ฐ„ ๋ฆฌ์ŠคํŠธ๋กœ ๋งŒ๋“ค์–ด์ค˜. ์ฃผ๊ด€์‹, true or false ๊ฐ™์€ ๊ฒฝ์šฐ์—๋Š” ๋นˆ ๋ฆฌ์ŠคํŠธ์ธ [] ํ˜•์‹์œผ๋กœ ๋ฐ˜ํ™˜ํ•ด์ค˜.\n
answer๋Š” ๊ฐ๊ด€์‹์˜ ๊ฒฝ์šฐ์—๋Š” quiz_select_options์—์„œ ์ •๋‹ต์ธ ์š”์†Œ๊ฐ€ ๋“ค์–ด๊ฐ„ index๋ฒˆํ˜ธ๋ฅผ ์ฃผ๋ฉด ๋˜ (์ธ๋ฑ์Šค ์‹œ์ž‘๋ฒˆํ˜ธ๋Š” 1์ด์•ผ).\n
์ฃผ๊ด€์‹์€ ์ฃผ๊ด€์‹์˜ ๋‹ต์ด stringํ˜•์‹์œผ๋กœ ๋“ค์–ด๊ฐ€๊ณ  ๋ฆฌ์ŠคํŠธ์™€ ๊ฐ™์€ ๋‹ค๋ฅธ ํ˜•์‹์€ ํ—ˆ์šฉ๋˜์ง€ ์•Š์•„.
true or false ๋ฌธ์ œ๋Š” ๋‹ต์ด true์ผ ๊ฒฝ์šฐ 1, ๊ฑฐ์ง“์ผ ๊ฒฝ์šฐ 0์ด ๋“ค์–ด๊ฐ€๋ฉด๋˜.
๋ชจ๋“  answerํ•ญ๋ชฉ์€ ๋ฐ˜๋“œ์‹œ ๋‹ต์ด ์ฑ„์›Œ์ ธ ์žˆ์–ด์•ผํ•ด. ๋นˆ์นธ์ด๋ฉด ์•ˆ๋˜."

multiple_choice_quiz:
system_message: { "role": "system", "content": "Make a multiple choice quiz with the given text" }
Expand Down
Loading