Skip to content

Commit

Permalink
Merge pull request #48 from 4m9d/feat/#47/develop-summary-fragmentati…
Browse files Browse the repository at this point in the history
…on-logic(swm-405)

Feat/#47/develop summary fragmentation logic(swm 405)
  • Loading branch information
Son-GyeongSik authored Nov 11, 2023
2 parents 4a14bcc + 63b571f commit b9ea851
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 84 deletions.
21 changes: 1 addition & 20 deletions .github/workflows/main.yml → .github/workflows/deploy-main.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Sroom-AI-Deploy
name: Sroom-AI-Deploy-Main

on:
workflow_dispatch:
Expand Down Expand Up @@ -33,22 +33,3 @@ jobs:
tmux send-keys -t server "python3 main.py server" C-m
tmux send-keys -t celery "celery -A celery_app worker --concurrency=10 -l info" C-m
- name: Deploy Test Server
uses: appleboy/[email protected]
with:
host: ${{ secrets.AWS_SSH_TEST_HOST }}
username: ubuntu
key: ${{ secrets.SSH_SECRET_KEY }}
script_stop: true
script: |
cd sroom-ai/
git pull
pip3 install -r requirements.txt
tmux send-keys -t celery "^C" C-m
tmux send-keys -t server "^C" C-m
tmux send-keys -t server "python3 main.py server" C-m
tmux send-keys -t celery "celery -A celery_app worker --concurrency=2 -l info" C-m
39 changes: 39 additions & 0 deletions .github/workflows/deploy-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Sroom-AI-Deploy-Test

on:
workflow_dispatch:
pull_request:
branches:
- main
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_KEY }}
aws-region: ap-northeast-2

- name: Deploy Test Server
uses: appleboy/[email protected]
with:
host: ${{ secrets.AWS_SSH_TEST_HOST }}
username: ubuntu
key: ${{ secrets.SSH_SECRET_KEY }}
script_stop: true
script: |
cd sroom-ai/
git fetch
git checkout '${{ github.event.pull_request.head.ref }}'
git pull
pip3 install -r requirements.txt
tmux send-keys -t celery "^C" C-m
tmux send-keys -t server "^C" C-m
tmux send-keys -t server "python3 main.py server" C-m
tmux send-keys -t celery "celery -A celery_app worker --concurrency=2 -l info" C-m
10 changes: 6 additions & 4 deletions app/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from app.script import script, scriptService
from main import constants
from app.summary import summary
from app.quiz import quiz, quizv2
from app.summary import summaryv2
from app.quiz import quiz
from app.quiz import quizv2


class ResponseModel:
Expand All @@ -21,7 +23,7 @@ def to_dict(self):
'is_valid': self.is_valid,
'summary': self.summary,
'quizzes': self.quizzes,
'tokens' : self.tokens
'tokens': self.tokens
}


Expand All @@ -38,8 +40,8 @@ def index(video_id: str = '', video_title: str = ''):
if youtube_script.is_valid:
response.is_valid = 1

summary_result = loop.run_until_complete(summary.generate_summary(youtube_script.text, video_title))
quizzes_result = loop.run_until_complete(quizv2.generate_quizzes(summary_result, youtube_script.token_count))
summary_result, summaries = loop.run_until_complete(summaryv2.generate_summary(youtube_script.raw_script, video_title))
quizzes_result = loop.run_until_complete(quizv2.generate_quizzes(summaries))

response.summary = summary_result
response.quizzes = quizzes_result
Expand Down
40 changes: 15 additions & 25 deletions app/quiz/quiz.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,34 @@
import json

from app.gpt import gpt
from main import constants
from app.gpt import gpt

MAX_TRY_COUNT = 3


async def generate_quiz(summary: str):

quiz_prompt = constants['prompt']['quiz']['kr']
prompt = summary + quiz_prompt
async def generate_quizzes(summary: str, script_tokens: int):
quiz_count = set_quiz_count(script_tokens)
prompt = summary + constants['prompt']['multiple_choice_quiz']['kr'] + str(quiz_count)
quiz_json = {}
system_message = constants['prompt']['multiple_choice_quiz']['system_message']

for count in range(MAX_TRY_COUNT):
gpt_response = await gpt.request_gpt(prompt)
gpt_response = await gpt.request_gpt(prompt, system_message)
quiz_json, is_valid = _reformat_quiz(gpt_response)
if is_valid:
break

quizzes = []
for quiz in quiz_json['quizzes']:
# quiz = await validation_quiz(json.dumps(quiz))
quizzes.append(quiz)

return quizzes


async def validation_quiz(raw_quiz: str):
return quiz_json

quiz_validation_prompt = constants['prompt']['quiz_validation']['kr']
prompt = raw_quiz + quiz_validation_prompt
quiz_json = {}

for count in range(MAX_TRY_COUNT):
gpt_response = await gpt.request_gpt(prompt)
quiz_json, is_valid = _reformat_quiz(gpt_response)
if is_valid:
break
def set_quiz_count(script_tokens: int):
quiz_count = 3
if script_tokens > 5000:
quiz_count += int((script_tokens - 5000) / 2500) + 1
if quiz_count > 15:
quiz_count = 15

return quiz_json
return quiz_count


def _reformat_quiz(quiz_json: str):
Expand All @@ -49,6 +39,6 @@ def _reformat_quiz(quiz_json: str):
quiz_json = json.loads(quiz_json)
except json.decoder.JSONDecodeError as e:
print("JSON Decode Error : retry generate quiz")
return {'quizzes': []}, False
return [{'quiz_type': 1, 'quiz_question': 'ERROR!', 'quiz_select_options': ['퀴즈 생성중 오류가 발생했습니다. ㅠㅠ'], 'answer': 1}], False

return quiz_json, True
30 changes: 18 additions & 12 deletions app/quiz/quizv2.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import json

from main import constants
Expand All @@ -6,8 +7,23 @@
MAX_TRY_COUNT = 3


async def generate_quizzes(summary: str, script_tokens: int):
quiz_count = set_quiz_count(script_tokens)
async def generate_quizzes(summaries: list):

if len(summaries) == 1:
quiz_count = 3
else:
quiz_count = 2

tasks = [generate_quizzes_chunk(summary, quiz_count) for summary in summaries]
quiz_chunk_list = await asyncio.gather(*tasks)

quiz_list = []
for quiz_chunk in quiz_chunk_list:
quiz_list.extend(quiz_chunk)
return quiz_list


async def generate_quizzes_chunk(summary: str, quiz_count: int):
prompt = summary + constants['prompt']['multiple_choice_quiz']['kr'] + str(quiz_count)
quiz_json = {}
system_message = constants['prompt']['multiple_choice_quiz']['system_message']
Expand All @@ -21,16 +37,6 @@ async def generate_quizzes(summary: str, script_tokens: int):
return quiz_json


def set_quiz_count(script_tokens: int):
quiz_count = 3
if script_tokens > 5000:
quiz_count += int((script_tokens - 5000) / 2500) + 1
if quiz_count > 15:
quiz_count = 15

return quiz_count


def _reformat_quiz(quiz_json: str):
quiz_json = quiz_json.replace("\n", "")
quiz_json = quiz_json.replace("\"", '"')
Expand Down
62 changes: 62 additions & 0 deletions app/summary/summaryv2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import asyncio
import datetime
import re

from main import constants
from app.gpt import gpt


async def generate_summary(scripts: dict, video_title: str):
time_stamp, chunks = divide_chunk(scripts)
summary_prompt = constants['prompt']['final_summary']['kr']

tasks = [gpt.request_gpt(summary_prompt + "\n script : " + chunk,
constants['prompt']['final_summary']['system_message']) for idx, chunk in enumerate(chunks)]

summaries = await asyncio.gather(*tasks)

final_summary = ''
for idx, summary in enumerate(summaries):
time_delta = datetime.timedelta(seconds=int(time_stamp[idx]))
time_format = str(time_delta)
final_summary += ('<button id=\"' + time_format.replace(":", "") + '\" class=\"timestamp\" style=\"'
'color:#FA5B3E;font-size: 1.125rem;line-height: 1.75rem;text-decoration-line:none;'
'display:inline-block;background-color:rgba(250, 91, 62, 0.2);border-radius:0.25rem;padding:0.125rem 0.25rem;\">' +
time_format + '</button>' + '\n')
final_summary += summary + '\n \n '

final_summary = reformat_summary(final_summary)
return final_summary, summaries


def divide_chunk(scripts: dict):

chunk_text = ''
time_stamp = 0

time_stamps = []
chunks = []
for script in scripts:
if len(chunk_text) > 3000:
chunk_text.replace("[음악]", "")
chunk_text.replace("[박수]", "")
chunks.append(chunk_text)
time_stamps.append(time_stamp)
time_stamp = script['start']
chunk_text = script['text'] + ' '
else:
chunk_text += script['text']

if len(chunk_text) < 1000:
chunks[-1] += chunk_text
else:
time_stamps.append(time_stamp)
chunks.append(chunk_text)

return time_stamps, chunks


def reformat_summary(summary: str):
summary.replace("\#", "#")
summary = re.sub(r"```", "", summary)
return summary
27 changes: 4 additions & 23 deletions constants.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,33 +24,14 @@ model_parameter :
prompt :
system_message : {"role": "system", "content": "You are an assistant that generates quizzes and summaries"}
final_summary:
system_message : {"role": "system", "content": "write the given text in the given markdown format, where applicable : ## {Title} \n ### {section name} \n {content}"}
en : "\n\n Please summarize the article in make sure to write the summary in Korean. Please write in Korean but English terms in English. And please make the summary in the form of a markdown raw code with escape sequence. And please exclude inline codes such as ```."
kr : "\n\n 위 글을 한국어로 마크다운 형식으로 만들어줘. 코드블록은 빼고 작성해줘.\n
Heading2로 제목을 달아주고, 각 키워드나 핵심 내용들을 나눠서 작성해줘.\n\n"
system_message : {"role": "system", "content": "write the given text in the given markdown format, where applicable : ### {section name} \n {content}"}
en : "\n\n Please summarize the article in make sure to write the summary in Korean. Please write in Korean but English terms in English. And please make the summary in the form of a markdown raw code with escape sequence."
kr : "\n\n 아래는 유튜브 스크립트야. 해당 스크립트를 한국어로 마크다운 형식으로 요약해줘. 키워드나 핵심 내용들을 나눠서 작성해줘."

summary :
system_message: { "role": "system", "content": "summarize the given text in korean so that it contains everything information as much as possible." }
en : "\n\n Please summarize the above script so that everything is reflected as much as possible. Please write a summary as if the student is writing down the contents of the class in a notebook. Please make the summary in Korean."
kr : "\n\n 아래는 유튜브 영상 제목과 그 영상의 스크립트야. 제목과 스크립트 내용을 바탕으로 요약을 진행해줘. 최대한 디테일하게 내용을 담아줘. 요약은 한국어로 진행해줘."
quiz :
en : "\n\n Based on the summary above, please give me 1 multiple choice question, 1 short answer question, and 1 True or False question. \n
Please answer in JSON format and follow the format below. \n
quizzes: [{\"quiz_type\":\"\" ,\"quiz_question\": \"\" \"quiz_select_options\": [], \"answer\":\"\"}] \n
quiz_type is assigned to 1 for multiple choice, 2 for short answer, and 3 for True or False \n
quiz_question allows questions to go in\"
Please put multiple choice options in the quiz_select_option list. The number of optional list elements must be 5. In multiple choice, answer is to put a number in the option list with the correct answer among the options (starting from 1)\n
For short answer and TF problems, please return the option list as an empty list. Please unify the blank list format to []\n
Please return the TF question to 1 if true and 0 if false. All answer must be filled out. If the answer is too long, please give me a short answer. Make sure to give one question for each type. Please write the quiz questions, answers, and options in Korean"
kr : "\n\n 위 요약본을 바탕으로 퀴즈를 만들어줘. 퀴즈는 객관식, 주관식, true or false 문제 각각 1문제씩 만들어줘.\n
퀴즈는 json 형식으로 만들어주고, 구체적인 형식은 아래와 같아. \n
quizzes: [{\"quiz_type\":\"\" ,\"quiz_question\": \"\" \"quiz_select_options\": [], \"answer\":\"\"}] \n
quiz_type은 객관식은 1, 주관식은 2, true or false 문제는 3으로 할당해줘.\n
quiz_question에는 퀴즈 문제가 들어가게 해줘.\n
quiz_select_options에는 객관식에서 선택할 수 있는 선택 옵션들이 string형식으로 들어간 리스트로 만들어줘. 주관식, true or false 같은 경우에는 빈 리스트인 [] 형식으로 반환해줘.\n
answer는 객관식의 경우에는 quiz_select_options에서 정답인 요소가 들어간 index번호를 주면 되 (인덱스 시작번호는 1이야).\n
주관식은 주관식의 답이 string형식으로 들어가고 리스트와 같은 다른 형식은 허용되지 않아.
true or false 문제는 답이 true일 경우 1, 거짓일 경우 0이 들어가면되.
모든 answer항목은 반드시 답이 채워져 있어야해. 빈칸이면 안되."

multiple_choice_quiz:
system_message: { "role": "system", "content": "Make a multiple choice quiz with the given text" }
Expand Down

0 comments on commit b9ea851

Please sign in to comment.