Merge pull request #48 from 4m9d/feat/#47/develop-summary-fragmentati…

…on-logic(swm-405) Feat/#47/develop summary fragmentation logic(swm 405)
4m9d · Nov 11, 2023 · b9ea851 · b9ea851
2 parents 4a14bcc + 63b571f
commit b9ea851
Show file tree

Hide file tree

Showing 7 changed files with 145 additions and 84 deletions.
diff --git a/.github/workflows/main.yml → .github/workflows/deploy-main.yml b/.github/workflows/main.yml → .github/workflows/deploy-main.yml
@@ -1,4 +1,4 @@
-name: Sroom-AI-Deploy
+name: Sroom-AI-Deploy-Main
 
 on:
   workflow_dispatch:
@@ -33,22 +33,3 @@ jobs:
 
             tmux send-keys -t server "python3 main.py server" C-m
             tmux send-keys -t celery "celery -A celery_app worker --concurrency=10 -l info" C-m
-
-      - name: Deploy Test Server
-        uses: appleboy/[email protected]
-        with:
-          host: ${{ secrets.AWS_SSH_TEST_HOST }}
-          username: ubuntu
-          key: ${{ secrets.SSH_SECRET_KEY }}
-          script_stop: true
-          script: |
-            cd sroom-ai/
-            git pull
-            pip3 install -r requirements.txt
-
-            tmux send-keys -t celery "^C" C-m
-            tmux send-keys -t server "^C" C-m
-
-            tmux send-keys -t server "python3 main.py server" C-m
-            tmux send-keys -t celery "celery -A celery_app worker --concurrency=2 -l info" C-m
-
diff --git a/.github/workflows/deploy-test.yml b/.github/workflows/deploy-test.yml
@@ -0,0 +1,39 @@
+name: Sroom-AI-Deploy-Test
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches:
+      - main
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v1
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_KEY }}
+          aws-region: ap-northeast-2
+
+      - name: Deploy Test Server
+        uses: appleboy/[email protected]
+        with:
+          host: ${{ secrets.AWS_SSH_TEST_HOST }}
+          username: ubuntu
+          key: ${{ secrets.SSH_SECRET_KEY }}
+          script_stop: true
+          script: |
+            cd sroom-ai/
+                    
+            git fetch
+            git checkout '${{ github.event.pull_request.head.ref }}'
+            git pull
+            
+            pip3 install -r requirements.txt
+
+            tmux send-keys -t celery "^C" C-m
+            tmux send-keys -t server "^C" C-m
+
+            tmux send-keys -t server "python3 main.py server" C-m
+            tmux send-keys -t celery "celery -A celery_app worker --concurrency=2 -l info" C-m
diff --git a/app/index.py b/app/index.py
@@ -4,7 +4,9 @@
 from app.script import script, scriptService
 from main import constants
 from app.summary import summary
-from app.quiz import quiz, quizv2
+from app.summary import summaryv2
+from app.quiz import quiz
+from app.quiz import quizv2
 
 
 class ResponseModel:
@@ -21,7 +23,7 @@ def to_dict(self):
             'is_valid': self.is_valid,
             'summary': self.summary,
             'quizzes': self.quizzes,
-            'tokens' : self.tokens
+            'tokens': self.tokens
         }
 
 
@@ -38,8 +40,8 @@ def index(video_id: str = '', video_title: str = ''):
     if youtube_script.is_valid:
         response.is_valid = 1
 
-        summary_result = loop.run_until_complete(summary.generate_summary(youtube_script.text, video_title))
-        quizzes_result = loop.run_until_complete(quizv2.generate_quizzes(summary_result, youtube_script.token_count))
+        summary_result, summaries = loop.run_until_complete(summaryv2.generate_summary(youtube_script.raw_script, video_title))
+        quizzes_result = loop.run_until_complete(quizv2.generate_quizzes(summaries))
 
         response.summary = summary_result
         response.quizzes = quizzes_result

diff --git a/app/quiz/quiz.py b/app/quiz/quiz.py
@@ -1,44 +1,34 @@
 import json
 
-from app.gpt import gpt
 from main import constants
+from app.gpt import gpt
 
 MAX_TRY_COUNT = 3
 
 
-async def generate_quiz(summary: str):
-
-    quiz_prompt = constants['prompt']['quiz']['kr']
-    prompt = summary + quiz_prompt
+async def generate_quizzes(summary: str, script_tokens: int):
+    quiz_count = set_quiz_count(script_tokens)
+    prompt = summary + constants['prompt']['multiple_choice_quiz']['kr'] + str(quiz_count)
     quiz_json = {}
+    system_message = constants['prompt']['multiple_choice_quiz']['system_message']
 
     for count in range(MAX_TRY_COUNT):
-        gpt_response = await gpt.request_gpt(prompt)
+        gpt_response = await gpt.request_gpt(prompt, system_message)
         quiz_json, is_valid = _reformat_quiz(gpt_response)
         if is_valid:
             break
 
-    quizzes = []
-    for quiz in quiz_json['quizzes']:
-        # quiz = await validation_quiz(json.dumps(quiz))
-        quizzes.append(quiz)
-
-    return quizzes
-
-
-async def validation_quiz(raw_quiz: str):
+    return quiz_json
 
-    quiz_validation_prompt = constants['prompt']['quiz_validation']['kr']
-    prompt = raw_quiz + quiz_validation_prompt
-    quiz_json = {}
 
-    for count in range(MAX_TRY_COUNT):
-        gpt_response = await gpt.request_gpt(prompt)
-        quiz_json, is_valid = _reformat_quiz(gpt_response)
-        if is_valid:
-            break
+def set_quiz_count(script_tokens: int):
+    quiz_count = 3
+    if script_tokens > 5000:
+        quiz_count += int((script_tokens - 5000) / 2500) + 1
+        if quiz_count > 15:
+            quiz_count = 15
 
-    return quiz_json
+    return quiz_count
 
 
 def _reformat_quiz(quiz_json: str):
@@ -49,6 +39,6 @@ def _reformat_quiz(quiz_json: str):
         quiz_json = json.loads(quiz_json)
     except json.decoder.JSONDecodeError as e:
         print("JSON Decode Error : retry generate quiz")
-        return {'quizzes': []}, False
+        return [{'quiz_type': 1, 'quiz_question': 'ERROR!', 'quiz_select_options': ['퀴즈 생성중 오류가 발생했습니다. ㅠㅠ'], 'answer': 1}], False
 
     return quiz_json, True
diff --git a/app/quiz/quizv2.py b/app/quiz/quizv2.py
@@ -1,3 +1,4 @@
+import asyncio
 import json
 
 from main import constants
@@ -6,8 +7,23 @@
 MAX_TRY_COUNT = 3
 
 
-async def generate_quizzes(summary: str, script_tokens: int):
-    quiz_count = set_quiz_count(script_tokens)
+async def generate_quizzes(summaries: list):
+
+    if len(summaries) == 1:
+        quiz_count = 3
+    else:
+        quiz_count = 2
+
+    tasks = [generate_quizzes_chunk(summary, quiz_count) for summary in summaries]
+    quiz_chunk_list = await asyncio.gather(*tasks)
+
+    quiz_list = []
+    for quiz_chunk in quiz_chunk_list:
+        quiz_list.extend(quiz_chunk)
+    return quiz_list
+
+
+async def generate_quizzes_chunk(summary: str, quiz_count: int):
     prompt = summary + constants['prompt']['multiple_choice_quiz']['kr'] + str(quiz_count)
     quiz_json = {}
     system_message = constants['prompt']['multiple_choice_quiz']['system_message']
@@ -21,16 +37,6 @@ async def generate_quizzes(summary: str, script_tokens: int):
     return quiz_json
 
 
-def set_quiz_count(script_tokens: int):
-    quiz_count = 3
-    if script_tokens > 5000:
-        quiz_count += int((script_tokens - 5000) / 2500) + 1
-        if quiz_count > 15:
-            quiz_count = 15
-
-    return quiz_count
-
-
 def _reformat_quiz(quiz_json: str):
     quiz_json = quiz_json.replace("\n", "")
     quiz_json = quiz_json.replace("\"", '"')

diff --git a/app/summary/summaryv2.py b/app/summary/summaryv2.py
@@ -0,0 +1,62 @@
+import asyncio
+import datetime
+import re
+
+from main import constants
+from app.gpt import gpt
+
+
+async def generate_summary(scripts: dict, video_title: str):
+    time_stamp, chunks = divide_chunk(scripts)
+    summary_prompt = constants['prompt']['final_summary']['kr']
+
+    tasks = [gpt.request_gpt(summary_prompt + "\n script : " + chunk,
+                             constants['prompt']['final_summary']['system_message']) for idx, chunk in enumerate(chunks)]
+
+    summaries = await asyncio.gather(*tasks)
+
+    final_summary = ''
+    for idx, summary in enumerate(summaries):
+        time_delta = datetime.timedelta(seconds=int(time_stamp[idx]))
+        time_format = str(time_delta)
+        final_summary += ('<button id=\"' + time_format.replace(":", "") + '\" class=\"timestamp\" style=\"'
+                                        'color:#FA5B3E;font-size: 1.125rem;line-height: 1.75rem;text-decoration-line:none;'
+                                        'display:inline-block;background-color:rgba(250, 91, 62, 0.2);border-radius:0.25rem;padding:0.125rem 0.25rem;\">' +
+                          time_format + '</button>' + '\n')
+        final_summary += summary + '\n \n '
+
+    final_summary = reformat_summary(final_summary)
+    return final_summary, summaries
+
+
+def divide_chunk(scripts: dict):
+
+    chunk_text = ''
+    time_stamp = 0
+
+    time_stamps = []
+    chunks = []
+    for script in scripts:
+        if len(chunk_text) > 3000:
+            chunk_text.replace("[음악]", "")
+            chunk_text.replace("[박수]", "")
+            chunks.append(chunk_text)
+            time_stamps.append(time_stamp)
+            time_stamp = script['start']
+            chunk_text = script['text'] + ' '
+        else:
+            chunk_text += script['text']
+
+    if len(chunk_text) < 1000:
+        chunks[-1] += chunk_text
+    else:
+        time_stamps.append(time_stamp)
+        chunks.append(chunk_text)
+
+    return time_stamps, chunks
+
+
+def reformat_summary(summary: str):
+    summary.replace("\#", "#")
+    summary = re.sub(r"```", "", summary)
+    return summary
diff --git a/constants.yaml b/constants.yaml
@@ -24,33 +24,14 @@ model_parameter :
 prompt :
   system_message : {"role": "system", "content": "You are an assistant that generates quizzes and summaries"}
   final_summary:
-    system_message : {"role": "system", "content": "write the given text in the given markdown format, where applicable :  ## {Title} \n ### {section name} \n {content}"}
-    en : "\n\n Please summarize the article in make sure to write the summary in Korean. Please write in Korean but English terms in English. And please make the summary in the form of a markdown raw code with escape sequence. And please exclude inline codes such as ```."
-    kr : "\n\n 위 글을 한국어로 마크다운 형식으로 만들어줘. 코드블록은 빼고 작성해줘.\n
-          Heading2로 제목을 달아주고, 각 키워드나 핵심 내용들을 나눠서 작성해줘.\n\n"
+    system_message : {"role": "system", "content": "write the given text in the given markdown format, where applicable :  ### {section name} \n {content}"}
+    en : "\n\n Please summarize the article in make sure to write the summary in Korean. Please write in Korean but English terms in English. And please make the summary in the form of a markdown raw code with escape sequence."
+    kr : "\n\n 아래는 유튜브 스크립트야. 해당 스크립트를 한국어로 마크다운 형식으로 요약해줘. 키워드나 핵심 내용들을 나눠서 작성해줘."
+
   summary :
     system_message: { "role": "system", "content": "summarize the given text in korean so that it contains everything information as much as possible." }
     en : "\n\n Please summarize the above script so that everything is reflected as much as possible. Please write a summary as if the student is writing down the contents of the class in a notebook. Please make the summary in Korean."
     kr : "\n\n 아래는 유튜브 영상 제목과 그 영상의 스크립트야. 제목과 스크립트 내용을 바탕으로 요약을 진행해줘. 최대한 디테일하게 내용을 담아줘. 요약은 한국어로 진행해줘."
-  quiz :
-    en : "\n\n Based on the summary above, please give me 1 multiple choice question, 1 short answer question, and 1 True or False question. \n
-          Please answer in JSON format and follow the format below. \n
-          quizzes: [{\"quiz_type\":\"\" ,\"quiz_question\": \"\" \"quiz_select_options\": [], \"answer\":\"\"}] \n
-          quiz_type is assigned to 1 for multiple choice, 2 for short answer, and 3 for True or False \n
-          quiz_question allows questions to go in\"
-          Please put multiple choice options in the quiz_select_option list. The number of optional list elements must be 5. In multiple choice, answer is to put a number in the option list with the correct answer among the options (starting from 1)\n
-          For short answer and TF problems, please return the option list as an empty list. Please unify the blank list format to []\n
-          Please return the TF question to 1 if true and 0 if false. All answer must be filled out. If the answer is too long, please give me a short answer. Make sure to give one question for each type. Please write the quiz questions, answers, and options in Korean"
-    kr : "\n\n 위 요약본을 바탕으로 퀴즈를 만들어줘. 퀴즈는 객관식, 주관식, true or false 문제 각각 1문제씩 만들어줘.\n
-          퀴즈는 json 형식으로 만들어주고, 구체적인 형식은 아래와 같아. \n
-          quizzes: [{\"quiz_type\":\"\" ,\"quiz_question\": \"\" \"quiz_select_options\": [], \"answer\":\"\"}] \n
-          quiz_type은 객관식은 1, 주관식은 2, true or false 문제는 3으로 할당해줘.\n
-          quiz_question에는 퀴즈 문제가 들어가게 해줘.\n
-          quiz_select_options에는 객관식에서 선택할 수 있는 선택 옵션들이 string형식으로 들어간 리스트로 만들어줘. 주관식, true or false 같은 경우에는 빈 리스트인 [] 형식으로 반환해줘.\n
-          answer는 객관식의 경우에는 quiz_select_options에서 정답인 요소가 들어간 index번호를 주면 되 (인덱스 시작번호는 1이야).\n
-          주관식은 주관식의 답이 string형식으로 들어가고 리스트와 같은 다른 형식은 허용되지 않아.
-          true or false 문제는 답이 true일 경우 1, 거짓일 경우 0이 들어가면되.
-          모든 answer항목은 반드시 답이 채워져 있어야해. 빈칸이면 안되."
 
   multiple_choice_quiz:
     system_message: { "role": "system", "content": "Make a multiple choice quiz with the given text" }