Skip to content

Commit

Permalink
Score catboost 모델 추가 (#68)
Browse files Browse the repository at this point in the history
* feat: score/catboost 브랜치 생성 및 dataframe 생성
[#65]

* feat: create_score 함수 추가
[#65]

* feat: catboost model upload
[#65]

* feat: get_score router로 user score 추가
[#65]

* style: Refactor된 코드로 변환

[#65]

* feat: get_score router 수정

[#65]

* feat: 마지막 질문일 경우 get_score에 post 요청 보내기

[#65]

* fix: 사용하지 않는 함수 삭제

* fix: 오류 수정

- CreateScoreRequest로 수정
- 컬럼명 수정
- 오탈자 수정
related #65

* fix: request를 보내고 페이지 전환하기

related: #65

* style: collect_tests_scores 함수 추가
[#65]

* style: black + isort 적용

[#65]

---------

Co-authored-by: C7C4FF <[email protected]>
  • Loading branch information
valofosho and C7C4FF authored Apr 1, 2024
1 parent 1296d8b commit a9218d1
Show file tree
Hide file tree
Showing 12 changed files with 118 additions and 21 deletions.
4 changes: 0 additions & 4 deletions Backend/database/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@ def undone(self) -> "User":
self.is_done = False
return self

def streakupdate(self) -> "User":
self.streak += 1
return self

def addstreak(self) -> "User":
self.streak += 1
return self
Expand Down
7 changes: 7 additions & 0 deletions Backend/database/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ def create_test(session: Session, test: Test) -> Test:
return test


def create_score(session: Session, score: Score) -> Score:
session.add(instance=score)
session.commit()
session.refresh(instance=score)
return score


def get_questions_by_date(session: Session, date: date) -> Question | None:
return session.scalar(select(Question).where(Question.date == date))

Expand Down
1 change: 1 addition & 0 deletions Backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
app.include_router(auth_router, prefix="/api")
app.include_router(test_router, prefix="/api")


# load config.yaml
def load_config(filename):
with open(filename, "r") as config_file:
Expand Down
8 changes: 6 additions & 2 deletions Backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def load_config(filename):
client_2 = OpenAI(api_key=api_key)
client_2.fine_tuning.jobs.retrieve(model_config.get("gpt_model_ft2"))


# 1.send_wav_to_STT
async def whisperx(file_path, server_url):
print("Run whisperx")
Expand Down Expand Up @@ -85,7 +86,10 @@ def check_coherence(json_data, question):
response = client_1.chat.completions.create(
model="ft:gpt-3.5-turbo-0125:personal::8yvBw03H",
messages=[
{"role": "system", "content": "질문에 대한 대답 스크립트가 입력되었을 때 문맥이 적합한지 평가"},
{
"role": "system",
"content": "질문에 대한 대답 스크립트가 입력되었을 때 문맥이 적합한지 평가",
},
{"role": "assistant", "content": "{높음, 중간, 낮음} 중 하나로 평가"},
# {"role": "user", "content": "{How has your interest in plays changed over the last few years? What kind of play did you like in the past? What about now?}, {Okay, Lets talk about My taste in concerts... Actually, I have seen a lot of concerts. Right. Nowadays, I love k-pop concerts such as BTS concerts, Aespa concerts, Blackpink concerts, and whatever. K-pop concerts are a trend these days. And there are a lot of k-pop concerts in Korea. Those concerts are so fun and spectacular. But in the past, Um... yeah, I liked piano concerts. Because the first concert I have seen in my life is called Classic. That concert was a piano concert. It was so impressive and touched me. But I like k-pop concerts now. You know, it makes me feel like Im a k-pop star. What about you?}" }
{"role": "user", "content": content},
Expand Down Expand Up @@ -134,7 +138,7 @@ def check_complexity(json_data):
# 비동기 병렬처리
async def process_responses(
# file: UploadFile = File(...)
data: dict
data: dict,
):

# 이후에 유저 인풋 wav_binary로 교체
Expand Down
66 changes: 62 additions & 4 deletions Backend/test_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,18 @@
from typing import List

import librosa
import pandas as pd
import requests
import soundfile as sf
from auth_router import get_authorized_user
from catboost import CatBoostClassifier
from database.connection import get_db
from database.orm import Question, Score, Test, User
from database.repository import (create_test, create_update_user,
from database.repository import (create_score, create_test, create_update_user,
get_questions_by_date, get_result,
get_result_by_q_num)
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
from schema.request import CreateTestRequest
from schema.request import CreateScoreRequest, CreateTestRequest
from schema.response import QuestionSchema, ScoreSchema, TestSchema
from sqlalchemy.orm import Session

Expand Down Expand Up @@ -53,7 +55,9 @@ async def run_inference(path: str, question: str):

# 오늘 날짜로 문제 받아오기
@router.get("/test", status_code=200)
def get_question_handler(session: Session = Depends(get_db),) -> QuestionSchema:
def get_question_handler(
session: Session = Depends(get_db),
) -> QuestionSchema:
today: datetime.date = datetime.today()
questions: Question | None = get_questions_by_date(
session=session, date=today.strftime("%Y-%m-%d")
Expand Down Expand Up @@ -92,11 +96,65 @@ async def upload_test(
if q_num == 3:
user.addstreak()
user.done()
create_update_user()
create_update_user(session=session, user=user)

return TestSchema.from_orm(test)


def collect_tests_scores(session, date, user) -> pd.DataFrame:
columns = ["WPM", "MLR", "Pause", "Grammar", "PR", "Coherence"]
data = []
for q_num in range(1, 4):
# 나중에 풀버전을 위해서 반복문으로 만들면 좋을 것 같아요.
test = get_result_by_q_num(session=session, date=date, user=user, q_num=q_num)
data.append(
[
test.wpm,
test.mlr,
test.pause,
test.grammar["phase_2"]["score"],
test.mpr,
test.coherence,
]
)
return pd.DataFrame(data, columns=columns)


@router.post("/get_score")
async def get_score_handler(
user: User = Depends(get_authorized_user), session: Session = Depends(get_db)
):

date = datetime.now().strftime("%Y-%m-%d")

# user_score dataframe 생성
user_scores = collect_tests_scores(session=session, date=date, user=user)

# coherence label mapping
coherence_mapping = {"낮음": 0, "중간": 1, "높음": 2}
user_scores["Coherence"] = user_scores["Coherence"].map(coherence_mapping)
# class label mapping
class_mapping = {"NH": 0, "IL": 1, "IM": 2, "IH": 3, "AL": 4}

# classifier모델 불러오기
loaded_model = CatBoostClassifier()
loaded_model.load_model("../Models/catboost/catboost_model.bin")

# prediction 진행
predictions = loaded_model.predict(user_scores)
# predictions의 mapping을 위해 평균값을 정수형 변환
average_predictions = int(round(predictions.mean()))
predicted_class = [
key for key, value in class_mapping.items() if value == average_predictions
][0]
score_request = CreateScoreRequest(
user_id=user.id, date=date, score=predicted_class
)
score: Score | None = Score.create(request=score_request)
score: Score = create_score(session=session, score=score)
return score


@router.get("/me/result/{date}")
async def get_result_by_date(
date: date,
Expand Down
16 changes: 12 additions & 4 deletions Frontend/pages/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,21 +127,29 @@ def make_layout(question_data):
with by_text:
grammar = question_data["grammar"]["phase_2"]
st.subheader("grammar")
st.markdown(f"전체 발화 문장 중 올바른 문법 사용 비율은 **{grammar['score']}%** 입니다.")
st.markdown(
f"전체 발화 문장 중 올바른 문법 사용 비율은 **{grammar['score']}%** 입니다."
)

with st.container(height=300):
st.markdown(grammar["original_passage"])

fix_sentence(grammar["tag_grammar_info"])

st.subheader("coherence")
st.markdown(f"- 고객님의 질문에 대한 답변의 주제 적합성은 **{question_data['coherence']}**입니다")
st.markdown(
f"- 고객님의 질문에 대한 답변의 주제 적합성은 **{question_data['coherence']}**입니다"
)
st.subheader("complexity")
st.markdown(f"{question_data['complexity']}")

with by_speaking:
st.markdown(f"전체 발화 중 잘못된 발음 없이 명확하게 발음한 비율은 **{question_data['mpr']}%** 입니다.")
st.markdown(f"연속으로 발화한 평균 단어 수는 **{question_data['mlr']}개** 입니다.")
st.markdown(
f"전체 발화 중 잘못된 발음 없이 명확하게 발음한 비율은 **{question_data['mpr']}%** 입니다."
)
st.markdown(
f"연속으로 발화한 평균 단어 수는 **{question_data['mlr']}개** 입니다."
)
st.markdown(f"전체 발화 중 pause 비율은 **{question_data['pause']}%** 입니다.")


Expand Down
8 changes: 6 additions & 2 deletions Frontend/pages/pretest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@
)


st.write("시험은 하루에 한 번만 볼 수 있습니다. 중도 이탈 시 데이터는 저장되지 않습니다.")
st.write(
"시험은 하루에 한 번만 볼 수 있습니다. 중도 이탈 시 데이터는 저장되지 않습니다."
)
st.write("문제 음성은 총 두 번 들려드립니다.")
st.write("조용한 환경에서 응시해주세요. 보다 정확한 결과가 나옵니다.")
st.write("마이크를 허용해주시고, 아래 버튼으로 녹음하여 녹음이 제대로 되는지 확인하세요.")
st.write(
"마이크를 허용해주시고, 아래 버튼으로 녹음하여 녹음이 제대로 되는지 확인하세요."
)

voicecheck = audiorecorder("check your voice")
st.session_state.my_recorder_output = None
Expand Down
12 changes: 11 additions & 1 deletion Frontend/pages/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,13 @@
# Define the endpoint URL of the server where you want to save the recording
test = "https://mopic.today/api/test"

# Define the endpoint URL of the server where you want to inference the score
score = "https://mopic.today/api/get_score"

st.title("Daily Test")
st.image("AVA.png", caption="문제를 두 번 들려드린 후 바로 녹음을 시작해주세요.", width=300)
st.image(
"AVA.png", caption="문제를 두 번 들려드린 후 바로 녹음을 시작해주세요.", width=300
)


# remove image expansion
Expand All @@ -38,6 +42,7 @@
unsafe_allow_html=True,
)


# When "listen" button is pressed, Convert .wav->html tag to autoplay
def autoplay_audio(file_path: str):
with open(file_path, "rb") as audio_file:
Expand All @@ -56,7 +61,12 @@ def save_recording(audio_data, question_num):
headers={"Access-Token": st.session_state["token"]["access_token"]},
)
if question_num == 3:
response_score = requests.post(
url=score,
headers={"Access-Token": st.session_state["token"]["access_token"]},
)
st.switch_page("./pages/finish.py")

# print(response.text)
if response.status_code == 200:
st.success("The recording was successfully saved.")
Expand Down
Binary file added Models/catboost/catboost_model.bin
Binary file not shown.
4 changes: 3 additions & 1 deletion Models/grammar_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@


@app.post("/upload/")
async def upload_json(text: Annotated[str, Form()],):
async def upload_json(
text: Annotated[str, Form()],
):
try:

gector_path = "./gector"
Expand Down
1 change: 1 addition & 0 deletions Models/phoneme_rec_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
# "fileb_content_type": fileb.content_type,
# }


# 1. text to phoneme(model1)
def phonemize( # pylint: disable=too-many-arguments
text,
Expand Down
12 changes: 9 additions & 3 deletions Models/utils/gram_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from .gram_out_json import get_cleaned_token_list, get_scrs_tok


def get_error_count(checker_data: Dict,):
def get_error_count(
checker_data: Dict,
):
metric_data = {"main": {}}
ctl = get_cleaned_token_list()
for og_sent, inner_dict in checker_data.items():
Expand All @@ -26,15 +28,19 @@ def get_error_count(checker_data: Dict,):
return error_count


def get_error_rate_sen(checker_data: Dict,):
def get_error_rate_sen(
checker_data: Dict,
):
og_list = list(checker_data.keys())
error_count = get_error_count(checker_data=checker_data)
sentence_count = len(og_list)

return round(error_count / sentence_count, 2)


def get_error_rate_word(checker_data: Dict,):
def get_error_rate_word(
checker_data: Dict,
):
og_list = list(checker_data.keys())
error_count = get_error_count(checker_data=checker_data)

Expand Down

0 comments on commit a9218d1

Please sign in to comment.