From c8454b97c849cfde55576d360ba972bc7b98374b Mon Sep 17 00:00:00 2001 From: 9ooDa Date: Mon, 25 Mar 2024 02:22:15 +0900 Subject: [PATCH] =?UTF-8?q?style:=20json=EC=97=90=20=EA=B5=90=EC=A0=95?= =?UTF-8?q?=EB=AC=B8=EC=9E=A5=20=EC=B6=94=EA=B0=80,=20score=20=ED=8F=AC?= =?UTF-8?q?=EB=A7=B7=20=EC=88=98=EC=A0=95=20[#12]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/utils/gram_metrics.py | 10 ++++++---- models/utils/gram_out_json.py | 6 +++++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/models/utils/gram_metrics.py b/models/utils/gram_metrics.py index a63193e8..cfda67e0 100644 --- a/models/utils/gram_metrics.py +++ b/models/utils/gram_metrics.py @@ -1,6 +1,6 @@ import string -from typing import List, Dict, Literal, get_args -from .gram_out_json import get_cleaned_token_list, get_scrs_tok +from typing import Dict, Literal, get_args +from out_json import get_cleaned_token_list, get_scrs_tok def get_error_count( @@ -33,7 +33,7 @@ def get_error_rate_sen( error_count = get_error_count(checker_data=checker_data) sentence_count = len(og_list) - return error_count / sentence_count + return round(error_count / sentence_count, 2) def get_error_rate_word( @@ -47,12 +47,14 @@ def get_error_rate_word( # remove punctuations new_sen = sen.translate(str.maketrans('', '', string.punctuation)) word_count += len(new_sen.split(" ")) + result = (1 - (error_count / word_count)) * 100 - return 1 - (error_count / word_count) + return round(result, 2) # ec = error count, psc = per sentence count, pwc = per word count _TYPES = Literal["ec", "psc", "pwc"] + def get_score( checker_data: Dict, score_type: _TYPES = "pwc", diff --git a/models/utils/gram_out_json.py b/models/utils/gram_out_json.py index 3d54e1c2..bfb19b88 100644 --- a/models/utils/gram_out_json.py +++ b/models/utils/gram_out_json.py @@ -205,6 +205,7 @@ def get_phase_1_data( def get_phase_2_inner_data( sid: int, sent: str, + corr_sent: str, edited: bool, ref_word_list: List, tag_list: List, @@ -213,6 +214,7 @@ def get_phase_2_inner_data( inner = { "sid": int, "sentence": str, + "corrected_sentence": str, "edited": False, "ref_word": [], "category": [], @@ -221,6 +223,7 @@ def get_phase_2_inner_data( } inner["sid"] = sid inner["sentence"] = sent + inner["corrected_sentence"] = corr_sent inner["edited"] = edited inner["ref_word"] = ref_word_list inner["tag"] = tag_list @@ -251,11 +254,12 @@ def get_phase_2_data( gector_dict = get_scrs_tok(inner_dict, ctl) sid = sentence_list.index(og_sent) sent = og_sent + corr_sent = inner_dict["fin_sentence"] edited = inner_dict["edited"] ref_word_list = gector_dict["og_word"] tag_list = gector_dict["full_tag"] if edited == True: - inner = get_phase_2_inner_data(sid=sid, sent=sent, edited=edited, ref_word_list=ref_word_list, + inner = get_phase_2_inner_data(sid=sid, sent=sent, corr_sent=corr_sent, edited=edited, ref_word_list=ref_word_list, tag_list=tag_list, tag_grammar=tag_grammar) tag_gram_dict["tag_grammar_info"].append(inner)