From 823e1c8c6aa9e6c267bc487d074fb34cb5fa53cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EC=A0=95=EC=98=88=EC=9D=80?=
 <79676210+Yeeun-Jeong@users.noreply.github.com>
Date: Wed, 4 Dec 2024 12:28:46 +0900
Subject: [PATCH 1/3] =?UTF-8?q?#37=20Feat:=20=ED=82=A4=EC=9B=8C=EB=93=9C?=
 =?UTF-8?q?=20=EC=B6=94=EC=B6=9C=20=EA=B8=B0=EB=8A=A5=20=EC=B6=94=EA=B0=80?=
 =?UTF-8?q?=20-=20=ED=94=84=EB=A1=AC=ED=94=84=ED=8A=B8=20=EC=9E=91?=
 =?UTF-8?q?=EC=84=B1=20(#38)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 prompt/extract_keyword_prompt.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 prompt/extract_keyword_prompt.py

diff --git a/prompt/extract_keyword_prompt.py b/prompt/extract_keyword_prompt.py
new file mode 100644
index 0000000..9c70c88
--- /dev/null
+++ b/prompt/extract_keyword_prompt.py
@@ -0,0 +1,29 @@
+EXTRACT_KEYWORD_PROMPT = (
+    """
+    # Instruction
+    Read {user_message} and extract keywords that summarize the subject.
+    
+    # Constraints
+    - If the original text is in Korean:
+      - Extract keywords as they appear in the original text, without translation.
+      - Proper nouns should also remain unchanged.
+    - If the original text is in English:
+      - Proper nouns (e.g., names of models, tools, libraries) must be in English as they appear in the text.
+      - Other general terms should be translated into Korean.
+    - The number of keywords must be between 3 to 10.
+    
+    # Examples
+    - For Korean input:
+      User input: 업무 효율화를 위한 카카오 사내봇 개발기...
+      Keywords: 업무 효율화, 카카오 사내봇, 데이터, AI
+    - For English input:
+      User input: LoRA is a parameter-efficient method for fine-tuning large language models...
+      Keywords: LoRA, 파라미터 효율적, 미세 조정, 대형 언어 모델
+    - Mixed output:
+      User input: LoRA fine-tunes RoBERTa models using PyTorch...
+      Keywords: LoRA, 미세 조정, RoBERTa, PyTorch
+    
+    # Output Format
+    - Provide the keywords as a string separated by commas, similar to the examples above.
+    """
+)
\ No newline at end of file

From 9b253b69f4ededd747ddf781054ac8ab34e4fcd2 Mon Sep 17 00:00:00 2001
From: wrathlion <taejin7824@gmail.com>
Date: Wed, 4 Dec 2024 15:17:33 +0900
Subject: [PATCH 2/3] =?UTF-8?q?#37=20Feat:=20=ED=82=A4=EC=9B=8C=EB=93=9C?=
 =?UTF-8?q?=20=EC=B6=94=EC=B6=9C=20=EA=B8=B0=EB=8A=A5=20=EC=B6=94=EA=B0=80?=
 =?UTF-8?q?=20(#39)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* CI/CD: Dockerfile && Jenkinsfile 추가

#23

* Hotfix: DTO linkId Type error 수정 (#26)

#24

* Fix: 중괄호 누락 수정

#23

* Hotfix: 프롬프트 수정 (#28)

* FIX: Jenkinsfile 수정

#29

* Hotfix: 특정 사이트에서 발생하는  IndexError 수정

#31

* Hotfix: robots.txt의 주석을 무시하도록 수정 (#34)

#33

* #35 Feat: URL read 간 로그 출력 (#36)

* Feat: get_logger 함수 작성

#35

* Feat: content_read 간 오류 발생시 로그 출력

#35

* Refactor: 프롬프트 모듈화, metadata 부여 이름 변경

#37

* Feat: keyword 추출 기능 추가

+ Refactor: 프롬프트 출력 형식 일차

#37

* Docs: 주석 수정

#37

* Style: 기능 변경에 따른 이름 변경

Category -> Extract

#37

* Style: 불필요 모듈 제거

#37

---------

Co-authored-by: 정예은 <79676210+Yeeun-Jeong@users.noreply.github.com>
Co-authored-by: All_right_Cool <ehrud3220@naver.com>
Co-authored-by: Dokyeong_lee <35948496+DvaCode@users.noreply.github.com>
---
 Dockerfile                                    |  28 +++++
 Jenkinsfile                                   | 112 ++++++++++++++++++
 config/__init__.py                            |   0
 config/log.py                                 |  16 +++
 entity/link_info.py                           |   7 +-
 entity/request_dto.py                         |   2 +-
 entity/response_dto.py                        |   2 +-
 main.py                                       |  20 ++--
 prompt/__init__.py                            |   0
 .../{main_category => extract_tag_prompt.py}  |   7 +-
 service/categorize_service.py                 |  63 ----------
 service/crawlability_checker.py               |  13 +-
 service/metadata_extractor.py                 |  69 +++++++++++
 13 files changed, 258 insertions(+), 81 deletions(-)
 create mode 100644 Dockerfile
 create mode 100644 Jenkinsfile
 create mode 100644 config/__init__.py
 create mode 100644 config/log.py
 create mode 100644 prompt/__init__.py
 rename prompt/{main_category => extract_tag_prompt.py} (89%)
 delete mode 100644 service/categorize_service.py
 create mode 100644 service/metadata_extractor.py

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..0d2f92a
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,28 @@
+# 공식 Python 이미지를 베이스로 사용
+FROM python:3.11-slim
+
+# 컨테이너 내 작업 디렉토리 설정
+WORKDIR /app
+
+# 요구사항 파일을 컨테이너에 복사
+COPY requirement.txt .
+
+# 의존성 설치
+RUN pip install --no-cache-dir -r requirement.txt
+
+# 현재 디렉토리의 내용을 컨테이너의 /app 디렉토리에 복사
+COPY . .
+
+# PYTHONPATH를 현재 디렉토리를 포함하도록 수정
+ENV PYTHONPATH="./"
+
+# 애플리케이션이 사용하는 포트 노출
+EXPOSE 80
+
+# API 키를 위한 환경 변수 정의 (값은 Kubernetes에서 주입됨)
+ENV OPENAI_API_KEY=""
+ENV GOOGLE_SEARCH_API_KEY=""
+ENV GOOGLE_SEARCH_CX=""
+
+# Uvicorn을 사용하여 FastAPI 애플리케이션 실행
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
diff --git a/Jenkinsfile b/Jenkinsfile
new file mode 100644
index 0000000..03bfba5
--- /dev/null
+++ b/Jenkinsfile
@@ -0,0 +1,112 @@
+pipeline {
+    agent any
+    environment {
+        DOCKER_REPO = "ghcr.io/animal-squad/project-s-ai"
+        GIT_BRANCH = "${env.BRANCH_NAME}"
+        DEPLOYMENT_NAMESPACE = "${params.DEPLOYMENT_NAMESPACE}"
+        DEPLOYMENT_NAME = "${params.DEPLOYMENT_NAME}"
+        DEPLOYMENT_CONTAINER_NAME = "${params.DEPLOYMENT_CONTAINER_NAME}"
+        KANIKO_JOB_YAML = '/var/jenkins_home/kaniko/job-kaniko-ai.yaml' // Kaniko Pod YAML 파일 경로
+        KANIKO_JOB_NAME = 'kaniko-ai' // 값 설정할 부분
+        JENKINS_NAMESPACE = 'devops' // Kaniko Pod를 실행할 네임스페이스
+    }
+    parameters {
+        string(name: 'DEPLOYMENT_NAMESPACE', defaultValue: 'linket', description: '배포할 Kubernetes 네임스페이스') // 설정
+        string(name: 'DEPLOYMENT_NAME', defaultValue: 'ai-deployment', description: '배포할 Deployment 이름') // 설정
+        string(name: 'DEPLOYMENT_CONTAINER_NAME', defaultValue: 'ai-container', description: 'Deployment 내 컨테이너 이름') // 설정
+    }
+    stages {
+        stage('Checkout Source Code') {
+            steps {
+                checkout scm
+                script {
+                    env.GIT_COMMIT_SHORT = sh(script: 'git rev-parse --short HEAD', returnStdout: true).trim()
+                    env.GIT_COMMIT_MESSAGE = sh(script: 'git log -1 --pretty=%B', returnStdout: true).trim()
+                    echo "Current Git Commit Short: ${env.GIT_COMMIT_SHORT}"
+                    echo "Git Commit Message: ${env.GIT_COMMIT_MESSAGE}"
+                }
+            }
+        }
+        // stage('Unit Tests') {
+        //     steps {
+        //         sh 'make test' // 유닛 테스트 실행 stage 현재 없음
+        //     }
+        // }
+        stage('Update Kaniko YAML') {
+            steps {
+                script {
+                    // Kaniko YAML 파일에서 이미지 태그 업데이트
+                    sh """
+                    sed -i 's|--destination=.*|--destination=${DOCKER_REPO}:${GIT_COMMIT_SHORT}",|' ${KANIKO_JOB_YAML}
+                    """
+                }
+            }
+        }
+        stage('Deploy Kaniko Job') {
+            steps {
+                script {
+                    // 기존 Kaniko Pod 삭제 후 새로운 Kaniko Pod 배포
+                    sh """
+                    kubectl delete job ${KANIKO_JOB_NAME} -n ${JENKINS_NAMESPACE} --ignore-not-found
+                    kubectl create -f ${KANIKO_JOB_YAML} -n ${JENKINS_NAMESPACE}
+                    """
+                }
+            }
+        }
+        stage('Wait for Kaniko Build') {
+            steps {
+                script {
+                    // Kaniko Job가 완료될 때까지 대기
+                    timeout(time: 15, unit: 'MINUTES') {
+                        waitUntil {
+                            def succeeded = sh(script: "kubectl get job ${KANIKO_JOB_NAME} -n ${JENKINS_NAMESPACE} -o jsonpath='{.status.succeeded}'", returnStdout: true).trim()
+                            def failed = sh(script: "kubectl get job ${KANIKO_JOB_NAME} -n ${JENKINS_NAMESPACE} -o jsonpath='{.status.failed}'", returnStdout: true).trim()
+                            // 빈 문자열 처리
+                            def succeededCount = succeeded ? succeeded.toInteger() : 0
+                            def failedCount = failed ? failed.toInteger() : 0
+
+                            echo "Kaniko Job Succeeded: ${succeededCount}, Failed: ${failedCount}"
+                            return (succeededCount >= 1) || (failedCount >= 1)
+                        }
+                    }
+                    // 최종 상태 확인
+                    def finalStatus = sh(script: "kubectl get job ${KANIKO_JOB_NAME} -n ${JENKINS_NAMESPACE} -o jsonpath='{.status.conditions[?(@.type==\"Complete\")].status}'", returnStdout: true).trim()
+                    def finalFailed = sh(script: "kubectl get job ${KANIKO_JOB_NAME} -n ${JENKINS_NAMESPACE} -o jsonpath='{.status.conditions[?(@.type==\"Failed\")].status}'", returnStdout: true).trim()
+                    if (finalStatus != 'True') {
+                        error "Kaniko build failed."
+                    }
+                }
+            }
+        }
+        stage('Deploy to Kubernetes') {
+            steps {
+                script {
+                    // Kubernetes에 이미지 배포
+                    sh """
+                    kubectl set image deployment/${DEPLOYMENT_NAME} \
+                    -n ${DEPLOYMENT_NAMESPACE} ${DEPLOYMENT_CONTAINER_NAME}=${DOCKER_REPO}:${GIT_COMMIT_SHORT}
+                    kubectl rollout status deployment/${DEPLOYMENT_NAME} -n ${DEPLOYMENT_NAMESPACE}
+                    """
+                    }
+                }
+            }
+        }
+    post {
+        always {
+            script {
+                currentBuild.result = currentBuild.result ?: 'SUCCESS'
+            }
+            echo "Build Result: ${currentBuild.result}"
+            withCredentials([string(credentialsId: 'Discord-Webhook', variable: 'DISCORD')]) {
+                discordSend title: "빌드 결과: ${env.JOB_NAME}",
+                            description: """
+                            **커밋 메시지**: `${env.GIT_COMMIT_MESSAGE}`
+                            **커밋 ID**: `${env.GIT_COMMIT_SHORT}`
+                            **빌드 번호**: `#${env.BUILD_NUMBER}`
+                            **상태**: ${currentBuild.result == 'SUCCESS' ? '🟢 **성공**' : '❌ **실패**'}
+                            """,
+                            webhookURL: DISCORD
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/config/__init__.py b/config/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/config/log.py b/config/log.py
new file mode 100644
index 0000000..8ea5e5e
--- /dev/null
+++ b/config/log.py
@@ -0,0 +1,16 @@
+# Logger 설정
+import logging
+
+def get_logger(logger_name: str) -> logging.Logger:
+    logger = logging.getLogger(logger_name)
+    logger.setLevel(logging.DEBUG)
+
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.DEBUG)
+
+    formatter = logging.Formatter('%(levelname)s - %(asctime)s - %(name)s - %(message)s')
+    console_handler.setFormatter(formatter)
+
+    logger.addHandler(console_handler)
+
+    return logger
diff --git a/entity/link_info.py b/entity/link_info.py
index ec2656c..51779b2 100644
--- a/entity/link_info.py
+++ b/entity/link_info.py
@@ -1,11 +1,12 @@
 from pydantic import BaseModel
 
 class LinkInfo(BaseModel):
-    linkId: int
+    linkId: str
     URL: str
     content: str | None
 
 class LinkWithTags(BaseModel):
-    linkId: int
+    linkId: str
     title: str | None
-    tags: list[str]
\ No newline at end of file
+    tags: list[str]
+    keywords: list[str | None]
diff --git a/entity/request_dto.py b/entity/request_dto.py
index 1a2e573..2924562 100644
--- a/entity/request_dto.py
+++ b/entity/request_dto.py
@@ -3,5 +3,5 @@
 from entity.link_info import LinkInfo
 
 
-class CategorizeRequest(BaseModel):
+class ExtractRequest(BaseModel):
     links: list[LinkInfo]
diff --git a/entity/response_dto.py b/entity/response_dto.py
index 9355e01..f55e878 100644
--- a/entity/response_dto.py
+++ b/entity/response_dto.py
@@ -3,5 +3,5 @@
 from entity.link_info import LinkWithTags
 
 
-class CategorizeResponse(BaseModel):
+class ExtractResponse(BaseModel):
     links: list[LinkWithTags]
diff --git a/main.py b/main.py
index af88567..521e0df 100644
--- a/main.py
+++ b/main.py
@@ -1,9 +1,10 @@
 from fastapi import FastAPI
 
-from entity.request_dto import CategorizeRequest
-from entity.response_dto import CategorizeResponse
+from config.log import get_logger
+from entity.request_dto import ExtractRequest
+from entity.response_dto import ExtractResponse
 from model.gpt_model import GPTModel
-from service.categorize_service import CategorizeService
+from service.metadata_extractor import MetadataExtractor
 from service.content_extractor import ContentExtractor
 from service.content_reader import ContentReader
 from service.crawlability_checker import CrawlabilityChecker
@@ -16,15 +17,16 @@
 content_extractor = ContentExtractor()
 content_reader = ContentReader(crawlability_checker, content_extractor)
 
-categorize_service = CategorizeService(gpt_model, content_reader)
+extractor_logger = get_logger("ExtractorLogger")
+metadata_extractor = MetadataExtractor(gpt_model, content_reader, extractor_logger)
 
-@app.post("/ai/categorize")
-async def classify_main(req: CategorizeRequest):
+@app.post("/ai/extract")
+async def classify_main(req: ExtractRequest):
     """
-    메인 카테고리 분류 엔드포인트
+    메타 데이터 추출 엔드포인트
     """
     data = {
-        "links": categorize_service.categorize_contents(req.links),
+        "links": metadata_extractor.extract_metadata_batch(req.links),
     }
 
-    return CategorizeResponse(**data)
+    return ExtractResponse(**data)
diff --git a/prompt/__init__.py b/prompt/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/prompt/main_category b/prompt/extract_tag_prompt.py
similarity index 89%
rename from prompt/main_category
rename to prompt/extract_tag_prompt.py
index a957d1e..23bcf8a 100644
--- a/prompt/main_category
+++ b/prompt/extract_tag_prompt.py
@@ -1,3 +1,4 @@
+EXTRACT_TAG_PROMPT = """
 # Persona
 You are a developer working at a Korean IT company with expertise in various computer science topics.
 
@@ -61,10 +62,10 @@
 # Constraints
 - Classify into 1, 2, or 3 categories maximum
 - If the category is clear, a single category is sufficient
-- If the user's input doesn't fit into any of the main categories, create and add an appropriate new category
-- However, do not add a new category if the content can be classified into one or more existing categories
+- Use only the provided Categories listed above for tagging: do not create or suggest any new categories under any circumstances
 
 # Output Format
 - If there's only one category, output it as a string
-- If there are 2 or 3 categories, output them as strings in a list ['category1', 'category2', 'category3']
+- If there are 2 or 3 categories, output them as a comma-separated string, e.g., “category1, category2, category3”. Do not use brackets or lists in the output.
 - While unclassified entries should be minimized, output None if classification is not possible
+"""
\ No newline at end of file
diff --git a/service/categorize_service.py b/service/categorize_service.py
deleted file mode 100644
index 361ddce..0000000
--- a/service/categorize_service.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from entity.link_info import LinkWithTags, LinkInfo
-from model.gpt_model import GPTModel
-from service.content_reader import ContentReader
-
-
-def get_tags(result: str) -> list[str]:
-    result = result.split("'")[1:-1]
-    tags = []
-    for tag in result:
-        if tag.find(",") == -1:
-            tags.append(tag)
-
-    return tags
-
-
-class CategorizeService:
-    """
-    주어진 텍스트의 카테고리를 분류하는 서비스
-    :param gpt_model: 사용하려는 GPT 모델을 주입
-    """
-    def __init__(self, gpt_model: GPTModel, content_reader: ContentReader):
-        self.content_reader = content_reader
-        self.gpt_model = gpt_model
-        with open("prompt/main_category", "r") as f:
-            self.main_category_prompt = f.read()
-
-    def categorize_contents(self, contents: list[LinkInfo]) -> list[LinkWithTags]:
-        """
-        링크들의 태그를 부여
-        :param contents: 분류하려는 링크들의 정보
-        :return: 각 링크들의 분류된 태그들, linkId, title
-        """
-        results = []
-        for link_info in contents:
-            data = {
-                "linkId": link_info.linkId,
-            }
-            content_info = self.content_reader.read_content(link_info.URL, link_info.content)
-            if content_info:
-                data["title"] = content_info["title"]
-                data["tags"] = self.categorize_main(content_info["title"], content_info["content"])
-            else:
-                data["title"] = None
-                data["tags"] = []
-
-            if not data["tags"]:
-                data["tags"].append("기타")
-
-            results.append(LinkWithTags(**data))
-
-        return results
-
-    def categorize_main(self, title: str, content: str) -> list[str]:
-        """
-        내용의 태그를 부여
-        :param title: 분류하려는 텍스트의 제목
-        :param content: 분류하려는 텍스트
-        :return: 분류된 여러개의 태그
-        """
-        query = f"{title if title else ''}\n\n{content if content else ''}"
-        category = self.gpt_model.generate_response(self.main_category_prompt, query)
-
-        return get_tags(category)
diff --git a/service/crawlability_checker.py b/service/crawlability_checker.py
index 9123a7c..a1521ad 100644
--- a/service/crawlability_checker.py
+++ b/service/crawlability_checker.py
@@ -25,6 +25,9 @@ def robots_txt_parser(self, url: str) -> dict[str, dict[str, list[str]]] | None:
 
         recent_agent = ""
         for info in texts:
+            if len(info) and info[0] == '#':
+                continue
+
             if info.find(":") != -1:
                 key, value = info.split(":")[:2]
                 key = key.lower()
@@ -91,8 +94,16 @@ def can_crawl(self, url: str) -> bool:
         target_path_list = parsed_url["path"].split("/")
         for disallow in all_agent["Disallow"]:
             disallow_path_list = disallow.split("/")
-
             is_disallow = True  # 주어진 URL이 현재 검사하려는 Disallow path와 같은지
+
+            # 만약 disallow_path의 길이가 더 긴 상황에서 IndexError가 발생하는 상황은 아래와 같은 2가지 경우
+            # target_path = /a/b/c
+            # disallow_path = /*/*/*/d
+            # disallow_path = /a/b/c/d
+            # 위의 두가지 경우 모두 disallow에 해당되지 않기 때문에 크롤링이 가능한 페이지이다.
+            if len(target_path_list) < len(disallow_path_list):
+                break
+
             for idx, path in enumerate(disallow_path_list):
                 # disallow path와 URL의 path가 다른 경우
                 if path != "*" and target_path_list[idx] != path:
diff --git a/service/metadata_extractor.py b/service/metadata_extractor.py
new file mode 100644
index 0000000..50ef8bb
--- /dev/null
+++ b/service/metadata_extractor.py
@@ -0,0 +1,69 @@
+import logging
+
+from entity.link_info import LinkWithTags, LinkInfo
+from model.gpt_model import GPTModel
+from prompt.extract_keyword_prompt import EXTRACT_KEYWORD_PROMPT
+from prompt.extract_tag_prompt import EXTRACT_TAG_PROMPT
+from service.content_reader import ContentReader
+
+
+class MetadataExtractor:
+    def __init__(self, gpt_model: GPTModel, content_reader: ContentReader, logger: logging.Logger):
+        self.content_reader = content_reader
+        self.gpt_model = gpt_model
+        self.logger = logger
+
+    @staticmethod
+    def _parse_response(response: str) -> list[str]:
+        return list(map(str.strip, response.split(",")))
+
+    def extract_metadata_batch(self, contents: list[LinkInfo]) -> list[LinkWithTags]:
+        """
+        여러 링크들의 메타데이터를 추출
+        :param contents: 데이터를 추출하려는 링크들의 정보
+        :return: 각 링크들에 부여된 태그, 키워드, linkId, title
+        """
+        results = []
+        for link_info in contents:
+            data = {
+                "linkId": link_info.linkId,
+            }
+
+            try:
+                content_info = self.content_reader.read_content(link_info.URL, link_info.content)
+            except Exception as e:
+                content_info = None
+                error_type = type(e).__name__
+                self.logger.error(
+                    f"Failed to read content for URL: {link_info.URL}.\n"
+                    f"Error Type: {error_type}, Message: {e}"
+                )
+
+            if content_info:
+                data["title"] = content_info["title"]
+                data["tags"] = self.extract_metadata(content_info["title"], content_info["content"], EXTRACT_TAG_PROMPT)
+                data["keywords"] = self.extract_metadata(content_info["title"], content_info["content"], EXTRACT_KEYWORD_PROMPT)
+            else:
+                data["title"] = None
+                data["tags"] = []
+                data["keywords"] = []
+
+            if not data["tags"]:
+                data["tags"].append("기타")
+
+            results.append(LinkWithTags(**data))
+
+        return results
+
+    def extract_metadata(self, title: str, content: str, prompt: str) -> list[str]:
+        """
+        주어진 텍스트를 GPT를 통해 메타데이터를 추출
+        :param title: 글의 제목
+        :param content: 글의 내용
+        :param prompt: 메타데이터를 추출하기 위한 프롬프트
+        :return: 분류된 여러개의 태그
+        """
+        query = f"{title if title else ''}\n\n{content if content else ''}"
+        category = self.gpt_model.generate_response(prompt, query)
+
+        return self._parse_response(category)

From 89deeada88562ff6eeb91fe3d6f1371c5a262e69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EC=A0=95=EC=98=88=EC=9D=80?=
 <79676210+Yeeun-Jeong@users.noreply.github.com>
Date: Wed, 4 Dec 2024 15:45:18 +0900
Subject: [PATCH 3/3] =?UTF-8?q?Refactor:=20=ED=82=A4=EC=9B=8C=EB=93=9C=20?=
 =?UTF-8?q?=EC=B6=9C=EB=A0=A5=20=ED=98=95=EC=8B=9D=20=EB=B3=80=EA=B2=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 prompt/extract_keyword_prompt.py | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/prompt/extract_keyword_prompt.py b/prompt/extract_keyword_prompt.py
index 9c70c88..5435404 100644
--- a/prompt/extract_keyword_prompt.py
+++ b/prompt/extract_keyword_prompt.py
@@ -5,25 +5,22 @@
     
     # Constraints
     - If the original text is in Korean:
-      - Extract keywords as they appear in the original text, without translation.
+      - Extract keywords as they appear in the input, without translation.
       - Proper nouns should also remain unchanged.
-    - If the original text is in English:
-      - Proper nouns (e.g., names of models, tools, libraries) must be in English as they appear in the text.
+    - If the original text is in foreign language:
+      - Proper nouns (e.g., names of models, tools, libraries) must be output as they appear in the input, without translation.
       - Other general terms should be translated into Korean.
-    - The number of keywords must be between 3 to 10.
+    - Ensure the number of keywords extracted is between 3 and 10.
     
     # Examples
     - For Korean input:
-      User input: 업무 효율화를 위한 카카오 사내봇 개발기...
-      Keywords: 업무 효율화, 카카오 사내봇, 데이터, AI
-    - For English input:
-      User input: LoRA is a parameter-efficient method for fine-tuning large language models...
-      Keywords: LoRA, 파라미터 효율적, 미세 조정, 대형 언어 모델
-    - Mixed output:
-      User input: LoRA fine-tunes RoBERTa models using PyTorch...
-      Keywords: LoRA, 미세 조정, RoBERTa, PyTorch
+      User Message: 업무 효율화를 위한 카카오 사내봇 개발기...
+      Output: 업무 효율화, 카카오 사내봇, 데이터, AI, LLM, RAG
+    - For foreign language input:
+      User Message: LoRA is a parameter-efficient method for fine-tuning large language models. LoRA fine-tunes RoBERTa models using PyTorch...
+      Output: LoRA, 파라미터 효율적, 미세 조정, 대형 언어 모델, PyTorch, RoBERTa
     
     # Output Format
-    - Provide the keywords as a string separated by commas, similar to the examples above.
+    - Output keywords as a comma-separated string, e.g., “keyword1, keyword2”. Do not use brackets or lists in the output.
     """
-)
\ No newline at end of file
+)