Skip to content

Commit

Permalink
#35 Feat: URL read 간 로그 출력 (#41)
Browse files Browse the repository at this point in the history
* Style: 불필요 모듈 제거

#35

* Feat: Crawlability 체크 로그 추가

#35

* Feat: 토큰, 시간 로그 추가

#35
  • Loading branch information
Taejin1221 authored Dec 6, 2024
1 parent 5d18897 commit a6e86c8
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 5 deletions.
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@

app = FastAPI()

gpt_model = GPTModel()
gpt_model = GPTModel(get_logger("GPTLogger"))

crawlability_checker = CrawlabilityChecker()
content_extractor = ContentExtractor()
content_reader = ContentReader(crawlability_checker, content_extractor)
content_reader = ContentReader(crawlability_checker, content_extractor, get_logger("CrawlabilityLogger"))

extractor_logger = get_logger("ExtractorLogger")
metadata_extractor = MetadataExtractor(gpt_model, content_reader, extractor_logger)
Expand Down
9 changes: 8 additions & 1 deletion model/gpt_model.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import logging
import time

from openai import OpenAI


class GPTModel:
def __init__(self):
def __init__(self, logger: logging.Logger):
self.client = OpenAI()
self.logger = logger

def generate_response(self, prompt: str, user_message: str) -> str:
"""
Expand All @@ -17,9 +21,12 @@ def generate_response(self, prompt: str, user_message: str) -> str:
{"role": "user", "content": user_message}
]

before = time.time()
response = self.client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
)
self.logger.info(f"[TOKEN] {response.usage.total_tokens}")
self.logger.info(f"[TIME] {time.time() - before: .2f}s")

return response.choices[0].message.content
7 changes: 6 additions & 1 deletion service/content_reader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging
import os

import requests
Expand All @@ -8,11 +9,12 @@


class ContentReader:
def __init__(self, crawlability_checker: CrawlabilityChecker, content_extractor: ContentExtractor):
def __init__(self, crawlability_checker: CrawlabilityChecker, content_extractor: ContentExtractor, logger: logging.Logger):
self.GOOGLE_SEARCH_API_KEY = os.getenv("GOOGLE_SEARCH_API_KEY")
self.GOOGLE_SEARCH_CX = os.getenv("GOOGLE_SEARCH_CX")
self.crawlability_checker = crawlability_checker
self.content_extractor = content_extractor
self.logger = logger

def fetch_html_content(self, url: str) -> str:
"""
Expand Down Expand Up @@ -73,7 +75,10 @@ def read_content(self, url: str, content: str | None) -> dict[str, str] | None:
"""
if not content:
if self.crawlability_checker.can_crawl(url):
self.logger.info(f" OK {url}")
content = self.fetch_html_content(url)
else:
self.logger.info(f"[NO] {url}")

if content:
return self.extract(url, content)
Expand Down
1 change: 0 additions & 1 deletion service/crawlability_checker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from urllib.parse import urlparse, parse_qs

import requests
from requests.packages import target


class CrawlabilityChecker:
Expand Down

0 comments on commit a6e86c8

Please sign in to comment.