Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] #53 - 비즈니스 트렌드 크롤링 API 1차 구현 #54

Merged
merged 6 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ dependencies {
// Swagger
implementation group: 'org.springdoc', name: 'springdoc-openapi-starter-webmvc-ui', version: '2.2.0'

// crolling
// implementation 'org.jsoup:jsoup:1.15.4'
implementation 'org.seleniumhq.selenium:selenium-java:4.13.0'
// WebDriverManager
implementation 'io.github.bonigarcia:webdrivermanager:5.5.3'

// Test dependencies
testImplementation 'org.springframework.boot:spring-boot-starter-test'
testImplementation 'org.springframework.security:spring-security-test'
Expand Down
29 changes: 0 additions & 29 deletions src/main/java/dgu/sw/domain/business/entity/Business.java

This file was deleted.

22 changes: 0 additions & 22 deletions src/main/java/dgu/sw/domain/business/entity/BusinessImage.java

This file was deleted.

41 changes: 41 additions & 0 deletions src/main/java/dgu/sw/domain/trend/controller/TrendController.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package dgu.sw.domain.trend.controller;

import dgu.sw.domain.trend.dto.TrendDTO;
import dgu.sw.domain.trend.service.TrendService;
import dgu.sw.global.ApiResponse;
import dgu.sw.global.status.SuccessStatus;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import java.util.List;

@RestController
@RequestMapping("/api/trends")
public class TrendController {

private final TrendService trendService;

public TrendController(TrendService trendService) {
this.trendService = trendService;
}

@GetMapping
public ApiResponse<List<TrendDTO>> getTrends() {
List<TrendDTO> trends = trendService.fetchTrends();
if (trends.isEmpty()) {
return ApiResponse.onFailure("TREND404", "트렌드 데이터를 가져올 수 없습니다.", null);
}
return ApiResponse.of(SuccessStatus._OK, trends);
}

@GetMapping("/{trendId}")
public ApiResponse<TrendDTO> getTrendDetail(@PathVariable String trendId) {
TrendDTO trendDetail = trendService.fetchTrendDetail(trendId);
if (trendDetail == null) {
return ApiResponse.onFailure("TREND_DETAIL404", "트렌드 상세 데이터를 가져올 수 없습니다.", null);
}
return ApiResponse.of(SuccessStatus._OK, trendDetail);
}
}
21 changes: 21 additions & 0 deletions src/main/java/dgu/sw/domain/trend/dto/TrendDTO.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package dgu.sw.domain.trend.dto;

import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;

@Getter
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class TrendDTO {
private String id;
private String category;
private String title;
private String content;
private String date;
private String source;
private String imageUrl;
private String authorProfile;
}
151 changes: 151 additions & 0 deletions src/main/java/dgu/sw/domain/trend/service/TrendService.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package dgu.sw.domain.trend.service;

import dgu.sw.domain.trend.dto.TrendDTO;
import io.github.bonigarcia.wdm.WebDriverManager;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.stereotype.Service;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.openqa.selenium.support.ui.ExpectedConditions;

import java.time.Duration;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;

@Service
public class TrendService {

private List<TrendDTO> cachedTrends = new ArrayList<>();
private LocalDateTime lastFetchedTime = null;

// 뉴닉 최신 기사 리스트 조회
public List<TrendDTO> fetchTrends() {
// 데이터 갱신 조건: 마지막 갱신이 없거나, 날짜가 변경된 경우
if (lastFetchedTime == null || lastFetchedTime.toLocalDate().isBefore(LocalDateTime.now().toLocalDate())) {
cachedTrends = fetchTrendsFromWeb();
lastFetchedTime = LocalDateTime.now();
}
return cachedTrends;
}

// 디테일 조회
public TrendDTO fetchTrendDetail(String trendId) {
final String targetUrl = "https://newneek.co/@newneek/article/" + trendId;

// System.setProperty("webdriver.chrome.driver", "/Users/dudtlstm/Downloads/chromedriver-mac-arm64/chromedriver");
// WebDriverManager로 ChromeDriver 설정
WebDriverManager.chromedriver().setup();
ChromeOptions options = new ChromeOptions();
options.addArguments("--headless", "--no-sandbox", "--disable-dev-shm-usage");

WebDriver driver = new ChromeDriver(options);

TrendDTO trendDetail = null;

try {
// 해당 기사 URL로 이동
driver.get(targetUrl);

// 페이지 로드 대기
WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10));
wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector("article")));

// 데이터 크롤링
String imageUrl = driver.findElement(By.cssSelector("div.relative img")).getAttribute("src");
String title = driver.findElement(By.cssSelector("h1.mb-4.break-words.text-2xl.font-bold.text-gray-900")).getText();
String content = driver.findElement(By.cssSelector("main.content")).getText();
String author = driver.findElement(By.cssSelector("strong.line-clamp-1.text-sm.font-bold")).getText();
String date = driver.findElement(By.cssSelector("div.flex.items-center.gap-1.text-xs.text-gray-500 time")).getText();
String authorProfileUrl = driver.findElement(By.cssSelector("div.items-center img")).getAttribute("src");
String category = driver.findElement(By.cssSelector("a.h-7.rounded-full.bg-gray-50")).getText();

// DTO 생성
trendDetail = TrendDTO.builder()
.id(trendId)
.category(category)
.title(title)
.content(content)
.date(date)
.source(author)
.imageUrl(imageUrl)
.authorProfile(authorProfileUrl)
.build();

} catch (Exception e) {
e.printStackTrace();
} finally {
driver.quit();
}

return trendDetail;
}


// 최신 기사 크롤링
private List<TrendDTO> fetchTrendsFromWeb() {
final String targetUrl = "https://newneek.co/@newneek/series/89";
List<TrendDTO> trends = new ArrayList<>();

// System.setProperty("webdriver.chrome.driver", "/Users/dudtlstm/Downloads/chromedriver-mac-arm64/chromedriver");
// WebDriver driver = new ChromeDriver();

WebDriverManager.chromedriver().setup();
ChromeOptions options = new ChromeOptions();
options.addArguments("--headless", "--no-sandbox", "--disable-dev-shm-usage");

WebDriver driver = new ChromeDriver(options);

try {
driver.get(targetUrl);

WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(10));
wait.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector("a.block.mb-4.border-b.border-gray-100.pb-4")));

// 1. <a> 태그에서 href 추출 - 디테일을 위해 추출
List<WebElement> linkElements = driver.findElements(By.cssSelector("a.block.mb-4.border-b.border-gray-100.pb-4"));
List<String> hrefs = new ArrayList<>();
for (WebElement link : linkElements) {
String href = link.getAttribute("href");
hrefs.add(href);
}

// 2. <article> 태그에서 나머지 정보 추출
List<WebElement> articles = driver.findElements(By.cssSelector("article.flex.flex-col"));
for (int i = 0; i < articles.size() && i < hrefs.size() && i < 10; i++) {
WebElement article = articles.get(i);

String imageUrl = article.findElement(By.cssSelector("img")).getAttribute("src");
String title = article.findElement(By.cssSelector("h2.break-words.text-xl.font-bold.text-gray-900")).getText();
String content = article.findElement(By.cssSelector("p.line-clamp-2.break-all.text-gray-500")).getText();
String author = article.findElement(By.cssSelector("strong.text-sm.font-bold.text-gray-700")).getText();
String date = article.findElement(By.cssSelector("time")).getText();

// 3. href에서 ID 추출
String href = hrefs.get(i);
String id = href.substring(href.lastIndexOf("/") + 1);

// 4. DTO 생성
trends.add(TrendDTO.builder()
.id(id)
.category("뉴닉")
.title(title)
.content(content)
.date(date)
.source(author)
.imageUrl(imageUrl)
.build());
}
} catch (Exception e) {
e.printStackTrace();
} finally {
driver.quit();
}

return trends;
}

}