Skip to content

Commit

Permalink
feat: 무신사, 29cm 크롤링 로직 추가 & 공통 크롤링 로직 리팩토링 #59
Browse files Browse the repository at this point in the history
  • Loading branch information
bongsh0112 committed Oct 10, 2023
1 parent 07f7cfb commit 444b1a3
Show file tree
Hide file tree
Showing 9 changed files with 175 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ public void oliveYoungCrawl() {
crawlingUseCase.executeForOliveYoung();
}

@Operation(summary = "무신사 크롤링을 통해 상품명과 상품 이미지를 가져옵니다.")
@PatchMapping("/musinsa")
public void musinsaCrawl() {
crawlingUseCase.executeForMusinsa();
}

@Operation(summary = "29cm 크롤링을 통해 상품명과 상품 이미지를 가져옵니다.")
@PatchMapping("/twentynine")
public void twentynineCrawl() {
crawlingUseCase.executeForTwentyNine();
}

@Operation(summary = "키워드를 이용하여 관련된 상품을 검색합니다.")
@GetMapping()
public SliceResponse<ProductRetrieveDTO> productSearch(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,52 @@
import lombok.RequiredArgsConstructor;
import tify.server.core.annotation.UseCase;
import tify.server.domain.domains.product.adaptor.ProductAdaptor;
import tify.server.domain.domains.product.domain.Site;
import tify.server.domain.domains.product.dto.ProductCrawlingDto;
import tify.server.infrastructure.outer.crawling.MusinsaCrawl;
import tify.server.infrastructure.outer.crawling.OliveYoungCrawl;
import tify.server.infrastructure.outer.crawling.TwentyNineCrawl;

@UseCase
@RequiredArgsConstructor
public class CrawlingUseCase {

private final OliveYoungCrawl oliveYoungCrawl;
private final MusinsaCrawl musinsaCrawl;
private final TwentyNineCrawl twentyNineCrawl;
private final ProductAdaptor productAdaptor;

@Transactional
public void executeForOliveYoung() {
List<ProductCrawlingDto> productCrawlingDtos = productAdaptor.searchByName();
List<ProductCrawlingDto> productCrawlingDtos = productAdaptor.searchByCompany(Site.OLIVE_YOUNG);
productCrawlingDtos.forEach(
dto -> {
String imgSrc = oliveYoungCrawl.process(dto.getCrawlUrl());
updateImageUrl(dto.getName(), imgSrc);
});
}

@Transactional
public void executeForMusinsa() {
List<ProductCrawlingDto> productCrawlingDtos = productAdaptor.searchByCompany(Site.MUSINSA);
productCrawlingDtos.forEach(
dto -> {
System.out.println(dto.getName() + dto.getCrawlUrl());
String imgSrc = musinsaCrawl.process(dto.getCrawlUrl());
updateImageUrl(dto.getName(), imgSrc);
});
}

@Transactional
public void executeForTwentyNine() {
List<ProductCrawlingDto> productCrawlingDtos = productAdaptor.searchByCompany(Site.CM);
productCrawlingDtos.forEach(
dto -> {
String imgSrc = twentyNineCrawl.process(dto.getCrawlUrl());
updateImageUrl(dto.getName(), imgSrc);
});
}

@Transactional
public void updateImageUrl(String name, String imgSrc) {
productAdaptor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ public List<ProductCrawlingDto> searchByName() {
return productRepository.search();
}

public List<ProductCrawlingDto> searchByCompany(Site site) {
return productRepository.searchByCompany(site);
}

public List<Product> queryAllBySite(Site site) {
return productRepository.findAllBySite(site);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
@Getter
@AllArgsConstructor
public enum Site {
OLIVE_YOUNG("올리브영"),
MUSINSA("무신사"),
OLIVE_YOUNG("oliveyoung"),
MUSINSA("musinsa"),
CM("29cm"),
;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import java.util.List;
import org.springframework.data.domain.Slice;
import tify.server.domain.domains.product.domain.Product;
import tify.server.domain.domains.product.domain.Site;
import tify.server.domain.domains.product.dto.ProductCondition;
import tify.server.domain.domains.product.dto.ProductCrawlingDto;
import tify.server.domain.domains.product.dto.ProductRetrieveDTO;

public interface ProductCustomRepository {
List<ProductCrawlingDto> search();

List<ProductCrawlingDto> searchByCompany(Site site);

List<Product> searchAllToRecommendation(String categoryName, String answer);

Slice<ProductRetrieveDTO> searchByKeyword(ProductCondition productCondition);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.springframework.data.domain.Slice;
import tify.server.domain.common.util.SliceUtil;
import tify.server.domain.domains.product.domain.Product;
import tify.server.domain.domains.product.domain.Site;
import tify.server.domain.domains.product.dto.ProductCondition;
import tify.server.domain.domains.product.dto.ProductCrawlingDto;
import tify.server.domain.domains.product.dto.ProductRetrieveDTO;
Expand All @@ -29,6 +30,17 @@ public List<ProductCrawlingDto> search() {
.fetch();
}

@Override
public List<ProductCrawlingDto> searchByCompany(Site site) {
return queryFactory
.select(new QProductCrawlingDto(product.name, product.crawlUrl))
.from(product)
.where(product.crawlUrl.contains(site.getValue()))
.groupBy(product.name)
.fetch();
}


@Override
public List<Product> searchAllToRecommendation(String categoryName, String answer) {
return queryFactory
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package tify.server.infrastructure.outer.crawling;

import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.UnhandledAlertException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.stereotype.Component;
import tify.server.infrastructure.exception.FeignException;

@Component
@RequiredArgsConstructor
@Slf4j
public class MusinsaCrawl {
private WebDriver driver;

public String process(String url) {
System.setProperty(
"webdriver.chrome.driver",
"/Users/sehwan/Downloads/chromedriver-mac-arm64/chromedriver");

ChromeOptions options = new ChromeOptions();
options.addArguments("--remote-allow-origins=*");

driver = new ChromeDriver(options);

String imgSrc = "";

try {
imgSrc = getDataList(url);
} catch (InterruptedException e) {
throw FeignException.EXCEPTION;
} catch (UnhandledAlertException e) {
log.info("유효하지 않은 url : {}", url);
}

driver.close();
driver.quit();

return imgSrc;
}

private String getDataList(String url) throws InterruptedException {
driver.get(url);
Thread.sleep(1000);
WebElement element = driver.findElement(By.id("detail_bigimg")).findElement(By.className("product-img")).findElement(By.id("bigimg"));
System.out.println(element.toString());
return element.getAttribute("src");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@


import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.UnhandledAlertException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
Expand All @@ -12,6 +14,7 @@

@Component
@RequiredArgsConstructor
@Slf4j
public class OliveYoungCrawl {

private WebDriver driver;
Expand All @@ -32,6 +35,8 @@ public String process(String url) {
imgSrc = getDataList(url);
} catch (InterruptedException e) {
throw FeignException.EXCEPTION;
} catch (UnhandledAlertException e) {
log.info("유효하지 않은 url : {}", url);
}

driver.close();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package tify.server.infrastructure.outer.crawling;

import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.UnhandledAlertException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.stereotype.Component;
import tify.server.infrastructure.exception.FeignException;

@Component
@RequiredArgsConstructor
@Slf4j
public class TwentyNineCrawl {
private WebDriver driver;

public String process(String url) {
System.setProperty(
"webdriver.chrome.driver",
"/Users/sehwan/Downloads/chromedriver-mac-arm64/chromedriver");

ChromeOptions options = new ChromeOptions();
options.addArguments("--remote-allow-origins=*");

driver = new ChromeDriver(options);

String imgSrc = "";

try {
imgSrc = getDataList(url);
} catch (InterruptedException e) {
// throw FeignException.EXCEPTION;
log.info("에러 발생 ㅠㅠ");
} catch (UnhandledAlertException e) {
log.info("유효하지 않은 url : {}", url);
} finally {
driver.close();
driver.quit();

return imgSrc;
}
}

private String getDataList(String url) throws InterruptedException {
driver.get(url);
Thread.sleep(1000);
if (!driver.findElements(By.cssSelector(".css-12qah06.ewptmlp5")).isEmpty()) {
WebElement element = driver.findElements(By.cssSelector(".css-12qah06.ewptmlp5")).get(0);
System.out.println(element.toString());
return element.getAttribute("src");
}
return null;
}
}

0 comments on commit 444b1a3

Please sign in to comment.