Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] 무신사, 29cm 크롤링 로직 추가 & 공통 크롤링 로직 리팩토링 #60

Merged
merged 2 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ public void oliveYoungCrawl() {
crawlingUseCase.executeForOliveYoung();
}

@Operation(summary = "무신사 크롤링을 통해 상품명과 상품 이미지를 가져옵니다.")
@PatchMapping("/musinsa")
public void musinsaCrawl() {
crawlingUseCase.executeForMusinsa();
}

@Operation(summary = "29cm 크롤링을 통해 상품명과 상품 이미지를 가져옵니다.")
@PatchMapping("/twentynine")
public void twentynineCrawl() {
crawlingUseCase.executeForTwentyNine();
}

@Operation(summary = "키워드를 이용하여 관련된 상품을 검색합니다.")
@GetMapping()
public SliceResponse<ProductRetrieveDTO> productSearch(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,53 @@
import lombok.RequiredArgsConstructor;
import tify.server.core.annotation.UseCase;
import tify.server.domain.domains.product.adaptor.ProductAdaptor;
import tify.server.domain.domains.product.domain.Site;
import tify.server.domain.domains.product.dto.ProductCrawlingDto;
import tify.server.infrastructure.outer.crawling.MusinsaCrawl;
import tify.server.infrastructure.outer.crawling.OliveYoungCrawl;
import tify.server.infrastructure.outer.crawling.TwentyNineCrawl;

@UseCase
@RequiredArgsConstructor
public class CrawlingUseCase {

private final OliveYoungCrawl oliveYoungCrawl;
private final MusinsaCrawl musinsaCrawl;
private final TwentyNineCrawl twentyNineCrawl;
private final ProductAdaptor productAdaptor;

@Transactional
public void executeForOliveYoung() {
List<ProductCrawlingDto> productCrawlingDtos = productAdaptor.searchByName();
List<ProductCrawlingDto> productCrawlingDtos =
productAdaptor.searchByCompany(Site.OLIVE_YOUNG);
productCrawlingDtos.forEach(
dto -> {
String imgSrc = oliveYoungCrawl.process(dto.getCrawlUrl());
updateImageUrl(dto.getName(), imgSrc);
});
}

@Transactional
public void executeForMusinsa() {
List<ProductCrawlingDto> productCrawlingDtos = productAdaptor.searchByCompany(Site.MUSINSA);
productCrawlingDtos.forEach(
dto -> {
System.out.println(dto.getName() + dto.getCrawlUrl());
String imgSrc = musinsaCrawl.process(dto.getCrawlUrl());
updateImageUrl(dto.getName(), imgSrc);
});
}

@Transactional
public void executeForTwentyNine() {
List<ProductCrawlingDto> productCrawlingDtos = productAdaptor.searchByCompany(Site.CM);
productCrawlingDtos.forEach(
dto -> {
String imgSrc = twentyNineCrawl.process(dto.getCrawlUrl());
updateImageUrl(dto.getName(), imgSrc);
});
}

@Transactional
public void updateImageUrl(String name, String imgSrc) {
productAdaptor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ public List<ProductCrawlingDto> searchByName() {
return productRepository.search();
}

public List<ProductCrawlingDto> searchByCompany(Site site) {
return productRepository.searchByCompany(site);
}

public List<Product> queryAllBySite(Site site) {
return productRepository.findAllBySite(site);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
@Getter
@AllArgsConstructor
public enum Site {
OLIVE_YOUNG("올리브영"),
MUSINSA("무신사"),
OLIVE_YOUNG("oliveyoung"),
MUSINSA("musinsa"),
CM("29cm"),
;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import java.util.List;
import org.springframework.data.domain.Slice;
import tify.server.domain.domains.product.domain.Product;
import tify.server.domain.domains.product.domain.Site;
import tify.server.domain.domains.product.dto.ProductCondition;
import tify.server.domain.domains.product.dto.ProductCrawlingDto;
import tify.server.domain.domains.product.dto.ProductRetrieveDTO;

public interface ProductCustomRepository {
List<ProductCrawlingDto> search();

List<ProductCrawlingDto> searchByCompany(Site site);

List<Product> searchAllToRecommendation(String categoryName, String answer);

Slice<ProductRetrieveDTO> searchByKeyword(ProductCondition productCondition);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.springframework.data.domain.Slice;
import tify.server.domain.common.util.SliceUtil;
import tify.server.domain.domains.product.domain.Product;
import tify.server.domain.domains.product.domain.Site;
import tify.server.domain.domains.product.dto.ProductCondition;
import tify.server.domain.domains.product.dto.ProductCrawlingDto;
import tify.server.domain.domains.product.dto.ProductRetrieveDTO;
Expand All @@ -29,6 +30,16 @@ public List<ProductCrawlingDto> search() {
.fetch();
}

@Override
public List<ProductCrawlingDto> searchByCompany(Site site) {
return queryFactory
.select(new QProductCrawlingDto(product.name, product.crawlUrl))
.from(product)
.where(product.crawlUrl.contains(site.getValue()))
.groupBy(product.name)
.fetch();
}

@Override
public List<Product> searchAllToRecommendation(String categoryName, String answer) {
return queryFactory
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package tify.server.infrastructure.outer.crawling;


import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.UnhandledAlertException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.stereotype.Component;
import tify.server.infrastructure.exception.FeignException;

@Component
@RequiredArgsConstructor
@Slf4j
public class MusinsaCrawl {
private WebDriver driver;

public String process(String url) {
System.setProperty(
"webdriver.chrome.driver",
"/Users/sehwan/Downloads/chromedriver-mac-arm64/chromedriver");

ChromeOptions options = new ChromeOptions();
options.addArguments("--remote-allow-origins=*");

driver = new ChromeDriver(options);

String imgSrc = "";

try {
imgSrc = getDataList(url);
} catch (InterruptedException e) {
throw FeignException.EXCEPTION;
} catch (UnhandledAlertException e) {
log.info("유효하지 않은 url : {}", url);
}

driver.close();
driver.quit();

return imgSrc;
}

private String getDataList(String url) throws InterruptedException {
driver.get(url);
Thread.sleep(1000);
WebElement element =
driver.findElement(By.id("detail_bigimg"))
.findElement(By.className("product-img"))
.findElement(By.id("bigimg"));
System.out.println(element.toString());
return element.getAttribute("src");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@


import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.UnhandledAlertException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
Expand All @@ -12,6 +14,7 @@

@Component
@RequiredArgsConstructor
@Slf4j
public class OliveYoungCrawl {

private WebDriver driver;
Expand All @@ -32,6 +35,8 @@ public String process(String url) {
imgSrc = getDataList(url);
} catch (InterruptedException e) {
throw FeignException.EXCEPTION;
} catch (UnhandledAlertException e) {
log.info("유효하지 않은 url : {}", url);
}

driver.close();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package tify.server.infrastructure.outer.crawling;


import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.UnhandledAlertException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.stereotype.Component;

@Component
@RequiredArgsConstructor
@Slf4j
public class TwentyNineCrawl {
private WebDriver driver;

public String process(String url) {
System.setProperty(
"webdriver.chrome.driver",
"/Users/sehwan/Downloads/chromedriver-mac-arm64/chromedriver");

ChromeOptions options = new ChromeOptions();
options.addArguments("--remote-allow-origins=*");

driver = new ChromeDriver(options);

String imgSrc = "";

try {
imgSrc = getDataList(url);
} catch (InterruptedException e) {
// throw FeignException.EXCEPTION;
log.info("에러 발생 ㅠㅠ");
} catch (UnhandledAlertException e) {
log.info("유효하지 않은 url : {}", url);
} finally {
driver.close();
driver.quit();

return imgSrc;
}
}

private String getDataList(String url) throws InterruptedException {
driver.get(url);
Thread.sleep(1000);
if (!driver.findElements(By.cssSelector(".css-12qah06.ewptmlp5")).isEmpty()) {
WebElement element =
driver.findElements(By.cssSelector(".css-12qah06.ewptmlp5")).get(0);
System.out.println(element.toString());
return element.getAttribute("src");
}
return null;
}
}
Loading