-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* chore: OCR 도메인 생성 도메인 생성 * feat: ID를 사용하여 엔티티 꺼내오는 작업 수행 엔티티 꺼내오는 작업 수행 * rename: PDF 관련 파일 이동 파일 이동 * feat: 문서 저장시 OCR 작업수행하는 기능 OCR 자동 작업 기능 구현 * refactor: PDF 저장 로직 개선 저장 로직 개선 * test: OCR 기능 작동 여부 테스트코드 작성 테스트 코드 작성 * refactor: 상수값 static 으로 따로 관리 상수값 관리하도록 피드백 반영 * feat: AOP 를 활용하여 Get요청시 ok를 바로 보내줄 수 있도록 기능 구현 AOP 활용 래퍼클래스 개발 * remove: 상의 후 도입할지 정해야하기때문에 우선 삭제
- Loading branch information
1 parent
751e5cd
commit 311db97
Showing
11 changed files
with
205 additions
and
106 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package notai.ocr.application; | ||
|
||
import lombok.RequiredArgsConstructor; | ||
import net.sourceforge.tess4j.Tesseract; | ||
import notai.common.exception.type.FileProcessException; | ||
import notai.document.domain.Document; | ||
import notai.ocr.domain.OCR; | ||
import notai.ocr.domain.OCRRepository; | ||
import org.apache.pdfbox.Loader; | ||
import org.apache.pdfbox.pdmodel.PDDocument; | ||
import org.apache.pdfbox.rendering.PDFRenderer; | ||
import org.springframework.scheduling.annotation.Async; | ||
import org.springframework.stereotype.Service; | ||
|
||
import java.awt.image.BufferedImage; | ||
import java.io.File; | ||
|
||
@Service | ||
@RequiredArgsConstructor | ||
public class OCRService { | ||
|
||
private final OCRRepository ocrRepository; | ||
|
||
@Async | ||
public void saveOCR( | ||
Document document, File pdfFile | ||
) { | ||
try { | ||
System.setProperty("jna.library.path", "/usr/local/opt/tesseract/lib/"); | ||
//window, mac -> brew install tesseract, tesseract-lang | ||
Tesseract tesseract = new Tesseract(); | ||
|
||
tesseract.setDatapath("/usr/local/share/tessdata"); | ||
tesseract.setLanguage("kor+eng"); | ||
|
||
PDDocument pdDocument = Loader.loadPDF(pdfFile); | ||
PDFRenderer pdfRenderer = new PDFRenderer(pdDocument); | ||
for (int i = 0; i < pdDocument.getNumberOfPages(); i++) { | ||
BufferedImage image = pdfRenderer.renderImage(i); | ||
String ocrResult = tesseract.doOCR(image); | ||
OCR ocr = new OCR(document, i + 1, ocrResult); | ||
ocrRepository.save(ocr); | ||
} | ||
|
||
pdDocument.close(); | ||
} catch (Exception e) { | ||
throw new FileProcessException("PDF 파일을 통해 OCR 작업을 수행하는데 실패했습니다."); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
package notai.ocr.domain; | ||
|
||
import jakarta.persistence.*; | ||
import static jakarta.persistence.GenerationType.IDENTITY; | ||
import jakarta.validation.constraints.NotNull; | ||
import static lombok.AccessLevel.PROTECTED; | ||
import lombok.Getter; | ||
import lombok.NoArgsConstructor; | ||
import notai.common.domain.RootEntity; | ||
import notai.document.domain.Document; | ||
|
||
@Entity | ||
@Table(name = "ocr") | ||
@Getter | ||
@NoArgsConstructor(access = PROTECTED) | ||
public class OCR extends RootEntity<Long> { | ||
|
||
@Id | ||
@GeneratedValue(strategy = IDENTITY) | ||
private Long id; | ||
|
||
@ManyToOne(fetch = FetchType.LAZY) | ||
@JoinColumn(name = "document_id", referencedColumnName = "id") | ||
private Document document; | ||
|
||
@NotNull | ||
@Column(name = "page_number") | ||
private Integer pageNumber; | ||
|
||
@NotNull | ||
@Column(name = "content", length = 255) | ||
private String content; | ||
|
||
public OCR(Document document, Integer pageNumber, String content) { | ||
this.document = document; | ||
this.pageNumber = pageNumber; | ||
this.content = content; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package notai.ocr.domain; | ||
|
||
import notai.common.exception.type.NotFoundException; | ||
import notai.document.domain.Document; | ||
import org.springframework.data.jpa.repository.JpaRepository; | ||
|
||
import java.util.List; | ||
|
||
public interface OCRRepository extends JpaRepository<OCR, Long> { | ||
default OCR getById(Long id) { | ||
return findById(id).orElseThrow(() -> new NotFoundException("OCR 데이터를 찾을 수 없습니다.")); | ||
} | ||
|
||
List<OCR> findAllByDocumentId(Long documentId); | ||
|
||
void deleteAllByDocument(Document document); | ||
} |
3 changes: 1 addition & 2 deletions
3
.../document/presentation/PdfController.java → src/main/java/notai/pdf/PdfController.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
package notai.pdf.result; | ||
|
||
import java.io.File; | ||
|
||
public record PdfSaveResult( | ||
String pdfName, | ||
String pdfUrl, | ||
File pdf | ||
) { | ||
public static PdfSaveResult of( | ||
String pdfName, File pdf | ||
) { | ||
return new PdfSaveResult(pdfName, convertPdfUrl(pdfName), pdf); | ||
} | ||
|
||
private static String convertPdfUrl(String pdfName) { | ||
return String.format("pdf/%s", pdfName); | ||
} | ||
} |
13 changes: 0 additions & 13 deletions
13
src/test/java/notai/document/application/DocumentServiceTest.java
This file was deleted.
Oops, something went wrong.
73 changes: 0 additions & 73 deletions
73
src/test/java/notai/document/application/PdfServiceTest.java
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package notai.ocr.application; | ||
|
||
import notai.document.domain.Document; | ||
import notai.ocr.domain.OCR; | ||
import notai.ocr.domain.OCRRepository; | ||
import notai.pdf.result.PdfSaveResult; | ||
import org.junit.jupiter.api.Test; | ||
import org.junit.jupiter.api.extension.ExtendWith; | ||
import static org.mockito.ArgumentMatchers.any; | ||
import org.mockito.InjectMocks; | ||
import org.mockito.Mock; | ||
import static org.mockito.Mockito.*; | ||
import org.mockito.junit.jupiter.MockitoExtension; | ||
import org.springframework.core.io.ClassPathResource; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
|
||
@ExtendWith(MockitoExtension.class) | ||
class OCRServiceTest { | ||
|
||
@InjectMocks | ||
OCRService ocrService; | ||
@Mock | ||
OCRRepository ocrRepository; | ||
|
||
@Test | ||
void savePdf_success_existsTestPdf() throws IOException { | ||
//given | ||
Document document = mock(Document.class); | ||
OCR ocr = mock(OCR.class); | ||
ClassPathResource existsPdf = new ClassPathResource("pdf/test.pdf"); | ||
PdfSaveResult saveResult = PdfSaveResult.of("test.pdf", existsPdf.getFile()); | ||
when(ocrRepository.save(any(OCR.class))).thenReturn(ocr); | ||
//when | ||
ocrService.saveOCR(document, saveResult.pdf()); | ||
//then | ||
verify(ocrRepository, times(43)).save(any(OCR.class)); | ||
|
||
deleteFile(saveResult.pdf().toPath()); | ||
} | ||
|
||
void deleteFile(Path filePath) throws IOException { | ||
if (Files.exists(filePath)) { | ||
Files.delete(filePath); | ||
} | ||
} | ||
} |