diff --git a/cucumber/exampleFiles/ghost1.pdf b/cucumber/exampleFiles/ghost1.pdf new file mode 100644 index 00000000000..4e497da02ab Binary files /dev/null and b/cucumber/exampleFiles/ghost1.pdf differ diff --git a/cucumber/exampleFiles/ghost2.pdf b/cucumber/exampleFiles/ghost2.pdf new file mode 100644 index 00000000000..6b2f529b8ad Binary files /dev/null and b/cucumber/exampleFiles/ghost2.pdf differ diff --git a/cucumber/exampleFiles/ghost3.pdf b/cucumber/exampleFiles/ghost3.pdf new file mode 100644 index 00000000000..ea0516614c7 Binary files /dev/null and b/cucumber/exampleFiles/ghost3.pdf differ diff --git a/cucumber/exampleFiles/images.pdf b/cucumber/exampleFiles/images.pdf new file mode 100644 index 00000000000..a87bca41448 Binary files /dev/null and b/cucumber/exampleFiles/images.pdf differ diff --git a/cucumber/exampleFiles/pdfa1.pdf b/cucumber/exampleFiles/pdfa1.pdf new file mode 100644 index 00000000000..2873c0d464b Binary files /dev/null and b/cucumber/exampleFiles/pdfa1.pdf differ diff --git a/cucumber/exampleFiles/pdfa2.pdf b/cucumber/exampleFiles/pdfa2.pdf new file mode 100644 index 00000000000..b0622aec9eb Binary files /dev/null and b/cucumber/exampleFiles/pdfa2.pdf differ diff --git a/cucumber/features/environment.py b/cucumber/features/environment.py index ec7f2c9ff3f..ca56b21f196 100644 --- a/cucumber/features/environment.py +++ b/cucumber/features/environment.py @@ -14,3 +14,8 @@ def after_scenario(context, scenario): os.remove('response_file') if hasattr(context, 'file_name') and os.path.exists(context.file_name): os.remove(context.file_name) + + # Remove any temporary files + for temp_file in os.listdir('.'): + if temp_file.startswith('genericNonCustomisableName') or temp_file.startswith('temp_image_'): + os.remove(temp_file) \ No newline at end of file diff --git a/cucumber/features/examples.feature b/cucumber/features/examples.feature index 04e4081fa65..c6477466fd7 100644 --- a/cucumber/features/examples.feature +++ b/cucumber/features/examples.feature @@ -1,4 +1,4 @@ -@example +@example @general Feature: API Validation @positive @password diff --git a/cucumber/features/external.feature b/cucumber/features/external.feature index 1d503135de5..c1330f523c3 100644 --- a/cucumber/features/external.feature +++ b/cucumber/features/external.feature @@ -32,7 +32,7 @@ Feature: API Validation @ocr @positive Scenario: Extract Image Scans Given I generate a PDF file as "fileInput" - And the pdf contains 3 images on 2 pages + And the pdf contains 3 images of size 300x300 on 2 pages And the request data includes | parameter | value | | angleThreshold | 5 | @@ -125,8 +125,7 @@ Feature: API Validation @ocr Scenario: PDFA - Given I generate a PDF file as "fileInput" - And the pdf contains 3 pages with random text + Given I use an example file at "exampleFiles/pdfa2.pdf" as parameter "fileInput" And the request data includes | parameter | value | | outputFormat | pdfa | @@ -137,8 +136,7 @@ Feature: API Validation @ocr Scenario: PDFA1 - Given I generate a PDF file as "fileInput" - And the pdf contains 3 pages with random text + Given I use an example file at "exampleFiles/pdfa1.pdf" as parameter "fileInput" And the request data includes | parameter | value | | outputFormat | pdfa-1 | @@ -149,8 +147,7 @@ Feature: API Validation @compress @ghostscript @positive Scenario: Compress - Given I generate a PDF file as "fileInput" - And the pdf contains 3 pages with random text + Given I use an example file at "exampleFiles/ghost3.pdf" as parameter "fileInput" And the request data includes | parameter | value | | optimizeLevel | 4 | @@ -161,8 +158,7 @@ Feature: API Validation @compress @ghostscript @positive Scenario: Compress - Given I generate a PDF file as "fileInput" - And the pdf contains 3 pages with random text + Given I use an example file at "exampleFiles/ghost2.pdf" as parameter "fileInput" And the request data includes | parameter | value | | optimizeLevel | 1 | @@ -175,8 +171,7 @@ Feature: API Validation @compress @ghostscript @positive Scenario: Compress - Given I generate a PDF file as "fileInput" - And the pdf contains 3 pages with random text + Given I use an example file at "exampleFiles/ghost1.pdf" as parameter "fileInput" And the request data includes | parameter | value | | optimizeLevel | 1 | diff --git a/cucumber/features/general.feature b/cucumber/features/general.feature index 229d37b50c6..345f59cbc8d 100644 --- a/cucumber/features/general.feature +++ b/cucumber/features/general.feature @@ -94,3 +94,23 @@ Feature: API Validation | 1 | 10 | 2 | 10 | + @extract-images + Scenario Outline: Extract Image Scans + Given I use an example file at "exampleFiles/images.pdf" as parameter "fileInput" + And the request data includes + | parameter | value | + | format | | + When I send the API request to the endpoint "/api/v1/misc/extract-images" + Then the response content type should be "application/octet-stream" + And the response file should have extension ".zip" + And the response ZIP should contain 20 files + And the response file should have size greater than 0 + And the response status code should be 200 + + Examples: + | format | + | png | + | gif | + | jpeg | + + diff --git a/cucumber/features/steps/step_definitions.py b/cucumber/features/steps/step_definitions.py index 60479b6eac3..65a49fda0ad 100644 --- a/cucumber/features/steps/step_definitions.py +++ b/cucumber/features/steps/step_definitions.py @@ -6,11 +6,14 @@ import random import string from reportlab.lib.pagesizes import letter +from reportlab.lib.utils import ImageReader from reportlab.pdfgen import canvas import mimetypes import requests import zipfile import shutil +import re +from PIL import Image, ImageDraw ######### # GIVEN # @@ -43,8 +46,6 @@ def step_use_example_file(context, filePath, fileInput): except FileNotFoundError: raise FileNotFoundError(f"The example file '{filePath}' does not exist.") - - @given('the pdf contains {page_count:d} pages') def step_pdf_contains_pages(context, page_count): writer = PdfWriter() @@ -66,8 +67,6 @@ def step_pdf_contains_blank_pages(context, page_count): context.files[context.param_name].close() context.files[context.param_name] = open(context.file_name, 'rb') - - def create_black_box_image(file_name, size): can = canvas.Canvas(file_name, pagesize=size) width, height = size @@ -76,36 +75,75 @@ def create_black_box_image(file_name, size): can.showPage() can.save() -def create_pdf_with_black_boxes(file_name, image_count, page_count): - page_width, page_height = letter - box_size = 72 # 1 inch by 1 inch black box +@given(u'the pdf contains {image_count:d} images of size {width:d}x{height:d} on {page_count:d} pages') +def step_impl(context, image_count, width, height, page_count): + context.param_name = "fileInput" + context.file_name = "genericNonCustomisableName.pdf" + create_pdf_with_images_and_boxes(context.file_name, image_count, page_count, width, height) + if not hasattr(context, 'files'): + context.files = {} + context.files[context.param_name] = open(context.file_name, 'rb') + +def add_black_boxes_to_image(image): + if isinstance(image, str): + image = Image.open(image) + + draw = ImageDraw.Draw(image) + draw.rectangle([(0, 0), image.size], fill=(0, 0, 0)) # Fill image with black + return image + +def create_pdf_with_images_and_boxes(file_name, image_count, page_count, image_width, image_height): + page_width, page_height = max(letter[0], image_width), max(letter[1], image_height) boxes_per_page = image_count // page_count + (1 if image_count % page_count != 0 else 0) - + writer = PdfWriter() box_counter = 0 - + for page in range(page_count): packet = io.BytesIO() - can = canvas.Canvas(packet, pagesize=letter) - + can = canvas.Canvas(packet, pagesize=(page_width, page_height)) + for i in range(boxes_per_page): if box_counter >= image_count: break - x = (i % (page_width // box_size)) * box_size - y = page_height - ((i // (page_width // box_size) + 1) * box_size) - can.setFillColorRGB(0, 0, 0) - can.rect(x, y, box_size, box_size, fill=1) + + # Simulating a dynamic image creation (replace this with your actual image creation logic) + # For demonstration, we'll create a simple black image + dummy_image = Image.new('RGB', (image_width, image_height), color='white') # Create a white image + dummy_image = add_black_boxes_to_image(dummy_image) # Add black boxes + + # Convert the PIL Image to bytes to pass to drawImage + image_bytes = io.BytesIO() + dummy_image.save(image_bytes, format='PNG') + image_bytes.seek(0) + + # Check if the image fits in the current page dimensions + x = (i % (page_width // image_width)) * image_width + y = page_height - (((i % (page_height // image_height)) + 1) * image_height) + + if x + image_width > page_width or y < 0: + break + + # Add the image to the PDF + can.drawImage(ImageReader(image_bytes), x, y, width=image_width, height=image_height) box_counter += 1 - + can.showPage() can.save() packet.seek(0) new_pdf = PdfReader(packet) writer.add_page(new_pdf.pages[0]) - + + # Write the PDF to file with open(file_name, 'wb') as f: writer.write(f) + # Clean up temporary image files + for i in range(image_count): + temp_image_path = f"temp_image_{i}.png" + if os.path.exists(temp_image_path): + os.remove(temp_image_path) + @given('the pdf contains {image_count:d} images on {page_count:d} pages') def step_pdf_contains_images(context, image_count, page_count): if not hasattr(context, 'param_name'): @@ -118,7 +156,6 @@ def step_pdf_contains_images(context, image_count, page_count): context.files[context.param_name].close() context.files[context.param_name] = open(context.file_name, 'rb') - @given('the pdf contains {page_count:d} pages with random text') def step_pdf_contains_pages_with_random_text(context, page_count): buffer = io.BytesIO() @@ -186,6 +223,21 @@ def save_generated_pdf(context, filename): # WHEN # ######## +@when('I send a GET request to "{endpoint}"') +def step_send_get_request(context, endpoint): + base_url = "http://localhost:8080" + full_url = f"{base_url}{endpoint}" + response = requests.get(full_url) + context.response = response + +@when('I send a GET request to "{endpoint}" with parameters') +def step_send_get_request_with_params(context, endpoint): + base_url = "http://localhost:8080" + params = {row['parameter']: row['value'] for row in context.table} + full_url = f"{base_url}{endpoint}" + response = requests.get(full_url, params=params) + context.response = response + @when('I send the API request to the endpoint "{endpoint}"') def step_send_api_request(context, endpoint): url = f"http://localhost:8080{endpoint}" @@ -278,7 +330,6 @@ def step_save_response_file(context, filename): f.write(context.response.content) print(f"Saved response content to {filename}") - @then('the response PDF should contain {page_count:d} pages') def step_check_response_pdf_page_count(context, page_count): response_file = io.BytesIO(context.response.content) @@ -305,3 +356,26 @@ def step_check_response_zip_doc_page_count(context, doc_count, pages_per_doc): reader = PdfReader(pdf_file) actual_pages_per_doc = len(reader.pages) assert actual_pages_per_doc == pages_per_doc, f"Expected {pages_per_doc} pages per document but got {actual_pages_per_doc} pages in document {file_name}" + +@then('the JSON value of "{key}" should be "{expected_value}"') +def step_check_json_value(context, key, expected_value): + actual_value = context.response.json().get(key) + assert actual_value == expected_value, \ + f"Expected JSON value for '{key}' to be '{expected_value}' but got '{actual_value}'" + +@then('JSON list entry containing "{identifier_key}" as "{identifier_value}" should have "{target_key}" as "{target_value}"') +def step_check_json_list_entry(context, identifier_key, identifier_self, target_key, target_value): + json_response = context.response.json() + for entry in json_response: + if entry.get(identifier_key) == identifier_value: + assert entry.get(target_key) == target_value, \ + f"Expected {target_key} to be {target_value} in entry where {identifier_key} is {identifier_value}, but found {entry.get(target_key)}" + break + else: + raise AssertionError(f"No entry with {identifier_key} as {identifier_value} found") + +@then('the response should match the regex "{pattern}"') +def step_response_matches_regex(context, pattern): + response_text = context.response.text + assert re.match(pattern, response_text), \ + f"Response '{response_text}' does not match the expected pattern '{pattern}'" diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java index 12dd625c428..2240dbed229 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/CompressController.java @@ -99,7 +99,7 @@ public ResponseEntity optimizePdf(@ModelAttribute OptimizePdfRequest req List command = new ArrayList<>(); command.add("gs"); command.add("-sDEVICE=pdfwrite"); - command.add("-dCompatibilityLevel=1.4"); + command.add("-dCompatibilityLevel=1.5"); switch (optimizeLevel) { case 1: