diff --git a/borb/io/read/any_object_transformer.py b/borb/io/read/any_object_transformer.py index ab19e10ed..51618964f 100644 --- a/borb/io/read/any_object_transformer.py +++ b/borb/io/read/any_object_transformer.py @@ -31,7 +31,7 @@ from borb.io.read.primitive.string_transformer import StringTransformer from borb.io.read.reference.reference_transformer import ReferenceTransformer from borb.io.read.reference.xref_transformer import XREFTransformer -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/encryption/rc4.py b/borb/io/read/encryption/rc4.py index 570496275..92806434d 100644 --- a/borb/io/read/encryption/rc4.py +++ b/borb/io/read/encryption/rc4.py @@ -32,7 +32,7 @@ def __init__(self): self._p: int = 0 self._q: int = 0 - def set_key(self, key: bytes): + def _set_key(self, key: bytes): self._state = [n for n in range(256)] self._p = 0 self._q = 0 @@ -54,5 +54,12 @@ def _byte_generator(self): return self._state[(self._state[self._p] + self._state[self._q]) % 256] def encrypt(self, key: bytes, input: bytes): - self.set_key(key) + """ + This function encrypts a given byte array with a given key, + returning the encrypted bytes. + :param key: the key to be used for encrypting + :param input: the input byte array to be encrypted + :return: the encrypted bytes + """ + self._set_key(key) return bytes([p ^ self._byte_generator() for p in input]) diff --git a/borb/io/read/encryption/standard_security_handler.py b/borb/io/read/encryption/standard_security_handler.py index 0d8e149b2..9c20c9e26 100644 --- a/borb/io/read/encryption/standard_security_handler.py +++ b/borb/io/read/encryption/standard_security_handler.py @@ -12,14 +12,14 @@ from borb.io.read.encryption.rc4 import RC4 from borb.io.read.types import ( - String, + AnyPDFType, + Boolean, Decimal, HexadecimalString, - Boolean, - AnyPDFType, + Name, Reference, Stream, - Name, + String, ) @@ -59,27 +59,40 @@ def __init__( # used in determining whether to prompt the user for a password and, if so, # whether a valid user or owner password was entered. For more # information, see 7.6.3.4, "Password Algorithms." - self._u: bytes = StandardSecurityHandler._str_to_bytes( - StandardSecurityHandler._unescape_pdf_syntax(encryption_dictionary.get("U")) + self._u: bytes = ( + StandardSecurityHandler._str_to_bytes( + StandardSecurityHandler._unescape_pdf_syntax( + encryption_dictionary.get("U") + ) + ) + or b"" ) + assert self._u is not None assert len(self._u) == 32 # (Required) A 32-byte string, based on both the owner and user passwords, # that shall be used in computing the encryption key and in determining # whether a valid owner password was entered. For more information, see # 7.6.3.3, "Encryption Key Algorithm," and 7.6.3.4, "Password Algorithms." - self._o: bytes = StandardSecurityHandler._str_to_bytes( - StandardSecurityHandler._unescape_pdf_syntax(encryption_dictionary.get("O")) + self._o: bytes = ( + StandardSecurityHandler._str_to_bytes( + StandardSecurityHandler._unescape_pdf_syntax( + encryption_dictionary.get("O") + ) + ) + or b"" ) + assert self._o is not None assert len(self._o) == 32 # /ID - trailer: dict = encryption_dictionary.get_parent() + trailer: dict = encryption_dictionary.get_parent() # type: ignore [attr-defined] if "ID" in trailer: self._document_id: bytes = trailer["ID"][0].get_content_bytes() # (Required) A set of flags specifying which operations shall be permitted # when the document is opened with user access (see Table 22). + assert "P" in encryption_dictionary self._permissions: int = int(encryption_dictionary.get("P")) # (Optional; PDF 1.4; only if V is 2 or 3) The length of the encryption key, in bits. @@ -112,15 +125,19 @@ def __init__( password = bytes(owner_password, encoding="charmap") # calculate encryption_key + assert password is not None self._encryption_key: bytes = self._compute_encryption_key(password) - def _encrypt_data(self, object: AnyPDFType) -> None: + def _encrypt_data(self, object: AnyPDFType) -> AnyPDFType: # a) Obtain the object number and generation number from the object identifier of the string or stream to be # encrypted (see 7.3.10, "Indirect Objects"). If the string is a direct object, use the identifier of the indirect # object containing it. - reference: typing.Optional[Reference] = object.get_reference() + reference: typing.Optional[Reference] = object.get_reference() # type: ignore [union-attr] if reference is None: - reference = object.get_parent().get_reference() + reference = object.get_parent().get_reference() # type: ignore [union-attr] + assert reference is not None + assert reference.object_number is not None + assert reference.generation_number is not None object_number: int = reference.object_number generation_number: int = reference.generation_number @@ -153,24 +170,27 @@ def _encrypt_data(self, object: AnyPDFType) -> None: # The output is the encrypted data to be stored in the PDF file. n_plus_5: int = min(16, n + 5) if isinstance(object, String): - new_content_bytes: bytes = RC4().encrypt( + str_new_content_bytes: bytes = RC4().encrypt( h.digest()[0:n_plus_5], object.get_content_bytes() ) # TODO if isinstance(object, HexadecimalString): - new_content_bytes: bytes = RC4().encrypt( + hex_str_new_content_bytes: bytes = RC4().encrypt( h.digest()[0:n_plus_5], object.get_content_bytes() ) # TODO if isinstance(object, Stream): - new_content_bytes: bytes = RC4().encrypt( + stream_new_content_bytes: bytes = RC4().encrypt( h.digest()[0:n_plus_5], object["DecodedBytes"] ) - object[Name("DecodedBytes")] = new_content_bytes + object[Name("DecodedBytes")] = stream_new_content_bytes object[Name("Bytes")] = zlib.compress(object["DecodedBytes"], 9) return object - def _decrypt_data(self, object: AnyPDFType) -> None: + # default + return object + + def _decrypt_data(self, object: AnyPDFType) -> AnyPDFType: return self._encrypt_data(object) def _compute_encryption_key(self, password: bytes) -> bytes: @@ -211,7 +231,6 @@ def _compute_encryption_key(self, password: bytes) -> bytes: # MD5 hash and pass the first n bytes of the output as input into a new MD5 hash, where n is the number of # bytes of the encryption key as defined by the value of the encryption dictionary’s Length entry. if self._revision >= 3: - # THIS IS WHERE THE ALGORITHM GOES WRONG n: int = int(self._key_length / 8) for _ in range(0, 50): h2 = hashlib.md5() @@ -296,19 +315,22 @@ def _compute_encryption_dictionary_u_value(self, user_password_string: bytes): if self._revision == 2: # a) Create an encryption key based on the user password string, as described in "Algorithm 2: Computing an # encryption key". - key: bytes = self._compute_encryption_key(user_password_string) + key_rev_2: bytes = self._compute_encryption_key(user_password_string) # b) Encrypt the 32-byte padding string shown in step (a) of "Algorithm 2: Computing an encryption key", using # an RC4 encryption function with the encryption key from the preceding step. # c) Store the result of step (b) as the value of the U entry in the encryption dictionary. - self._u = RC4().encrypt(key, StandardSecurityHandler._pad_or_truncate(None)) + self._u = RC4().encrypt( + key_rev_2, StandardSecurityHandler._pad_or_truncate(None) + ) return self._u + if self._revision >= 3: # a) Create an encryption key based on the user password string, as described in "Algorithm 2: Computing an # encryption key". - key: bytes = self._compute_encryption_key(user_password_string) + key_rev_3: bytes = self._compute_encryption_key(user_password_string) # b) Initialize the MD5 hash function and pass the 32-byte padding string shown in step (a) of "Algorithm 2: # Computing an encryption key" as input to this function. @@ -322,7 +344,7 @@ def _compute_encryption_dictionary_u_value(self, user_password_string: bytes): # d) Encrypt the 16-byte result of the hash, using an RC4 encryption function with the encryption key from step # (a). - digest = RC4().encrypt(key, digest) + digest = RC4().encrypt(key_rev_3, digest) # e) Do the following 19 times: Take the output from the previous invocation of the RC4 function and pass it as # input to a new invocation of the function; use an encryption key generated by taking each byte of the @@ -330,7 +352,7 @@ def _compute_encryption_dictionary_u_value(self, user_password_string: bytes): # byte and the single-byte value of the iteration counter (from 1 to 19). if self._revision >= 3: for i in range(1, 20): - key2: bytes = bytes([b ^ i for b in key]) + key2: bytes = bytes([b ^ i for b in key_rev_3]) digest = RC4().encrypt(key2, digest) # f) Append 16 bytes of arbitrary padding to the output from the final invocation of the RC4 function and store diff --git a/borb/io/read/font/font_dictionary_transformer.py b/borb/io/read/font/font_dictionary_transformer.py index 2463e9d30..ff6f1f6e4 100644 --- a/borb/io/read/font/font_dictionary_transformer.py +++ b/borb/io/read/font/font_dictionary_transformer.py @@ -8,7 +8,7 @@ import typing from typing import Any, Optional, Union -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Dictionary, Stream from borb.pdf.canvas.event.event_listener import EventListener from borb.pdf.canvas.font.composite_font.cid_font_type_0 import CIDType0Font diff --git a/borb/io/read/function/function_dictionary_transformer.py b/borb/io/read/function/function_dictionary_transformer.py index 7c8b68cfe..97a3274b5 100644 --- a/borb/io/read/function/function_dictionary_transformer.py +++ b/borb/io/read/function/function_dictionary_transformer.py @@ -10,7 +10,7 @@ from typing import Any, Optional, Union from borb.io.filter.stream_decode_util import decode_stream -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Dictionary, Function, Name, Reference, Stream from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/image/ccitt_fax_image_transformer.py b/borb/io/read/image/ccitt_fax_image_transformer.py index 9e8683fab..9095dd405 100644 --- a/borb/io/read/image/ccitt_fax_image_transformer.py +++ b/borb/io/read/image/ccitt_fax_image_transformer.py @@ -11,7 +11,7 @@ from PIL import Image # type: ignore [import] -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Stream, add_base_methods from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/image/compressed_jpeg_image_transformer.py b/borb/io/read/image/compressed_jpeg_image_transformer.py index 35e43a401..de40793e6 100644 --- a/borb/io/read/image/compressed_jpeg_image_transformer.py +++ b/borb/io/read/image/compressed_jpeg_image_transformer.py @@ -12,7 +12,7 @@ from PIL import Image # type: ignore [import] from borb.io.filter.stream_decode_util import decode_stream -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Name, Stream, add_base_methods from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/image/grayscale_image_transformer.py b/borb/io/read/image/grayscale_image_transformer.py index 5b5cd0bcf..4b8d26eb0 100644 --- a/borb/io/read/image/grayscale_image_transformer.py +++ b/borb/io/read/image/grayscale_image_transformer.py @@ -12,7 +12,7 @@ from PIL import Image # type: ignore [import] from borb.io.filter.stream_decode_util import decode_stream -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Reference, Stream, add_base_methods from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/image/jbig2_image_transformer.py b/borb/io/read/image/jbig2_image_transformer.py index f7a0f8a75..c725cdbb0 100644 --- a/borb/io/read/image/jbig2_image_transformer.py +++ b/borb/io/read/image/jbig2_image_transformer.py @@ -11,7 +11,7 @@ from PIL import Image # type: ignore [import] -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Stream, add_base_methods from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/image/jpeg_2000_image_transformer.py b/borb/io/read/image/jpeg_2000_image_transformer.py index e87378b83..0d3244e26 100644 --- a/borb/io/read/image/jpeg_2000_image_transformer.py +++ b/borb/io/read/image/jpeg_2000_image_transformer.py @@ -11,7 +11,7 @@ from PIL import Image # type: ignore [import] -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Stream, add_base_methods from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/image/jpeg_image_transformer.py b/borb/io/read/image/jpeg_image_transformer.py index 3c78d1ffc..705760cbb 100644 --- a/borb/io/read/image/jpeg_image_transformer.py +++ b/borb/io/read/image/jpeg_image_transformer.py @@ -11,7 +11,7 @@ from PIL import Image # type: ignore [import] -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Stream, add_base_methods from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/object/array_transformer.py b/borb/io/read/object/array_transformer.py index 3bd267463..18c7fbb3b 100644 --- a/borb/io/read/object/array_transformer.py +++ b/borb/io/read/object/array_transformer.py @@ -8,7 +8,7 @@ import typing from typing import Any, Optional, Union -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, List from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/object/dictionary_transformer.py b/borb/io/read/object/dictionary_transformer.py index 9a1f6f1fc..c3b0d5fba 100644 --- a/borb/io/read/object/dictionary_transformer.py +++ b/borb/io/read/object/dictionary_transformer.py @@ -8,7 +8,7 @@ import typing from typing import Any, Optional, Union -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Dictionary from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/object/stream_transformer.py b/borb/io/read/object/stream_transformer.py index fb43942d9..0fe9421ca 100644 --- a/borb/io/read/object/stream_transformer.py +++ b/borb/io/read/object/stream_transformer.py @@ -9,7 +9,7 @@ from typing import Any, Optional, Union from borb.io.filter.stream_decode_util import decode_stream -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Reference, Stream from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/page/page_dictionary_transformer.py b/borb/io/read/page/page_dictionary_transformer.py index a025de7d1..fcc73529a 100644 --- a/borb/io/read/page/page_dictionary_transformer.py +++ b/borb/io/read/page/page_dictionary_transformer.py @@ -9,7 +9,7 @@ import zlib from typing import Any, Dict, Optional, Union -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType from borb.io.read.types import Decimal as pDecimal from borb.io.read.types import Dictionary, List, Name, Stream @@ -59,7 +59,9 @@ def transform( # avoid circular reference if k == "Parent": continue - v = self.get_root_transformer().transform(v, page_out, context, []) + v = self.get_root_transformer().transform( + v, page_out, context, event_listeners + ) if v is not None: page_out[k] = v @@ -91,10 +93,13 @@ def transform( # create Canvas canvas = Canvas().set_parent(page_out) # type: ignore [attr-defined] - # create CanvasStreamProcessor - CanvasStreamProcessor(page_out, canvas, []).read( - io.BytesIO(contents["DecodedBytes"]), event_listeners - ) + # If there are no event listeners, processing the page has no effect + # we may as well skip it (cause it is very labour-intensive). + if len(event_listeners) > 0: + # create CanvasStreamProcessor + CanvasStreamProcessor(page_out, canvas, []).read( + io.BytesIO(contents["DecodedBytes"]), event_listeners + ) # send out EndPageEvent for l in event_listeners: diff --git a/borb/io/read/page/root_dictionary_transformer.py b/borb/io/read/page/root_dictionary_transformer.py index bc2f3fbb8..c41711679 100644 --- a/borb/io/read/page/root_dictionary_transformer.py +++ b/borb/io/read/page/root_dictionary_transformer.py @@ -9,7 +9,7 @@ from typing import Any, Dict, List, Optional, Union from borb.io.read.object.dictionary_transformer import DictionaryTransformer -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Decimal, Dictionary from borb.io.read.types import List as pList from borb.io.read.types import Name @@ -34,6 +34,9 @@ def can_be_transformed( and object["Type"] == "Catalog" ) + def _re_order_pages(self): + pass + def transform( self, object_to_transform: Union[io.BufferedIOBase, io.RawIOBase, AnyPDFType], diff --git a/borb/io/read/primitive/number_transformer.py b/borb/io/read/primitive/number_transformer.py index 7bc88eed8..2ef4a7010 100644 --- a/borb/io/read/primitive/number_transformer.py +++ b/borb/io/read/primitive/number_transformer.py @@ -8,7 +8,7 @@ import typing from typing import Any, Optional, Union -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Decimal from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/primitive/string_transformer.py b/borb/io/read/primitive/string_transformer.py index 7cf73dee3..b14a55bb0 100644 --- a/borb/io/read/primitive/string_transformer.py +++ b/borb/io/read/primitive/string_transformer.py @@ -8,7 +8,7 @@ import typing from typing import Any, Optional, Union -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, HexadecimalString, Name, String from borb.pdf.canvas.event.event_listener import EventListener diff --git a/borb/io/read/reference/reference_transformer.py b/borb/io/read/reference/reference_transformer.py index 60a009a1d..4c081be5b 100644 --- a/borb/io/read/reference/reference_transformer.py +++ b/borb/io/read/reference/reference_transformer.py @@ -10,7 +10,7 @@ import typing from typing import Any, Optional, Union -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Reference from borb.pdf.canvas.event.event_listener import EventListener from borb.pdf.xref.xref import XREF diff --git a/borb/io/read/reference/xref_transformer.py b/borb/io/read/reference/xref_transformer.py index bac552e99..1af181a96 100644 --- a/borb/io/read/reference/xref_transformer.py +++ b/borb/io/read/reference/xref_transformer.py @@ -12,7 +12,7 @@ from borb.io.read.encryption.standard_security_handler import StandardSecurityHandler from borb.io.read.tokenize.high_level_tokenizer import HighLevelTokenizer -from borb.io.read.transformer import Transformer, ReadTransformerState +from borb.io.read.transformer import ReadTransformerState, Transformer from borb.io.read.types import AnyPDFType, Dictionary, Name from borb.pdf.canvas.event.event_listener import Event, EventListener from borb.pdf.document import Document diff --git a/borb/io/write/any_object_transformer.py b/borb/io/write/any_object_transformer.py index 251a36b9f..3ef52044d 100644 --- a/borb/io/write/any_object_transformer.py +++ b/borb/io/write/any_object_transformer.py @@ -26,10 +26,7 @@ from borb.io.write.primitive.string_transformer import StringTransformer from borb.io.write.reference.reference_transformer import ReferenceTransform from borb.io.write.reference.xref_transformer import XREFTransformer -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState from borb.io.write.xmp.xmp_transformer import XMPTransformer diff --git a/borb/io/write/ascii_art/ascii_art_transformer.py b/borb/io/write/ascii_art/ascii_art_transformer.py index d87f26e90..ec1dd65e0 100644 --- a/borb/io/write/ascii_art/ascii_art_transformer.py +++ b/borb/io/write/ascii_art/ascii_art_transformer.py @@ -8,10 +8,7 @@ from typing import Optional from borb.io.read.types import AnyPDFType, Stream -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState class ASCIIArtTransformer(Transformer): diff --git a/borb/io/write/ascii_art/ascii_logo.txt b/borb/io/write/ascii_art/ascii_logo.txt index 0413cb23e..e93f71ab0 100644 --- a/borb/io/write/ascii_art/ascii_logo.txt +++ b/borb/io/write/ascii_art/ascii_logo.txt @@ -1,2 +1,2 @@ -borb version 2.0.13 +borb version 2.0.14 Joris Schellekens diff --git a/borb/io/write/document/document_transformer.py b/borb/io/write/document/document_transformer.py index ad1beed51..a904a792d 100644 --- a/borb/io/write/document/document_transformer.py +++ b/borb/io/write/document/document_transformer.py @@ -9,17 +9,8 @@ import typing from typing import Any, Optional -from borb.io.read.types import ( - AnyPDFType, - Dictionary, - HexadecimalString, - List, - Name, -) -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.read.types import AnyPDFType, Dictionary, HexadecimalString, List, Name +from borb.io.write.transformer import Transformer, WriteTransformerState from borb.pdf.document import Document logger = logging.getLogger(__name__) diff --git a/borb/io/write/document/information_dictionary_transformer.py b/borb/io/write/document/information_dictionary_transformer.py index f196a334a..f7e4dd959 100644 --- a/borb/io/write/document/information_dictionary_transformer.py +++ b/borb/io/write/document/information_dictionary_transformer.py @@ -4,27 +4,19 @@ """ This implementation of WriteBaseTransformer is responsible for writing \Info Dictionary objects """ -import xml.etree.ElementTree as ET import datetime import logging import random import typing +import xml.etree.ElementTree as ET from typing import Any, Optional -from borb.io.read.types import ( - AnyPDFType, - Dictionary, - Name, - String, - Stream, - Decimal as bDecimal, -) +from borb.io.read.types import AnyPDFType +from borb.io.read.types import Decimal as bDecimal +from borb.io.read.types import Dictionary, Name, Stream, String from borb.io.write.object.dictionary_transformer import DictionaryTransformer from borb.io.write.object.stream_transformer import StreamTransformer -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState from borb.pdf.document import Document from borb.pdf.trailer.document_info import XMPDocumentInfo @@ -37,9 +29,12 @@ class InformationDictionaryTransformer(Transformer): """ def can_be_transformed(self, any: AnyPDFType): + """ + This function returns True if the object to be transformed is an \Info Dictionary + """ if not isinstance(any, Dictionary): return False - parent: AnyPDFType = any.get_parent() + parent: AnyPDFType = any.get_parent() # type: ignore [attr-defined] return ( isinstance(parent, Dictionary) and "Info" in parent @@ -69,25 +64,29 @@ def _consolidate_xmp_and_info_dictionary(self, document: Document) -> Dictionary and isinstance(document["XRef"]["Trailer"]["Root"]["Metadata"], ET.Element) ): xmp_document_info: XMPDocumentInfo = document.get_xmp_document_info() - for k,v in {Name("Title"):xmp_document_info.get_title(), - Name("Author"):xmp_document_info.get_author(), - Name("Subject"):xmp_document_info.get_subject(), - Name("Keywords"):xmp_document_info.get_keywords(), - Name("Creator"):xmp_document_info.get_creator(), - Name("Producer"):xmp_document_info.get_producer(), - Name("CreationDate"):xmp_document_info.get_creation_date(), - Name("ModDate"):xmp_document_info.get_modification_date()}.items(): + for k, v in { + Name("Title"): xmp_document_info.get_title(), + Name("Author"): xmp_document_info.get_author(), + Name("Subject"): xmp_document_info.get_subject(), + Name("Keywords"): xmp_document_info.get_keywords(), + Name("Creator"): xmp_document_info.get_creator(), + Name("Producer"): xmp_document_info.get_producer(), + Name("CreationDate"): xmp_document_info.get_creation_date(), + Name("ModDate"): xmp_document_info.get_modification_date(), + }.items(): if v is None: continue if k in ["CreationDate", "ModDate"]: - v = InformationDictionaryTransformer._convert_xmp_date_format_to_iso_8824_date_format(v) + v = InformationDictionaryTransformer._convert_xmp_date_format_to_iso_8824_date_format( + v + ) new_info_dictionary[k] = String(v) # return return new_info_dictionary @staticmethod - def now_as_iso_8824_date_format() -> str: + def _now_as_iso_8824_date_format() -> str: timestamp_str = "D:" now = datetime.datetime.now() for n in [now.year, now.month, now.day, now.hour, now.minute, now.second]: @@ -100,12 +99,12 @@ def _update_info_dictionary(self, info_dictionary: Dictionary) -> Dictionary: # set CreationDate if "CreationDate" not in info_dictionary: info_dictionary[Name("CreationDate")] = String( - InformationDictionaryTransformer.now_as_iso_8824_date_format() + InformationDictionaryTransformer._now_as_iso_8824_date_format() ) # set ModDate info_dictionary[Name("ModDate")] = String( - InformationDictionaryTransformer.now_as_iso_8824_date_format() + InformationDictionaryTransformer._now_as_iso_8824_date_format() ) # set Producer @@ -223,6 +222,10 @@ def transform( object_to_transform: Any, context: Optional[WriteTransformerState] = None, ): + """ + This method writes an \Info Dictionary to a byte stream + """ + # get Document document: Document = object_to_transform.get_root() assert document is not None @@ -242,7 +245,7 @@ def transform( and "Trailer" in document["XRef"] \ and "Root" in document["XRef"]["Trailer"] \ and "Metadata" in document["XRef"]["Trailer"]["Root"] - needs_xmp_metadata = has_xmp_metadata or context.conformance_level in ["PDF/A-1a", "PDF/A-1b"] + needs_xmp_metadata = has_xmp_metadata or (context is not None and context.conformance_level in ["PDF/A-1a", "PDF/A-1b"]) # fmt: on if needs_xmp_metadata: @@ -251,10 +254,11 @@ def transform( xmp_metadata_stream: Stream = self._write_xmp_metadata_stream( new_info_dictionary ) + assert context is not None document["XRef"]["Trailer"]["Root"][Name("Metadata")] = self.get_reference( xmp_metadata_stream, context ) - xmp_metadata_stream.set_parent(document["XRef"]["Trailer"]["Root"]) + xmp_metadata_stream.set_parent(document["XRef"]["Trailer"]["Root"]) # type: ignore [attr-defined] # delegate XMP \Metadata for h in self.get_root_transformer()._handlers: diff --git a/borb/io/write/image/image_transformer.py b/borb/io/write/image/image_transformer.py index 4b6e40650..8aa192d25 100644 --- a/borb/io/write/image/image_transformer.py +++ b/borb/io/write/image/image_transformer.py @@ -13,10 +13,7 @@ from borb.io.read.types import AnyPDFType from borb.io.read.types import Decimal as pDecimal from borb.io.read.types import Name, Reference, Stream, add_base_methods -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState class ImageTransformer(Transformer): diff --git a/borb/io/write/object/array_transformer.py b/borb/io/write/object/array_transformer.py index a23ba296a..8df5f8d27 100644 --- a/borb/io/write/object/array_transformer.py +++ b/borb/io/write/object/array_transformer.py @@ -11,10 +11,7 @@ from PIL.Image import Image # type: ignore [import] from borb.io.read.types import AnyPDFType, Dictionary, List, Reference, Stream -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState logger = logging.getLogger(__name__) diff --git a/borb/io/write/object/dictionary_transformer.py b/borb/io/write/object/dictionary_transformer.py index 97f757655..b31c6a351 100644 --- a/borb/io/write/object/dictionary_transformer.py +++ b/borb/io/write/object/dictionary_transformer.py @@ -11,10 +11,7 @@ from PIL.Image import Image # type: ignore [import] from borb.io.read.types import AnyPDFType, Dictionary, Element, List, Reference, Stream -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState logger = logging.getLogger(__name__) diff --git a/borb/io/write/object/stream_transformer.py b/borb/io/write/object/stream_transformer.py index 4ec46cb82..c3eec0ba2 100644 --- a/borb/io/write/object/stream_transformer.py +++ b/borb/io/write/object/stream_transformer.py @@ -12,10 +12,7 @@ from borb.io.read.types import AnyPDFType from borb.io.read.types import Decimal as pDecimal from borb.io.read.types import Dictionary, List, Name, Reference, Stream -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState logger = logging.getLogger(__name__) diff --git a/borb/io/write/primitive/boolean_transformer.py b/borb/io/write/primitive/boolean_transformer.py index fac9effbf..7bb043cae 100644 --- a/borb/io/write/primitive/boolean_transformer.py +++ b/borb/io/write/primitive/boolean_transformer.py @@ -7,10 +7,7 @@ from typing import Optional from borb.io.read.types import AnyPDFType, Boolean -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState class BooleanTransformer(Transformer): diff --git a/borb/io/write/primitive/name_transformer.py b/borb/io/write/primitive/name_transformer.py index e75176580..9491794f2 100644 --- a/borb/io/write/primitive/name_transformer.py +++ b/borb/io/write/primitive/name_transformer.py @@ -7,10 +7,7 @@ from typing import Optional from borb.io.read.types import AnyPDFType, Name -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState class NameTransformer(Transformer): diff --git a/borb/io/write/primitive/number_transformer.py b/borb/io/write/primitive/number_transformer.py index 3571f55d2..ef3a6aacc 100644 --- a/borb/io/write/primitive/number_transformer.py +++ b/borb/io/write/primitive/number_transformer.py @@ -7,10 +7,7 @@ from typing import Optional from borb.io.read.types import AnyPDFType, Decimal -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState class NumberTransformer(Transformer): diff --git a/borb/io/write/primitive/string_transformer.py b/borb/io/write/primitive/string_transformer.py index 36fd71772..e86fe8448 100644 --- a/borb/io/write/primitive/string_transformer.py +++ b/borb/io/write/primitive/string_transformer.py @@ -7,10 +7,7 @@ from typing import Optional from borb.io.read.types import AnyPDFType, HexadecimalString, String -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState class StringTransformer(Transformer): diff --git a/borb/io/write/reference/reference_transformer.py b/borb/io/write/reference/reference_transformer.py index 147d44880..4c7000c6c 100644 --- a/borb/io/write/reference/reference_transformer.py +++ b/borb/io/write/reference/reference_transformer.py @@ -7,10 +7,7 @@ from typing import Optional from borb.io.read.types import AnyPDFType, Reference -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState class ReferenceTransform(Transformer): diff --git a/borb/io/write/reference/xref_transformer.py b/borb/io/write/reference/xref_transformer.py index 7a7a46e3c..ea1d2bd8d 100644 --- a/borb/io/write/reference/xref_transformer.py +++ b/borb/io/write/reference/xref_transformer.py @@ -8,10 +8,7 @@ from typing import Optional from borb.io.read.types import AnyPDFType, Decimal, Dictionary, Name, Reference -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState from borb.pdf.xref.xref import XREF diff --git a/borb/io/write/xmp/xmp_transformer.py b/borb/io/write/xmp/xmp_transformer.py index 23e47b0fb..32bd22dcc 100644 --- a/borb/io/write/xmp/xmp_transformer.py +++ b/borb/io/write/xmp/xmp_transformer.py @@ -12,10 +12,7 @@ from borb.io.read.types import AnyPDFType from borb.io.read.types import Decimal as pDecimal from borb.io.read.types import Name, Reference, Stream -from borb.io.write.transformer import ( - Transformer, - WriteTransformerState, -) +from borb.io.write.transformer import Transformer, WriteTransformerState logger = logging.getLogger(__name__) diff --git a/borb/pdf/canvas/font/simple_font/true_type_font.py b/borb/pdf/canvas/font/simple_font/true_type_font.py index 521a62f3e..a78f5111a 100644 --- a/borb/pdf/canvas/font/simple_font/true_type_font.py +++ b/borb/pdf/canvas/font/simple_font/true_type_font.py @@ -195,7 +195,7 @@ def _get_base_font(ttf_font_file: TTFont) -> str: return font_name @staticmethod - def _build_custom_cmap(ttf_font_file: TTFont) -> Stream: + def _build_custom_cmap_for_type_0_font(ttf_font_file: TTFont) -> Stream: cmap_prefix: str = """ /CIDInit /ProcSet findresource begin 12 dict begin @@ -252,14 +252,17 @@ def _build_custom_cmap(ttf_font_file: TTFont) -> Stream: return to_unicode_stream @staticmethod - def _build_custom_widths_array(ttf_font_file: TTFont) -> List: + def _build_custom_widths_array_for_type_0_font(ttf_font_file: TTFont) -> List: + units_per_em: pDecimal = pDecimal(ttf_font_file["head"].unitsPerEm) cmap = ttf_font_file.getBestCmap() glyph_set = ttf_font_file.getGlyphSet() widths_array: List = List() for cid, g in enumerate(ttf_font_file.glyphOrder): glyph_width: pDecimal = pDecimal(0) try: - glyph_width = pDecimal(glyph_set[cmap[ord(toUnicode(g))]].width) + glyph_width = pDecimal( + glyph_set[cmap[ord(toUnicode(g))]].width / units_per_em * 1000 + ) except: pass widths_array.append(pDecimal(cid)) @@ -279,7 +282,9 @@ def _type_0_font_from_file(ttf_font_file: TTFont) -> "Type0Font": type_0_font[Name("Encoding")] = Name("Identity-H") # set ToUnicode - type_0_font[Name("ToUnicode")] = TrueTypeFont._build_custom_cmap(ttf_font_file) + type_0_font[ + Name("ToUnicode") + ] = TrueTypeFont._build_custom_cmap_for_type_0_font(ttf_font_file) # build DescendantFont descendant_font: CIDType2Font = CIDType2Font() @@ -292,9 +297,9 @@ def _type_0_font_from_file(ttf_font_file: TTFont) -> "Type0Font": descendant_font[Name("DW")] = pDecimal(250) # build W array - descendant_font[Name("W")] = TrueTypeFont._build_custom_widths_array( - ttf_font_file - ) + descendant_font[ + Name("W") + ] = TrueTypeFont._build_custom_widths_array_for_type_0_font(ttf_font_file) descendant_font[Name("CIDToGIDMap")] = Name("Identity") # build CIDSystemInfo diff --git a/borb/pdf/canvas/layout/forms/check_box.py b/borb/pdf/canvas/layout/forms/check_box.py index 4bda08590..c3e236738 100644 --- a/borb/pdf/canvas/layout/forms/check_box.py +++ b/borb/pdf/canvas/layout/forms/check_box.py @@ -8,16 +8,9 @@ import typing import zlib +from borb.io.read.types import Boolean, Decimal from borb.io.read.types import Decimal as bDecimal -from borb.io.read.types import ( - Dictionary, - Name, - Decimal, - List, - String, - Stream, - Boolean, -) +from borb.io.read.types import Dictionary, List, Name, Stream, String from borb.pdf.canvas.color.color import Color, HexColor, RGBColor from borb.pdf.canvas.font.simple_font.font_type_1 import StandardType1Font from borb.pdf.canvas.geometry.rectangle import Rectangle diff --git a/borb/pdf/canvas/layout/forms/drop_down_list.py b/borb/pdf/canvas/layout/forms/drop_down_list.py index 51ba76453..f31f5cf73 100644 --- a/borb/pdf/canvas/layout/forms/drop_down_list.py +++ b/borb/pdf/canvas/layout/forms/drop_down_list.py @@ -7,16 +7,9 @@ import typing import zlib +from borb.io.read.types import Boolean, Decimal from borb.io.read.types import Decimal as bDecimal -from borb.io.read.types import ( - Dictionary, - Name, - Decimal, - List, - String, - Stream, - Boolean, -) +from borb.io.read.types import Dictionary, List, Name, Stream, String from borb.pdf.canvas.color.color import Color, HexColor, RGBColor from borb.pdf.canvas.font.simple_font.font_type_1 import StandardType1Font from borb.pdf.canvas.geometry.rectangle import Rectangle diff --git a/borb/pdf/canvas/layout/forms/text_area.py b/borb/pdf/canvas/layout/forms/text_area.py index 6d4b6e81b..68c2a6960 100644 --- a/borb/pdf/canvas/layout/forms/text_area.py +++ b/borb/pdf/canvas/layout/forms/text_area.py @@ -7,16 +7,9 @@ import typing import zlib +from borb.io.read.types import Boolean, Decimal from borb.io.read.types import Decimal as bDecimal -from borb.io.read.types import ( - Dictionary, - Name, - Decimal, - List, - String, - Stream, - Boolean, -) +from borb.io.read.types import Dictionary, List, Name, Stream, String from borb.pdf.canvas.color.color import Color, HexColor, RGBColor from borb.pdf.canvas.font.simple_font.font_type_1 import StandardType1Font from borb.pdf.canvas.geometry.rectangle import Rectangle diff --git a/borb/pdf/canvas/layout/forms/text_field.py b/borb/pdf/canvas/layout/forms/text_field.py index 71abe19a8..99ebbc386 100644 --- a/borb/pdf/canvas/layout/forms/text_field.py +++ b/borb/pdf/canvas/layout/forms/text_field.py @@ -7,16 +7,9 @@ import typing import zlib +from borb.io.read.types import Boolean, Decimal from borb.io.read.types import Decimal as bDecimal -from borb.io.read.types import ( - Dictionary, - Name, - Decimal, - List, - String, - Stream, - Boolean, -) +from borb.io.read.types import Dictionary, List, Name, Stream, String from borb.pdf.canvas.color.color import Color, HexColor, RGBColor from borb.pdf.canvas.font.simple_font.font_type_1 import StandardType1Font from borb.pdf.canvas.geometry.rectangle import Rectangle diff --git a/borb/pdf/canvas/layout/image/barcode.py b/borb/pdf/canvas/layout/image/barcode.py index 8e625e5ed..b71adf99c 100644 --- a/borb/pdf/canvas/layout/image/barcode.py +++ b/borb/pdf/canvas/layout/image/barcode.py @@ -10,8 +10,8 @@ import barcode # type: ignore [import] import qrcode # type: ignore [import] -from PIL import Image as PILImage # type: ignore [import] from barcode.writer import ImageWriter as BarcodeImageWriter # type: ignore [import] +from PIL import Image as PILImage # type: ignore [import] from borb.pdf.canvas.color.color import Color, HexColor from borb.pdf.canvas.layout.image.image import Image diff --git a/borb/pdf/canvas/layout/layout_element.py b/borb/pdf/canvas/layout/layout_element.py index c8ff29219..d7db6c9c1 100644 --- a/borb/pdf/canvas/layout/layout_element.py +++ b/borb/pdf/canvas/layout/layout_element.py @@ -10,8 +10,8 @@ from decimal import Decimal from enum import Enum -from borb.io.read.types import Decimal as pDecimal, Dictionary -from borb.io.read.types import Name, Stream +from borb.io.read.types import Decimal as pDecimal +from borb.io.read.types import Dictionary, Name, Stream from borb.pdf.canvas.color.color import Color, HexColor from borb.pdf.canvas.geometry.rectangle import Rectangle diff --git a/borb/pdf/canvas/layout/page_layout/multi_column_layout.py b/borb/pdf/canvas/layout/page_layout/multi_column_layout.py index 26bf56eee..20c5afce0 100644 --- a/borb/pdf/canvas/layout/page_layout/multi_column_layout.py +++ b/borb/pdf/canvas/layout/page_layout/multi_column_layout.py @@ -130,6 +130,8 @@ def add(self, layout_element: LayoutElement) -> "PageLayout": return self # previous element is used to determine the paragraph spacing + assert self._page_height is not None + assert self._page_width is not None previous_element_margin_bottom: Decimal = Decimal(0) previous_element_y = self._page_height - self._vertical_margin inter_paragraph_space: Decimal = Decimal(0) diff --git a/borb/pdf/canvas/operator/path_construction/append_rectangle.py b/borb/pdf/canvas/operator/path_construction/append_rectangle.py index bb50469c9..0c04fc236 100644 --- a/borb/pdf/canvas/operator/path_construction/append_rectangle.py +++ b/borb/pdf/canvas/operator/path_construction/append_rectangle.py @@ -16,6 +16,7 @@ from decimal import Decimal from borb.io.read.types import AnyPDFType +from borb.io.read.types import Decimal as bDecimal from borb.pdf.canvas.operator.canvas_operator import CanvasOperator @@ -45,6 +46,10 @@ def invoke( """ Invoke the s operator """ + assert isinstance(operands[0], Decimal) + assert isinstance(operands[1], Decimal) + assert isinstance(operands[2], Decimal) + assert isinstance(operands[3], Decimal) x: Decimal = operands[0] y: Decimal = operands[1] width: Decimal = operands[2] @@ -53,18 +58,31 @@ def invoke( moveto_op: typing.Optional[ CanvasOperator ] = canvas_stream_processor.get_operator("m") - moveto_op.invoke(canvas_stream_processor, [x, y], event_listeners) + assert moveto_op is not None + moveto_op.invoke( + canvas_stream_processor, [bDecimal(x), bDecimal(y)], event_listeners + ) line_to_op: typing.Optional[ CanvasOperator ] = canvas_stream_processor.get_operator("l") - line_to_op.invoke(canvas_stream_processor, [x + width, y], event_listeners) + assert line_to_op is not None + line_to_op.invoke( + canvas_stream_processor, [bDecimal(x + width), bDecimal(y)], event_listeners + ) + line_to_op.invoke( + canvas_stream_processor, + [bDecimal(x + width), bDecimal(y + height)], + event_listeners, + ) line_to_op.invoke( - canvas_stream_processor, [x + width, y + height], event_listeners + canvas_stream_processor, + [bDecimal(x), bDecimal(y + height)], + event_listeners, ) - line_to_op.invoke(canvas_stream_processor, [x, y + height], event_listeners) close_subpath_op: typing.Optional[ CanvasOperator ] = canvas_stream_processor.get_operator("h") + assert close_subpath_op is not None close_subpath_op.invoke(canvas_stream_processor, [], event_listeners) diff --git a/borb/pdf/trailer/document_info.py b/borb/pdf/trailer/document_info.py index 9b271a169..69b55f137 100644 --- a/borb/pdf/trailer/document_info.py +++ b/borb/pdf/trailer/document_info.py @@ -98,7 +98,7 @@ def get_subject(self) -> Optional[str]: except: return None - def get_keywords(self) -> Optional[List[str]]: + def get_keywords(self) -> Optional[str]: """ (Optional; PDF 1.1) Keywords associated with the document. """ @@ -176,9 +176,20 @@ def get_creation_date(self) -> Optional[str]: readable form (see 7.9.4, “Dates”). """ try: - return next(iter([v for k, v in - self._document["XRef"]["Trailer"]["Root"]["Metadata"].findall('.//{*}Description')[0].attrib.items() if - k.endswith('CreateDate')]), None) + return next( + iter( + [ + v + for k, v in self._document["XRef"]["Trailer"]["Root"][ + "Metadata" + ] + .findall(".//{*}Description")[0] + .attrib.items() + if k.endswith("CreateDate") + ] + ), + None, + ) except: return None @@ -189,31 +200,78 @@ def get_modification_date(self) -> Optional[str]: most recently modified, in human-readable form (see 7.9.4, “Dates”). """ try: - return next(iter([v for k, v in - self._document["XRef"]["Trailer"]["Root"]["Metadata"].findall('.//{*}Description')[0].attrib.items() if - k.endswith('ModifyDate')]), None) + return next( + iter( + [ + v + for k, v in self._document["XRef"]["Trailer"]["Root"][ + "Metadata" + ] + .findall(".//{*}Description")[0] + .attrib.items() + if k.endswith("ModifyDate") + ] + ), + None, + ) except: return None def get_author(self) -> Optional[str]: + """ + (Optional; PDF 1.1) The name of the person who created the document. + """ try: - return self._document["XRef"]["Trailer"]["Root"]["Metadata"].findall('.//{*}creator')[0][0][0].text + return ( + self._document["XRef"]["Trailer"]["Root"]["Metadata"] + .findall(".//{*}creator")[0][0][0] + .text + ) except: return None def get_producer(self) -> Optional[str]: + """ + (Optional) If the document was converted to PDF from another format, + the name of the conforming product that converted it to PDF. + """ try: - return next(iter([v for k, v in - self._document["XRef"]["Trailer"]["Root"]["Metadata"].findall('.//{*}Description')[0].attrib.items() if - k.endswith('Producer')]), None) + return next( + iter( + [ + v + for k, v in self._document["XRef"]["Trailer"]["Root"][ + "Metadata" + ] + .findall(".//{*}Description")[0] + .attrib.items() + if k.endswith("Producer") + ] + ), + None, + ) except: return None def get_keywords(self) -> Optional[str]: + """ + (Optional; PDF 1.1) Keywords associated with the document. + """ try: - return next(iter([v for k, v in - self._document["XRef"]["Trailer"]["Root"]["Metadata"].findall('.//{*}Description')[0].attrib.items() if - k.endswith('Keywords')]), None) + return next( + iter( + [ + v + for k, v in self._document["XRef"]["Trailer"]["Root"][ + "Metadata" + ] + .findall(".//{*}Description")[0] + .attrib.items() + if k.endswith("Keywords") + ] + ), + None, + ) except: return None @@ -222,7 +280,11 @@ def get_title(self) -> Optional[str]: (Optional; PDF 1.1) The document’s title. """ try: - return self._document["XRef"]["Trailer"]["Root"]["Metadata"].findall(".//{*}title")[0][0][0].text + return ( + self._document["XRef"]["Trailer"]["Root"]["Metadata"] + .findall(".//{*}title")[0][0][0] + .text + ) except: return None @@ -233,16 +295,33 @@ def get_creator(self) -> Optional[str]: from which it was converted. """ try: - return next(iter([v for k, v in - self._document["XRef"]["Trailer"]["Root"]["Metadata"].findall('.//{*}Description')[0].attrib.items() if - k.endswith('CreatorTool')]), None) + return next( + iter( + [ + v + for k, v in self._document["XRef"]["Trailer"]["Root"][ + "Metadata" + ] + .findall(".//{*}Description")[0] + .attrib.items() + if k.endswith("CreatorTool") + ] + ), + None, + ) except: return None def get_subject(self) -> Optional[str]: - # TODO + """ + (Optional; PDF 1.1) The subject of the document. + """ try: - return self._document["XRef"]["Trailer"]["Root"]["Metadata"].findall(".//{*}description")[0][0][0].text + return ( + self._document["XRef"]["Trailer"]["Root"]["Metadata"] + .findall(".//{*}description")[0][0][0] + .text + ) except: return None diff --git a/borb/toolkit/export/html_to_pdf/read/any_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/any_tag_transformer.py index 27afe7cbf..0c62867a1 100644 --- a/borb/toolkit/export/html_to_pdf/read/any_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/any_tag_transformer.py @@ -105,9 +105,7 @@ from borb.toolkit.export.html_to_pdf.read.text.p_tag_transformer import PTagTransformer # fmt: off -from borb.toolkit.export.html_to_pdf.read.transformer import \ - Transformer - +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer # fmt: on diff --git a/borb/toolkit/export/html_to_pdf/read/body/body_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/body/body_tag_transformer.py index 550f607a0..8a578949d 100644 --- a/borb/toolkit/export/html_to_pdf/read/body/body_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/body/body_tag_transformer.py @@ -10,9 +10,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class BodyTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/head/head_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/head/head_tag_transformer.py index 49fe874e1..277504c61 100644 --- a/borb/toolkit/export/html_to_pdf/read/head/head_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/head/head_tag_transformer.py @@ -9,9 +9,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class HeadTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/head/meta_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/head/meta_tag_transformer.py index 19427457a..68ee55926 100644 --- a/borb/toolkit/export/html_to_pdf/read/head/meta_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/head/meta_tag_transformer.py @@ -12,9 +12,7 @@ from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.document import Document from borb.pdf.xref.plaintext_xref import PlainTextXREF -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class MetaTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/head/title_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/head/title_tag_transformer.py index 684c61e9c..793392041 100644 --- a/borb/toolkit/export/html_to_pdf/read/head/title_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/head/title_tag_transformer.py @@ -12,9 +12,7 @@ from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.document import Document from borb.pdf.xref.plaintext_xref import PlainTextXREF -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class TitleTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/heading/h1_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/heading/h1_tag_transformer.py index e4b31b5c0..fbaf65063 100644 --- a/borb/toolkit/export/html_to_pdf/read/heading/h1_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/heading/h1_tag_transformer.py @@ -11,9 +11,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.heading import Heading -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class H1TagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/heading/h2_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/heading/h2_tag_transformer.py index 4bad42daf..14ab06742 100644 --- a/borb/toolkit/export/html_to_pdf/read/heading/h2_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/heading/h2_tag_transformer.py @@ -11,9 +11,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.heading import Heading -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class H2TagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/heading/h3_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/heading/h3_tag_transformer.py index cb9480a01..ec6d18734 100644 --- a/borb/toolkit/export/html_to_pdf/read/heading/h3_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/heading/h3_tag_transformer.py @@ -11,9 +11,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.heading import Heading -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class H3TagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/heading/h4_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/heading/h4_tag_transformer.py index b36ca844c..b396de9cb 100644 --- a/borb/toolkit/export/html_to_pdf/read/heading/h4_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/heading/h4_tag_transformer.py @@ -11,9 +11,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.heading import Heading -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class H4TagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/heading/h5_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/heading/h5_tag_transformer.py index e78cb07a8..7ef7b8ed9 100644 --- a/borb/toolkit/export/html_to_pdf/read/heading/h5_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/heading/h5_tag_transformer.py @@ -11,9 +11,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.heading import Heading -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class H5TagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/heading/h6_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/heading/h6_tag_transformer.py index 685f6f431..2bf3910e9 100644 --- a/borb/toolkit/export/html_to_pdf/read/heading/h6_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/heading/h6_tag_transformer.py @@ -11,9 +11,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.heading import Heading -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class H6TagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/heading/hr_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/heading/hr_tag_transformer.py index d2b07a1d9..93cc65758 100644 --- a/borb/toolkit/export/html_to_pdf/read/heading/hr_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/heading/hr_tag_transformer.py @@ -10,9 +10,7 @@ from borb.pdf.canvas.layout.horizontal_rule import HorizontalRule from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class HrTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/html/html_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/html/html_tag_transformer.py index 5b78d5fa0..59e599998 100644 --- a/borb/toolkit/export/html_to_pdf/read/html/html_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/html/html_tag_transformer.py @@ -9,9 +9,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class HTMLTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/image/img_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/image/img_tag_transformer.py index 772bebbb3..70020fee4 100644 --- a/borb/toolkit/export/html_to_pdf/read/image/img_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/image/img_tag_transformer.py @@ -11,9 +11,7 @@ from borb.pdf.canvas.layout.image.image import Image from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class ImgTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/list/li_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/list/li_tag_transformer.py index 53275ad01..848bb2958 100644 --- a/borb/toolkit/export/html_to_pdf/read/list/li_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/list/li_tag_transformer.py @@ -11,9 +11,7 @@ from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText from borb.pdf.canvas.layout.text.chunks_of_text import Span -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class LiTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/list/ol_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/list/ol_tag_transformer.py index 07e6403fe..b0267de6e 100644 --- a/borb/toolkit/export/html_to_pdf/read/list/ol_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/list/ol_tag_transformer.py @@ -10,9 +10,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.list.ordered_list import OrderedList from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class OlTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/list/ul_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/list/ul_tag_transformer.py index bfcffe579..c61c5ae8b 100644 --- a/borb/toolkit/export/html_to_pdf/read/list/ul_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/list/ul_tag_transformer.py @@ -10,9 +10,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.list.unordered_list import UnorderedList from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class UlTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/structure/address_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/structure/address_tag_transformer.py index e71565c43..50702b150 100644 --- a/borb/toolkit/export/html_to_pdf/read/structure/address_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/structure/address_tag_transformer.py @@ -10,9 +10,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunks_of_text import HeterogeneousParagraph -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class AddressTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/structure/main_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/structure/main_tag_transformer.py index 3602a7e2e..2718d57e6 100644 --- a/borb/toolkit/export/html_to_pdf/read/structure/main_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/structure/main_tag_transformer.py @@ -9,9 +9,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class MainTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/structure/section_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/structure/section_tag_transformer.py index d9ddcf3d9..a9d32460a 100644 --- a/borb/toolkit/export/html_to_pdf/read/structure/section_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/structure/section_tag_transformer.py @@ -10,9 +10,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.paragraph import Paragraph -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class SectionTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/table/table_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/table/table_tag_transformer.py index d4a43d395..c42c3bf88 100644 --- a/borb/toolkit/export/html_to_pdf/read/table/table_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/table/table_tag_transformer.py @@ -14,9 +14,7 @@ FlexibleColumnWidthTable, ) from borb.pdf.canvas.layout.table.table import Table -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class TableTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/table/tbody_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/table/tbody_tag_transformer.py index 980315046..22d2b5ade 100644 --- a/borb/toolkit/export/html_to_pdf/read/table/tbody_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/table/tbody_tag_transformer.py @@ -9,9 +9,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class TBodyTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/table/td_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/table/td_tag_transformer.py index 583165546..728dce92c 100644 --- a/borb/toolkit/export/html_to_pdf/read/table/td_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/table/td_tag_transformer.py @@ -12,9 +12,7 @@ from borb.pdf.canvas.layout.table.table import TableCell from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText from borb.pdf.canvas.layout.text.chunks_of_text import HeterogeneousParagraph -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class TdTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/table/th_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/table/th_tag_transformer.py index 851eeca84..49a3da79f 100644 --- a/borb/toolkit/export/html_to_pdf/read/table/th_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/table/th_tag_transformer.py @@ -9,9 +9,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class ThTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/table/tr_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/table/tr_tag_transformer.py index 9fd793fd5..a74050cb7 100644 --- a/borb/toolkit/export/html_to_pdf/read/table/tr_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/table/tr_tag_transformer.py @@ -9,9 +9,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class TrTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/text/a_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/text/a_tag_transformer.py index 3573d8f8f..acc576add 100644 --- a/borb/toolkit/export/html_to_pdf/read/text/a_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/text/a_tag_transformer.py @@ -12,9 +12,7 @@ from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText from borb.pdf.canvas.layout.text.chunks_of_text import HeterogeneousParagraph -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class ATagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/text/abbr_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/text/abbr_tag_transformer.py index 7a770055d..6bcce2340 100644 --- a/borb/toolkit/export/html_to_pdf/read/text/abbr_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/text/abbr_tag_transformer.py @@ -11,9 +11,7 @@ from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText from borb.pdf.canvas.layout.text.chunks_of_text import HeterogeneousParagraph -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class AbbrTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/text/bold/b_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/text/bold/b_tag_transformer.py index 95f37c9aa..1fa66003b 100644 --- a/borb/toolkit/export/html_to_pdf/read/text/bold/b_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/text/bold/b_tag_transformer.py @@ -10,9 +10,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class BTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/text/bold/strong_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/text/bold/strong_tag_transformer.py index 40faa5eb8..8dc536265 100644 --- a/borb/toolkit/export/html_to_pdf/read/text/bold/strong_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/text/bold/strong_tag_transformer.py @@ -10,9 +10,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class StrongTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/text/br_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/text/br_tag_transformer.py index f67053035..307a70d54 100644 --- a/borb/toolkit/export/html_to_pdf/read/text/br_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/text/br_tag_transformer.py @@ -10,9 +10,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunks_of_text import HeterogeneousParagraph -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class BrTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/text/code_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/text/code_tag_transformer.py index b6f7cf78d..018e4e548 100644 --- a/borb/toolkit/export/html_to_pdf/read/text/code_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/text/code_tag_transformer.py @@ -13,9 +13,7 @@ from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText from borb.pdf.canvas.layout.text.chunks_of_text import HeterogeneousParagraph -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class CodeTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/text/italic/em_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/text/italic/em_tag_transformer.py index b574b4f0f..0355f146d 100644 --- a/borb/toolkit/export/html_to_pdf/read/text/italic/em_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/text/italic/em_tag_transformer.py @@ -10,9 +10,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class EmTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/text/italic/i_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/text/italic/i_tag_transformer.py index 968ae3804..6d387b409 100644 --- a/borb/toolkit/export/html_to_pdf/read/text/italic/i_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/text/italic/i_tag_transformer.py @@ -10,9 +10,7 @@ from borb.pdf.canvas.layout.layout_element import LayoutElement from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class ITagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/text/mark_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/text/mark_tag_transformer.py index e17a2a215..5f89da278 100644 --- a/borb/toolkit/export/html_to_pdf/read/text/mark_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/text/mark_tag_transformer.py @@ -12,9 +12,7 @@ from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText from borb.pdf.canvas.layout.text.chunks_of_text import HeterogeneousParagraph -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class MarkTagTransformer(Transformer): diff --git a/borb/toolkit/export/html_to_pdf/read/text/p_tag_transformer.py b/borb/toolkit/export/html_to_pdf/read/text/p_tag_transformer.py index fd027a785..8b23ae288 100644 --- a/borb/toolkit/export/html_to_pdf/read/text/p_tag_transformer.py +++ b/borb/toolkit/export/html_to_pdf/read/text/p_tag_transformer.py @@ -11,9 +11,7 @@ from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText from borb.pdf.canvas.layout.text.chunks_of_text import HeterogeneousParagraph -from borb.toolkit.export.html_to_pdf.read.transformer import ( - Transformer, -) +from borb.toolkit.export.html_to_pdf.read.transformer import Transformer class PTagTransformer(Transformer): diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_to_pdf.py b/borb/toolkit/export/markdown_to_pdf/markdown_to_pdf.py index 1cd853805..e9763f98d 100644 --- a/borb/toolkit/export/markdown_to_pdf/markdown_to_pdf.py +++ b/borb/toolkit/export/markdown_to_pdf/markdown_to_pdf.py @@ -5,12 +5,10 @@ This class converts Markdown to PDF """ from borb.pdf.document import Document -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.any_markdown_transformer import ( +from borb.toolkit.export.markdown_to_pdf.read.any_markdown_transformer import ( AnyMarkdownTransformer, ) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - MarkdownTransformerState, -) +from borb.toolkit.export.markdown_to_pdf.read.transformer import TransformerState class MarkdownToPDF: @@ -23,6 +21,6 @@ def convert_markdown_to_pdf(markdown: str) -> Document: """ This function converts a Markdown str to a PDF """ - ctx: MarkdownTransformerState = MarkdownTransformerState(markdown) + ctx: TransformerState = TransformerState(markdown) AnyMarkdownTransformer()._transform(ctx) return ctx.get_document() diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/any_markdown_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/any_markdown_transformer.py deleted file mode 100644 index b0b1b32ac..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/any_markdown_transformer.py +++ /dev/null @@ -1,76 +0,0 @@ -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.heading.alternate_syntax_heading_transformer import ( - AlternateSyntaxHeadingTransformer, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.heading.heading_transformer import ( - HeadingTransformer, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.heading.horizontal_rule_transformer import ( - HorizontalRuleTransformer, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.image.image_transformer import ( - ImageTransformer, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.list.ordered_list_transformer import ( - OrderedListTransformer, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.list.unordered_list_transformer import ( - UnorderedListTransformer, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.table.table_transformer import ( - TableTransformer, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.text.blockquote_transformer import ( - BlockQuoteTransformer, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.text.fenced_code_snippet_transformer import ( - FencedCodeSnippetTransformer, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.text.indented_code_snippet_transformer import ( - IndentedCodeSnippetTransformer, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.text.paragraph_transformer import ( - ParagraphTransformer, -) - - -class AnyMarkdownTransformer(BaseMarkdownTransformer): - def __init__(self): - super(AnyMarkdownTransformer, self).__init__() - # fmt: off - self.add_child_transformer(HeadingTransformer()) \ - .add_child_transformer(AlternateSyntaxHeadingTransformer()) \ - .add_child_transformer(HorizontalRuleTransformer()) \ - .add_child_transformer(BlockQuoteTransformer()) \ - .add_child_transformer(IndentedCodeSnippetTransformer()) \ - .add_child_transformer(FencedCodeSnippetTransformer()) \ - .add_child_transformer(UnorderedListTransformer()) \ - .add_child_transformer(OrderedListTransformer()) \ - .add_child_transformer(TableTransformer()) \ - .add_child_transformer(ParagraphTransformer()) \ - .add_child_transformer(ImageTransformer()) - # fmt: on - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - return True - - def _transform(self, context: MarkdownTransformerState) -> None: - input_has_transformed: bool = True - while input_has_transformed and context.tell() < len( - context.get_markdown_string() - ): - # print("remaining input: `%s`" % context.get_markdown_string()[context.tell():context.get_markdown_string().find("\n", context.tell())]) - input_has_transformed = False - for t in self._children: - if t._can_transform(context): - t._transform(context) - input_has_transformed = True - break - - # this part catches any input and advances the tell() by 1 - if not input_has_transformed: - context.seek(context.tell() + 1) - input_has_transformed = True diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/base_markdown_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/base_markdown_transformer.py deleted file mode 100644 index e9098f2e9..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/base_markdown_transformer.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" - This class provides the base for converting a snippet of Markdown - to PDF syntax. -""" -import re -import typing - -from borb.pdf.canvas.layout.layout_element import LayoutElement -from borb.pdf.canvas.layout.page_layout.browser_layout import BrowserLayout -from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.pdf.document import Document -from borb.pdf.page.page import Page - - -class MarkdownTransformerState: - """ - This class represents all the meta-information used in the process of converting markdown to PDF - This includes: - - the root object (the Document itself) - - the current position in the markdown str - - etc - """ - - def __init__(self, markdown: str): - self._markdown_string: str = markdown - self._start_index: int = 0 - self._document: Document = Document() - page: Page = Page() - self._document.append_page(page) - layout: PageLayout = BrowserLayout(page) - self._parent_layout_element: typing.Union[ - Document, Page, LayoutElement, PageLayout - ] = layout - - def tell(self) -> int: - """ - This function returns the current str position - """ - return self._start_index - - def seek(self, p: int) -> "MarkdownTransformerState": - """ - This function changes the str position to the given byte offset. - This function returns self. - """ - self._start_index = p - return self - - def get_markdown_string(self) -> str: - """ - This function returns the markdown str being transformed - """ - return self._markdown_string - - def get_document(self) -> Document: - """ - This function returns the Document being built - """ - return self._document - - def get_parent_layout_element( - self, - ) -> typing.Union[Document, Page, LayoutElement, PageLayout]: - return self._parent_layout_element - - -class BaseMarkdownTransformer: - """ - This class provides the base for converting a snippet of Markdown - to PDF syntax. - """ - - def __init__(self): - self._children: typing.List["BaseMarkdownTransformer"] = [] - self._parent: typing.Optional["BaseMarkdownTransformer"] = None - - def add_child_transformer( - self, transformer: "BaseMarkdownTransformer" - ) -> "BaseMarkdownTransformer": - """ - Add a child BaseMarkdownTransformer to this BaseMarkdownTransformer. - Child transformers can be used to encapsulate specific object-creation/transformation logic. - e.g. converting bold text, lists, tables, etc - :param transformer: the BaseMarkdownTransformer implementation to be added - :type transformer: BaseMarkdownTransformer - """ - self._children.append(transformer) - transformer._parent = self - return self - - def get_parent(self) -> typing.Optional["BaseMarkdownTransformer"]: - """ - This function returns the parent BaseMarkdownTransformer. - BaseMarkdownTransformer implementations can delegate the transformation - process to their children (e.g. a paragraph-BaseMarkdownTransformer may delegate - some of its work to a bold-BaseMarkdownTransformer). - """ - return self._parent - - def get_root(self) -> "BaseMarkdownTransformer": - """ - This function returns the root BaseMarkdownTransformer. - BaseMarkdownTransformer implementations can delegate the transformation - process to their children (e.g. a paragraph-BaseMarkdownTransformer may delegate - some of its work to a bold-BaseMarkdownTransformer). - """ - p = self - while p._parent is not None: - p = p._parent - return p - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - return False - - def _transform(self, context: MarkdownTransformerState) -> None: - return None - - def _until_double_newline(self, context: MarkdownTransformerState) -> int: - i: int = context.tell() - while i < len(context.get_markdown_string()): - if ( - context.get_markdown_string()[i] == "\n" - and i + 1 < len(context.get_markdown_string()) - and context.get_markdown_string()[i + 1] == "\n" - ): - return i + 1 - i += 1 - return -1 - - def _as_long_as_input_lines_match( - self, line_regex: str, context: MarkdownTransformerState - ) -> int: - prev_newline_pos: int = context.tell() - 1 - while prev_newline_pos < len(context.get_markdown_string()): - # find next newline - next_newline_pos: int = context.get_markdown_string().find( - "\n", prev_newline_pos + 1 - ) - - # handle end of input - if next_newline_pos == -1: - next_newline_pos = len(context.get_markdown_string()) - - # determine input line - line: str = context.get_markdown_string()[ - prev_newline_pos + 1 : next_newline_pos - ] - if re.match(line_regex, line) is None: - return prev_newline_pos - - # set everything up for the next round - prev_newline_pos = next_newline_pos - - # return - return prev_newline_pos diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/__init__.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/__init__.py deleted file mode 100644 index bcf77caad..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -""" - This file is part of the borb (R) project. - Copyright (c) 2020-2040 borb Group NV - Authors: Joris Schellekens, et al. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License version 3 - as published by the Free Software Foundation with the addition of the - following permission added to Section 15 as permitted in Section 7(a): - FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY - BORB GROUP. BORB GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT - OF THIRD PARTY RIGHTS - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program; if not, see http://www.gnu.org/licenses or write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA, 02110-1301 USA. - - The interactive user interfaces in modified source and object code versions - of this program must display Appropriate Legal Notices, as required under - Section 5 of the GNU Affero General Public License. - In accordance with Section 7(b) of the GNU Affero General Public License, - a covered work must retain the producer line in every PDF that is created - or manipulated using borb. - - You can be released from the requirements of the license by purchasing - a commercial license. Buying such a license is mandatory as soon as you - develop commercial activities involving the borb software without - disclosing the source code of your own applications. - - These activities include: offering paid services to customers as an ASP, - serving PDFs on the fly in a web application, shipping borb with a closed - source product. - - For more information, please contact borb Software Corp. at this - address: joris.schellekens.1989@gmail.com -""" diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/alternate_syntax_heading_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/alternate_syntax_heading_transformer.py deleted file mode 100644 index 520b40687..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/alternate_syntax_heading_transformer.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This implementation of BaseMarkdownTransformer handles (alternate syntax) headings -""" -import typing -from decimal import Decimal - -from borb.pdf.canvas.layout.layout_element import LayoutElement -from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.pdf.canvas.layout.text.heading import Heading -from borb.pdf.document import Document -from borb.pdf.page.page import Page -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, -) - - -class AlternateSyntaxHeadingTransformer(BaseMarkdownTransformer): - """ - This implementation of BaseMarkdownTransformer handles (alternate syntax) headings - """ - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - # alternate syntax headings should start with an alpha character - if not context.get_markdown_string()[context.tell()].isalpha(): - return False - # headings should end with a - next_newline_pos: int = context.get_markdown_string().find( - "\n", context.tell() + 1 - ) - if next_newline_pos == -1: - return False - # the line under the heading should be all '=' or '-' - next_next_newline_pos: int = context.get_markdown_string().find( - "\n", next_newline_pos + 1 - ) - line_of_dashes: str = context.get_markdown_string()[ - next_newline_pos:next_next_newline_pos - ].strip() - return len(line_of_dashes) > 0 and ( - all([c == "=" for c in line_of_dashes]) - or all([c == "-" for c in line_of_dashes]) - ) - - def _transform(self, context: MarkdownTransformerState) -> None: - - # determine heading text - next_newline_pos: int = context.get_markdown_string().find( - "\n", context.tell() + 1 - ) - next_next_newline_pos: int = context.get_markdown_string().find( - "\n", next_newline_pos + 1 - ) - heading_text: str = context.get_markdown_string()[ - context.tell() : next_newline_pos - ] - - # determine heading level - line_of_dashes: str = context.get_markdown_string()[ - next_newline_pos:next_next_newline_pos - ].strip() - heading_level: int = 0 - if line_of_dashes[0] == "=": - heading_level = 0 - elif line_of_dashes[0] == "-": - heading_level = 1 - - # determine font size - font_size: Decimal = Decimal(27) - if heading_level == 1: - font_size = Decimal(21) - - # add LayoutElement - parent_layout_element: typing.Union[ - Document, Page, PageLayout, LayoutElement - ] = context.get_parent_layout_element() - assert isinstance(parent_layout_element, PageLayout) - parent_layout_element.add( - Heading(heading_text, font_size=font_size, outline_level=heading_level) - ) - - # seek - context.seek(next_next_newline_pos + 1) diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/heading_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/heading_transformer.py deleted file mode 100644 index 746301e66..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/heading_transformer.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This implementation of BaseMarkdownTransformer handles headings -""" -import typing -from decimal import Decimal - -from borb.pdf.canvas.layout.layout_element import LayoutElement -from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.pdf.canvas.layout.text.heading import Heading -from borb.pdf.document import Document -from borb.pdf.page.page import Page -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, -) - - -class HeadingTransformer(BaseMarkdownTransformer): - """ - This implementation of BaseMarkdownTransformer handles headings - """ - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - return context.get_markdown_string()[context.tell()] == "#" - - def _transform(self, context: MarkdownTransformerState) -> None: - - # determine heading text and level - heading_text: str = context.get_markdown_string()[ - context.tell() : context.get_markdown_string().find("\n", context.tell()) - ] - heading_level: int = 0 - while heading_text.startswith("#"): - heading_text = heading_text[1:] - heading_level += 1 - heading_level -= 1 - heading_text = heading_text.lstrip() - - # determine style - font_size: Decimal = { - 0: Decimal(27), - 1: Decimal(21), - 2: Decimal(18), - 3: Decimal(15), - 4: Decimal(12), - 5: Decimal(12), - }.get(heading_level, Decimal(12)) - - # add LayoutElement - parent_layout_element: typing.Union[ - Document, Page, PageLayout, LayoutElement - ] = context.get_parent_layout_element() - assert isinstance(parent_layout_element, PageLayout) - parent_layout_element.add( - Heading(heading_text, font_size=font_size, outline_level=heading_level) - ) - - # seek - context.seek(context.get_markdown_string().find("\n", context.tell()) + 1) diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/horizontal_rule_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/horizontal_rule_transformer.py deleted file mode 100644 index 5851f212d..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/heading/horizontal_rule_transformer.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This implementation of BaseMarkdownTransformer handles horizontal rules -""" -import typing - -from borb.pdf.canvas.layout.horizontal_rule import HorizontalRule -from borb.pdf.canvas.layout.layout_element import LayoutElement -from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.pdf.document import Document -from borb.pdf.page.page import Page -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, -) - - -class HorizontalRuleTransformer(BaseMarkdownTransformer): - """ - This implementation of BaseMarkdownTransformer handles horizontal rules - """ - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - """ - To create a horizontal rule, use three or more asterisks (***), dashes (---), or underscores (___) on a line by themselves. - """ - if context.get_markdown_string()[context.tell()] != "\n": - return False - markdown_str: str = context.get_markdown_string()[ - context.tell() - + 1 : context.get_markdown_string().find("\n", context.tell() + 1) - ] - return any([x in markdown_str for x in ["---", "***", "___"]]) - - def _transform(self, context: MarkdownTransformerState) -> None: - - # add LayoutElement - parent_layout_element: typing.Union[ - Document, Page, PageLayout, LayoutElement - ] = context.get_parent_layout_element() - assert isinstance(parent_layout_element, PageLayout) - parent_layout_element.add(HorizontalRule()) - - # seek - context.seek(context.get_markdown_string().find("\n", context.tell() + 1) + 1) diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/image/__init__.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/image/__init__.py deleted file mode 100644 index bcf77caad..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/image/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -""" - This file is part of the borb (R) project. - Copyright (c) 2020-2040 borb Group NV - Authors: Joris Schellekens, et al. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License version 3 - as published by the Free Software Foundation with the addition of the - following permission added to Section 15 as permitted in Section 7(a): - FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY - BORB GROUP. BORB GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT - OF THIRD PARTY RIGHTS - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program; if not, see http://www.gnu.org/licenses or write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA, 02110-1301 USA. - - The interactive user interfaces in modified source and object code versions - of this program must display Appropriate Legal Notices, as required under - Section 5 of the GNU Affero General Public License. - In accordance with Section 7(b) of the GNU Affero General Public License, - a covered work must retain the producer line in every PDF that is created - or manipulated using borb. - - You can be released from the requirements of the license by purchasing - a commercial license. Buying such a license is mandatory as soon as you - develop commercial activities involving the borb software without - disclosing the source code of your own applications. - - These activities include: offering paid services to customers as an ASP, - serving PDFs on the fly in a web application, shipping borb with a closed - source product. - - For more information, please contact borb Software Corp. at this - address: joris.schellekens.1989@gmail.com -""" diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/list/__init__.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/list/__init__.py deleted file mode 100644 index bcf77caad..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/list/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -""" - This file is part of the borb (R) project. - Copyright (c) 2020-2040 borb Group NV - Authors: Joris Schellekens, et al. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License version 3 - as published by the Free Software Foundation with the addition of the - following permission added to Section 15 as permitted in Section 7(a): - FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY - BORB GROUP. BORB GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT - OF THIRD PARTY RIGHTS - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program; if not, see http://www.gnu.org/licenses or write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA, 02110-1301 USA. - - The interactive user interfaces in modified source and object code versions - of this program must display Appropriate Legal Notices, as required under - Section 5 of the GNU Affero General Public License. - In accordance with Section 7(b) of the GNU Affero General Public License, - a covered work must retain the producer line in every PDF that is created - or manipulated using borb. - - You can be released from the requirements of the license by purchasing - a commercial license. Buying such a license is mandatory as soon as you - develop commercial activities involving the borb software without - disclosing the source code of your own applications. - - These activities include: offering paid services to customers as an ASP, - serving PDFs on the fly in a web application, shipping borb with a closed - source product. - - For more information, please contact borb Software Corp. at this - address: joris.schellekens.1989@gmail.com -""" diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/list/ordered_list_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/list/ordered_list_transformer.py deleted file mode 100644 index c33a66aa4..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/list/ordered_list_transformer.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This implementation of BaseMarkdownTransformer handles ordered lists -""" -import typing - -from borb.pdf.canvas.layout.list.ordered_list import OrderedList -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, -) - - -class OrderedListTransformer(BaseMarkdownTransformer): - """ - This implementation of BaseMarkdownTransformer handles ordered lists - """ - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - indent_level: int = 0 - while ( - context.tell() + indent_level < len(context.get_markdown_string()) - and context.get_markdown_string()[context.tell() + indent_level] == " " - ): - indent_level += 1 - return ( - context.get_markdown_string()[context.tell() + indent_level] == "1" - and context.tell() + indent_level + 1 < len(context.get_markdown_string()) - and context.get_markdown_string()[context.tell() + indent_level + 1] == "." - and context.tell() + indent_level + 2 < len(context.get_markdown_string()) - and context.get_markdown_string()[context.tell() + indent_level + 2] == " " - ) - - def _transform(self, context: MarkdownTransformerState) -> None: - - # continue processing lines until we hit - end_pos: int = self._until_double_newline(context) - if end_pos == -1: - end_pos = len(context.get_markdown_string()) - list_lines_raw: typing.List[str] = context.get_markdown_string()[ - context.tell() : end_pos - 1 - ].split("\n") - - index: int = 0 - prev_indentation_level: int = 0 - while list_lines_raw[0][prev_indentation_level] == " ": - prev_indentation_level += 1 - - list_items_str: typing.List[str] = [] - while index < len(list_lines_raw): - - # determine the indentation level - indentation_level: int = 0 - while ( - indentation_level < len(list_lines_raw[index]) - and list_lines_raw[index][indentation_level] == " " - ): - indentation_level += 1 - - # IF the indentation level changed (+4) AND there is no list_symbol --> continuation of previous item - if indentation_level == prev_indentation_level + 4 and list_lines_raw[ - index - ].strip()[0] not in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]: - list_items_str[-1] += "\n" + list_lines_raw[index][4:] - index += 1 - continue - - # IF the indentation level changed AND there is a list symbol --> grab everything on that indentation level - if indentation_level > prev_indentation_level and list_lines_raw[ - index - ].strip()[0] in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]: - sublist_end_index: int = index - while sublist_end_index < len(list_lines_raw) and list_lines_raw[ - sublist_end_index - ].startswith("".join([" " for _ in range(0, indentation_level)])): - sublist_end_index += 1 - list_items_str.append( - "".join( - [ - list_lines_raw[i][indentation_level:] + "\n" - for i in range(index, sublist_end_index) - ] - ) - ) - index = sublist_end_index - continue - - # IF the indentation level is equal AND there is no list_symbol --> error in markdown - if indentation_level == prev_indentation_level and list_lines_raw[ - index - ].strip()[0] not in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]: - assert ( - False - ), "Invalid markdown: To add another element in a list while preserving the continuity of the list, indent the element four spaces or one tab." - - # IF the indentation level is equal AND there is a list_symbol --> new item - if indentation_level == prev_indentation_level and list_lines_raw[ - index - ].lstrip()[0] in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]: - l: str = list_lines_raw[index] - while l[0].isdigit() or l[0] == " ": - l = l[1:] - if l.startswith(". "): - l = l[2:] - if l.startswith("."): - l = l[1:] - list_items_str.append(l) - index += 1 - - # build UnorderedList - ul: OrderedList = OrderedList() - for s in list_items_str: - sub_context: MarkdownTransformerState = MarkdownTransformerState(s) - sub_context._document = context._document - sub_context._parent_layout_element = ul - self.get_root()._transform(sub_context) - - # add - context.get_parent_layout_element().add(ul) # type: ignore [union-attr] - - # seek - context.seek(end_pos) diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/list/unordered_list_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/list/unordered_list_transformer.py deleted file mode 100644 index 8017223ca..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/list/unordered_list_transformer.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This implementation of BaseMarkdownTransformer handles unordered lists -""" -import typing - -from borb.pdf.canvas.layout.list.unordered_list import UnorderedList -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, -) - - -class UnorderedListTransformer(BaseMarkdownTransformer): - """ - This implementation of BaseMarkdownTransformer handles unordered lists - """ - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - """ - To create an unordered list, add dashes (-), asterisks (*), or plus signs (+) in front of line items. - Indent one or more items to create a nested list. - """ - indent_level: int = 0 - while ( - context.tell() + indent_level < len(context.get_markdown_string()) - and context.get_markdown_string()[context.tell() + indent_level] == " " - ): - indent_level += 1 - return ( - context.get_markdown_string()[context.tell() + indent_level] - in ["*", "-", "+"] - and context.tell() + indent_level + 1 < len(context.get_markdown_string()) - and context.get_markdown_string()[context.tell() + indent_level + 1] == " " - ) - - def _transform(self, context: MarkdownTransformerState) -> None: - - # continue processing lines until we hit - end_pos: int = self._until_double_newline(context) - if end_pos == -1: - end_pos = len(context.get_markdown_string()) - list_lines_raw: typing.List[str] = context.get_markdown_string()[ - context.tell() : end_pos - 1 - ].split("\n") - - # determine initial indentation level - index: int = 0 - prev_indentation_level: int = 0 - while list_lines_raw[0][prev_indentation_level] == " ": - prev_indentation_level += 1 - - list_items_str: typing.List[str] = [] - while index < len(list_lines_raw): - - # determine the indentation level - indentation_level: int = 0 - while ( - indentation_level < len(list_lines_raw[index]) - and list_lines_raw[index][indentation_level] == " " - ): - indentation_level += 1 - - # IF the indentation level changed (+4) AND there is no list_symbol --> continuation of previous item - if indentation_level == prev_indentation_level + 4 and list_lines_raw[ - index - ].strip()[0] not in ["+", "-", "*"]: - list_items_str[-1] += "\n" + list_lines_raw[index][4:] - index += 1 - continue - - # IF the indentation level changed AND there is a list symbol --> grab everything on that indentation level - if indentation_level > prev_indentation_level and list_lines_raw[ - index - ].strip()[0] in ["+", "-", "*"]: - sublist_end_index: int = index - while sublist_end_index < len(list_lines_raw) and list_lines_raw[ - sublist_end_index - ].startswith("".join([" " for _ in range(0, indentation_level)])): - sublist_end_index += 1 - list_items_str.append( - "".join( - [ - list_lines_raw[i][indentation_level:] + "\n" - for i in range(index, sublist_end_index) - ] - ) - ) - index = sublist_end_index - continue - - # IF the indentation level is equal AND there is no list_symbol --> error in markdown - if indentation_level == prev_indentation_level and list_lines_raw[ - index - ].strip()[0] not in ["+", "-", "*"]: - assert ( - False - ), "Invalid markdown: To add another element in a list while preserving the continuity of the list, indent the element four spaces or one tab." - - # IF the indentation level is equal AND there is a list_symbol --> new item - if indentation_level == prev_indentation_level and list_lines_raw[ - index - ].lstrip()[0] in ["+", "-", "*"]: - list_items_str.append(list_lines_raw[index].lstrip()[2:]) - index += 1 - - # build UnorderedList - ul: UnorderedList = UnorderedList() - for s in list_items_str: - sub_context: MarkdownTransformerState = MarkdownTransformerState(s) - sub_context._document = context._document - sub_context._parent_layout_element = ul - self.get_root()._transform(sub_context) - - # add - context.get_parent_layout_element().add(ul) # type: ignore [union-attr] - - # seek - context.seek(end_pos + 1) diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/table/__init__.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/table/__init__.py deleted file mode 100644 index bcf77caad..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/table/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -""" - This file is part of the borb (R) project. - Copyright (c) 2020-2040 borb Group NV - Authors: Joris Schellekens, et al. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License version 3 - as published by the Free Software Foundation with the addition of the - following permission added to Section 15 as permitted in Section 7(a): - FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY - BORB GROUP. BORB GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT - OF THIRD PARTY RIGHTS - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program; if not, see http://www.gnu.org/licenses or write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA, 02110-1301 USA. - - The interactive user interfaces in modified source and object code versions - of this program must display Appropriate Legal Notices, as required under - Section 5 of the GNU Affero General Public License. - In accordance with Section 7(b) of the GNU Affero General Public License, - a covered work must retain the producer line in every PDF that is created - or manipulated using borb. - - You can be released from the requirements of the license by purchasing - a commercial license. Buying such a license is mandatory as soon as you - develop commercial activities involving the borb software without - disclosing the source code of your own applications. - - These activities include: offering paid services to customers as an ASP, - serving PDFs on the fly in a web application, shipping borb with a closed - source product. - - For more information, please contact borb Software Corp. at this - address: joris.schellekens.1989@gmail.com -""" diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/table/table_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/table/table_transformer.py deleted file mode 100644 index 929ebd163..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/table/table_transformer.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This implementation of BaseMarkdownTransformer handles tables -""" -import typing -from decimal import Decimal - -from borb.pdf.canvas.font.simple_font.font_type_1 import StandardType1Font -from borb.pdf.canvas.layout.layout_element import Alignment -from borb.pdf.canvas.layout.table.flexible_column_width_table import ( - FlexibleColumnWidthTable, -) -from borb.pdf.canvas.layout.table.table import Table -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, -) - - -class TableTransformer(BaseMarkdownTransformer): - """ - This implementation of BaseMarkdownTransformer handles tables - """ - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - i: int = context.tell() - while ( - i < len(context.get_markdown_string()) - and context.get_markdown_string()[i] == " " - ): - i += 1 - if ( - i >= len(context.get_markdown_string()) - or context.get_markdown_string()[i] != "|" - ): - return False - next_newline_pos: int = context.get_markdown_string().find("\n", i) - if next_newline_pos == -1: - next_newline_pos = len(context.get_markdown_string()) - if context.get_markdown_string()[next_newline_pos - 1] == "|": - return True - return False - - def _is_alignment_td(self, td: str) -> bool: - td_stripped: str = td.strip() - if all([x == "-" for x in td_stripped]): - return True - if td_stripped.startswith(":") and all([x == "-" for x in td_stripped[1:]]): - return True - if td_stripped.endswith(":") and all([x == "-" for x in td_stripped[:-1]]): - return True - if ( - td_stripped.startswith(":") - and td_stripped.endswith(":") - and all([x == "-" for x in td_stripped[1:-1]]) - ): - return True - return False - - def _transform(self, context: MarkdownTransformerState) -> None: - - # continue processing lines until we hit - end_pos: int = self._until_double_newline(context) - if end_pos == -1: - end_pos = len(context.get_markdown_string()) - table_lines_raw: typing.List[str] = context.get_markdown_string()[ - context.tell() : end_pos - 1 - ].split("\n") - - index: int = 0 - number_of_columns: int = len(table_lines_raw[0].split("|")) - 2 - column_alignment: typing.List[Alignment] = [ - Alignment.LEFT for _ in range(0, number_of_columns) - ] - table_items_str: typing.List[typing.List[str]] = [] - while index < len(table_lines_raw): - # process alignment line - if all( - [self._is_alignment_td(x) for x in table_lines_raw[index].split("|")] - ): - for i, a in enumerate(table_lines_raw[index].strip().split("|")[1:-1]): - a = a.strip() - if a.endswith(":") and a.startswith(":"): - column_alignment[i] = Alignment.CENTERED - elif a.endswith(":"): - column_alignment[i] = Alignment.RIGHT - index += 1 - continue - # process normal lines - table_items_str.append(table_lines_raw[index].strip().split("|")[1:-1]) - index += 1 - - # build Table - number_of_rows: int = len(table_items_str) - ul: Table = FlexibleColumnWidthTable( - number_of_columns=number_of_columns, number_of_rows=number_of_rows - ) - for tr in table_items_str: - for td in tr: - sub_context: MarkdownTransformerState = MarkdownTransformerState(td) - sub_context._document = context._document - sub_context._parent_layout_element = ul - self.get_root()._transform(sub_context) - - # set alignment - for i in range(0, number_of_columns): - for td_table_cell in ul._get_cells_at_column(i): - td_table_cell._horizontal_alignment = column_alignment[i] - - # set header row - for td_table_cell in ul._get_cells_at_row(0): - td_table_cell._layout_element._font = StandardType1Font("Helvetica-Bold") # type: ignore [attr-defined] - - # set padding and zebra striping - ul.set_padding_on_all_cells(Decimal(5), Decimal(5), Decimal(5), Decimal(5)) - # ul.even_odd_row_colors(HexColor("ffffff"), HexColor("c3c3c3")) - - # add - context.get_parent_layout_element().add(ul) # type: ignore [union-attr] - - # seek - context.seek(end_pos + 1) diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/__init__.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/__init__.py deleted file mode 100644 index bcf77caad..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -""" - This file is part of the borb (R) project. - Copyright (c) 2020-2040 borb Group NV - Authors: Joris Schellekens, et al. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License version 3 - as published by the Free Software Foundation with the addition of the - following permission added to Section 15 as permitted in Section 7(a): - FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY - BORB GROUP. BORB GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT - OF THIRD PARTY RIGHTS - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. - - See the GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program; if not, see http://www.gnu.org/licenses or write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA, 02110-1301 USA. - - The interactive user interfaces in modified source and object code versions - of this program must display Appropriate Legal Notices, as required under - Section 5 of the GNU Affero General Public License. - In accordance with Section 7(b) of the GNU Affero General Public License, - a covered work must retain the producer line in every PDF that is created - or manipulated using borb. - - You can be released from the requirements of the license by purchasing - a commercial license. Buying such a license is mandatory as soon as you - develop commercial activities involving the borb software without - disclosing the source code of your own applications. - - These activities include: offering paid services to customers as an ASP, - serving PDFs on the fly in a web application, shipping borb with a closed - source product. - - For more information, please contact borb Software Corp. at this - address: joris.schellekens.1989@gmail.com -""" diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/blockquote_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/blockquote_transformer.py deleted file mode 100644 index 2e68836c0..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/blockquote_transformer.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This implementation of BaseMarkdownTransformer handles block quotes -""" -import typing -from decimal import Decimal - -from borb.pdf.canvas.color.color import HexColor -from borb.pdf.canvas.layout.text.chunks_of_text import HeterogeneousParagraph -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, -) - - -class BlockQuoteTransformer(BaseMarkdownTransformer): - """ - This implementation of BaseMarkdownTransformer handles block quotes - """ - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - return context.get_markdown_string()[context.tell()] == ">" and ( - context.tell() == 0 - or context.get_markdown_string()[context.tell() - 1] == "\n" - ) - - def _transform(self, context: MarkdownTransformerState) -> None: - - end_of_input: int = self._as_long_as_input_lines_match(">.*", context) - block_quote_lines: typing.List[str] = context.get_markdown_string()[ - context.tell() : end_of_input - 1 - ].split("\n") - block_quote_lines = [x[2:] for x in block_quote_lines] - - # transform the markdown syntax per line - el: HeterogeneousParagraph = HeterogeneousParagraph( - background_color=HexColor("c3c3c3"), - padding_top=Decimal(5), - padding_right=Decimal(5), - padding_bottom=Decimal(5), - padding_left=Decimal(5), - border_left=True, - border_width=Decimal(3), - ) - for line in block_quote_lines: - sub_context: MarkdownTransformerState = MarkdownTransformerState(line) - sub_context._document = context._document - sub_context._parent_layout_element = el - self.get_root()._transform(sub_context) - - for c in el._chunks_of_text: - c._background_color = HexColor("c3c3c3") - - # add - context.get_parent_layout_element().add(el) # type: ignore [union-attr] - - # seek - context.seek(end_of_input + 1) diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/fenced_code_snippet_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/fenced_code_snippet_transformer.py deleted file mode 100644 index 360a61468..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/fenced_code_snippet_transformer.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This implementation of BaseMarkdownTransformer handles (fenced) code snippets -""" -import typing -from decimal import Decimal - -from borb.pdf.canvas.color.color import HexColor -from borb.pdf.canvas.layout.text.chunks_of_text import ( - HeterogeneousParagraph, - LineBreakChunk, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, -) - - -class FencedCodeSnippetTransformer(BaseMarkdownTransformer): - """ - This implementation of BaseMarkdownTransformer handles (fenced) code snippets - """ - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - return ( - context.get_markdown_string()[context.tell()] == "`" - and context.tell() + 1 < len(context.get_markdown_string()) - and context.get_markdown_string()[context.tell() + 1] == "`" - and context.tell() + 2 < len(context.get_markdown_string()) - and context.get_markdown_string()[context.tell() + 2] == "`" - and context.tell() + 3 < len(context.get_markdown_string()) - and context.get_markdown_string()[context.tell() + 3] == "\n" - ) - - def _transform(self, context: MarkdownTransformerState) -> None: - - end_of_input: int = context.get_markdown_string().find( - "```", context.tell() + 1 - ) - code_snippet_lines: typing.List[str] = context.get_markdown_string()[ - context.tell() : end_of_input - ].split("\n") - code_snippet_lines = code_snippet_lines[1:-1] - - # transform the markdown syntax per line - el: HeterogeneousParagraph = HeterogeneousParagraph( - background_color=HexColor("c3c3c3"), - padding_top=Decimal(5), - padding_right=Decimal(5), - padding_bottom=Decimal(5), - padding_left=Decimal(5), - border_left=True, - border_width=Decimal(3), - ) - - for line in code_snippet_lines: - sub_context: MarkdownTransformerState = MarkdownTransformerState(line) - sub_context._document = context._document - sub_context._parent_layout_element = el - self.get_root()._transform(sub_context) - el.add(LineBreakChunk()) - - for c in el._chunks_of_text: - c._background_color = HexColor("c3c3c3") - - # add - context.get_parent_layout_element().add(el) # type: ignore [union-attr] - - # seek - context.seek(end_of_input + 1) diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/indented_code_snippet_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/indented_code_snippet_transformer.py deleted file mode 100644 index b0fbe88de..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/indented_code_snippet_transformer.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This implementation of BaseMarkdownTransformer handles (indented) code snippets -""" -import typing -from decimal import Decimal - -from borb.pdf.canvas.color.color import HexColor -from borb.pdf.canvas.layout.text.chunks_of_text import ( - HeterogeneousParagraph, - LineBreakChunk, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, -) - - -class IndentedCodeSnippetTransformer(BaseMarkdownTransformer): - """ - This implementation of BaseMarkdownTransformer handles (indented) code snippets - """ - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - return ( - context.get_markdown_string()[context.tell()] == " " - and context.tell() + 1 < len(context.get_markdown_string()) - and context.get_markdown_string()[context.tell() + 1] == " " - and context.tell() + 2 < len(context.get_markdown_string()) - and context.get_markdown_string()[context.tell() + 2] == " " - and context.tell() + 3 < len(context.get_markdown_string()) - and context.get_markdown_string()[context.tell() + 3] == " " - ) - - def _transform(self, context: MarkdownTransformerState) -> None: - - end_of_input: int = self._as_long_as_input_lines_match(" .*", context) - code_snippet_lines: typing.List[str] = context.get_markdown_string()[ - context.tell() : end_of_input - ].split("\n") - code_snippet_lines = [x[4:] for x in code_snippet_lines] - - # transform the markdown syntax per line - el: HeterogeneousParagraph = HeterogeneousParagraph( - background_color=HexColor("c3c3c3"), - padding_top=Decimal(5), - padding_right=Decimal(5), - padding_bottom=Decimal(5), - padding_left=Decimal(5), - border_left=True, - border_width=Decimal(3), - ) - - for line in code_snippet_lines: - sub_context: MarkdownTransformerState = MarkdownTransformerState(line) - sub_context._document = context._document - sub_context._parent_layout_element = el - self.get_root()._transform(sub_context) - el.add(LineBreakChunk()) - - for c in el._chunks_of_text: - c._background_color = HexColor("c3c3c3") - - # add - context.get_parent_layout_element().add(el) # type: ignore [union-attr] - - # seek - context.seek(end_of_input + 1) diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/paragraph_transformer.py b/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/paragraph_transformer.py deleted file mode 100644 index dcabe2a4f..000000000 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/text/paragraph_transformer.py +++ /dev/null @@ -1,249 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -This implementation of BaseMarkdownTransformer handles paragraphs -""" -import typing - -from borb.pdf.canvas.color.color import Color, HexColor -from borb.pdf.canvas.font.font import Font -from borb.pdf.canvas.font.simple_font.font_type_1 import StandardType1Font -from borb.pdf.canvas.layout.emoji.emoji import Emoji, Emojis -from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText -from borb.pdf.canvas.layout.text.chunks_of_text import ( - HeterogeneousParagraph, - LineBreakChunk, -) -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, -) - - -class ParagraphTransformer(BaseMarkdownTransformer): - """ - This implementation of BaseMarkdownTransformer handles paragraphs - """ - - def _can_transform(self, context: MarkdownTransformerState) -> bool: - """ - This function always returns True, anything can be a Paragraph - """ - return context.get_markdown_string()[ - context.tell() - ].isalpha() or context.get_markdown_string()[context.tell()] in [ - "*", - "_", - ":", - "\\", - "`", - ] - - def _get_font(self, is_bold: bool, is_italic: bool, is_monospaced: bool) -> Font: - if is_monospaced: - return StandardType1Font("Courier") - if is_bold and is_italic: - return StandardType1Font("Helvetica-bold-oblique") - elif is_bold: - return StandardType1Font("Helvetica-bold") - elif is_italic: - return StandardType1Font("Helvetica-oblique") - else: - return StandardType1Font("Helvetica") - - def _build_chunks( - self, text: str, is_bold: bool, is_italic: bool, is_monospaced: bool - ) -> typing.List[ChunkOfText]: - out: typing.List[ChunkOfText] = [] - for w in text.split(" "): - background_color: Color = HexColor("ffffff") - if is_monospaced: - background_color = HexColor("c3c3c3") - out.append( - ChunkOfText( - w + " ", - font=self._get_font(is_bold, is_italic, is_monospaced), - background_color=background_color, - ) - ) - return out - - def _transform(self, context: MarkdownTransformerState) -> None: - - # continue processing lines until we hit - end_pos: int = self._until_double_newline(context) - if end_pos == -1: - end_pos = len(context.get_markdown_string()) + 1 - paragraph_lines_raw: typing.List[str] = context.get_markdown_string()[ - context.tell() : end_pos - 1 - ].split("\n") - - # process each line - chunks_of_text: typing.List[typing.Union[ChunkOfText, Emoji]] = [] - is_bold: bool = False - is_italic: bool = False - is_monospaced: bool = False - chunk_text: str = "" - for paragraph_line in paragraph_lines_raw: - i: int = 0 - while i < len(paragraph_line): - # process \< - c: str = paragraph_line[i] - if ( - c == "\\" - and i + 1 < len(paragraph_line) - and paragraph_line[i + 1] in [">", "<", "*", "+", "-", "_", "`"] - ): - chunk_text += paragraph_line[i + 1] - i += 2 - continue - # process :: - if ( - not is_monospaced - and c == ":" - and paragraph_line.find(":", i + 1) >= 0 - and paragraph_line[i + 1 : paragraph_line.find(":", i + 1)].upper() - in [x.name for x in Emojis] - ): - emoji_name: str = paragraph_line[ - i + 1 : paragraph_line.find(":", i + 1) - ] - chunks_of_text.extend( - self._build_chunks( - chunk_text, is_bold, is_italic, is_monospaced - ) - ) - chunks_of_text.append(Emojis[emoji_name.upper()].value) - chunk_text = "" - i = paragraph_line.find(":", i + 1) + 1 - continue - # process *** - if ( - c == "*" - and i + 1 < len(paragraph_line) - and paragraph_line[i + 1] == "*" - and i + 2 < len(paragraph_line) - and paragraph_line[i + 2] == "*" - ): - chunks_of_text.extend( - self._build_chunks( - chunk_text, is_bold, is_italic, is_monospaced - ) - ) - chunk_text = "" - is_bold = not is_bold - is_italic = not is_italic - i += 3 - continue - # process ___ - if ( - c == "_" - and i + 1 < len(paragraph_line) - and paragraph_line[i + 1] == "_" - and i + 2 < len(paragraph_line) - and paragraph_line[i + 2] == "_" - ): - chunks_of_text.extend( - self._build_chunks( - chunk_text, is_bold, is_italic, is_monospaced - ) - ) - chunk_text = "" - is_bold = not is_bold - is_italic = not is_italic - i += 3 - continue - # process ** - if ( - c == "*" - and i + 1 < len(paragraph_line) - and paragraph_line[i + 1] == "*" - ): - chunks_of_text.extend( - self._build_chunks( - chunk_text, is_bold, is_italic, is_monospaced - ) - ) - chunk_text = "" - is_bold = not is_bold - i += 2 - continue - # process __ - if ( - c == "_" - and i + 1 < len(paragraph_line) - and paragraph_line[i + 1] == "_" - ): - chunks_of_text.extend( - self._build_chunks( - chunk_text, is_bold, is_italic, is_monospaced - ) - ) - chunk_text = "" - is_bold = not is_bold - i += 2 - continue - # process * - if c == "*": - chunks_of_text.extend( - self._build_chunks( - chunk_text, is_bold, is_italic, is_monospaced - ) - ) - chunk_text = "" - is_italic = not is_italic - i += 1 - continue - # process _ - if c == "_": - chunks_of_text.extend( - self._build_chunks( - chunk_text, is_bold, is_italic, is_monospaced - ) - ) - chunk_text = "" - is_italic = not is_italic - i += 1 - continue - # process ` - if c == "`": - chunks_of_text.extend( - self._build_chunks( - chunk_text, is_bold, is_italic, is_monospaced - ) - ) - chunk_text = "" - is_monospaced = not is_monospaced - i += 1 - continue - # process - if ( - i == len(paragraph_line) - 2 - and c == " " - and paragraph_line[i + 1] == " " - ): - chunks_of_text.extend( - self._build_chunks( - chunk_text, is_bold, is_italic, is_monospaced - ) - ) - chunks_of_text.append(LineBreakChunk()) - chunk_text = "" - i += 2 - continue - # process any character - chunk_text += c - i += 1 - - # append any remaining chunks - if len(chunk_text) > 0: - chunks_of_text.extend( - self._build_chunks(chunk_text, is_bold, is_italic, is_monospaced) - ) - - # append HeterogeneousParagraph - context.get_parent_layout_element().add(HeterogeneousParagraph(chunks_of_text)) # type: ignore [union-attr] - - # seek - context.seek(end_pos) diff --git a/borb/toolkit/export/markdown_to_pdf/read/any_markdown_transformer.py b/borb/toolkit/export/markdown_to_pdf/read/any_markdown_transformer.py index be5e1c191..68980a5b1 100644 --- a/borb/toolkit/export/markdown_to_pdf/read/any_markdown_transformer.py +++ b/borb/toolkit/export/markdown_to_pdf/read/any_markdown_transformer.py @@ -1,3 +1,9 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +This implementation of BaseMarkdownTransformer is responsible for converting a markdown str to PDF +""" from borb.toolkit.export.markdown_to_pdf.read.heading.alternate_syntax_heading_transformer import ( AlternateSyntaxHeadingTransformer, ) @@ -7,6 +13,9 @@ from borb.toolkit.export.markdown_to_pdf.read.heading.horizontal_rule_transformer import ( HorizontalRuleTransformer, ) +from borb.toolkit.export.markdown_to_pdf.read.image.image_transformer import ( + ImageTransformer, +) from borb.toolkit.export.markdown_to_pdf.read.list.ordered_list_transformer import ( OrderedListTransformer, ) @@ -35,6 +44,10 @@ class AnyMarkdownTransformer(Transformer): + """ + This implementation of BaseMarkdownTransformer is responsible for converting a markdown str to PDF + """ + def __init__(self): super(AnyMarkdownTransformer, self).__init__() # fmt: off @@ -47,7 +60,9 @@ def __init__(self): .add_child_transformer(UnorderedListTransformer()) \ .add_child_transformer(OrderedListTransformer()) \ .add_child_transformer(TableTransformer()) \ + .add_child_transformer(ImageTransformer()) \ .add_child_transformer(ParagraphTransformer()) + # fmt: on def _can_transform(self, context: TransformerState) -> bool: diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/__init__.py b/borb/toolkit/export/markdown_to_pdf/read/image/__init__.py similarity index 100% rename from borb/toolkit/export/markdown_to_pdf/markdown_transformer/__init__.py rename to borb/toolkit/export/markdown_to_pdf/read/image/__init__.py diff --git a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/image/image_transformer.py b/borb/toolkit/export/markdown_to_pdf/read/image/image_transformer.py similarity index 68% rename from borb/toolkit/export/markdown_to_pdf/markdown_transformer/image/image_transformer.py rename to borb/toolkit/export/markdown_to_pdf/read/image/image_transformer.py index 18efe0daa..53ce868d2 100644 --- a/borb/toolkit/export/markdown_to_pdf/markdown_transformer/image/image_transformer.py +++ b/borb/toolkit/export/markdown_to_pdf/read/image/image_transformer.py @@ -13,14 +13,15 @@ from borb.pdf.canvas.layout.image.image import Image from borb.pdf.canvas.layout.page_layout.browser_layout import BrowserLayout +from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout from borb.pdf.page.page import Page -from borb.toolkit.export.markdown_to_pdf.markdown_transformer.base_markdown_transformer import ( - BaseMarkdownTransformer, - MarkdownTransformerState, +from borb.toolkit.export.markdown_to_pdf.read.transformer import ( + Transformer, + TransformerState, ) -class ImageTransformer(BaseMarkdownTransformer): +class ImageTransformer(Transformer): """ This implementation of BaseMarkdownTransformer handles images """ @@ -36,7 +37,7 @@ def _get_image_default_margins(): borb_image.get_margin_left(), ) - def _can_transform(self, context: MarkdownTransformerState) -> bool: + def _can_transform(self, context: TransformerState) -> bool: if context.get_markdown_string()[context.tell()] != "!": return False markdown_str: str = context.get_markdown_string()[ @@ -46,7 +47,7 @@ def _can_transform(self, context: MarkdownTransformerState) -> bool: ] return re.match("!\[[^]]+\]\([^)]+\)", markdown_str) is not None - def _transform(self, context: MarkdownTransformerState) -> None: + def _transform(self, context: TransformerState) -> None: # get markdown string of current char -> next line markdown_str: str = context.get_markdown_string()[ @@ -57,7 +58,9 @@ def _transform(self, context: MarkdownTransformerState) -> None: assert len(markdown_str) > 0 # match against regex - match: re.Match = re.match("!\[[^]]+\]\((?P[^)]+)\)", markdown_str) + match: typing.Optional[re.Match] = re.match( + "!\[[^]]+\]\((?P[^)]+)\)", markdown_str + ) assert match is not None # extract (named group) url @@ -85,26 +88,24 @@ def _transform(self, context: MarkdownTransformerState) -> None: # Page parent_element = context.get_parent_layout_element() + assert isinstance(parent_element, PageLayout) + + # get page width and height + page_width: Decimal = ( + parent_element.get_page().get_page_info().get_width() or Decimal(0) + ) + page_height: Decimal = ( + parent_element.get_page().get_page_info().get_height() or Decimal(0) + ) + if isinstance(parent_element, Page): - W = int(parent_element.get_page_info().get_width() * Decimal(0.8)) - H = int(parent_element.get_page_info().get_height() * Decimal(0.8)) + W = int(page_width * Decimal(0.8)) + H = int(page_height * Decimal(0.8)) # BrowserLayout if isinstance(parent_element, BrowserLayout): - W = ( - int( - parent_element.get_page().get_page_info().get_width() - - parent_element._horizontal_margin * Decimal(2) - ) - - 1 - ) - H = ( - int( - parent_element.get_page().get_page_info().get_height() - - parent_element._vertical_margin * Decimal(2) - ) - - 1 - ) + W = int(page_width - parent_element._horizontal_margin * Decimal(2)) - 1 + H = int(page_height - parent_element._vertical_margin * Decimal(2)) - 1 # TODO: Table @@ -119,15 +120,7 @@ def _transform(self, context: MarkdownTransformerState) -> None: # create and add Image borb_image: Image = Image(image, width=Decimal(w), height=Decimal(h)) - parent_element.add(borb_image) - - # add remote go to annotation - try: - parent_element.get_page().append_remote_go_to_annotation( - borb_image.get_bounding_box(), url - ) - except: - pass + parent_element.add(borb_image) # type: ignore [union-attr] # seek context.seek(context.get_markdown_string().find("\n", context.tell()) + 1) diff --git a/borb/toolkit/export/markdown_to_pdf/read/text/fenced_code_snippet_transformer.py b/borb/toolkit/export/markdown_to_pdf/read/text/fenced_code_snippet_transformer.py index de0bbc25a..9c3b1eaef 100644 --- a/borb/toolkit/export/markdown_to_pdf/read/text/fenced_code_snippet_transformer.py +++ b/borb/toolkit/export/markdown_to_pdf/read/text/fenced_code_snippet_transformer.py @@ -8,6 +8,7 @@ from decimal import Decimal from borb.pdf.canvas.color.color import HexColor +from borb.pdf.canvas.font.simple_font.font_type_1 import StandardType1Font from borb.pdf.canvas.layout.text.chunks_of_text import ( HeterogeneousParagraph, LineBreakChunk, @@ -46,13 +47,11 @@ def _transform(self, context: TransformerState) -> None: # transform the markdown syntax per line el: HeterogeneousParagraph = HeterogeneousParagraph( - background_color=HexColor("c3c3c3"), + background_color=HexColor("f5f7f9"), padding_top=Decimal(5), padding_right=Decimal(5), padding_bottom=Decimal(5), padding_left=Decimal(5), - border_left=True, - border_width=Decimal(3), ) for line in code_snippet_lines: @@ -63,7 +62,8 @@ def _transform(self, context: TransformerState) -> None: el.add(LineBreakChunk()) for c in el._chunks_of_text: - c._background_color = HexColor("c3c3c3") + c._background_color = HexColor("f5f7f9") + c._font = StandardType1Font("Courier") # add context.get_parent_layout_element().add(el) # type: ignore [union-attr] diff --git a/borb/toolkit/export/markdown_to_pdf/read/text/indented_code_snippet_transformer.py b/borb/toolkit/export/markdown_to_pdf/read/text/indented_code_snippet_transformer.py index 6ad641d9b..2c56c3c80 100644 --- a/borb/toolkit/export/markdown_to_pdf/read/text/indented_code_snippet_transformer.py +++ b/borb/toolkit/export/markdown_to_pdf/read/text/indented_code_snippet_transformer.py @@ -8,6 +8,7 @@ from decimal import Decimal from borb.pdf.canvas.color.color import HexColor +from borb.pdf.canvas.font.simple_font.font_type_1 import StandardType1Font from borb.pdf.canvas.layout.text.chunks_of_text import ( HeterogeneousParagraph, LineBreakChunk, @@ -44,13 +45,11 @@ def _transform(self, context: TransformerState) -> None: # transform the markdown syntax per line el: HeterogeneousParagraph = HeterogeneousParagraph( - background_color=HexColor("c3c3c3"), + background_color=HexColor("f5f7f9"), padding_top=Decimal(5), padding_right=Decimal(5), padding_bottom=Decimal(5), padding_left=Decimal(5), - border_left=True, - border_width=Decimal(3), ) for line in code_snippet_lines: @@ -61,7 +60,8 @@ def _transform(self, context: TransformerState) -> None: el.add(LineBreakChunk()) for c in el._chunks_of_text: - c._background_color = HexColor("c3c3c3") + c._background_color = HexColor("f5f7f9") + c._font = StandardType1Font("Courier") # add context.get_parent_layout_element().add(el) # type: ignore [union-attr] diff --git a/borb/toolkit/export/markdown_to_pdf/read/text/paragraph_transformer.py b/borb/toolkit/export/markdown_to_pdf/read/text/paragraph_transformer.py index 59ee6321e..a2fdcb7ea 100644 --- a/borb/toolkit/export/markdown_to_pdf/read/text/paragraph_transformer.py +++ b/borb/toolkit/export/markdown_to_pdf/read/text/paragraph_transformer.py @@ -38,6 +38,8 @@ def _can_transform(self, context: TransformerState) -> bool: ":", "\\", "`", + "<", + ">", ] def _get_font(self, is_bold: bool, is_italic: bool, is_monospaced: bool) -> Font: @@ -59,7 +61,7 @@ def _build_chunks( for w in text.split(" "): background_color: Color = HexColor("ffffff") if is_monospaced: - background_color = HexColor("c3c3c3") + background_color = HexColor("f5f7f9") out.append( ChunkOfText( w + " ", diff --git a/borb/toolkit/text/bigram_part_of_speech_tagger.py b/borb/toolkit/text/bigram_part_of_speech_tagger.py index 0019e5f4a..e599917ea 100644 --- a/borb/toolkit/text/bigram_part_of_speech_tagger.py +++ b/borb/toolkit/text/bigram_part_of_speech_tagger.py @@ -173,7 +173,7 @@ def _tag_by_transition( if first_unknown_tag_index is None: if self._odds_best_tagging is None or p > self._odds_best_tagging: self._odds_best_tagging = p - self._best_tagging = [x for x in tags] + self._best_tagging = [x or "" for x in tags] return # determine possible tags diff --git a/release_notes.md b/release_notes.md index 3b6a5e7d0..c76d48f85 100644 --- a/release_notes.md +++ b/release_notes.md @@ -1,21 +1,3 @@ -# :mega: borb release 2.0.13 - -With this release, `borb` is one step closer to being able to write a PDF/A-1b document. -We still need to create an `\OutputIntents` Dictionary in the document to be fully compliant. -This is planned for the next release. - -This release features: - -- Minor bugfix to estimating width of a space character - - Useful in text extraction -- Bugfix in `TrueTypeFont` to build a proper `\Widths` array and `cmap` -- Fixes in `XMPDocumentInfo` class - - Title - - Author - - Creator - - CreatorTool -- Separate logic that writes `\Info` `Dictionary` - - This class now also writes the `XMP` `\Metadata` when needed - - Enables PDF/A-1b - - Added tests for PDF/A-1b (preservation of metadata) +# :mega: borb release 2.0.14 +This is a bugfix release. diff --git a/setup.py b/setup.py index c1fd85667..d0b1010aa 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ setuptools.setup( name="borb", - version="2.0.13", + version="2.0.14", author="Joris Schellekens", author_email="joris.schellekens.1989@gmail.com", description="borb is a library for reading, creating and manipulating PDF files in python.", diff --git a/tests/corpus/test_copy_document_compare_size.py b/tests/corpus/test_copy_document_compare_size.py index 4fe5981ba..456524d3c 100644 --- a/tests/corpus/test_copy_document_compare_size.py +++ b/tests/corpus/test_copy_document_compare_size.py @@ -42,7 +42,7 @@ def __init__(self, methodName="runTest"): self.number_of_fails: int = 0 self.memory_stats_per_document: typing.Dict[str, typing.Tuple[int, int]] = {} - @unittest.skip + #@unittest.skip def test_against_entire_corpus(self): pdf_file_names = os.listdir(self.corpus_dir) pdfs = [ diff --git a/tests/corpus/test_copy_document_resize_images_compare_size.py b/tests/corpus/test_copy_document_resize_images_compare_size.py index 2169acd51..39124c9b7 100644 --- a/tests/corpus/test_copy_document_resize_images_compare_size.py +++ b/tests/corpus/test_copy_document_resize_images_compare_size.py @@ -43,7 +43,7 @@ def __init__(self, methodName="runTest"): self.number_of_fails: int = 0 self.memory_stats_per_document: typing.Dict[str, typing.Tuple[int, int]] = {} - @unittest.skip + #@unittest.skip def test_against_entire_corpus(self): pdf_file_names = os.listdir(self.corpus_dir) pdfs = [ diff --git a/tests/corpus/test_extract_text_expect_ground_truth.py b/tests/corpus/test_extract_text_expect_ground_truth.py index fb8f47c0f..1c34821f4 100644 --- a/tests/corpus/test_extract_text_expect_ground_truth.py +++ b/tests/corpus/test_extract_text_expect_ground_truth.py @@ -46,7 +46,7 @@ def __init__(self, methodName="runTest"): self.time_per_document: typing.Dict[str, float] = {} self.fails_per_document: typing.Dict[str, int] = [] - @unittest.skip + #@unittest.skip def test_against_entire_corpus(self): pdf_file_names = os.listdir(self.corpus_dir) pdfs = [ diff --git a/tests/corpus/test_open_document.py b/tests/corpus/test_open_document.py index 9ff107d44..4e5f12bc1 100644 --- a/tests/corpus/test_open_document.py +++ b/tests/corpus/test_open_document.py @@ -1,4 +1,5 @@ import os +import time import typing import unittest from datetime import datetime @@ -7,6 +8,7 @@ import matplotlib.pyplot as plt from borb.io.read.types import Decimal +from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.layout.image.chart import Chart from borb.pdf.canvas.layout.layout_element import Alignment from borb.pdf.canvas.layout.list.unordered_list import UnorderedList @@ -40,9 +42,10 @@ def __init__(self, methodName="runTest"): self.number_of_documents: int = 0 self.number_of_passes: int = 0 self.number_of_fails: int = 0 + self.time_per_document: typing.Dict[str, float] = {} self.memory_stats_per_document: typing.Dict[str, typing.Tuple[int, int]] = {} - @unittest.skip + #@unittest.skip def test_against_entire_corpus(self): pdf_file_names = os.listdir(self.corpus_dir) pdfs = [ @@ -63,8 +66,11 @@ def _test_list_of_documents(self, documents: typing.List[Path]): for i, doc in enumerate(documents): try: print("processing %s [%d/%d] ..." % (doc.stem, i + 1, len(documents))) + delta: float = time.time() with open(doc, "rb") as pdf_file_handle: - PDF.loads(pdf_file_handle) + pdf = PDF.loads(pdf_file_handle) + delta = time.time() - delta + self.time_per_document[doc.stem] = delta self.number_of_passes += 1 except Exception as e: print("ERROR, document %s, %s" % (doc.name, str(e))) @@ -91,13 +97,60 @@ def _build_document(self): .add(Paragraph("Description", font="Helvetica-Bold")) .add( Paragraph( - "This test attempts to read each PDF in a corpus of roughly 1000 PDF documents." + "This test attempts to open each PDF in a corpus of roughly 1000 PDF documents." ) ) .set_padding_on_all_cells(Decimal(2), Decimal(2), Decimal(2), Decimal(2)) ) + graph_table: Table = Table( + number_of_rows=2, + number_of_columns=2, + margin_top=Decimal(5), + margin_bottom=Decimal(5), + ) + + graph_table.add( + Paragraph( + "Timing Information", + font_color=HexColor("72A276"), + font_size=Decimal(14), + font="Helvetica-Bold", + ) + ) + graph_table.add( + Paragraph( + "Failure Information", + font_color=HexColor("72A276"), + font_size=Decimal(14), + font="Helvetica-Bold", + ) + ) + # graph with timing information + labels = "<1s", "<5s", "<10s", "<30s", ">30s" + sizes = [ + sum([1 for k, v in self.time_per_document.items() if v < 1]), + sum([1 for k, v in self.time_per_document.items() if 1 <= v < 5]), + sum([1 for k, v in self.time_per_document.items() if 5 <= v < 10]), + sum([1 for k, v in self.time_per_document.items() if 10 <= v < 30]), + sum([1 for k, v in self.time_per_document.items() if v >= 30]), + ] + explode = (0.1, 0, 0, 0, 0) # only "explode" the 2nd slice (i.e. '<1s') + fig1, ax1 = plt.subplots() + ax1.pie( + sizes, + explode=explode, + labels=labels, + autopct="%1.1f%%", + shadow=True, + startangle=90, + colors=["#a5ffd6", "#56cbf9", "#0b3954", "#f1cd2e", "#de6449"], + ) + ax1.axis("equal") # Equal aspect ratio ensures that pie is drawn as a circle. + graph_table.add(Chart(plt.gcf(), width=Decimal(200), height=Decimal(200))) + + # graph with pass/fail information labels = ( "pass", "fail", @@ -118,7 +171,7 @@ def _build_document(self): colors=["#a5ffd6", "#56cbf9", "#0b3954", "#f1cd2e", "#de6449"], ) ax1.axis("equal") # Equal aspect ratio ensures that pie is drawn as a circle. - layout.add( + graph_table.add( Chart( plt.gcf(), width=Decimal(200), @@ -127,6 +180,9 @@ def _build_document(self): ) ) + graph_table.no_borders() + layout.add(graph_table) + # raw data ul: UnorderedList = UnorderedList() ul.add( @@ -141,6 +197,15 @@ def _build_document(self): % (self.number_of_fails, self.number_of_passes) ) ) + + avg_processing_time: float = sum( + [x for x in self.time_per_document.values()] + ) / len(self.time_per_document) + min_processing_time: float = min([x for x in self.time_per_document.values()]) + max_processing_time: float = max([x for x in self.time_per_document.values()]) + ul.add(Paragraph("avg. processing time: %f seconds" % avg_processing_time)) + ul.add(Paragraph("max. processing time: %f seconds" % max_processing_time)) + ul.add(Paragraph("min. processing time: %f seconds" % min_processing_time)) layout.add(ul) # write diff --git a/tests/output/test_add_all_rubber_stamp_annotations/output.pdf b/tests/output/test_add_all_rubber_stamp_annotations/output.pdf index b606ea9d7..86ffad4dd 100644 Binary files a/tests/output/test_add_all_rubber_stamp_annotations/output.pdf and b/tests/output/test_add_all_rubber_stamp_annotations/output.pdf differ diff --git a/tests/output/test_add_circle_annotation/output.pdf b/tests/output/test_add_circle_annotation/output.pdf index 0574f72ea..5f55c2756 100644 Binary files a/tests/output/test_add_circle_annotation/output.pdf and b/tests/output/test_add_circle_annotation/output.pdf differ diff --git a/tests/output/test_add_free_text_annotation/output_001.pdf b/tests/output/test_add_free_text_annotation/output_001.pdf index dd945dc9b..c7af05cea 100644 Binary files a/tests/output/test_add_free_text_annotation/output_001.pdf and b/tests/output/test_add_free_text_annotation/output_001.pdf differ diff --git a/tests/output/test_add_free_text_annotation/output_002.pdf b/tests/output/test_add_free_text_annotation/output_002.pdf index c80dfd0ba..b787090de 100644 Binary files a/tests/output/test_add_free_text_annotation/output_002.pdf and b/tests/output/test_add_free_text_annotation/output_002.pdf differ diff --git a/tests/output/test_add_highlight_annotation/output_001.pdf b/tests/output/test_add_highlight_annotation/output_001.pdf index 04fe829c1..16bef02f0 100644 Binary files a/tests/output/test_add_highlight_annotation/output_001.pdf and b/tests/output/test_add_highlight_annotation/output_001.pdf differ diff --git a/tests/output/test_add_highlight_annotation/output_002.pdf b/tests/output/test_add_highlight_annotation/output_002.pdf index 0ca88bbc4..f70debfca 100644 Binary files a/tests/output/test_add_highlight_annotation/output_002.pdf and b/tests/output/test_add_highlight_annotation/output_002.pdf differ diff --git a/tests/output/test_add_line_annotation/output_001.pdf b/tests/output/test_add_line_annotation/output_001.pdf index 9367c762e..df213b5ae 100644 Binary files a/tests/output/test_add_line_annotation/output_001.pdf and b/tests/output/test_add_line_annotation/output_001.pdf differ diff --git a/tests/output/test_add_line_annotation/output_002.pdf b/tests/output/test_add_line_annotation/output_002.pdf index 4fc1a3245..208f9d3c1 100644 Binary files a/tests/output/test_add_line_annotation/output_002.pdf and b/tests/output/test_add_line_annotation/output_002.pdf differ diff --git a/tests/output/test_add_outline/output_001.pdf b/tests/output/test_add_outline/output_001.pdf index b1a533008..0edf0248c 100644 Binary files a/tests/output/test_add_outline/output_001.pdf and b/tests/output/test_add_outline/output_001.pdf differ diff --git a/tests/output/test_add_outline/output_002.pdf b/tests/output/test_add_outline/output_002.pdf index 077847122..5990f595e 100644 Binary files a/tests/output/test_add_outline/output_002.pdf and b/tests/output/test_add_outline/output_002.pdf differ diff --git a/tests/output/test_add_polygon_annotation_using_line_art_factory/output.pdf b/tests/output/test_add_polygon_annotation_using_line_art_factory/output.pdf index 54829ce2b..42a298897 100644 Binary files a/tests/output/test_add_polygon_annotation_using_line_art_factory/output.pdf and b/tests/output/test_add_polygon_annotation_using_line_art_factory/output.pdf differ diff --git a/tests/output/test_add_polyline_annotation_using_line_art_factory/output.pdf b/tests/output/test_add_polyline_annotation_using_line_art_factory/output.pdf index 2a9b910d2..1c3166c34 100644 Binary files a/tests/output/test_add_polyline_annotation_using_line_art_factory/output.pdf and b/tests/output/test_add_polyline_annotation_using_line_art_factory/output.pdf differ diff --git a/tests/output/test_add_redact_annotation/output_001.pdf b/tests/output/test_add_redact_annotation/output_001.pdf index e7d7f730d..fa1389545 100644 Binary files a/tests/output/test_add_redact_annotation/output_001.pdf and b/tests/output/test_add_redact_annotation/output_001.pdf differ diff --git a/tests/output/test_add_redact_annotation/output_002.pdf b/tests/output/test_add_redact_annotation/output_002.pdf index de4c3a17f..d1ad28670 100644 Binary files a/tests/output/test_add_redact_annotation/output_002.pdf and b/tests/output/test_add_redact_annotation/output_002.pdf differ diff --git a/tests/output/test_add_redact_annotation/output_003.pdf b/tests/output/test_add_redact_annotation/output_003.pdf index 5b751ecd3..a08fc86f8 100644 Binary files a/tests/output/test_add_redact_annotation/output_003.pdf and b/tests/output/test_add_redact_annotation/output_003.pdf differ diff --git a/tests/output/test_add_redact_annotation/output_004.pdf b/tests/output/test_add_redact_annotation/output_004.pdf index b8642634b..166509587 100644 Binary files a/tests/output/test_add_redact_annotation/output_004.pdf and b/tests/output/test_add_redact_annotation/output_004.pdf differ diff --git a/tests/output/test_add_redact_annotation/output_005.pdf b/tests/output/test_add_redact_annotation/output_005.pdf index f86aedb03..ece1b6c0d 100644 Binary files a/tests/output/test_add_redact_annotation/output_005.pdf and b/tests/output/test_add_redact_annotation/output_005.pdf differ diff --git a/tests/output/test_add_remote_go_to_annotation/output_001.pdf b/tests/output/test_add_remote_go_to_annotation/output_001.pdf index ebe565dd1..32c16cc29 100644 Binary files a/tests/output/test_add_remote_go_to_annotation/output_001.pdf and b/tests/output/test_add_remote_go_to_annotation/output_001.pdf differ diff --git a/tests/output/test_add_remote_go_to_annotation/output_002.pdf b/tests/output/test_add_remote_go_to_annotation/output_002.pdf index b57ad875c..6e7759519 100644 Binary files a/tests/output/test_add_remote_go_to_annotation/output_002.pdf and b/tests/output/test_add_remote_go_to_annotation/output_002.pdf differ diff --git a/tests/output/test_add_square_annotation/output.pdf b/tests/output/test_add_square_annotation/output.pdf index 50f7d368c..cf95328df 100644 Binary files a/tests/output/test_add_square_annotation/output.pdf and b/tests/output/test_add_square_annotation/output.pdf differ diff --git a/tests/output/test_add_square_annotation_in_free_space/output_001.pdf b/tests/output/test_add_square_annotation_in_free_space/output_001.pdf index 912bf1c01..6288c8f48 100644 Binary files a/tests/output/test_add_square_annotation_in_free_space/output_001.pdf and b/tests/output/test_add_square_annotation_in_free_space/output_001.pdf differ diff --git a/tests/output/test_add_square_annotation_in_free_space/output_002.pdf b/tests/output/test_add_square_annotation_in_free_space/output_002.pdf index 2f2bac9c1..5052f0af5 100644 Binary files a/tests/output/test_add_square_annotation_in_free_space/output_002.pdf and b/tests/output/test_add_square_annotation_in_free_space/output_002.pdf differ diff --git a/tests/output/test_add_squiggle_annotation/output_001.pdf b/tests/output/test_add_squiggle_annotation/output_001.pdf index ad31244a2..99fd68958 100644 Binary files a/tests/output/test_add_squiggle_annotation/output_001.pdf and b/tests/output/test_add_squiggle_annotation/output_001.pdf differ diff --git a/tests/output/test_add_squiggle_annotation/output_002.pdf b/tests/output/test_add_squiggle_annotation/output_002.pdf index 475e9b113..3ea1097a8 100644 Binary files a/tests/output/test_add_squiggle_annotation/output_002.pdf and b/tests/output/test_add_squiggle_annotation/output_002.pdf differ diff --git a/tests/output/test_add_strikeout_annotation/output_001.pdf b/tests/output/test_add_strikeout_annotation/output_001.pdf index 57bb2673f..88079daf1 100644 Binary files a/tests/output/test_add_strikeout_annotation/output_001.pdf and b/tests/output/test_add_strikeout_annotation/output_001.pdf differ diff --git a/tests/output/test_add_strikeout_annotation/output_002.pdf b/tests/output/test_add_strikeout_annotation/output_002.pdf index 5ed14ff17..33cd1ba5c 100644 Binary files a/tests/output/test_add_strikeout_annotation/output_002.pdf and b/tests/output/test_add_strikeout_annotation/output_002.pdf differ diff --git a/tests/output/test_add_super_mario_annotation/output.pdf b/tests/output/test_add_super_mario_annotation/output.pdf index 86f139463..82a033209 100644 Binary files a/tests/output/test_add_super_mario_annotation/output.pdf and b/tests/output/test_add_super_mario_annotation/output.pdf differ diff --git a/tests/output/test_add_text_annotation/output_001.pdf b/tests/output/test_add_text_annotation/output_001.pdf index d80d953b2..43693af72 100644 Binary files a/tests/output/test_add_text_annotation/output_001.pdf and b/tests/output/test_add_text_annotation/output_001.pdf differ diff --git a/tests/output/test_add_text_annotation/output_002.pdf b/tests/output/test_add_text_annotation/output_002.pdf index 60362afcb..242eccd8f 100644 Binary files a/tests/output/test_add_text_annotation/output_002.pdf and b/tests/output/test_add_text_annotation/output_002.pdf differ diff --git a/tests/output/test_add_underline_annotation/output_001.pdf b/tests/output/test_add_underline_annotation/output_001.pdf index 790726ea7..170d81246 100644 Binary files a/tests/output/test_add_underline_annotation/output_001.pdf and b/tests/output/test_add_underline_annotation/output_001.pdf differ diff --git a/tests/output/test_add_underline_annotation/output_002.pdf b/tests/output/test_add_underline_annotation/output_002.pdf index e7a5f18e1..84c7d468b 100644 Binary files a/tests/output/test_add_underline_annotation/output_002.pdf and b/tests/output/test_add_underline_annotation/output_002.pdf differ diff --git a/tests/output/test_analogous_color_scheme/output.pdf b/tests/output/test_analogous_color_scheme/output.pdf index 3746ae339..a2644cbe3 100644 Binary files a/tests/output/test_analogous_color_scheme/output.pdf and b/tests/output/test_analogous_color_scheme/output.pdf differ diff --git a/tests/output/test_append_embedded_file/output_001.pdf b/tests/output/test_append_embedded_file/output_001.pdf index 397f5e432..a77b76a18 100644 Binary files a/tests/output/test_append_embedded_file/output_001.pdf and b/tests/output/test_append_embedded_file/output_001.pdf differ diff --git a/tests/output/test_append_embedded_file/output_002.pdf b/tests/output/test_append_embedded_file/output_002.pdf index 01d138713..c3d30a322 100644 Binary files a/tests/output/test_append_embedded_file/output_002.pdf and b/tests/output/test_append_embedded_file/output_002.pdf differ diff --git a/tests/output/test_apply_redaction_annotations/output_001.pdf b/tests/output/test_apply_redaction_annotations/output_001.pdf index 4bed222de..6adde4776 100644 Binary files a/tests/output/test_apply_redaction_annotations/output_001.pdf and b/tests/output/test_apply_redaction_annotations/output_001.pdf differ diff --git a/tests/output/test_apply_redaction_annotations/output_002.pdf b/tests/output/test_apply_redaction_annotations/output_002.pdf index ce9f988f1..1137b5992 100644 Binary files a/tests/output/test_apply_redaction_annotations/output_002.pdf and b/tests/output/test_apply_redaction_annotations/output_002.pdf differ diff --git a/tests/output/test_apply_redaction_annotations/output_003.pdf b/tests/output/test_apply_redaction_annotations/output_003.pdf index f45c2feac..a0625230b 100644 Binary files a/tests/output/test_apply_redaction_annotations/output_003.pdf and b/tests/output/test_apply_redaction_annotations/output_003.pdf differ diff --git a/tests/output/test_apply_redaction_annotations/output_004.pdf b/tests/output/test_apply_redaction_annotations/output_004.pdf index b4b7af3c5..a6bf319a1 100644 Binary files a/tests/output/test_apply_redaction_annotations/output_004.pdf and b/tests/output/test_apply_redaction_annotations/output_004.pdf differ diff --git a/tests/output/test_apply_redaction_annotations/output_005.pdf b/tests/output/test_apply_redaction_annotations/output_005.pdf index 9da63a8f6..d768986e0 100644 Binary files a/tests/output/test_apply_redaction_annotations/output_005.pdf and b/tests/output/test_apply_redaction_annotations/output_005.pdf differ diff --git a/tests/output/test_apply_redaction_annotations/output_006.pdf b/tests/output/test_apply_redaction_annotations/output_006.pdf index c7a74ebc8..72cd05f01 100644 Binary files a/tests/output/test_apply_redaction_annotations/output_006.pdf and b/tests/output/test_apply_redaction_annotations/output_006.pdf differ diff --git a/tests/output/test_browser_layout_inline_next_line/output.pdf b/tests/output/test_browser_layout_inline_next_line/output.pdf index b60af80af..492ee1aba 100644 Binary files a/tests/output/test_browser_layout_inline_next_line/output.pdf and b/tests/output/test_browser_layout_inline_next_line/output.pdf differ diff --git a/tests/output/test_change_info_dictionary_author/output_001.pdf b/tests/output/test_change_info_dictionary_author/output_001.pdf index 3bc562d4b..cf5100e23 100644 Binary files a/tests/output/test_change_info_dictionary_author/output_001.pdf and b/tests/output/test_change_info_dictionary_author/output_001.pdf differ diff --git a/tests/output/test_change_info_dictionary_author/output_002.pdf b/tests/output/test_change_info_dictionary_author/output_002.pdf index 56a8c7ac9..0e0e2cb5e 100644 Binary files a/tests/output/test_change_info_dictionary_author/output_002.pdf and b/tests/output/test_change_info_dictionary_author/output_002.pdf differ diff --git a/tests/output/test_concat_documents/output_000.pdf b/tests/output/test_concat_documents/output_000.pdf index d09f365c2..3fb7a0023 100644 Binary files a/tests/output/test_concat_documents/output_000.pdf and b/tests/output/test_concat_documents/output_000.pdf differ diff --git a/tests/output/test_concat_documents/output_001.pdf b/tests/output/test_concat_documents/output_001.pdf index 715c51bd5..5c2045abe 100644 Binary files a/tests/output/test_concat_documents/output_001.pdf and b/tests/output/test_concat_documents/output_001.pdf differ diff --git a/tests/output/test_concat_documents/output_002.pdf b/tests/output/test_concat_documents/output_002.pdf index 9d5cb4a6f..6e910d01e 100644 Binary files a/tests/output/test_concat_documents/output_002.pdf and b/tests/output/test_concat_documents/output_002.pdf differ diff --git a/tests/output/test_copy_document_compare_size/0187_page_0_copy.pdf b/tests/output/test_copy_document_compare_size/0187_page_0_copy.pdf new file mode 100644 index 000000000..5ec2754e4 Binary files /dev/null and b/tests/output/test_copy_document_compare_size/0187_page_0_copy.pdf differ diff --git a/tests/output/test_copy_document_compare_size/0430_page_0_copy.pdf b/tests/output/test_copy_document_compare_size/0430_page_0_copy.pdf new file mode 100644 index 000000000..b6a481e84 Binary files /dev/null and b/tests/output/test_copy_document_compare_size/0430_page_0_copy.pdf differ diff --git a/tests/output/test_copy_document_compare_size/0462_page_0_copy.pdf b/tests/output/test_copy_document_compare_size/0462_page_0_copy.pdf new file mode 100644 index 000000000..42c5d874b Binary files /dev/null and b/tests/output/test_copy_document_compare_size/0462_page_0_copy.pdf differ diff --git a/tests/output/test_copy_document_compare_size/0495_page_0_copy.pdf b/tests/output/test_copy_document_compare_size/0495_page_0_copy.pdf new file mode 100644 index 000000000..d741e4b59 Binary files /dev/null and b/tests/output/test_copy_document_compare_size/0495_page_0_copy.pdf differ diff --git a/tests/output/test_copy_document_compare_size/0594_page_0_copy.pdf b/tests/output/test_copy_document_compare_size/0594_page_0_copy.pdf new file mode 100644 index 000000000..37981df87 Binary files /dev/null and b/tests/output/test_copy_document_compare_size/0594_page_0_copy.pdf differ diff --git a/tests/output/test_copy_document_compare_size/output.pdf b/tests/output/test_copy_document_compare_size/output.pdf index a947d27f3..06183b796 100644 Binary files a/tests/output/test_copy_document_compare_size/output.pdf and b/tests/output/test_copy_document_compare_size/output.pdf differ diff --git a/tests/output/test_copy_document_resize_images_compare_size/0187_page_0_copy.pdf b/tests/output/test_copy_document_resize_images_compare_size/0187_page_0_copy.pdf new file mode 100644 index 000000000..715bd87bc Binary files /dev/null and b/tests/output/test_copy_document_resize_images_compare_size/0187_page_0_copy.pdf differ diff --git a/tests/output/test_copy_document_resize_images_compare_size/0430_page_0_copy.pdf b/tests/output/test_copy_document_resize_images_compare_size/0430_page_0_copy.pdf new file mode 100644 index 000000000..f0516bb17 Binary files /dev/null and b/tests/output/test_copy_document_resize_images_compare_size/0430_page_0_copy.pdf differ diff --git a/tests/output/test_copy_document_resize_images_compare_size/0462_page_0_copy.pdf b/tests/output/test_copy_document_resize_images_compare_size/0462_page_0_copy.pdf new file mode 100644 index 000000000..4a31bf5e9 Binary files /dev/null and b/tests/output/test_copy_document_resize_images_compare_size/0462_page_0_copy.pdf differ diff --git a/tests/output/test_copy_document_resize_images_compare_size/0495_page_0_copy.pdf b/tests/output/test_copy_document_resize_images_compare_size/0495_page_0_copy.pdf new file mode 100644 index 000000000..cb4cf3b22 Binary files /dev/null and b/tests/output/test_copy_document_resize_images_compare_size/0495_page_0_copy.pdf differ diff --git a/tests/output/test_copy_document_resize_images_compare_size/0594_page_0_copy.pdf b/tests/output/test_copy_document_resize_images_compare_size/0594_page_0_copy.pdf new file mode 100644 index 000000000..c1d2fdedd Binary files /dev/null and b/tests/output/test_copy_document_resize_images_compare_size/0594_page_0_copy.pdf differ diff --git a/tests/output/test_copy_document_resize_images_compare_size/output.pdf b/tests/output/test_copy_document_resize_images_compare_size/output.pdf index 18036f258..e1bd1a8dc 100644 Binary files a/tests/output/test_copy_document_resize_images_compare_size/output.pdf and b/tests/output/test_copy_document_resize_images_compare_size/output.pdf differ diff --git a/tests/output/test_count_annotations/output_001.pdf b/tests/output/test_count_annotations/output_001.pdf index 48d0e0e91..8ad50225f 100644 Binary files a/tests/output/test_count_annotations/output_001.pdf and b/tests/output/test_count_annotations/output_001.pdf differ diff --git a/tests/output/test_count_annotations/output_002.pdf b/tests/output/test_count_annotations/output_002.pdf index eb6695b4e..2833116b2 100644 Binary files a/tests/output/test_count_annotations/output_002.pdf and b/tests/output/test_count_annotations/output_002.pdf differ diff --git a/tests/output/test_create_document_with_output_intent/output_001.pdf b/tests/output/test_create_document_with_output_intent/output_001.pdf index 6e6c8cd10..73b068f9d 100644 Binary files a/tests/output/test_create_document_with_output_intent/output_001.pdf and b/tests/output/test_create_document_with_output_intent/output_001.pdf differ diff --git a/tests/output/test_detect_table/input_000.pdf b/tests/output/test_detect_table/input_000.pdf index c633e6990..11ab4636b 100644 Binary files a/tests/output/test_detect_table/input_000.pdf and b/tests/output/test_detect_table/input_000.pdf differ diff --git a/tests/output/test_detect_table/input_001.pdf b/tests/output/test_detect_table/input_001.pdf index 7286be7b2..f7dcb7635 100644 Binary files a/tests/output/test_detect_table/input_001.pdf and b/tests/output/test_detect_table/input_001.pdf differ diff --git a/tests/output/test_detect_table/input_002.pdf b/tests/output/test_detect_table/input_002.pdf index ae051d240..c805d0561 100644 Binary files a/tests/output/test_detect_table/input_002.pdf and b/tests/output/test_detect_table/input_002.pdf differ diff --git a/tests/output/test_detect_table/input_003.pdf b/tests/output/test_detect_table/input_003.pdf index 7bf7caa29..6eec8a58e 100644 Binary files a/tests/output/test_detect_table/input_003.pdf and b/tests/output/test_detect_table/input_003.pdf differ diff --git a/tests/output/test_detect_table/input_004.pdf b/tests/output/test_detect_table/input_004.pdf index 1b6f6ab86..545bf1548 100644 Binary files a/tests/output/test_detect_table/input_004.pdf and b/tests/output/test_detect_table/input_004.pdf differ diff --git a/tests/output/test_detect_table/input_005.pdf b/tests/output/test_detect_table/input_005.pdf index df25bd48b..5d66eabc1 100644 Binary files a/tests/output/test_detect_table/input_005.pdf and b/tests/output/test_detect_table/input_005.pdf differ diff --git a/tests/output/test_detect_table/input_006.pdf b/tests/output/test_detect_table/input_006.pdf index 5d3804bc0..7f2e0fd7e 100644 Binary files a/tests/output/test_detect_table/input_006.pdf and b/tests/output/test_detect_table/input_006.pdf differ diff --git a/tests/output/test_detect_table/output_000.pdf b/tests/output/test_detect_table/output_000.pdf index a8ba991d1..ee90bef75 100644 Binary files a/tests/output/test_detect_table/output_000.pdf and b/tests/output/test_detect_table/output_000.pdf differ diff --git a/tests/output/test_detect_table/output_001.pdf b/tests/output/test_detect_table/output_001.pdf index 1f9f34654..baa72df1d 100644 Binary files a/tests/output/test_detect_table/output_001.pdf and b/tests/output/test_detect_table/output_001.pdf differ diff --git a/tests/output/test_detect_table/output_002.pdf b/tests/output/test_detect_table/output_002.pdf index 068ad0359..aac50c306 100644 Binary files a/tests/output/test_detect_table/output_002.pdf and b/tests/output/test_detect_table/output_002.pdf differ diff --git a/tests/output/test_detect_table/output_003.pdf b/tests/output/test_detect_table/output_003.pdf index 81c7f4515..95539fa54 100644 Binary files a/tests/output/test_detect_table/output_003.pdf and b/tests/output/test_detect_table/output_003.pdf differ diff --git a/tests/output/test_detect_table/output_004.pdf b/tests/output/test_detect_table/output_004.pdf index 030e0020e..4998253c6 100644 Binary files a/tests/output/test_detect_table/output_004.pdf and b/tests/output/test_detect_table/output_004.pdf differ diff --git a/tests/output/test_detect_table/output_005.pdf b/tests/output/test_detect_table/output_005.pdf index c08888a8e..68b4c0120 100644 Binary files a/tests/output/test_detect_table/output_005.pdf and b/tests/output/test_detect_table/output_005.pdf differ diff --git a/tests/output/test_detect_table/output_006.pdf b/tests/output/test_detect_table/output_006.pdf index 6fceeb9a8..84417b327 100644 Binary files a/tests/output/test_detect_table/output_006.pdf and b/tests/output/test_detect_table/output_006.pdf differ diff --git a/tests/output/test_digit_placement_ubuntu_font/output_001.pdf b/tests/output/test_digit_placement_ubuntu_font/output_001.pdf index f463eef35..8a99928bf 100644 Binary files a/tests/output/test_digit_placement_ubuntu_font/output_001.pdf and b/tests/output/test_digit_placement_ubuntu_font/output_001.pdf differ diff --git a/tests/output/test_digit_placement_ubuntu_font/output_001.png b/tests/output/test_digit_placement_ubuntu_font/output_001.png index 35cf41b38..6d4d859dd 100644 Binary files a/tests/output/test_digit_placement_ubuntu_font/output_001.png and b/tests/output/test_digit_placement_ubuntu_font/output_001.png differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_000.pdf b/tests/output/test_export_html_to_pdf/example_html_input_000.pdf index ba6397db1..6611e20dc 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_000.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_000.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_001.pdf b/tests/output/test_export_html_to_pdf/example_html_input_001.pdf index 507bb6694..4de391fa6 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_001.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_001.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_002.pdf b/tests/output/test_export_html_to_pdf/example_html_input_002.pdf index 4dcd742d5..64f582a7f 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_002.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_002.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_003.pdf b/tests/output/test_export_html_to_pdf/example_html_input_003.pdf index 75de67846..02c823767 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_003.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_003.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_004.pdf b/tests/output/test_export_html_to_pdf/example_html_input_004.pdf index d71acb9e5..e59d73326 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_004.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_004.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_005.pdf b/tests/output/test_export_html_to_pdf/example_html_input_005.pdf index 430bdc721..277517900 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_005.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_005.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_006.pdf b/tests/output/test_export_html_to_pdf/example_html_input_006.pdf index 72170cf3d..9e24ee5c5 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_006.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_006.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_007.pdf b/tests/output/test_export_html_to_pdf/example_html_input_007.pdf index 28c8a1695..9f86ec699 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_007.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_007.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_008.pdf b/tests/output/test_export_html_to_pdf/example_html_input_008.pdf index eedd43665..9ccfbf17d 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_008.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_008.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_009.pdf b/tests/output/test_export_html_to_pdf/example_html_input_009.pdf index 9725f1062..1c1a5efe8 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_009.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_009.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_010.pdf b/tests/output/test_export_html_to_pdf/example_html_input_010.pdf index 87a23b5f8..bce48d859 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_010.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_010.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_011.pdf b/tests/output/test_export_html_to_pdf/example_html_input_011.pdf index 1caeb0d6f..92cd802a6 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_011.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_011.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_012.pdf b/tests/output/test_export_html_to_pdf/example_html_input_012.pdf index 9c5a30b6d..7301d9934 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_012.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_012.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_013.pdf b/tests/output/test_export_html_to_pdf/example_html_input_013.pdf index 19bbd926c..c4dae73d1 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_013.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_013.pdf differ diff --git a/tests/output/test_export_html_to_pdf/example_html_input_014.pdf b/tests/output/test_export_html_to_pdf/example_html_input_014.pdf index 7829c42b5..5ad8379f9 100644 Binary files a/tests/output/test_export_html_to_pdf/example_html_input_014.pdf and b/tests/output/test_export_html_to_pdf/example_html_input_014.pdf differ diff --git a/tests/output/test_export_markdown_to_pdf/example-markdown-input-001.md.pdf b/tests/output/test_export_markdown_to_pdf/example-markdown-input-001.md.pdf index a45ff8267..a8450127c 100644 Binary files a/tests/output/test_export_markdown_to_pdf/example-markdown-input-001.md.pdf and b/tests/output/test_export_markdown_to_pdf/example-markdown-input-001.md.pdf differ diff --git a/tests/output/test_export_markdown_to_pdf/example-markdown-input-002.md.pdf b/tests/output/test_export_markdown_to_pdf/example-markdown-input-002.md.pdf index 4e0c757bd..472e4a0d1 100644 Binary files a/tests/output/test_export_markdown_to_pdf/example-markdown-input-002.md.pdf and b/tests/output/test_export_markdown_to_pdf/example-markdown-input-002.md.pdf differ diff --git a/tests/output/test_export_markdown_to_pdf/example-markdown-input-003.md.pdf b/tests/output/test_export_markdown_to_pdf/example-markdown-input-003.md.pdf index 46fb3e5bd..42548d695 100644 Binary files a/tests/output/test_export_markdown_to_pdf/example-markdown-input-003.md.pdf and b/tests/output/test_export_markdown_to_pdf/example-markdown-input-003.md.pdf differ diff --git a/tests/output/test_export_markdown_to_pdf/example-markdown-input-004.md.pdf b/tests/output/test_export_markdown_to_pdf/example-markdown-input-004.md.pdf index a4a6a8a9f..a79892d03 100644 Binary files a/tests/output/test_export_markdown_to_pdf/example-markdown-input-004.md.pdf and b/tests/output/test_export_markdown_to_pdf/example-markdown-input-004.md.pdf differ diff --git a/tests/output/test_export_markdown_to_pdf/example-markdown-input-005.md.pdf b/tests/output/test_export_markdown_to_pdf/example-markdown-input-005.md.pdf index 2ef95127b..b8a78fb45 100644 Binary files a/tests/output/test_export_markdown_to_pdf/example-markdown-input-005.md.pdf and b/tests/output/test_export_markdown_to_pdf/example-markdown-input-005.md.pdf differ diff --git a/tests/output/test_export_markdown_to_pdf/example-markdown-input-006.md.pdf b/tests/output/test_export_markdown_to_pdf/example-markdown-input-006.md.pdf index 4ba32780d..80000bedf 100644 Binary files a/tests/output/test_export_markdown_to_pdf/example-markdown-input-006.md.pdf and b/tests/output/test_export_markdown_to_pdf/example-markdown-input-006.md.pdf differ diff --git a/tests/output/test_export_markdown_to_pdf/example-markdown-input-007.md.pdf b/tests/output/test_export_markdown_to_pdf/example-markdown-input-007.md.pdf index c31f846ea..f7dd98598 100644 Binary files a/tests/output/test_export_markdown_to_pdf/example-markdown-input-007.md.pdf and b/tests/output/test_export_markdown_to_pdf/example-markdown-input-007.md.pdf differ diff --git a/tests/output/test_export_markdown_to_pdf/example-markdown-input-008.md.pdf b/tests/output/test_export_markdown_to_pdf/example-markdown-input-008.md.pdf index 643583d8c..75f2145b2 100644 Binary files a/tests/output/test_export_markdown_to_pdf/example-markdown-input-008.md.pdf and b/tests/output/test_export_markdown_to_pdf/example-markdown-input-008.md.pdf differ diff --git a/tests/output/test_export_markdown_to_pdf/example-markdown-input-009.md.pdf b/tests/output/test_export_markdown_to_pdf/example-markdown-input-009.md.pdf index 288c26094..d9ffd5a54 100644 Binary files a/tests/output/test_export_markdown_to_pdf/example-markdown-input-009.md.pdf and b/tests/output/test_export_markdown_to_pdf/example-markdown-input-009.md.pdf differ diff --git a/tests/output/test_export_markdown_to_pdf/example-markdown-input-010.md.pdf b/tests/output/test_export_markdown_to_pdf/example-markdown-input-010.md.pdf index 2e8aa8ba6..6475a5bb3 100644 Binary files a/tests/output/test_export_markdown_to_pdf/example-markdown-input-010.md.pdf and b/tests/output/test_export_markdown_to_pdf/example-markdown-input-010.md.pdf differ diff --git a/tests/output/test_export_to_mp3/output.mp3 b/tests/output/test_export_to_mp3/output.mp3 index 769c46f5a..16a7e16fe 100644 Binary files a/tests/output/test_export_to_mp3/output.mp3 and b/tests/output/test_export_to_mp3/output.mp3 differ diff --git a/tests/output/test_extract_colors/output_001.pdf b/tests/output/test_extract_colors/output_001.pdf index 8cca98bdf..77767d3c6 100644 Binary files a/tests/output/test_extract_colors/output_001.pdf and b/tests/output/test_extract_colors/output_001.pdf differ diff --git a/tests/output/test_extract_colors/output_002.pdf b/tests/output/test_extract_colors/output_002.pdf index 13a52f1bd..c9a096624 100644 Binary files a/tests/output/test_extract_colors/output_002.pdf and b/tests/output/test_extract_colors/output_002.pdf differ diff --git a/tests/output/test_extract_colors/output_002.png b/tests/output/test_extract_colors/output_002.png index 53fc5a2e6..443b70a0c 100644 Binary files a/tests/output/test_extract_colors/output_002.png and b/tests/output/test_extract_colors/output_002.png differ diff --git a/tests/output/test_extract_courier_text/output_001.pdf b/tests/output/test_extract_courier_text/output_001.pdf index 0dcd81e3f..0e0c7be9e 100644 Binary files a/tests/output/test_extract_courier_text/output_001.pdf and b/tests/output/test_extract_courier_text/output_001.pdf differ diff --git a/tests/output/test_extract_font_names/output_001.pdf b/tests/output/test_extract_font_names/output_001.pdf index ff7a5b65f..44c5a950d 100644 Binary files a/tests/output/test_extract_font_names/output_001.pdf and b/tests/output/test_extract_font_names/output_001.pdf differ diff --git a/tests/output/test_extract_font_names/output_002.pdf b/tests/output/test_extract_font_names/output_002.pdf index 5c07408ce..fb3c8a45f 100644 Binary files a/tests/output/test_extract_font_names/output_002.pdf and b/tests/output/test_extract_font_names/output_002.pdf differ diff --git a/tests/output/test_extract_keywords/output_001.pdf b/tests/output/test_extract_keywords/output_001.pdf index a313e85f8..c8d2dc7ba 100644 Binary files a/tests/output/test_extract_keywords/output_001.pdf and b/tests/output/test_extract_keywords/output_001.pdf differ diff --git a/tests/output/test_extract_keywords/output_002.pdf b/tests/output/test_extract_keywords/output_002.pdf index d87dad8f4..3a58db84b 100644 Binary files a/tests/output/test_extract_keywords/output_002.pdf and b/tests/output/test_extract_keywords/output_002.pdf differ diff --git a/tests/output/test_extract_keywords/output_003.pdf b/tests/output/test_extract_keywords/output_003.pdf index 6753945af..ddd9112d6 100644 Binary files a/tests/output/test_extract_keywords/output_003.pdf and b/tests/output/test_extract_keywords/output_003.pdf differ diff --git a/tests/output/test_extract_red_text/output_001.pdf b/tests/output/test_extract_red_text/output_001.pdf index 16070d5eb..ae5dcc803 100644 Binary files a/tests/output/test_extract_red_text/output_001.pdf and b/tests/output/test_extract_red_text/output_001.pdf differ diff --git a/tests/output/test_extract_regex/output_001.pdf b/tests/output/test_extract_regex/output_001.pdf index d912521d7..838032c33 100644 Binary files a/tests/output/test_extract_regex/output_001.pdf and b/tests/output/test_extract_regex/output_001.pdf differ diff --git a/tests/output/test_extract_regex/output_002.pdf b/tests/output/test_extract_regex/output_002.pdf index fe4056fa9..dbe691a4e 100644 Binary files a/tests/output/test_extract_regex/output_002.pdf and b/tests/output/test_extract_regex/output_002.pdf differ diff --git a/tests/output/test_extract_text/output_001.pdf b/tests/output/test_extract_text/output_001.pdf index 350c9a6fd..3b99b1ff0 100644 Binary files a/tests/output/test_extract_text/output_001.pdf and b/tests/output/test_extract_text/output_001.pdf differ diff --git a/tests/output/test_extract_text/output_002.pdf b/tests/output/test_extract_text/output_002.pdf new file mode 100644 index 000000000..d9647e815 Binary files /dev/null and b/tests/output/test_extract_text/output_002.pdf differ diff --git a/tests/output/test_extract_text_expect_ground_truth/output.pdf b/tests/output/test_extract_text_expect_ground_truth/output.pdf index 8b15139cb..ee9e63814 100644 Binary files a/tests/output/test_extract_text_expect_ground_truth/output.pdf and b/tests/output/test_extract_text_expect_ground_truth/output.pdf differ diff --git a/tests/output/test_extract_text_from_self_made_invoice/output.pdf b/tests/output/test_extract_text_from_self_made_invoice/output.pdf index 92ee6b4ac..abdecec73 100644 Binary files a/tests/output/test_extract_text_from_self_made_invoice/output.pdf and b/tests/output/test_extract_text_from_self_made_invoice/output.pdf differ diff --git a/tests/output/test_margin_and_padding/output_001.pdf b/tests/output/test_margin_and_padding/output_001.pdf index a477e42d7..f5bc0ef38 100644 Binary files a/tests/output/test_margin_and_padding/output_001.pdf and b/tests/output/test_margin_and_padding/output_001.pdf differ diff --git a/tests/output/test_margin_and_padding/output_002.pdf b/tests/output/test_margin_and_padding/output_002.pdf index d171aff97..98e3b5c3d 100644 Binary files a/tests/output/test_margin_and_padding/output_002.pdf and b/tests/output/test_margin_and_padding/output_002.pdf differ diff --git a/tests/output/test_modify_image/output_001.pdf b/tests/output/test_modify_image/output_001.pdf index 499ab4d39..a26eb12ea 100644 Binary files a/tests/output/test_modify_image/output_001.pdf and b/tests/output/test_modify_image/output_001.pdf differ diff --git a/tests/output/test_modify_image/output_002.pdf b/tests/output/test_modify_image/output_002.pdf index 38f9ee3e1..7703a83cc 100644 Binary files a/tests/output/test_modify_image/output_002.pdf and b/tests/output/test_modify_image/output_002.pdf differ diff --git a/tests/output/test_open_document/output.pdf b/tests/output/test_open_document/output.pdf index be02cc49b..5d8736cab 100644 Binary files a/tests/output/test_open_document/output.pdf and b/tests/output/test_open_document/output.pdf differ diff --git a/tests/output/test_open_encrypted_document/output.pdf b/tests/output/test_open_encrypted_document/output.pdf index bc062a40f..a563efaef 100644 Binary files a/tests/output/test_open_encrypted_document/output.pdf and b/tests/output/test_open_encrypted_document/output.pdf differ diff --git a/tests/output/test_optimize_images/output_001.pdf b/tests/output/test_optimize_images/output_001.pdf index 06623154d..e64f499e9 100644 Binary files a/tests/output/test_optimize_images/output_001.pdf and b/tests/output/test_optimize_images/output_001.pdf differ diff --git a/tests/output/test_page_has_empty_resource_dictionary/output_001.pdf b/tests/output/test_page_has_empty_resource_dictionary/output_001.pdf index 26e5ff43c..62e7780a9 100644 Binary files a/tests/output/test_page_has_empty_resource_dictionary/output_001.pdf and b/tests/output/test_page_has_empty_resource_dictionary/output_001.pdf differ diff --git a/tests/output/test_redact_common_regular_expressions/output_001.pdf b/tests/output/test_redact_common_regular_expressions/output_001.pdf index 3307e8f89..87a9ce8fa 100644 Binary files a/tests/output/test_redact_common_regular_expressions/output_001.pdf and b/tests/output/test_redact_common_regular_expressions/output_001.pdf differ diff --git a/tests/output/test_redact_common_regular_expressions/output_002.pdf b/tests/output/test_redact_common_regular_expressions/output_002.pdf index e0dd53eef..23fbe4708 100644 Binary files a/tests/output/test_redact_common_regular_expressions/output_002.pdf and b/tests/output/test_redact_common_regular_expressions/output_002.pdf differ diff --git a/tests/output/test_redact_common_regular_expressions/output_003.pdf b/tests/output/test_redact_common_regular_expressions/output_003.pdf index ef4acb0a4..7e1c977d7 100644 Binary files a/tests/output/test_redact_common_regular_expressions/output_003.pdf and b/tests/output/test_redact_common_regular_expressions/output_003.pdf differ diff --git a/tests/output/test_remove_annotation/output_001.pdf b/tests/output/test_remove_annotation/output_001.pdf index b89403982..46e4f7d0a 100644 Binary files a/tests/output/test_remove_annotation/output_001.pdf and b/tests/output/test_remove_annotation/output_001.pdf differ diff --git a/tests/output/test_remove_annotation/output_002.pdf b/tests/output/test_remove_annotation/output_002.pdf index d970c37ed..af5a8059c 100644 Binary files a/tests/output/test_remove_annotation/output_002.pdf and b/tests/output/test_remove_annotation/output_002.pdf differ diff --git a/tests/output/test_remove_annotation/output_003.pdf b/tests/output/test_remove_annotation/output_003.pdf index 33f43fc1b..f571d6e50 100644 Binary files a/tests/output/test_remove_annotation/output_003.pdf and b/tests/output/test_remove_annotation/output_003.pdf differ diff --git a/tests/output/test_remove_page/output_001.pdf b/tests/output/test_remove_page/output_001.pdf index 2c17b755b..8f2a04acb 100644 Binary files a/tests/output/test_remove_page/output_001.pdf and b/tests/output/test_remove_page/output_001.pdf differ diff --git a/tests/output/test_remove_page/output_002.pdf b/tests/output/test_remove_page/output_002.pdf index 5227a809d..7060d3899 100644 Binary files a/tests/output/test_remove_page/output_002.pdf and b/tests/output/test_remove_page/output_002.pdf differ diff --git a/tests/output/test_remove_page/output_003.pdf b/tests/output/test_remove_page/output_003.pdf index a2fd2a61a..1b5f9ef73 100644 Binary files a/tests/output/test_remove_page/output_003.pdf and b/tests/output/test_remove_page/output_003.pdf differ diff --git a/tests/output/test_remove_page/output_004.pdf b/tests/output/test_remove_page/output_004.pdf index 1efdbf1f8..b80b0cf35 100644 Binary files a/tests/output/test_remove_page/output_004.pdf and b/tests/output/test_remove_page/output_004.pdf differ diff --git a/tests/output/test_rotate_page/output_001.pdf b/tests/output/test_rotate_page/output_001.pdf index 801fcec10..c4172829d 100644 Binary files a/tests/output/test_rotate_page/output_001.pdf and b/tests/output/test_rotate_page/output_001.pdf differ diff --git a/tests/output/test_rotate_page/output_002.pdf b/tests/output/test_rotate_page/output_002.pdf index be08dd9a7..21e94c5ca 100644 Binary files a/tests/output/test_rotate_page/output_002.pdf and b/tests/output/test_rotate_page/output_002.pdf differ diff --git a/tests/output/test_rotate_page/output_003.pdf b/tests/output/test_rotate_page/output_003.pdf index 766252247..85e59ca26 100644 Binary files a/tests/output/test_rotate_page/output_003.pdf and b/tests/output/test_rotate_page/output_003.pdf differ diff --git a/tests/output/test_split_complementary_color_scheme/output.pdf b/tests/output/test_split_complementary_color_scheme/output.pdf index af468145d..bb6c3d869 100644 Binary files a/tests/output/test_split_complementary_color_scheme/output.pdf and b/tests/output/test_split_complementary_color_scheme/output.pdf differ diff --git a/tests/output/test_tetradic_rectangle_color_scheme/output.pdf b/tests/output/test_tetradic_rectangle_color_scheme/output.pdf index 12f65c786..5bb0d30e2 100644 Binary files a/tests/output/test_tetradic_rectangle_color_scheme/output.pdf and b/tests/output/test_tetradic_rectangle_color_scheme/output.pdf differ diff --git a/tests/output/test_tetradic_square_color_scheme/output.pdf b/tests/output/test_tetradic_square_color_scheme/output.pdf index baeb1c238..877079e1f 100644 Binary files a/tests/output/test_tetradic_square_color_scheme/output.pdf and b/tests/output/test_tetradic_square_color_scheme/output.pdf differ diff --git a/tests/output/test_triadic_color_scheme/output.pdf b/tests/output/test_triadic_color_scheme/output.pdf index f4671f935..09dea1678 100644 Binary files a/tests/output/test_triadic_color_scheme/output.pdf and b/tests/output/test_triadic_color_scheme/output.pdf differ diff --git a/tests/output/test_write_2_scatter_plots/output.pdf b/tests/output/test_write_2_scatter_plots/output.pdf index bb943a21d..d9098e5ac 100644 Binary files a/tests/output/test_write_2_scatter_plots/output.pdf and b/tests/output/test_write_2_scatter_plots/output.pdf differ diff --git a/tests/output/test_write_3d_density_chart/output.pdf b/tests/output/test_write_3d_density_chart/output.pdf index 233cdfc3a..5aeea8eb8 100644 Binary files a/tests/output/test_write_3d_density_chart/output.pdf and b/tests/output/test_write_3d_density_chart/output.pdf differ diff --git a/tests/output/test_write_3d_surface_plot/output.pdf b/tests/output/test_write_3d_surface_plot/output.pdf index 18755fd45..d2a778376 100644 Binary files a/tests/output/test_write_3d_surface_plot/output.pdf and b/tests/output/test_write_3d_surface_plot/output.pdf differ diff --git a/tests/output/test_write_all_types_of_barcode/output.pdf b/tests/output/test_write_all_types_of_barcode/output.pdf index 2e1c6485c..f42768629 100644 Binary files a/tests/output/test_write_all_types_of_barcode/output.pdf and b/tests/output/test_write_all_types_of_barcode/output.pdf differ diff --git a/tests/output/test_write_battleship/output.pdf b/tests/output/test_write_battleship/output.pdf index 15e03fb9e..629e04702 100644 Binary files a/tests/output/test_write_battleship/output.pdf and b/tests/output/test_write_battleship/output.pdf differ diff --git a/tests/output/test_write_blobs/output.pdf b/tests/output/test_write_blobs/output.pdf index 2e1bf875c..bafba37d9 100644 Binary files a/tests/output/test_write_blobs/output.pdf and b/tests/output/test_write_blobs/output.pdf differ diff --git a/tests/output/test_write_check_box/output_001.pdf b/tests/output/test_write_check_box/output_001.pdf index 1d48baaa5..3cabb86e9 100644 Binary files a/tests/output/test_write_check_box/output_001.pdf and b/tests/output/test_write_check_box/output_001.pdf differ diff --git a/tests/output/test_write_check_box/output_002.pdf b/tests/output/test_write_check_box/output_002.pdf index 87ed9cd11..ad12017ac 100644 Binary files a/tests/output/test_write_check_box/output_002.pdf and b/tests/output/test_write_check_box/output_002.pdf differ diff --git a/tests/output/test_write_chunk_of_text/output.pdf b/tests/output/test_write_chunk_of_text/output.pdf index 82e8d5208..e8cf9d3cd 100644 Binary files a/tests/output/test_write_chunk_of_text/output.pdf and b/tests/output/test_write_chunk_of_text/output.pdf differ diff --git a/tests/output/test_write_chunk_of_text_escaped_chars/output.pdf b/tests/output/test_write_chunk_of_text_escaped_chars/output.pdf index 7178a5024..066d652a8 100644 Binary files a/tests/output/test_write_chunk_of_text_escaped_chars/output.pdf and b/tests/output/test_write_chunk_of_text_escaped_chars/output.pdf differ diff --git a/tests/output/test_write_chunk_of_text_in_rainbow_colors/output.pdf b/tests/output/test_write_chunk_of_text_in_rainbow_colors/output.pdf index c1a6a09b2..4d0c2bfc9 100644 Binary files a/tests/output/test_write_chunk_of_text_in_rainbow_colors/output.pdf and b/tests/output/test_write_chunk_of_text_in_rainbow_colors/output.pdf differ diff --git a/tests/output/test_write_chunks_of_text/output_001.pdf b/tests/output/test_write_chunks_of_text/output_001.pdf index 5dd428fe4..3dd1585bd 100644 Binary files a/tests/output/test_write_chunks_of_text/output_001.pdf and b/tests/output/test_write_chunks_of_text/output_001.pdf differ diff --git a/tests/output/test_write_chunks_of_text/output_002.pdf b/tests/output/test_write_chunks_of_text/output_002.pdf index 862196040..af2c0a0f8 100644 Binary files a/tests/output/test_write_chunks_of_text/output_002.pdf and b/tests/output/test_write_chunks_of_text/output_002.pdf differ diff --git a/tests/output/test_write_chunks_of_text/output_003.pdf b/tests/output/test_write_chunks_of_text/output_003.pdf index 2ef3955e4..25007fdd7 100644 Binary files a/tests/output/test_write_chunks_of_text/output_003.pdf and b/tests/output/test_write_chunks_of_text/output_003.pdf differ diff --git a/tests/output/test_write_chunks_of_text/output_004.pdf b/tests/output/test_write_chunks_of_text/output_004.pdf index 5d567599e..2face1ca5 100644 Binary files a/tests/output/test_write_chunks_of_text/output_004.pdf and b/tests/output/test_write_chunks_of_text/output_004.pdf differ diff --git a/tests/output/test_write_chunks_of_text/output_005.pdf b/tests/output/test_write_chunks_of_text/output_005.pdf index f5cb5dd0b..da1822b0f 100644 Binary files a/tests/output/test_write_chunks_of_text/output_005.pdf and b/tests/output/test_write_chunks_of_text/output_005.pdf differ diff --git a/tests/output/test_write_chunks_of_text_preserves_bounding_boxes/output.pdf b/tests/output/test_write_chunks_of_text_preserves_bounding_boxes/output.pdf index 58bed927d..bd4a7416d 100644 Binary files a/tests/output/test_write_chunks_of_text_preserves_bounding_boxes/output.pdf and b/tests/output/test_write_chunks_of_text_preserves_bounding_boxes/output.pdf differ diff --git a/tests/output/test_write_code_128_barcode/output.pdf b/tests/output/test_write_code_128_barcode/output.pdf index 8b2e39a99..0a5422459 100644 Binary files a/tests/output/test_write_code_128_barcode/output.pdf and b/tests/output/test_write_code_128_barcode/output.pdf differ diff --git a/tests/output/test_write_code_128_barcode_in_color/output.pdf b/tests/output/test_write_code_128_barcode_in_color/output.pdf index c0f2cf18e..b90298639 100644 Binary files a/tests/output/test_write_code_128_barcode_in_color/output.pdf and b/tests/output/test_write_code_128_barcode_in_color/output.pdf differ diff --git a/tests/output/test_write_codeblock/output.pdf b/tests/output/test_write_codeblock/output.pdf index 68c6d6d1a..2834d951d 100644 Binary files a/tests/output/test_write_codeblock/output.pdf and b/tests/output/test_write_codeblock/output.pdf differ diff --git a/tests/output/test_write_dragon_curve/output.pdf b/tests/output/test_write_dragon_curve/output.pdf index b91adcc33..79fe91e73 100644 Binary files a/tests/output/test_write_dragon_curve/output.pdf and b/tests/output/test_write_dragon_curve/output.pdf differ diff --git a/tests/output/test_write_drop_down_list/output_001.pdf b/tests/output/test_write_drop_down_list/output_001.pdf index 92460e102..6e28f494d 100644 Binary files a/tests/output/test_write_drop_down_list/output_001.pdf and b/tests/output/test_write_drop_down_list/output_001.pdf differ diff --git a/tests/output/test_write_drop_down_list/output_002.pdf b/tests/output/test_write_drop_down_list/output_002.pdf index 0e097e774..0e5635ef1 100644 Binary files a/tests/output/test_write_drop_down_list/output_002.pdf and b/tests/output/test_write_drop_down_list/output_002.pdf differ diff --git a/tests/output/test_write_emoji/output.pdf b/tests/output/test_write_emoji/output.pdf index 7dd1efb06..ea690fe6b 100644 Binary files a/tests/output/test_write_emoji/output.pdf and b/tests/output/test_write_emoji/output.pdf differ diff --git a/tests/output/test_write_empty_document/output.pdf b/tests/output/test_write_empty_document/output.pdf index b706a708f..cd6413f47 100644 Binary files a/tests/output/test_write_empty_document/output.pdf and b/tests/output/test_write_empty_document/output.pdf differ diff --git a/tests/output/test_write_fixed_column_width_table/output_001.pdf b/tests/output/test_write_fixed_column_width_table/output_001.pdf index 70a00c398..b227eabc2 100644 Binary files a/tests/output/test_write_fixed_column_width_table/output_001.pdf and b/tests/output/test_write_fixed_column_width_table/output_001.pdf differ diff --git a/tests/output/test_write_fixed_column_width_table/output_002.pdf b/tests/output/test_write_fixed_column_width_table/output_002.pdf index 6a6879660..20f4cbf9f 100644 Binary files a/tests/output/test_write_fixed_column_width_table/output_002.pdf and b/tests/output/test_write_fixed_column_width_table/output_002.pdf differ diff --git a/tests/output/test_write_fixed_column_width_table/output_003.pdf b/tests/output/test_write_fixed_column_width_table/output_003.pdf index 04ce2a96e..0300db1c5 100644 Binary files a/tests/output/test_write_fixed_column_width_table/output_003.pdf and b/tests/output/test_write_fixed_column_width_table/output_003.pdf differ diff --git a/tests/output/test_write_fixed_column_width_table/output_004.pdf b/tests/output/test_write_fixed_column_width_table/output_004.pdf index f5760b5dd..9af28bb35 100644 Binary files a/tests/output/test_write_fixed_column_width_table/output_004.pdf and b/tests/output/test_write_fixed_column_width_table/output_004.pdf differ diff --git a/tests/output/test_write_fixed_column_width_table/output_005.pdf b/tests/output/test_write_fixed_column_width_table/output_005.pdf index ddec4b74e..786c5f618 100644 Binary files a/tests/output/test_write_fixed_column_width_table/output_005.pdf and b/tests/output/test_write_fixed_column_width_table/output_005.pdf differ diff --git a/tests/output/test_write_flexi_table/output_001.pdf b/tests/output/test_write_flexi_table/output_001.pdf index e31effce2..02412f9dc 100644 Binary files a/tests/output/test_write_flexi_table/output_001.pdf and b/tests/output/test_write_flexi_table/output_001.pdf differ diff --git a/tests/output/test_write_flexi_table/output_002.pdf b/tests/output/test_write_flexi_table/output_002.pdf index a8a235fd0..0525f4f04 100644 Binary files a/tests/output/test_write_flexi_table/output_002.pdf and b/tests/output/test_write_flexi_table/output_002.pdf differ diff --git a/tests/output/test_write_flexi_table/output_003.pdf b/tests/output/test_write_flexi_table/output_003.pdf index fb0131a94..45d52c5d8 100644 Binary files a/tests/output/test_write_flexi_table/output_003.pdf and b/tests/output/test_write_flexi_table/output_003.pdf differ diff --git a/tests/output/test_write_flexi_table/output_004.pdf b/tests/output/test_write_flexi_table/output_004.pdf index ca36d9a9f..ef33dccf5 100644 Binary files a/tests/output/test_write_flexi_table/output_004.pdf and b/tests/output/test_write_flexi_table/output_004.pdf differ diff --git a/tests/output/test_write_flexi_table/output_005.pdf b/tests/output/test_write_flexi_table/output_005.pdf index 639ce9c37..a0ef602ac 100644 Binary files a/tests/output/test_write_flexi_table/output_005.pdf and b/tests/output/test_write_flexi_table/output_005.pdf differ diff --git a/tests/output/test_write_flexi_table_with_preferred_width/output.pdf b/tests/output/test_write_flexi_table_with_preferred_width/output.pdf index e61503447..64e253a99 100644 Binary files a/tests/output/test_write_flexi_table_with_preferred_width/output.pdf and b/tests/output/test_write_flexi_table_with_preferred_width/output.pdf differ diff --git a/tests/output/test_write_flowchart_line_art/output.pdf b/tests/output/test_write_flowchart_line_art/output.pdf index 0dfe3c3b3..9ec765534 100644 Binary files a/tests/output/test_write_flowchart_line_art/output.pdf and b/tests/output/test_write_flowchart_line_art/output.pdf differ diff --git a/tests/output/test_write_flyer/output.pdf b/tests/output/test_write_flyer/output.pdf index 9d645d578..e424a4c13 100644 Binary files a/tests/output/test_write_flyer/output.pdf and b/tests/output/test_write_flyer/output.pdf differ diff --git a/tests/output/test_write_grayscale_image/output.pdf b/tests/output/test_write_grayscale_image/output.pdf index f552b5eb2..826849931 100644 Binary files a/tests/output/test_write_grayscale_image/output.pdf and b/tests/output/test_write_grayscale_image/output.pdf differ diff --git a/tests/output/test_write_hello_world_with_monaco_font/output_001.pdf b/tests/output/test_write_hello_world_with_monaco_font/output_001.pdf index 82d61c537..2d3cc6347 100644 Binary files a/tests/output/test_write_hello_world_with_monaco_font/output_001.pdf and b/tests/output/test_write_hello_world_with_monaco_font/output_001.pdf differ diff --git a/tests/output/test_write_hello_world_with_simhei_font/output_001.pdf b/tests/output/test_write_hello_world_with_simhei_font/output_001.pdf new file mode 100644 index 000000000..ee37cd510 Binary files /dev/null and b/tests/output/test_write_hello_world_with_simhei_font/output_001.pdf differ diff --git a/tests/output/test_write_hello_world_with_simhei_font/output_001_ground_truth.png b/tests/output/test_write_hello_world_with_simhei_font/output_001_ground_truth.png new file mode 100644 index 000000000..08033abfa Binary files /dev/null and b/tests/output/test_write_hello_world_with_simhei_font/output_001_ground_truth.png differ diff --git a/tests/output/test_write_hyphenated_paragraph/output.pdf b/tests/output/test_write_hyphenated_paragraph/output.pdf index 5ede4b01f..fef65ff97 100644 Binary files a/tests/output/test_write_hyphenated_paragraph/output.pdf and b/tests/output/test_write_hyphenated_paragraph/output.pdf differ diff --git a/tests/output/test_write_image_aligned_center/output.pdf b/tests/output/test_write_image_aligned_center/output.pdf index c3235e039..ffa578c2b 100644 Binary files a/tests/output/test_write_image_aligned_center/output.pdf and b/tests/output/test_write_image_aligned_center/output.pdf differ diff --git a/tests/output/test_write_image_by_url/output.pdf b/tests/output/test_write_image_by_url/output.pdf index 29e40fc5b..43fe26763 100644 Binary files a/tests/output/test_write_image_by_url/output.pdf and b/tests/output/test_write_image_by_url/output.pdf differ diff --git a/tests/output/test_write_incomplete_table/output.pdf b/tests/output/test_write_incomplete_table/output.pdf index 222b31762..16fe70e87 100644 Binary files a/tests/output/test_write_incomplete_table/output.pdf and b/tests/output/test_write_incomplete_table/output.pdf differ diff --git a/tests/output/test_write_line_of_text_justified_center/output.pdf b/tests/output/test_write_line_of_text_justified_center/output.pdf index 85a1611a2..2daa9d9d2 100644 Binary files a/tests/output/test_write_line_of_text_justified_center/output.pdf and b/tests/output/test_write_line_of_text_justified_center/output.pdf differ diff --git a/tests/output/test_write_line_of_text_justified_full/output.pdf b/tests/output/test_write_line_of_text_justified_full/output.pdf index 618246830..8ac016cb0 100644 Binary files a/tests/output/test_write_line_of_text_justified_full/output.pdf and b/tests/output/test_write_line_of_text_justified_full/output.pdf differ diff --git a/tests/output/test_write_line_of_text_justified_right/output.pdf b/tests/output/test_write_line_of_text_justified_right/output.pdf index 5897948db..d96251ab1 100644 Binary files a/tests/output/test_write_line_of_text_justified_right/output.pdf and b/tests/output/test_write_line_of_text_justified_right/output.pdf differ diff --git a/tests/output/test_write_lissajours_line_art/output.pdf b/tests/output/test_write_lissajours_line_art/output.pdf index 2405c4a23..0973b470f 100644 Binary files a/tests/output/test_write_lissajours_line_art/output.pdf and b/tests/output/test_write_lissajours_line_art/output.pdf differ diff --git a/tests/output/test_write_long_unordered_list/output.pdf b/tests/output/test_write_long_unordered_list/output.pdf index f834bacf4..e36982ca3 100644 Binary files a/tests/output/test_write_long_unordered_list/output.pdf and b/tests/output/test_write_long_unordered_list/output.pdf differ diff --git a/tests/output/test_write_multiple_pages/output.pdf b/tests/output/test_write_multiple_pages/output.pdf index 0e64667cb..361bd14a5 100644 Binary files a/tests/output/test_write_multiple_pages/output.pdf and b/tests/output/test_write_multiple_pages/output.pdf differ diff --git a/tests/output/test_write_nested_ordered_list/output.pdf b/tests/output/test_write_nested_ordered_list/output.pdf index 0d8dea8b9..a4f225405 100644 Binary files a/tests/output/test_write_nested_ordered_list/output.pdf and b/tests/output/test_write_nested_ordered_list/output.pdf differ diff --git a/tests/output/test_write_nested_unordered_list/output.pdf b/tests/output/test_write_nested_unordered_list/output.pdf index ebf5648e6..998ca7782 100644 Binary files a/tests/output/test_write_nested_unordered_list/output.pdf and b/tests/output/test_write_nested_unordered_list/output.pdf differ diff --git a/tests/output/test_write_nested_unordered_list/output.png b/tests/output/test_write_nested_unordered_list/output.png index aeb8e6f6e..18996abde 100644 Binary files a/tests/output/test_write_nested_unordered_list/output.png and b/tests/output/test_write_nested_unordered_list/output.png differ diff --git a/tests/output/test_write_ordered_list/output_001.pdf b/tests/output/test_write_ordered_list/output_001.pdf index 5fcf86282..171fa1375 100644 Binary files a/tests/output/test_write_ordered_list/output_001.pdf and b/tests/output/test_write_ordered_list/output_001.pdf differ diff --git a/tests/output/test_write_ordered_list/output_002.pdf b/tests/output/test_write_ordered_list/output_002.pdf index 5858580c1..c851b1bc4 100644 Binary files a/tests/output/test_write_ordered_list/output_002.pdf and b/tests/output/test_write_ordered_list/output_002.pdf differ diff --git a/tests/output/test_write_paragraph/output.pdf b/tests/output/test_write_paragraph/output.pdf index 3f0aa9890..84d18155e 100644 Binary files a/tests/output/test_write_paragraph/output.pdf and b/tests/output/test_write_paragraph/output.pdf differ diff --git a/tests/output/test_write_paragraph_alignment/output.pdf b/tests/output/test_write_paragraph_alignment/output.pdf index 2d7103f86..e4a9b5457 100644 Binary files a/tests/output/test_write_paragraph_alignment/output.pdf and b/tests/output/test_write_paragraph_alignment/output.pdf differ diff --git a/tests/output/test_write_paragraph_border_left/output.pdf b/tests/output/test_write_paragraph_border_left/output.pdf index 36a73dc24..db922577c 100644 Binary files a/tests/output/test_write_paragraph_border_left/output.pdf and b/tests/output/test_write_paragraph_border_left/output.pdf differ diff --git a/tests/output/test_write_paragraph_force_split/output.pdf b/tests/output/test_write_paragraph_force_split/output.pdf index 2114e99fe..fbdbb37f1 100644 Binary files a/tests/output/test_write_paragraph_force_split/output.pdf and b/tests/output/test_write_paragraph_force_split/output.pdf differ diff --git a/tests/output/test_write_paragraph_justified_center/output_001.pdf b/tests/output/test_write_paragraph_justified_center/output_001.pdf index 94d0f76b6..47a0d57bd 100644 Binary files a/tests/output/test_write_paragraph_justified_center/output_001.pdf and b/tests/output/test_write_paragraph_justified_center/output_001.pdf differ diff --git a/tests/output/test_write_paragraph_justified_center/output_002.pdf b/tests/output/test_write_paragraph_justified_center/output_002.pdf index 8601dc0fe..7c1907e56 100644 Binary files a/tests/output/test_write_paragraph_justified_center/output_002.pdf and b/tests/output/test_write_paragraph_justified_center/output_002.pdf differ diff --git a/tests/output/test_write_paragraph_justified_center_with_padding/output.pdf b/tests/output/test_write_paragraph_justified_center_with_padding/output.pdf index e16d8dc12..e3403b0fb 100644 Binary files a/tests/output/test_write_paragraph_justified_center_with_padding/output.pdf and b/tests/output/test_write_paragraph_justified_center_with_padding/output.pdf differ diff --git a/tests/output/test_write_paragraph_justified_center_with_padding_and_border/output.pdf b/tests/output/test_write_paragraph_justified_center_with_padding_and_border/output.pdf index d804208cc..eaeea1cc5 100644 Binary files a/tests/output/test_write_paragraph_justified_center_with_padding_and_border/output.pdf and b/tests/output/test_write_paragraph_justified_center_with_padding_and_border/output.pdf differ diff --git a/tests/output/test_write_paragraph_justified_center_with_padding_and_border_and_background/output.pdf b/tests/output/test_write_paragraph_justified_center_with_padding_and_border_and_background/output.pdf index c760a9038..742900910 100644 Binary files a/tests/output/test_write_paragraph_justified_center_with_padding_and_border_and_background/output.pdf and b/tests/output/test_write_paragraph_justified_center_with_padding_and_border_and_background/output.pdf differ diff --git a/tests/output/test_write_paragraph_justified_full/output.pdf b/tests/output/test_write_paragraph_justified_full/output.pdf index bb5638f80..d1a7aeb55 100644 Binary files a/tests/output/test_write_paragraph_justified_full/output.pdf and b/tests/output/test_write_paragraph_justified_full/output.pdf differ diff --git a/tests/output/test_write_paragraph_justified_right/output.pdf b/tests/output/test_write_paragraph_justified_right/output.pdf index 602a742b4..fa15f6209 100644 Binary files a/tests/output/test_write_paragraph_justified_right/output.pdf and b/tests/output/test_write_paragraph_justified_right/output.pdf differ diff --git a/tests/output/test_write_paragraph_preserve_space/output.pdf b/tests/output/test_write_paragraph_preserve_space/output.pdf index 33a746928..44dbd5c3b 100644 Binary files a/tests/output/test_write_paragraph_preserve_space/output.pdf and b/tests/output/test_write_paragraph_preserve_space/output.pdf differ diff --git a/tests/output/test_write_paragraph_save_twice/output_001.pdf b/tests/output/test_write_paragraph_save_twice/output_001.pdf index 0063fe26c..a6a4ae3f9 100644 Binary files a/tests/output/test_write_paragraph_save_twice/output_001.pdf and b/tests/output/test_write_paragraph_save_twice/output_001.pdf differ diff --git a/tests/output/test_write_paragraph_save_twice/output_002.pdf b/tests/output/test_write_paragraph_save_twice/output_002.pdf index ad3c0fd75..c41fc42a4 100644 Binary files a/tests/output/test_write_paragraph_save_twice/output_002.pdf and b/tests/output/test_write_paragraph_save_twice/output_002.pdf differ diff --git a/tests/output/test_write_paragraph_with_accented_letters/output.pdf b/tests/output/test_write_paragraph_with_accented_letters/output.pdf index d3461a034..521a0ac49 100644 Binary files a/tests/output/test_write_paragraph_with_accented_letters/output.pdf and b/tests/output/test_write_paragraph_with_accented_letters/output.pdf differ diff --git a/tests/output/test_write_paragraphs_using_multi_column_layout/output.pdf b/tests/output/test_write_paragraphs_using_multi_column_layout/output.pdf index 1806ada96..60b2f238e 100644 Binary files a/tests/output/test_write_paragraphs_using_multi_column_layout/output.pdf and b/tests/output/test_write_paragraphs_using_multi_column_layout/output.pdf differ diff --git a/tests/output/test_write_paragraphs_using_single_column_layout/output.pdf b/tests/output/test_write_paragraphs_using_single_column_layout/output.pdf index e2774baa3..1b6198e61 100644 Binary files a/tests/output/test_write_paragraphs_using_single_column_layout/output.pdf and b/tests/output/test_write_paragraphs_using_single_column_layout/output.pdf differ diff --git a/tests/output/test_write_paragraphs_with_headings/output.pdf b/tests/output/test_write_paragraphs_with_headings/output.pdf index 9255cdfa3..f33d7608b 100644 Binary files a/tests/output/test_write_paragraphs_with_headings/output.pdf and b/tests/output/test_write_paragraphs_with_headings/output.pdf differ diff --git a/tests/output/test_write_pdf_a_1b/output_001.pdf b/tests/output/test_write_pdf_a_1b/output_001.pdf index 9102abbde..0f81b8c6d 100644 Binary files a/tests/output/test_write_pdf_a_1b/output_001.pdf and b/tests/output/test_write_pdf_a_1b/output_001.pdf differ diff --git a/tests/output/test_write_pdf_a_1b/output_002.pdf b/tests/output/test_write_pdf_a_1b/output_002.pdf index 5405d8a50..21db9e78e 100644 Binary files a/tests/output/test_write_pdf_a_1b/output_002.pdf and b/tests/output/test_write_pdf_a_1b/output_002.pdf differ diff --git a/tests/output/test_write_pil_image/output.pdf b/tests/output/test_write_pil_image/output.pdf index c734cb51e..6d1029416 100644 Binary files a/tests/output/test_write_pil_image/output.pdf and b/tests/output/test_write_pil_image/output.pdf differ diff --git a/tests/output/test_write_png_image_by_url/output.pdf b/tests/output/test_write_png_image_by_url/output.pdf index fd514255d..6d913ab0d 100644 Binary files a/tests/output/test_write_png_image_by_url/output.pdf and b/tests/output/test_write_png_image_by_url/output.pdf differ diff --git a/tests/output/test_write_radar_plot/output.pdf b/tests/output/test_write_radar_plot/output.pdf index 99a4dbf59..6c59396bb 100644 Binary files a/tests/output/test_write_radar_plot/output.pdf and b/tests/output/test_write_radar_plot/output.pdf differ diff --git a/tests/output/test_write_table_with_col_span/output.pdf b/tests/output/test_write_table_with_col_span/output.pdf index f29ac7531..545c5c206 100644 Binary files a/tests/output/test_write_table_with_col_span/output.pdf and b/tests/output/test_write_table_with_col_span/output.pdf differ diff --git a/tests/output/test_write_table_with_image/output.pdf b/tests/output/test_write_table_with_image/output.pdf index 9b0d0f930..f8e1ab0e1 100644 Binary files a/tests/output/test_write_table_with_image/output.pdf and b/tests/output/test_write_table_with_image/output.pdf differ diff --git a/tests/output/test_write_table_with_non_black_paragraphs/output.pdf b/tests/output/test_write_table_with_non_black_paragraphs/output.pdf index c8b15b1e1..f9b963075 100644 Binary files a/tests/output/test_write_table_with_non_black_paragraphs/output.pdf and b/tests/output/test_write_table_with_non_black_paragraphs/output.pdf differ diff --git a/tests/output/test_write_table_with_rainbow_background/output.pdf b/tests/output/test_write_table_with_rainbow_background/output.pdf index 80b5b1809..44cfa539f 100644 Binary files a/tests/output/test_write_table_with_rainbow_background/output.pdf and b/tests/output/test_write_table_with_rainbow_background/output.pdf differ diff --git a/tests/output/test_write_table_with_row_span/output.pdf b/tests/output/test_write_table_with_row_span/output.pdf index d26ab3152..03bc75c4d 100644 Binary files a/tests/output/test_write_table_with_row_span/output.pdf and b/tests/output/test_write_table_with_row_span/output.pdf differ diff --git a/tests/output/test_write_table_with_special_characters/output.pdf b/tests/output/test_write_table_with_special_characters/output.pdf index ef6fb425b..7517ab5a4 100644 Binary files a/tests/output/test_write_table_with_special_characters/output.pdf and b/tests/output/test_write_table_with_special_characters/output.pdf differ diff --git a/tests/output/test_write_tents_and_trees/output.pdf b/tests/output/test_write_tents_and_trees/output.pdf index 157eb6c7d..4cfbd23b5 100644 Binary files a/tests/output/test_write_tents_and_trees/output.pdf and b/tests/output/test_write_tents_and_trees/output.pdf differ diff --git a/tests/output/test_write_text_area/output_001.pdf b/tests/output/test_write_text_area/output_001.pdf index 84faa0673..bd40c5b68 100644 Binary files a/tests/output/test_write_text_area/output_001.pdf and b/tests/output/test_write_text_area/output_001.pdf differ diff --git a/tests/output/test_write_text_area/output_002.pdf b/tests/output/test_write_text_area/output_002.pdf index edd8a2b1e..17852efc5 100644 Binary files a/tests/output/test_write_text_area/output_002.pdf and b/tests/output/test_write_text_area/output_002.pdf differ diff --git a/tests/output/test_write_text_field/output_001.pdf b/tests/output/test_write_text_field/output_001.pdf index bc7874e66..8944389e1 100644 Binary files a/tests/output/test_write_text_field/output_001.pdf and b/tests/output/test_write_text_field/output_001.pdf differ diff --git a/tests/output/test_write_text_field/output_002.pdf b/tests/output/test_write_text_field/output_002.pdf index 9d11dc0a4..01448dd79 100644 Binary files a/tests/output/test_write_text_field/output_002.pdf and b/tests/output/test_write_text_field/output_002.pdf differ diff --git a/tests/output/test_write_unordered_list/output.pdf b/tests/output/test_write_unordered_list/output.pdf index 3adfe79ed..5eb544824 100644 Binary files a/tests/output/test_write_unordered_list/output.pdf and b/tests/output/test_write_unordered_list/output.pdf differ diff --git a/tests/output/test_write_using_low_level_instructions/output.pdf b/tests/output/test_write_using_low_level_instructions/output.pdf index bf6ec75c2..f1d23c2d9 100644 Binary files a/tests/output/test_write_using_low_level_instructions/output.pdf and b/tests/output/test_write_using_low_level_instructions/output.pdf differ diff --git a/tests/output/test_write_with_truetype_font/output_001.pdf b/tests/output/test_write_with_truetype_font/output_001.pdf index 6ed316dfa..e6941b508 100644 Binary files a/tests/output/test_write_with_truetype_font/output_001.pdf and b/tests/output/test_write_with_truetype_font/output_001.pdf differ diff --git a/tests/output/test_write_with_truetype_font/output_002.pdf b/tests/output/test_write_with_truetype_font/output_002.pdf index 508a2ef8e..d6d6ca86a 100644 Binary files a/tests/output/test_write_with_truetype_font/output_002.pdf and b/tests/output/test_write_with_truetype_font/output_002.pdf differ diff --git a/tests/output/test_write_with_truetype_font/output_002.png b/tests/output/test_write_with_truetype_font/output_002.png index a4e99e2db..49fbdc01f 100644 Binary files a/tests/output/test_write_with_truetype_font/output_002.png and b/tests/output/test_write_with_truetype_font/output_002.png differ diff --git a/tests/output/test_write_with_truetype_font/output_003.pdf b/tests/output/test_write_with_truetype_font/output_003.pdf index 00286580e..a1a93e584 100644 Binary files a/tests/output/test_write_with_truetype_font/output_003.pdf and b/tests/output/test_write_with_truetype_font/output_003.pdf differ diff --git a/tests/output/test_write_with_truetype_font/output_003_ground_truth.png b/tests/output/test_write_with_truetype_font/output_003_ground_truth.png index 284988579..702027fef 100644 Binary files a/tests/output/test_write_with_truetype_font/output_003_ground_truth.png and b/tests/output/test_write_with_truetype_font/output_003_ground_truth.png differ diff --git a/tests/output/test_write_xl_image/output.pdf b/tests/output/test_write_xl_image/output.pdf index 271ec30e9..5e7b0a4f4 100644 Binary files a/tests/output/test_write_xl_image/output.pdf and b/tests/output/test_write_xl_image/output.pdf differ diff --git a/tests/pdf/canvas/font/SimHei.ttf b/tests/pdf/canvas/font/SimHei.ttf new file mode 100644 index 000000000..c5030aea8 Binary files /dev/null and b/tests/pdf/canvas/font/SimHei.ttf differ diff --git a/tests/pdf/canvas/font/test_write_hello_world_with_simhei_font.py b/tests/pdf/canvas/font/test_write_hello_world_with_simhei_font.py new file mode 100644 index 000000000..32bca97a5 --- /dev/null +++ b/tests/pdf/canvas/font/test_write_hello_world_with_simhei_font.py @@ -0,0 +1,71 @@ +import unittest +from decimal import Decimal +from pathlib import Path + +from borb.pdf.canvas.font.simple_font.true_type_font import TrueTypeFont +from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout +from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout +from borb.pdf.canvas.layout.text.paragraph import Paragraph +from borb.pdf.document import Document +from borb.pdf.page.page import Page +from borb.pdf.pdf import PDF +from tests.test_util import compare_visually_to_ground_truth + + +class TestWriteHelloWorldWithSimHeiFont(unittest.TestCase): + """ + This test loads a truetype _font from a .ttf file and attempts to use it to write 2 paragraphs of lorem ipsum. + """ + + def __init__(self, methodName="runTest"): + super().__init__(methodName) + + # find output dir + p: Path = Path(__file__).parent + while "output" not in [x.stem for x in p.iterdir() if x.is_dir()]: + p = p.parent + p = p / "output" + self.output_dir = Path(p, Path(__file__).stem.replace(".py", "")) + if not self.output_dir.exists(): + self.output_dir.mkdir() + + def test_write_document_001(self): + + # create document + pdf = Document() + + # add page + page = Page() + pdf.append_page(page) + + # layout + layout: PageLayout = SingleColumnLayout(page) + + # path to _font + font_path: Path = Path(__file__).parent / "SimHei.ttf" + assert font_path.exists() + + # load font + ttf = TrueTypeFont.true_type_font_from_file(font_path) + + # add paragraph 1 + layout.add( + Paragraph( + "你好世界", + font=ttf, + font_size=Decimal(14), + ) + ) + + # determine output location + out_file = self.output_dir / "output_001.pdf" + + # attempt to store PDF + with open(out_file, "wb") as in_file_handle: + PDF.dumps(in_file_handle, pdf) + + # attempt to re-open PDF + with open(out_file, "rb") as in_file_handle: + PDF.loads(in_file_handle) + + compare_visually_to_ground_truth(out_file) diff --git a/tests/pdf/conformance/test_write_pdf_a_1b.py b/tests/pdf/conformance/test_write_pdf_a_1b.py index 0ee0599b1..d3ec011e4 100644 --- a/tests/pdf/conformance/test_write_pdf_a_1b.py +++ b/tests/pdf/conformance/test_write_pdf_a_1b.py @@ -45,11 +45,11 @@ def test_write_pdf_a_1b(self): layout.add(Paragraph("Hello World!")) info_dictionary: Dictionary = Dictionary() - info_dictionary[Name("Title")] = String("Lorem Ipsum (T)") - info_dictionary[Name("Subject")] = String("Lorem Ipsum (S)") - info_dictionary[Name("Creator")] = String("Joris Schellekens (C)") - info_dictionary[Name("Author")] = String("Joris Schellekens (A)") - info_dictionary[Name("Keywords")] = String("Lorem Ipsum Dolor Sit Amet") + info_dictionary[Name("Title")] = String("Title Value") + info_dictionary[Name("Subject")] = String("Subject Value") + info_dictionary[Name("Creator")] = String("Creator Value") + info_dictionary[Name("Author")] = String("Author Value") + info_dictionary[Name("Keywords")] = String("Keyword1 Keyword2 Keyword3") pdf["XRef"]["Trailer"][Name("Info")] = info_dictionary # attempt to store PDF @@ -66,11 +66,11 @@ def test_re_open_pdfa_1_b(self): # assert XMP meta data xmp = pdf.get_xmp_document_info() - assert xmp.get_title() == "Lorem Ipsum (T)" - assert xmp.get_creator() == "Joris Schellekens (C)" - assert xmp.get_author() == "Joris Schellekens (A)" - assert xmp.get_subject() == "Lorem Ipsum (S)" - assert xmp.get_keywords() == "Lorem Ipsum Dolor Sit Amet" + assert xmp.get_title() == "Title Value" + assert xmp.get_creator() == "Creator Value" + assert xmp.get_author() == "Author Value" + assert xmp.get_subject() == "Subject Value" + assert xmp.get_keywords() == "Keyword1 Keyword2 Keyword3" def test_re_save_pdf_a_1_b(self): @@ -90,10 +90,10 @@ def test_re_save_pdf_a_1_b(self): # assert XMP meta data xmp = pdf.get_xmp_document_info() - assert xmp.get_title() == "Lorem Ipsum (T)" - assert xmp.get_creator() == "Joris Schellekens (C)" - assert xmp.get_author() == "Joris Schellekens (A)" - assert xmp.get_subject() == "Lorem Ipsum (S)" - assert xmp.get_keywords() == "Lorem Ipsum Dolor Sit Amet" + assert xmp.get_title() == "Title Value" + assert xmp.get_creator() == "Creator Value" + assert xmp.get_author() == "Author Value" + assert xmp.get_subject() == "Subject Value" + assert xmp.get_keywords() == "Keyword1 Keyword2 Keyword3" compare_visually_to_ground_truth(out_file) diff --git a/tests/pdf/page/annotations/test_add_all_rubber_stamp_annotations.py b/tests/pdf/page/annotations/test_add_all_rubber_stamp_annotations.py index 24b4c2b01..cb4e4fff5 100644 --- a/tests/pdf/page/annotations/test_add_all_rubber_stamp_annotations.py +++ b/tests/pdf/page/annotations/test_add_all_rubber_stamp_annotations.py @@ -3,17 +3,18 @@ from decimal import Decimal from pathlib import Path +from tests.test_util import compare_visually_to_ground_truth + from borb.pdf.canvas.color.color import X11Color from borb.pdf.canvas.geometry.rectangle import Rectangle -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.document import Document from borb.pdf.page.page import Page, RubberStampAnnotationIconType from borb.pdf.pdf import PDF -from tests.test_util import compare_visually_to_ground_truth class TestAddAllRubberStampAnnotations(unittest.TestCase): diff --git a/tests/pdf/page/annotations/test_add_circle_annotation.py b/tests/pdf/page/annotations/test_add_circle_annotation.py index 21a6706d0..4c81f4bcb 100644 --- a/tests/pdf/page/annotations/test_add_circle_annotation.py +++ b/tests/pdf/page/annotations/test_add_circle_annotation.py @@ -3,17 +3,18 @@ from decimal import Decimal from pathlib import Path +from tests.test_util import compare_visually_to_ground_truth + from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.geometry.rectangle import Rectangle -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from tests.test_util import compare_visually_to_ground_truth class TestAddCircleAnnotation(unittest.TestCase): diff --git a/tests/pdf/page/annotations/test_add_free_text_annotation.py b/tests/pdf/page/annotations/test_add_free_text_annotation.py index e8eec1701..cfd017126 100644 --- a/tests/pdf/page/annotations/test_add_free_text_annotation.py +++ b/tests/pdf/page/annotations/test_add_free_text_annotation.py @@ -3,22 +3,22 @@ from decimal import Decimal from pathlib import Path +from tests.test_util import compare_visually_to_ground_truth + from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.font.simple_font.font_type_1 import StandardType1Font from borb.pdf.canvas.geometry.rectangle import Rectangle from borb.pdf.canvas.layout.layout_element import Alignment -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from borb.toolkit.text.regular_expression_text_extraction import ( - RegularExpressionTextExtraction, -) -from tests.test_util import compare_visually_to_ground_truth +from borb.toolkit.text.regular_expression_text_extraction import \ + RegularExpressionTextExtraction unittest.TestLoader.sortTestMethodsUsing = None diff --git a/tests/pdf/page/annotations/test_add_highlight_annotation.py b/tests/pdf/page/annotations/test_add_highlight_annotation.py index afaf0b627..26385cca8 100644 --- a/tests/pdf/page/annotations/test_add_highlight_annotation.py +++ b/tests/pdf/page/annotations/test_add_highlight_annotation.py @@ -3,19 +3,19 @@ from decimal import Decimal from pathlib import Path +from tests.test_util import compare_visually_to_ground_truth + from borb.pdf.canvas.layout.layout_element import Alignment -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from borb.toolkit.text.regular_expression_text_extraction import ( - RegularExpressionTextExtraction, -) -from tests.test_util import compare_visually_to_ground_truth +from borb.toolkit.text.regular_expression_text_extraction import \ + RegularExpressionTextExtraction class TestAddHighlightAnnotation(unittest.TestCase): diff --git a/tests/pdf/page/annotations/test_add_line_annotation.py b/tests/pdf/page/annotations/test_add_line_annotation.py index f01d600cd..07b734a03 100644 --- a/tests/pdf/page/annotations/test_add_line_annotation.py +++ b/tests/pdf/page/annotations/test_add_line_annotation.py @@ -5,17 +5,16 @@ from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.layout.layout_element import Alignment -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from borb.toolkit.text.regular_expression_text_extraction import ( - RegularExpressionTextExtraction, -) +from borb.toolkit.text.regular_expression_text_extraction import \ + RegularExpressionTextExtraction unittest.TestLoader.sortTestMethodsUsing = None diff --git a/tests/pdf/page/annotations/test_add_polygon_annotation_using_line_art_factory.py b/tests/pdf/page/annotations/test_add_polygon_annotation_using_line_art_factory.py index 0772f8413..d1996cd04 100644 --- a/tests/pdf/page/annotations/test_add_polygon_annotation_using_line_art_factory.py +++ b/tests/pdf/page/annotations/test_add_polygon_annotation_using_line_art_factory.py @@ -3,19 +3,20 @@ from decimal import Decimal from pathlib import Path +from tests.test_util import compare_visually_to_ground_truth + from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.geometry.rectangle import Rectangle -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.canvas.line_art.line_art_factory import LineArtFactory from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from tests.test_util import compare_visually_to_ground_truth class TestAddAllLineArtAnnotations(unittest.TestCase): diff --git a/tests/pdf/page/annotations/test_add_polyline_annotation_using_line_art_factory.py b/tests/pdf/page/annotations/test_add_polyline_annotation_using_line_art_factory.py index 4f39c6998..65c11b6d7 100644 --- a/tests/pdf/page/annotations/test_add_polyline_annotation_using_line_art_factory.py +++ b/tests/pdf/page/annotations/test_add_polyline_annotation_using_line_art_factory.py @@ -5,10 +5,10 @@ from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.geometry.rectangle import Rectangle -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.canvas.line_art.line_art_factory import LineArtFactory from borb.pdf.document import Document diff --git a/tests/pdf/page/annotations/test_add_redact_annotation.py b/tests/pdf/page/annotations/test_add_redact_annotation.py index 870416ad5..3367677aa 100644 --- a/tests/pdf/page/annotations/test_add_redact_annotation.py +++ b/tests/pdf/page/annotations/test_add_redact_annotation.py @@ -4,22 +4,22 @@ from decimal import Decimal from pathlib import Path +from tests.test_util import compare_visually_to_ground_truth + from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.font.simple_font.true_type_font import TrueTypeFont from borb.pdf.canvas.layout.layout_element import Alignment -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from borb.toolkit.text.regular_expression_text_extraction import ( - RegularExpressionTextExtraction, -) -from tests.test_util import compare_visually_to_ground_truth +from borb.toolkit.text.regular_expression_text_extraction import \ + RegularExpressionTextExtraction unittest.TestLoader.sortTestMethodsUsing = None diff --git a/tests/pdf/page/annotations/test_add_remote_go_to_annotation.py b/tests/pdf/page/annotations/test_add_remote_go_to_annotation.py index 5c4ba30a3..02f2ed940 100644 --- a/tests/pdf/page/annotations/test_add_remote_go_to_annotation.py +++ b/tests/pdf/page/annotations/test_add_remote_go_to_annotation.py @@ -4,17 +4,20 @@ from decimal import Decimal from pathlib import Path +from tests.test_util import compare_visually_to_ground_truth + from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.geometry.rectangle import Rectangle -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import FixedColumnWidthTable +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable from borb.pdf.canvas.layout.text.chunk_of_text import ChunkOfText from borb.pdf.canvas.layout.text.chunks_of_text import HeterogeneousParagraph from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from tests.test_util import compare_visually_to_ground_truth unittest.TestLoader.sortTestMethodsUsing = None diff --git a/tests/pdf/page/annotations/test_add_square_annotation.py b/tests/pdf/page/annotations/test_add_square_annotation.py index b4f80981d..f982e28e7 100644 --- a/tests/pdf/page/annotations/test_add_square_annotation.py +++ b/tests/pdf/page/annotations/test_add_square_annotation.py @@ -3,17 +3,18 @@ from decimal import Decimal from pathlib import Path +from tests.test_util import compare_visually_to_ground_truth + from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.geometry.rectangle import Rectangle -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from tests.test_util import compare_visually_to_ground_truth class TestAddSquareAnnotation(unittest.TestCase): diff --git a/tests/pdf/page/annotations/test_add_square_annotation_in_free_space.py b/tests/pdf/page/annotations/test_add_square_annotation_in_free_space.py index 812f82bdc..a6b91443f 100644 --- a/tests/pdf/page/annotations/test_add_square_annotation_in_free_space.py +++ b/tests/pdf/page/annotations/test_add_square_annotation_in_free_space.py @@ -3,11 +3,12 @@ from math import ceil from pathlib import Path +from tests.test_util import compare_visually_to_ground_truth + from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.geometry.rectangle import Rectangle from borb.pdf.canvas.layout.free_space_finder import FreeSpaceFinder from borb.pdf.pdf import PDF -from tests.test_util import compare_visually_to_ground_truth unittest.TestLoader.sortTestMethodsUsing = None diff --git a/tests/pdf/page/annotations/test_add_squiggle_annotation.py b/tests/pdf/page/annotations/test_add_squiggle_annotation.py index fcb27dd0a..4be103b2d 100644 --- a/tests/pdf/page/annotations/test_add_squiggle_annotation.py +++ b/tests/pdf/page/annotations/test_add_squiggle_annotation.py @@ -5,17 +5,16 @@ from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.layout.layout_element import Alignment -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from borb.toolkit.text.regular_expression_text_extraction import ( - RegularExpressionTextExtraction, -) +from borb.toolkit.text.regular_expression_text_extraction import \ + RegularExpressionTextExtraction unittest.TestLoader.sortTestMethodsUsing = None diff --git a/tests/pdf/page/annotations/test_add_strikeout_annotation.py b/tests/pdf/page/annotations/test_add_strikeout_annotation.py index 5385482a4..b32043071 100644 --- a/tests/pdf/page/annotations/test_add_strikeout_annotation.py +++ b/tests/pdf/page/annotations/test_add_strikeout_annotation.py @@ -5,17 +5,16 @@ from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.layout.layout_element import Alignment -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from borb.toolkit.text.regular_expression_text_extraction import ( - RegularExpressionTextExtraction, -) +from borb.toolkit.text.regular_expression_text_extraction import \ + RegularExpressionTextExtraction unittest.TestLoader.sortTestMethodsUsing = None diff --git a/tests/pdf/page/redact/test_apply_redaction_annotations.py b/tests/pdf/page/redact/test_apply_redaction_annotations.py index 1cb3f8230..1a47d9596 100644 --- a/tests/pdf/page/redact/test_apply_redaction_annotations.py +++ b/tests/pdf/page/redact/test_apply_redaction_annotations.py @@ -4,23 +4,23 @@ from decimal import Decimal from pathlib import Path +from tests.test_util import compare_visually_to_ground_truth + from borb.io.read.types import Decimal as pDecimal from borb.io.read.types import Dictionary, List, Name, Stream from borb.pdf.canvas.color.color import X11Color from borb.pdf.canvas.layout.layout_element import Alignment -from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout +from borb.pdf.canvas.layout.page_layout.multi_column_layout import \ + SingleColumnLayout from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.pdf.canvas.layout.table.fixed_column_width_table import ( - FixedColumnWidthTable as Table, -) +from borb.pdf.canvas.layout.table.fixed_column_width_table import \ + FixedColumnWidthTable as Table from borb.pdf.canvas.layout.text.paragraph import Paragraph from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from borb.toolkit.text.regular_expression_text_extraction import ( - RegularExpressionTextExtraction, -) -from tests.test_util import compare_visually_to_ground_truth +from borb.toolkit.text.regular_expression_text_extraction import \ + RegularExpressionTextExtraction unittest.TestLoader.sortTestMethodsUsing = None diff --git a/tests/pdf/page/shape/test_page_has_empty_resource_dictionary.py b/tests/pdf/page/shape/test_page_has_empty_resource_dictionary.py index b36ca2be0..55d760667 100644 --- a/tests/pdf/page/shape/test_page_has_empty_resource_dictionary.py +++ b/tests/pdf/page/shape/test_page_has_empty_resource_dictionary.py @@ -2,6 +2,8 @@ from decimal import Decimal from pathlib import Path +from tests.test_util import compare_visually_to_ground_truth + from borb.io.read.types import Dictionary from borb.pdf.canvas.color.color import HexColor from borb.pdf.canvas.geometry.rectangle import Rectangle @@ -10,7 +12,6 @@ from borb.pdf.document import Document from borb.pdf.page.page import Page from borb.pdf.pdf import PDF -from tests.test_util import compare_visually_to_ground_truth class TestPageHasEmptyResourceDictionary(unittest.TestCase): diff --git a/tests/toolkit/text/test_extract_text.py b/tests/toolkit/text/test_extract_text.py index e8fabae6a..3943e4ca1 100644 --- a/tests/toolkit/text/test_extract_text.py +++ b/tests/toolkit/text/test_extract_text.py @@ -34,7 +34,7 @@ def __init__(self, methodName="runTest"): if not self.output_dir.exists(): self.output_dir.mkdir() - def test_write_document(self): + def test_write_document_001(self): # create document pdf = Document() @@ -83,7 +83,7 @@ def test_write_document(self): with open(self.output_dir / "output_001.pdf", "wb") as out_file_handle: PDF.dumps(out_file_handle, pdf) - def test_extract_text_from_document(self): + def test_extract_text_from_document_001(self): doc = None l = SimpleTextExtraction() @@ -103,6 +103,34 @@ def test_extract_text_from_document(self): for w in re.split("[^a-zA-Z]+", ground_truth): assert w in page_content, "Word '%s' not found in extracted text" % w + def test_write_document_002(self): + # create document + pdf = Document() + + # add page(s) + for s in ["Lorem Ipsum", "Dolor Sit Amet"]: + page = Page() + pdf.append_page(page) + layout = SingleColumnLayout(page) + layout.add(Paragraph(s)) + + # attempt to store PDF + with open(self.output_dir / "output_002.pdf", "wb") as out_file_handle: + PDF.dumps(out_file_handle, pdf) + + def test_extract_text_from_document_002(self): + + doc = None + l = SimpleTextExtraction() + with open(self.output_dir / "output_002.pdf", "rb") as file_handle: + doc = PDF.loads(file_handle, [l]) + + page_content_0: str = l.get_text_for_page(0) + assert page_content_0 == "Lorem Ipsum" + + page_content_1: str = l.get_text_for_page(1) + assert page_content_1 == "Dolor Sit Amet" + if __name__ == "__main__": unittest.main()