Skip to content

Commit

Permalink
release v2.0.14
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisschellekens committed Nov 9, 2021
1 parent a496503 commit 710eb5d
Show file tree
Hide file tree
Showing 374 changed files with 648 additions and 1,953 deletions.
2 changes: 1 addition & 1 deletion borb/io/read/any_object_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from borb.io.read.primitive.string_transformer import StringTransformer
from borb.io.read.reference.reference_transformer import ReferenceTransformer
from borb.io.read.reference.xref_transformer import XREFTransformer
from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
11 changes: 9 additions & 2 deletions borb/io/read/encryption/rc4.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self):
self._p: int = 0
self._q: int = 0

def set_key(self, key: bytes):
def _set_key(self, key: bytes):
self._state = [n for n in range(256)]
self._p = 0
self._q = 0
Expand All @@ -54,5 +54,12 @@ def _byte_generator(self):
return self._state[(self._state[self._p] + self._state[self._q]) % 256]

def encrypt(self, key: bytes, input: bytes):
self.set_key(key)
"""
This function encrypts a given byte array with a given key,
returning the encrypted bytes.
:param key: the key to be used for encrypting
:param input: the input byte array to be encrypted
:return: the encrypted bytes
"""
self._set_key(key)
return bytes([p ^ self._byte_generator() for p in input])
68 changes: 45 additions & 23 deletions borb/io/read/encryption/standard_security_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@

from borb.io.read.encryption.rc4 import RC4
from borb.io.read.types import (
String,
AnyPDFType,
Boolean,
Decimal,
HexadecimalString,
Boolean,
AnyPDFType,
Name,
Reference,
Stream,
Name,
String,
)


Expand Down Expand Up @@ -59,27 +59,40 @@ def __init__(
# used in determining whether to prompt the user for a password and, if so,
# whether a valid user or owner password was entered. For more
# information, see 7.6.3.4, "Password Algorithms."
self._u: bytes = StandardSecurityHandler._str_to_bytes(
StandardSecurityHandler._unescape_pdf_syntax(encryption_dictionary.get("U"))
self._u: bytes = (
StandardSecurityHandler._str_to_bytes(
StandardSecurityHandler._unescape_pdf_syntax(
encryption_dictionary.get("U")
)
)
or b""
)
assert self._u is not None
assert len(self._u) == 32

# (Required) A 32-byte string, based on both the owner and user passwords,
# that shall be used in computing the encryption key and in determining
# whether a valid owner password was entered. For more information, see
# 7.6.3.3, "Encryption Key Algorithm," and 7.6.3.4, "Password Algorithms."
self._o: bytes = StandardSecurityHandler._str_to_bytes(
StandardSecurityHandler._unescape_pdf_syntax(encryption_dictionary.get("O"))
self._o: bytes = (
StandardSecurityHandler._str_to_bytes(
StandardSecurityHandler._unescape_pdf_syntax(
encryption_dictionary.get("O")
)
)
or b""
)
assert self._o is not None
assert len(self._o) == 32

# /ID
trailer: dict = encryption_dictionary.get_parent()
trailer: dict = encryption_dictionary.get_parent() # type: ignore [attr-defined]
if "ID" in trailer:
self._document_id: bytes = trailer["ID"][0].get_content_bytes()

# (Required) A set of flags specifying which operations shall be permitted
# when the document is opened with user access (see Table 22).
assert "P" in encryption_dictionary
self._permissions: int = int(encryption_dictionary.get("P"))

# (Optional; PDF 1.4; only if V is 2 or 3) The length of the encryption key, in bits.
Expand Down Expand Up @@ -112,15 +125,19 @@ def __init__(
password = bytes(owner_password, encoding="charmap")

# calculate encryption_key
assert password is not None
self._encryption_key: bytes = self._compute_encryption_key(password)

def _encrypt_data(self, object: AnyPDFType) -> None:
def _encrypt_data(self, object: AnyPDFType) -> AnyPDFType:
# a) Obtain the object number and generation number from the object identifier of the string or stream to be
# encrypted (see 7.3.10, "Indirect Objects"). If the string is a direct object, use the identifier of the indirect
# object containing it.
reference: typing.Optional[Reference] = object.get_reference()
reference: typing.Optional[Reference] = object.get_reference() # type: ignore [union-attr]
if reference is None:
reference = object.get_parent().get_reference()
reference = object.get_parent().get_reference() # type: ignore [union-attr]
assert reference is not None
assert reference.object_number is not None
assert reference.generation_number is not None
object_number: int = reference.object_number
generation_number: int = reference.generation_number

Expand Down Expand Up @@ -153,24 +170,27 @@ def _encrypt_data(self, object: AnyPDFType) -> None:
# The output is the encrypted data to be stored in the PDF file.
n_plus_5: int = min(16, n + 5)
if isinstance(object, String):
new_content_bytes: bytes = RC4().encrypt(
str_new_content_bytes: bytes = RC4().encrypt(
h.digest()[0:n_plus_5], object.get_content_bytes()
)
# TODO
if isinstance(object, HexadecimalString):
new_content_bytes: bytes = RC4().encrypt(
hex_str_new_content_bytes: bytes = RC4().encrypt(
h.digest()[0:n_plus_5], object.get_content_bytes()
)
# TODO
if isinstance(object, Stream):
new_content_bytes: bytes = RC4().encrypt(
stream_new_content_bytes: bytes = RC4().encrypt(
h.digest()[0:n_plus_5], object["DecodedBytes"]
)
object[Name("DecodedBytes")] = new_content_bytes
object[Name("DecodedBytes")] = stream_new_content_bytes
object[Name("Bytes")] = zlib.compress(object["DecodedBytes"], 9)
return object

def _decrypt_data(self, object: AnyPDFType) -> None:
# default
return object

def _decrypt_data(self, object: AnyPDFType) -> AnyPDFType:
return self._encrypt_data(object)

def _compute_encryption_key(self, password: bytes) -> bytes:
Expand Down Expand Up @@ -211,7 +231,6 @@ def _compute_encryption_key(self, password: bytes) -> bytes:
# MD5 hash and pass the first n bytes of the output as input into a new MD5 hash, where n is the number of
# bytes of the encryption key as defined by the value of the encryption dictionary’s Length entry.
if self._revision >= 3:
# THIS IS WHERE THE ALGORITHM GOES WRONG
n: int = int(self._key_length / 8)
for _ in range(0, 50):
h2 = hashlib.md5()
Expand Down Expand Up @@ -296,19 +315,22 @@ def _compute_encryption_dictionary_u_value(self, user_password_string: bytes):
if self._revision == 2:
# a) Create an encryption key based on the user password string, as described in "Algorithm 2: Computing an
# encryption key".
key: bytes = self._compute_encryption_key(user_password_string)
key_rev_2: bytes = self._compute_encryption_key(user_password_string)

# b) Encrypt the 32-byte padding string shown in step (a) of "Algorithm 2: Computing an encryption key", using
# an RC4 encryption function with the encryption key from the preceding step.

# c) Store the result of step (b) as the value of the U entry in the encryption dictionary.
self._u = RC4().encrypt(key, StandardSecurityHandler._pad_or_truncate(None))
self._u = RC4().encrypt(
key_rev_2, StandardSecurityHandler._pad_or_truncate(None)
)

return self._u

if self._revision >= 3:
# a) Create an encryption key based on the user password string, as described in "Algorithm 2: Computing an
# encryption key".
key: bytes = self._compute_encryption_key(user_password_string)
key_rev_3: bytes = self._compute_encryption_key(user_password_string)

# b) Initialize the MD5 hash function and pass the 32-byte padding string shown in step (a) of "Algorithm 2:
# Computing an encryption key" as input to this function.
Expand All @@ -322,15 +344,15 @@ def _compute_encryption_dictionary_u_value(self, user_password_string: bytes):

# d) Encrypt the 16-byte result of the hash, using an RC4 encryption function with the encryption key from step
# (a).
digest = RC4().encrypt(key, digest)
digest = RC4().encrypt(key_rev_3, digest)

# e) Do the following 19 times: Take the output from the previous invocation of the RC4 function and pass it as
# input to a new invocation of the function; use an encryption key generated by taking each byte of the
# original encryption key obtained in step (a) and performing an XOR (exclusive or) operation between that
# byte and the single-byte value of the iteration counter (from 1 to 19).
if self._revision >= 3:
for i in range(1, 20):
key2: bytes = bytes([b ^ i for b in key])
key2: bytes = bytes([b ^ i for b in key_rev_3])
digest = RC4().encrypt(key2, digest)

# f) Append 16 bytes of arbitrary padding to the output from the final invocation of the RC4 function and store
Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/font/font_dictionary_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import typing
from typing import Any, Optional, Union

from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Dictionary, Stream
from borb.pdf.canvas.event.event_listener import EventListener
from borb.pdf.canvas.font.composite_font.cid_font_type_0 import CIDType0Font
Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/function/function_dictionary_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing import Any, Optional, Union

from borb.io.filter.stream_decode_util import decode_stream
from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Dictionary, Function, Name, Reference, Stream
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/image/ccitt_fax_image_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from PIL import Image # type: ignore [import]

from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Stream, add_base_methods
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/image/compressed_jpeg_image_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from PIL import Image # type: ignore [import]

from borb.io.filter.stream_decode_util import decode_stream
from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Name, Stream, add_base_methods
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/image/grayscale_image_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from PIL import Image # type: ignore [import]

from borb.io.filter.stream_decode_util import decode_stream
from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Reference, Stream, add_base_methods
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/image/jbig2_image_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from PIL import Image # type: ignore [import]

from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Stream, add_base_methods
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/image/jpeg_2000_image_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from PIL import Image # type: ignore [import]

from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Stream, add_base_methods
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/image/jpeg_image_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from PIL import Image # type: ignore [import]

from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Stream, add_base_methods
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/object/array_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import typing
from typing import Any, Optional, Union

from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, List
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/object/dictionary_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import typing
from typing import Any, Optional, Union

from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Dictionary
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/object/stream_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import Any, Optional, Union

from borb.io.filter.stream_decode_util import decode_stream
from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Reference, Stream
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
17 changes: 11 additions & 6 deletions borb/io/read/page/page_dictionary_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import zlib
from typing import Any, Dict, Optional, Union

from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType
from borb.io.read.types import Decimal as pDecimal
from borb.io.read.types import Dictionary, List, Name, Stream
Expand Down Expand Up @@ -59,7 +59,9 @@ def transform(
# avoid circular reference
if k == "Parent":
continue
v = self.get_root_transformer().transform(v, page_out, context, [])
v = self.get_root_transformer().transform(
v, page_out, context, event_listeners
)
if v is not None:
page_out[k] = v

Expand Down Expand Up @@ -91,10 +93,13 @@ def transform(
# create Canvas
canvas = Canvas().set_parent(page_out) # type: ignore [attr-defined]

# create CanvasStreamProcessor
CanvasStreamProcessor(page_out, canvas, []).read(
io.BytesIO(contents["DecodedBytes"]), event_listeners
)
# If there are no event listeners, processing the page has no effect
# we may as well skip it (cause it is very labour-intensive).
if len(event_listeners) > 0:
# create CanvasStreamProcessor
CanvasStreamProcessor(page_out, canvas, []).read(
io.BytesIO(contents["DecodedBytes"]), event_listeners
)

# send out EndPageEvent
for l in event_listeners:
Expand Down
5 changes: 4 additions & 1 deletion borb/io/read/page/root_dictionary_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import Any, Dict, List, Optional, Union

from borb.io.read.object.dictionary_transformer import DictionaryTransformer
from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Decimal, Dictionary
from borb.io.read.types import List as pList
from borb.io.read.types import Name
Expand All @@ -34,6 +34,9 @@ def can_be_transformed(
and object["Type"] == "Catalog"
)

def _re_order_pages(self):
pass

def transform(
self,
object_to_transform: Union[io.BufferedIOBase, io.RawIOBase, AnyPDFType],
Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/primitive/number_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import typing
from typing import Any, Optional, Union

from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Decimal
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/primitive/string_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import typing
from typing import Any, Optional, Union

from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, HexadecimalString, Name, String
from borb.pdf.canvas.event.event_listener import EventListener

Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/reference/reference_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import typing
from typing import Any, Optional, Union

from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Reference
from borb.pdf.canvas.event.event_listener import EventListener
from borb.pdf.xref.xref import XREF
Expand Down
2 changes: 1 addition & 1 deletion borb/io/read/reference/xref_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from borb.io.read.encryption.standard_security_handler import StandardSecurityHandler
from borb.io.read.tokenize.high_level_tokenizer import HighLevelTokenizer
from borb.io.read.transformer import Transformer, ReadTransformerState
from borb.io.read.transformer import ReadTransformerState, Transformer
from borb.io.read.types import AnyPDFType, Dictionary, Name
from borb.pdf.canvas.event.event_listener import Event, EventListener
from borb.pdf.document import Document
Expand Down
Loading

0 comments on commit 710eb5d

Please sign in to comment.