Skip to content

Commit

Permalink
release v2.1.21
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisschellekens committed Jan 22, 2024
1 parent 4365c9e commit 9250dd5
Show file tree
Hide file tree
Showing 856 changed files with 3,138 additions and 1,974 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,6 @@ dmypy.json
.prof

# secrets
tests/secrets.py
tests/borb_secrets.py

# End of https://www.toptal.com/developers/gitignore/api/python,pycharm
26 changes: 13 additions & 13 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://borbpdf.com. For AGPL licensing, see below.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://borbpdf.com. For AGPL licensing, see below.

AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[![Corpus Coverage : 100.0%](https://img.shields.io/badge/corpus%20coverage-100.0%25-green)]()
[![Text Extraction : 93.1%](https://img.shields.io/badge/text%20extraction-93.1%25-green)]()
[![Public Method Documentation : 100%](https://img.shields.io/badge/public%20method%20documentation-100%25-green)]()
[![Number of Tests : 735](https://img.shields.io/badge/number%20of%20tests-735-green)]()
[![Number of Tests : 751](https://img.shields.io/badge/number%20of%20tests-751-green)]()
[![Python : 3.8 | 3.9 | 3.10 ](https://img.shields.io/badge/python-3.8%20&#124;%203.9%20&#124;%203.10-green)]()

[![Downloads](https://pepy.tech/badge/borb)](https://pepy.tech/project/borb)
Expand Down
9 changes: 9 additions & 0 deletions SECURITY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Reporting Security Issues

The `borb` team and community take security bugs in ``borb seriously. We appreciate your efforts to responsibly disclose your findings, and will make every effort to acknowledge your contributions.

To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/jorisschellekens/borb/security/advisories/new) tab.

The `borb` team will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.

Report security bugs in third-party modules to the person or team maintaining the module.
12 changes: 7 additions & 5 deletions borb/io/filter/lzw_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ class bitarray:
#

def __init__(self, input: bytes):
self._src: bytes = input
self._pos: int = -1
self._buffer: typing.List[int] = []
self._default_to_return: int = 256
self._pos: int = -1
self._src: bytes = input

#
# PRIVATE
Expand Down Expand Up @@ -73,7 +73,7 @@ def __init__(self):
# PRIVATE
#

def _add_to_lookup_table(self, prev_bytes: bytearray, new_bytes: bytes):
def _add_to_lookup_table(self, new_bytes: bytes, prev_bytes: bytearray):
self._lookup_table[self._table_index] = prev_bytes + new_bytes
self._table_index += 1
if self._table_index == 511:
Expand Down Expand Up @@ -126,13 +126,15 @@ def decode(self, input: bytes):
if code < self._table_index:
x = self._lookup_table[code]
bytes_out += x
self._add_to_lookup_table(self._lookup_table[prev_code], x[0:1])
self._add_to_lookup_table(
new_bytes=x[0:1], prev_bytes=self._lookup_table[prev_code]
)
prev_code = code
else:
x = self._lookup_table[prev_code]
x = x + x[0:1]
bytes_out += x
self._add_to_lookup_table(x, bytearray())
self._add_to_lookup_table(new_bytes=bytearray(), prev_bytes=x)
prev_code = code

# return bytes
Expand Down
5 changes: 5 additions & 0 deletions borb/io/filter/stream_decode_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ def decode_stream(s: Stream) -> Stream:
assert ("Bytes" in s), "decode_stream only works on Stream objects with a `Bytes` key."
# fmt: on

# IF stream already has /DecodedBytes
# THEN return stream
if "DecodedBytes" in s:
return s

# determine filter(s) to apply
filters: typing.List[str] = []
if "Filter" in s:
Expand Down
12 changes: 6 additions & 6 deletions borb/io/read/any_object_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,11 @@
import io
import typing

# fmt: off
from borb.io.read.font.font_dictionary_transformer import FontDictionaryTransformer
from borb.io.read.function.function_dictionary_transformer import (
FunctionDictionaryTransformer,
)
from borb.io.read.function.function_dictionary_transformer import FunctionDictionaryTransformer
from borb.io.read.image.ccitt_fax_image_transformer import CCITTFaxImageTransformer
from borb.io.read.image.compressed_jpeg_image_transformer import (
CompressedJPEGImageTransformer,
)
from borb.io.read.image.compressed_jpeg_image_transformer import CompressedJPEGImageTransformer
from borb.io.read.image.grayscale_image_transformer import GrayscaleImageTransformer
from borb.io.read.image.jbig2_image_transformer import JBIG2ImageTransformer
from borb.io.read.image.jpeg_2000_image_transformer import JPEG2000ImageTransformer
Expand All @@ -36,6 +33,9 @@
from borb.pdf.canvas.event.event_listener import EventListener


# fmt: on


class AnyObjectTransformer(Transformer):
"""
This implementation of ReadBaseTransformer aggregates all other implementations
Expand Down
185 changes: 92 additions & 93 deletions borb/io/read/encryption/standard_security_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
"""
import hashlib
import typing
import zlib

from borb.io.read.encryption.rc4 import RC4
from borb.io.read.pdf_object import PDFObject
from borb.io.read.types import AnyPDFType
from borb.io.read.types import Boolean
from borb.io.read.types import Decimal as bDecimal
Expand Down Expand Up @@ -88,7 +86,7 @@ def __init__(
assert len(self._o) == 32

# /ID
trailer: typing.Optional[PDFObject] = encryption_dictionary.get_parent()
trailer: typing.Optional["PDFObject"] = encryption_dictionary.get_parent()
assert trailer is not None
assert isinstance(trailer, Dictionary)
if "ID" in trailer:
Expand Down Expand Up @@ -122,10 +120,10 @@ def __init__(
# verify password(s)
password: typing.Optional[bytes] = None
if user_password is not None:
self._authenticate_user_password(bytes(user_password, encoding="charmap"))
self.authenticate_user_password(bytes(user_password, encoding="charmap"))
password = bytes(user_password, encoding="charmap")
if owner_password is not None:
self._authenticate_owner_password(bytes(owner_password, encoding="charmap"))
self.authenticate_owner_password(bytes(owner_password, encoding="charmap"))
password = bytes(owner_password, encoding="charmap")

# calculate encryption_key
Expand All @@ -136,46 +134,6 @@ def __init__(
# PRIVATE
#

def _authenticate_owner_password(self, owner_password: bytes) -> bool:
"""
Algorithm 7: Authenticating the owner password
"""
# a) Compute an encryption key from the supplied password string, as described in steps (a) to (d) of
# "Algorithm 3: Computing the encryption dictionary’s O (owner password) value".

# b) (Security handlers of revision 2 only) Decrypt the value of the encryption dictionary’s O entry, using an RC4
# encryption function with the encryption key computed in step (a).
# (Security handlers of revision 3 or greater) Do the following 20 times: Decrypt the value of the encryption
# dictionary’s O entry (first iteration) or the output from the previous iteration (all subsequent iterations),
# using an RC4 encryption function with a different encryption key at each iteration. The key shall be
# generated by taking the original key (obtained in step (a)) and performing an XOR (exclusive or) operation
# between each byte of the key and the single-byte value of the iteration counter (from 19 to 0).

# c) The result of step (b) purports to be the user password. Authenticate this user password using "Algorithm
# 6: Authenticating the user password". If it is correct, the password supplied is the correct owner password.

return False

def _authenticate_user_password(self, user_password: bytes) -> bool:
"""
Algorithm 6: Authenticating the user password
"""
# a) Perform all but the last step of "Algorithm 4: Computing the encryption dictionary’s U (user password)
# value (Security handlers of revision 2)" or "Algorithm 5: Computing the encryption dictionary’s U (user
# password) value (Security handlers of revision 3 or greater)" using the supplied password string.
previous_u_value: bytes = self._u
self._compute_encryption_dictionary_u_value(user_password)
u_value: bytes = self._u
self._u = previous_u_value

# b)If the result of step (a) is equal to the value of the encryption dictionary’s U entry (comparing on the first 16
# bytes in the case of security handlers of revision 3 or greater), the password supplied is the correct user
# password. The key obtained in step (a) (that is, in the first step of "Algorithm 4: Computing the encryption
# dictionary’s U (user password) value (Security handlers of revision 2)" or "Algorithm 5: Computing the
# encryption dictionary’s U (user password) value (Security handlers of revision 3 or greater)") shall be used
# to decrypt the document.
return self._u == u_value

def _compute_encryption_dictionary_o_value(
self,
owner_password: typing.Optional[bytes],
Expand Down Expand Up @@ -346,16 +304,99 @@ def _compute_encryption_key(self, password: typing.Optional[bytes]) -> bytes:

return encryption_key

def _decrypt_data(self, object: AnyPDFType) -> AnyPDFType:
return self._encrypt_data(object)
@staticmethod
def _pad_or_truncate(b: typing.Optional[bytes]) -> bytes:
# fmt: off
padding: bytes = bytes([40, 191, 78, 94, 78, 117, 138, 65,
100, 0, 78, 86, 255, 250, 1, 8,
46, 46, 0, 182, 208, 104, 62, 128,
47, 12, 169, 254, 100, 83, 105, 122])
# fmt: on
if b is None:
return padding
if len(b) > 32:
return b[0:32]
if len(b) < 32:
b2: bytes = b + padding
return b2[0:32]
return b

@staticmethod
def _str_to_bytes(s: typing.Optional[str]) -> typing.Optional[bytes]:
if s is None:
return None
return bytes(s, encoding="charmap")

def _encrypt_data(self, object: AnyPDFType) -> AnyPDFType:
@staticmethod
def _unescape_pdf_syntax(
s: typing.Union[str, String, None]
) -> typing.Optional[str]:
# None
if s is None:
return None
# String
if isinstance(s, String):
return str(s.get_content_bytes(), encoding="latin1")
# str
return str(String(s).get_content_bytes(), encoding="latin1")

#
# PUBLIC
#

def authenticate_owner_password(self, owner_password: bytes) -> bool:
"""
Algorithm 7: Authenticating the owner password
"""
# a) Compute an encryption key from the supplied password string, as described in steps (a) to (d) of
# "Algorithm 3: Computing the encryption dictionary’s O (owner password) value".

# b) (Security handlers of revision 2 only) Decrypt the value of the encryption dictionary’s O entry, using an RC4
# encryption function with the encryption key computed in step (a).
# (Security handlers of revision 3 or greater) Do the following 20 times: Decrypt the value of the encryption
# dictionary’s O entry (first iteration) or the output from the previous iteration (all subsequent iterations),
# using an RC4 encryption function with a different encryption key at each iteration. The key shall be
# generated by taking the original key (obtained in step (a)) and performing an XOR (exclusive or) operation
# between each byte of the key and the single-byte value of the iteration counter (from 19 to 0).

# c) The result of step (b) purports to be the user password. Authenticate this user password using "Algorithm
# 6: Authenticating the user password". If it is correct, the password supplied is the correct owner password.

# TODO
return False

def authenticate_user_password(self, user_password: bytes) -> bool:
"""
Algorithm 6: Authenticating the user password
"""
# a) Perform all but the last step of "Algorithm 4: Computing the encryption dictionary’s U (user password)
# value (Security handlers of revision 2)" or "Algorithm 5: Computing the encryption dictionary’s U (user
# password) value (Security handlers of revision 3 or greater)" using the supplied password string.
previous_u_value: bytes = self._u
self._compute_encryption_dictionary_u_value(user_password)
u_value: bytes = self._u
self._u = previous_u_value

# b)If the result of step (a) is equal to the value of the encryption dictionary’s U entry (comparing on the first 16
# bytes in the case of security handlers of revision 3 or greater), the password supplied is the correct user
# password. The key obtained in step (a) (that is, in the first step of "Algorithm 4: Computing the encryption
# dictionary’s U (user password) value (Security handlers of revision 2)" or "Algorithm 5: Computing the
# encryption dictionary’s U (user password) value (Security handlers of revision 3 or greater)") shall be used
# to decrypt the document.
if self._revision >= 3:
return self._u[0:16] == u_value[0:16]
return self._u == u_value

def decrypt(self, object: AnyPDFType) -> AnyPDFType:
return self.encrypt(object)

def encrypt(self, object: AnyPDFType) -> AnyPDFType:
# a) Obtain the object number and generation number from the object identifier of the string or stream to be
# encrypted (see 7.3.10, "Indirect Objects"). If the string is a direct object, use the identifier of the indirect
# object containing it.
reference: typing.Optional[Reference] = object.get_reference()
if reference is None:
parent: typing.Optional[PDFObject] = object.get_parent()
parent: typing.Optional["PDFObject"] = object.get_parent()
assert parent is not None
reference = parent.get_reference()
assert reference is not None
Expand Down Expand Up @@ -403,52 +444,10 @@ def _encrypt_data(self, object: AnyPDFType) -> AnyPDFType:
)
# TODO
if isinstance(object, Stream):
stream_new_content_bytes: bytes = RC4().encrypt(
h.digest()[0:n_plus_5], object["DecodedBytes"]
object[Name("Bytes")] = RC4().encrypt(
h.digest()[0:n_plus_5], object["Bytes"]
)
object[Name("DecodedBytes")] = stream_new_content_bytes
object[Name("Bytes")] = zlib.compress(object["DecodedBytes"], 9)
return object

# default
return object

@staticmethod
def _pad_or_truncate(b: typing.Optional[bytes]) -> bytes:
# fmt: off
padding: bytes = bytes([40, 191, 78, 94, 78, 117, 138, 65,
100, 0, 78, 86, 255, 250, 1, 8,
46, 46, 0, 182, 208, 104, 62, 128,
47, 12, 169, 254, 100, 83, 105, 122])
# fmt: on
if b is None:
return padding
if len(b) > 32:
return b[0:32]
if len(b) < 32:
b2: bytes = b + padding
return b2[0:32]
return b

@staticmethod
def _str_to_bytes(s: typing.Optional[str]) -> typing.Optional[bytes]:
if s is None:
return None
return bytes(s, encoding="charmap")

@staticmethod
def _unescape_pdf_syntax(
s: typing.Union[str, String, None]
) -> typing.Optional[str]:
# None
if s is None:
return None
# String
if isinstance(s, String):
return str(s.get_content_bytes(), encoding="latin1")
# str
return str(String(s).get_content_bytes(), encoding="latin1")

#
# PUBLIC
#
5 changes: 5 additions & 0 deletions borb/io/read/object/stream_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def transform(
v = xref.get_object(v, context.source, context.tokenizer)
object_to_transform[k] = v

# IF context.security_handler is present
# THEN apply decryption
if context.security_handler is not None:
object_to_transform = context.security_handler.decrypt(object_to_transform)

# apply filter(s)
object_to_transform = decode_stream(object_to_transform)

Expand Down
Loading

0 comments on commit 9250dd5

Please sign in to comment.