diff --git a/pre_commit_nb/base64_to_external_storage.py b/pre_commit_nb/base64_to_external_storage.py new file mode 100644 index 0000000..f32aacd --- /dev/null +++ b/pre_commit_nb/base64_to_external_storage.py @@ -0,0 +1,91 @@ +import argparse +import mimetypes +import os +import urllib.request +from typing import Optional, Sequence +from urllib.parse import urlparse + +from .common import process_nb, base64_string_to_bytes + + +def base64_to_blob_storage( + base64_string: str, + az_blob_container_sas_url: str, + image_path: str) -> (int, str): + print("Uploading image to blob storage...") + image_bytes = base64_string_to_bytes(base64_string) + + o = urlparse(az_blob_container_sas_url) + # Remove first / from path + if o.path[0] == '/': + blob_storage_path = o.path[1:] + else: + blob_storage_path = o.path + + storage_account = o.scheme + "://" + o.netloc + "/" + file_name_only = os.path.basename(image_path) + + response_status, url_path = http_put( + storage_account, blob_storage_path, + file_name_only, o.query, image_path, image_bytes + ) + + return response_status, url_path + + +def http_put( + storage_url: str, container_name: str, blob_name: str, + qry_string: str, image_name: str, image_bytes) -> (int, str): + + file_name_only = os.path.basename(image_name) + + file_ext = os.path.splitext(file_name_only)[1] + + url = storage_url + container_name + '/' + blob_name + '?' + qry_string + + req = urllib.request.Request( + url, data=image_bytes, method='PUT', + headers={ + 'content-type': mimetypes.types_map[file_ext], + 'x-ms-blob-type': 'BlockBlob' + }) + response_code = urllib.request.urlopen(req).code + # response_code = requests.put( + # url, + # data=image_bytes, + # headers={ + # 'content-type': mimetypes.types_map[file_ext], + # 'x-ms-blob-type': 'BlockBlob' + # }, + # params={'file': file_name_only} + # ).status_code + return response_code, url + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', help='Filenames to fix') + parser.add_argument( + '--az-blob-container-url', + default=None, + help='If provided it will upload images to external Azure Blob Storage container rather than local files') # NOQA E501 + parser.add_argument( + '--add-changes-to-staging', + default=False, action='store_true', + help='Automatically add new and changed files to staging') + parser.add_argument( + '--auto-commit-changes', default=False, action='store_true', + help='Automatically commits added and changed files in staging') + args = parser.parse_args(argv) + + retv = 0 + + for filename in args.filenames: + return_value = process_nb(filename=filename, **vars(args)) + retv |= return_value + + return retv + + +if __name__ == '__main__': + exit(main()) diff --git a/pre_commit_nb/base64_to_image_files.py b/pre_commit_nb/base64_to_image_files.py index 1c548a8..2677536 100644 --- a/pre_commit_nb/base64_to_image_files.py +++ b/pre_commit_nb/base64_to_image_files.py @@ -1,169 +1,19 @@ import argparse -import base64 -import mimetypes import os -import re -import subprocess -import urllib.request -import uuid from typing import Optional, Sequence -from urllib.parse import urlparse - -def base64_to_blob_storage( - base64_string: str, - sas_url: str, - image_path: str): - print("Uploading image to blob storage...") - image_bytes = base64.decodebytes(base64_string.encode()) - - o = urlparse(sas_url) - # Remove first / from path - if o.path[0] == '/': - blob_storage_path = o.path[1:] - else: - blob_storage_path = o.path - - storage_account = o.scheme + "://" + o.netloc + "/" - file_name_only = os.path.basename(image_path) - - response_status, url_path = put_blob( - storage_account, blob_storage_path, - file_name_only, o.query, image_path, image_bytes - ) - - if response_status >= 200 and response_status < 300: - print(f"Successfully uploaded image to blob storage: {url_path}") - else: - print(f"Uploading process failed with response code: {response_status}") # NOQA E501 - - return url_path - - -def put_blob( - storage_url: str, container_name: str, blob_name: str, - qry_string: str, image_name: str, image_bytes): - - file_name_only = os.path.basename(image_name) - - file_ext = os.path.splitext(file_name_only)[1] - - url = storage_url + container_name + '/' + blob_name + '?' + qry_string - - # with open(file_name_full_path, 'rb') as fh: - req = urllib.request.Request( - url, data=image_bytes, method='PUT', - headers={ - 'content-type': mimetypes.types_map[file_ext], - 'x-ms-blob-type': 'BlockBlob' - }) - response_code = urllib.request.urlopen(req).code - # response_code = requests.put( - # url, - # data=image_bytes, - # headers={ - # 'content-type': mimetypes.types_map[file_ext], - # 'x-ms-blob-type': 'BlockBlob' - # }, - # params={'file': file_name_only} - # ).status_code - return response_code, url +from .common import base64_string_to_bytes, process_nb def base64_to_local_file(base64_string: str, image_path: str): os.makedirs(os.path.dirname(image_path), exist_ok=True) with open(image_path, "wb") as fh: - fh.write(base64.decodebytes(base64_string.encode())) - - -def create_nb_cell_output(url: str) -> str: - return """"text/html": [ - "" - ]""" % url - - -def process_nb( - filename: str, - add_changes_to_staging: bool, - auto_commit_changes: bool, - az_blob_container_url: str, - **kwargs - ) -> int: - print("==================") - print(add_changes_to_staging, auto_commit_changes) - print("Processing %s" % filename) - with open(filename, 'r') as file: - org_data = " ".join(file.readlines()) - data = org_data - matches = re.findall( - r"\"image/(?:gif|png|jpeg|bmp|webp)\": \".*[a-zA-Z0-9+/=]\"", - data) - - new_files = "" - - for match in matches: - ext = "." + re.findall(r"image/[a-zA-Z]*", match)[0].split('/')[1] - image_path = "nb_images" + "/" + str(uuid.uuid4()) + ext - - full_path = "./" + os.path.dirname(filename) + "/" + image_path - - base64_string = ( - match.split(':')[1] - .replace('"', '') - .replace(' ', '') - .replace('\\n', '') - ) - - if az_blob_container_url: - url_path = base64_to_blob_storage( - base64_string, az_blob_container_url, full_path - ) - else: - print("Converting base64 to image file and saving as %s" % full_path) # NOQA E501 - base64_to_local_file( - base64_string, full_path - ) - url_path = "./" + image_path - new_files += " " + full_path - - data = data.replace(match, create_nb_cell_output(url_path)) - - if org_data != data: - with open(filename, 'w') as file: - file.write(data) - new_files += " " + filename - new_files = new_files.strip() - - if add_changes_to_staging: - print("'--add_changes_to_staging' flag set to 'True' - added new and changed files to staging.") - git_add(new_files) - - if auto_commit_changes: - print("'--auto_commit_changes' flag set to 'True' - git hook set to return exit code 0.") - return 0 - - return 1 - else: - print("Didn't find any base64 strings...") - return 0 - - -def git_add(filenames: str): - process = subprocess.Popen( - ['git', 'add', *filenames.split()], - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, stderr = process.communicate() + fh.write(base64_string_to_bytes(base64_string)) def main(argv: Optional[Sequence[str]] = None) -> int: parser = argparse.ArgumentParser() parser.add_argument('filenames', nargs='*', help='Filenames to fix') - parser.add_argument( - '--az-blob-container-url', - default=None, - help='If provided it will upload images to external Azure Blob Storage container rather than local files') # NOQA E501 parser.add_argument( '--add-changes-to-staging', default=False, action='store_true', @@ -176,10 +26,7 @@ def main(argv: Optional[Sequence[str]] = None) -> int: retv = 0 for filename in args.filenames: - # print(f'Processing {filename}') return_value = process_nb(filename=filename, **vars(args)) - # if return_value != 0: - # print(f'Done converting base64 strings to files for {filename}') retv |= return_value return retv diff --git a/pre_commit_nb/common.py b/pre_commit_nb/common.py new file mode 100644 index 0000000..d7c3c7e --- /dev/null +++ b/pre_commit_nb/common.py @@ -0,0 +1,99 @@ +import base64 +import os +import re +import subprocess +import uuid + +from .base64_to_external_storage import base64_to_blob_storage +from .base64_to_image_files import base64_to_local_file + + +def create_nb_cell_output(url: str) -> str: + return """"text/html": [ + "" + ]""" % url + + +def git_add(filenames: str): + process = subprocess.Popen( + ['git', 'add', *filenames.split()], + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + + +def base64_string_to_bytes(base64_string: str) -> bytes: + return base64.decodebytes(base64_string.encode()) + + +def process_nb( + filename: str, + add_changes_to_staging: bool, + auto_commit_changes: bool, + az_blob_container_url: str = None, + **kwargs + ) -> int: + print("==================") + print(add_changes_to_staging, auto_commit_changes) + print("Processing %s" % filename) + with open(filename, 'r') as file: + org_data = " ".join(file.readlines()) + data = org_data + matches = re.findall( + r"\"image/(?:gif|png|jpeg|bmp|webp)\": \".*[a-zA-Z0-9+/=]\"", + data) + + new_files = "" + + for match in matches: + ext = "." + re.findall(r"image/[a-zA-Z]*", match)[0].split('/')[1] + image_path = "nb_images" + "/" + str(uuid.uuid4()) + ext + + full_path = "./" + os.path.dirname(filename) + "/" + image_path + + base64_string = ( + match.split(':')[1] + .replace('"', '') + .replace(' ', '') + .replace('\\n', '') + ) + + if az_blob_container_url: + response_status, url_path = base64_to_blob_storage( + base64_string, az_blob_container_url, full_path + ) + + if response_status >= 200 and response_status < 300: + print(f"Successfully uploaded image to blob storage: {url_path}") # NOQA E501 + else: + print(f"Uploading process failed with response code: {response_status}") # NOQA E501 + else: + print("Converting base64 to image file and saving as %s" % full_path) # NOQA E501 + base64_to_local_file( + base64_string, full_path + ) + url_path = "./" + image_path + new_files += " " + full_path + + data = data.replace(match, create_nb_cell_output(url_path)) + + if org_data != data: + with open(filename, 'w') as file: + file.write(data) + new_files += " " + filename + new_files = new_files.strip() + + if add_changes_to_staging: + print("'--add_changes_to_staging' flag set to 'True' - adding new and changed files to staging...") # NOQA E501 + print(new_files) + git_add(new_files) + + if auto_commit_changes: + print("'--auto_commit_changes' flag set to 'True' - git hook set to return exit code 0.") # NOQA E501 + return 0 + + return 1 + else: + print("Didn't find any base64 strings...") + return 0 diff --git a/setup.py b/setup.py index b03474d..8feefb7 100644 --- a/setup.py +++ b/setup.py @@ -6,8 +6,8 @@ setup( name="pre_commit_nb", - version="0.2.1a4", - description="Set of git pre-commit hooks for Jupyter Notebooks compatible with https://pre-commit.com/ framework", + version="0.2.2a0", + description="Set of git pre-commit hooks for Jupyter Notebooks compatible with https://pre-commit.com/ framework", # NOQA E501 long_description=long_description, long_description_content_type="text/markdown", # This is important! url="http://github.com/karolzak/pre-commit-nb", @@ -32,7 +32,8 @@ install_requires=[], entry_points={ "console_scripts": [ - "base64-to-image-files = pre_commit_nb.base64_to_image_files:main" + "base64-to-image-files = pre_commit_nb.base64_to_image_files:main", + "base64-to-external-storage = pre_commit_nb.base64_to_external_storage:main" # NOQA E501 ] } )