Skip to content

Commit

Permalink
ref code
Browse files Browse the repository at this point in the history
  • Loading branch information
karolzak committed Sep 7, 2020
1 parent a88018e commit d7fd295
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 158 deletions.
91 changes: 91 additions & 0 deletions pre_commit_nb/base64_to_external_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import argparse
import mimetypes
import os
import urllib.request
from typing import Optional, Sequence
from urllib.parse import urlparse

from .common import process_nb, base64_string_to_bytes


def base64_to_blob_storage(
base64_string: str,
az_blob_container_sas_url: str,
image_path: str) -> (int, str):
print("Uploading image to blob storage...")
image_bytes = base64_string_to_bytes(base64_string)

o = urlparse(az_blob_container_sas_url)
# Remove first / from path
if o.path[0] == '/':
blob_storage_path = o.path[1:]
else:
blob_storage_path = o.path

storage_account = o.scheme + "://" + o.netloc + "/"
file_name_only = os.path.basename(image_path)

response_status, url_path = http_put(
storage_account, blob_storage_path,
file_name_only, o.query, image_path, image_bytes
)

return response_status, url_path


def http_put(
storage_url: str, container_name: str, blob_name: str,
qry_string: str, image_name: str, image_bytes) -> (int, str):

file_name_only = os.path.basename(image_name)

file_ext = os.path.splitext(file_name_only)[1]

url = storage_url + container_name + '/' + blob_name + '?' + qry_string

req = urllib.request.Request(
url, data=image_bytes, method='PUT',
headers={
'content-type': mimetypes.types_map[file_ext],
'x-ms-blob-type': 'BlockBlob'
})
response_code = urllib.request.urlopen(req).code
# response_code = requests.put(
# url,
# data=image_bytes,
# headers={
# 'content-type': mimetypes.types_map[file_ext],
# 'x-ms-blob-type': 'BlockBlob'
# },
# params={'file': file_name_only}
# ).status_code
return response_code, url


def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
parser.add_argument(
'--az-blob-container-url',
default=None,
help='If provided it will upload images to external Azure Blob Storage container rather than local files') # NOQA E501
parser.add_argument(
'--add-changes-to-staging',
default=False, action='store_true',
help='Automatically add new and changed files to staging')
parser.add_argument(
'--auto-commit-changes', default=False, action='store_true',
help='Automatically commits added and changed files in staging')
args = parser.parse_args(argv)

retv = 0

for filename in args.filenames:
return_value = process_nb(filename=filename, **vars(args))
retv |= return_value

return retv


if __name__ == '__main__':
exit(main())
157 changes: 2 additions & 155 deletions pre_commit_nb/base64_to_image_files.py
Original file line number Diff line number Diff line change
@@ -1,169 +1,19 @@
import argparse
import base64
import mimetypes
import os
import re
import subprocess
import urllib.request
import uuid
from typing import Optional, Sequence
from urllib.parse import urlparse


def base64_to_blob_storage(
base64_string: str,
sas_url: str,
image_path: str):
print("Uploading image to blob storage...")
image_bytes = base64.decodebytes(base64_string.encode())

o = urlparse(sas_url)
# Remove first / from path
if o.path[0] == '/':
blob_storage_path = o.path[1:]
else:
blob_storage_path = o.path

storage_account = o.scheme + "://" + o.netloc + "/"
file_name_only = os.path.basename(image_path)

response_status, url_path = put_blob(
storage_account, blob_storage_path,
file_name_only, o.query, image_path, image_bytes
)

if response_status >= 200 and response_status < 300:
print(f"Successfully uploaded image to blob storage: {url_path}")
else:
print(f"Uploading process failed with response code: {response_status}") # NOQA E501

return url_path


def put_blob(
storage_url: str, container_name: str, blob_name: str,
qry_string: str, image_name: str, image_bytes):

file_name_only = os.path.basename(image_name)

file_ext = os.path.splitext(file_name_only)[1]

url = storage_url + container_name + '/' + blob_name + '?' + qry_string

# with open(file_name_full_path, 'rb') as fh:
req = urllib.request.Request(
url, data=image_bytes, method='PUT',
headers={
'content-type': mimetypes.types_map[file_ext],
'x-ms-blob-type': 'BlockBlob'
})
response_code = urllib.request.urlopen(req).code
# response_code = requests.put(
# url,
# data=image_bytes,
# headers={
# 'content-type': mimetypes.types_map[file_ext],
# 'x-ms-blob-type': 'BlockBlob'
# },
# params={'file': file_name_only}
# ).status_code
return response_code, url
from .common import base64_string_to_bytes, process_nb


def base64_to_local_file(base64_string: str, image_path: str):
os.makedirs(os.path.dirname(image_path), exist_ok=True)
with open(image_path, "wb") as fh:
fh.write(base64.decodebytes(base64_string.encode()))


def create_nb_cell_output(url: str) -> str:
return """"text/html": [
"<img src=\\"%s\\"/>"
]""" % url


def process_nb(
filename: str,
add_changes_to_staging: bool,
auto_commit_changes: bool,
az_blob_container_url: str,
**kwargs
) -> int:
print("==================")
print(add_changes_to_staging, auto_commit_changes)
print("Processing %s" % filename)
with open(filename, 'r') as file:
org_data = " ".join(file.readlines())
data = org_data
matches = re.findall(
r"\"image/(?:gif|png|jpeg|bmp|webp)\": \".*[a-zA-Z0-9+/=]\"",
data)

new_files = ""

for match in matches:
ext = "." + re.findall(r"image/[a-zA-Z]*", match)[0].split('/')[1]
image_path = "nb_images" + "/" + str(uuid.uuid4()) + ext

full_path = "./" + os.path.dirname(filename) + "/" + image_path

base64_string = (
match.split(':')[1]
.replace('"', '')
.replace(' ', '')
.replace('\\n', '')
)

if az_blob_container_url:
url_path = base64_to_blob_storage(
base64_string, az_blob_container_url, full_path
)
else:
print("Converting base64 to image file and saving as %s" % full_path) # NOQA E501
base64_to_local_file(
base64_string, full_path
)
url_path = "./" + image_path
new_files += " " + full_path

data = data.replace(match, create_nb_cell_output(url_path))

if org_data != data:
with open(filename, 'w') as file:
file.write(data)
new_files += " " + filename
new_files = new_files.strip()

if add_changes_to_staging:
print("'--add_changes_to_staging' flag set to 'True' - added new and changed files to staging.")
git_add(new_files)

if auto_commit_changes:
print("'--auto_commit_changes' flag set to 'True' - git hook set to return exit code 0.")
return 0

return 1
else:
print("Didn't find any base64 strings...")
return 0


def git_add(filenames: str):
process = subprocess.Popen(
['git', 'add', *filenames.split()],
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
fh.write(base64_string_to_bytes(base64_string))


def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
parser.add_argument(
'--az-blob-container-url',
default=None,
help='If provided it will upload images to external Azure Blob Storage container rather than local files') # NOQA E501
parser.add_argument(
'--add-changes-to-staging',
default=False, action='store_true',
Expand All @@ -176,10 +26,7 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
retv = 0

for filename in args.filenames:
# print(f'Processing {filename}')
return_value = process_nb(filename=filename, **vars(args))
# if return_value != 0:
# print(f'Done converting base64 strings to files for {filename}')
retv |= return_value

return retv
Expand Down
99 changes: 99 additions & 0 deletions pre_commit_nb/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import base64
import os
import re
import subprocess
import uuid

from .base64_to_external_storage import base64_to_blob_storage
from .base64_to_image_files import base64_to_local_file


def create_nb_cell_output(url: str) -> str:
return """"text/html": [
"<img src=\\"%s\\"/>"
]""" % url


def git_add(filenames: str):
process = subprocess.Popen(
['git', 'add', *filenames.split()],
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = process.communicate()


def base64_string_to_bytes(base64_string: str) -> bytes:
return base64.decodebytes(base64_string.encode())


def process_nb(
filename: str,
add_changes_to_staging: bool,
auto_commit_changes: bool,
az_blob_container_url: str = None,
**kwargs
) -> int:
print("==================")
print(add_changes_to_staging, auto_commit_changes)
print("Processing %s" % filename)
with open(filename, 'r') as file:
org_data = " ".join(file.readlines())
data = org_data
matches = re.findall(
r"\"image/(?:gif|png|jpeg|bmp|webp)\": \".*[a-zA-Z0-9+/=]\"",
data)

new_files = ""

for match in matches:
ext = "." + re.findall(r"image/[a-zA-Z]*", match)[0].split('/')[1]
image_path = "nb_images" + "/" + str(uuid.uuid4()) + ext

full_path = "./" + os.path.dirname(filename) + "/" + image_path

base64_string = (
match.split(':')[1]
.replace('"', '')
.replace(' ', '')
.replace('\\n', '')
)

if az_blob_container_url:
response_status, url_path = base64_to_blob_storage(
base64_string, az_blob_container_url, full_path
)

if response_status >= 200 and response_status < 300:
print(f"Successfully uploaded image to blob storage: {url_path}") # NOQA E501
else:
print(f"Uploading process failed with response code: {response_status}") # NOQA E501
else:
print("Converting base64 to image file and saving as %s" % full_path) # NOQA E501
base64_to_local_file(
base64_string, full_path
)
url_path = "./" + image_path
new_files += " " + full_path

data = data.replace(match, create_nb_cell_output(url_path))

if org_data != data:
with open(filename, 'w') as file:
file.write(data)
new_files += " " + filename
new_files = new_files.strip()

if add_changes_to_staging:
print("'--add_changes_to_staging' flag set to 'True' - adding new and changed files to staging...") # NOQA E501
print(new_files)
git_add(new_files)

if auto_commit_changes:
print("'--auto_commit_changes' flag set to 'True' - git hook set to return exit code 0.") # NOQA E501
return 0

return 1
else:
print("Didn't find any base64 strings...")
return 0
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

setup(
name="pre_commit_nb",
version="0.2.1a4",
description="Set of git pre-commit hooks for Jupyter Notebooks compatible with https://pre-commit.com/ framework",
version="0.2.2a0",
description="Set of git pre-commit hooks for Jupyter Notebooks compatible with https://pre-commit.com/ framework", # NOQA E501
long_description=long_description,
long_description_content_type="text/markdown", # This is important!
url="http://github.com/karolzak/pre-commit-nb",
Expand All @@ -32,7 +32,8 @@
install_requires=[],
entry_points={
"console_scripts": [
"base64-to-image-files = pre_commit_nb.base64_to_image_files:main"
"base64-to-image-files = pre_commit_nb.base64_to_image_files:main",
"base64-to-external-storage = pre_commit_nb.base64_to_external_storage:main" # NOQA E501
]
}
)

0 comments on commit d7fd295

Please sign in to comment.