From 72fa1bc071188d598dd0b585360ab59db3e5a2aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20=C5=BBak?= Date: Mon, 7 Sep 2020 13:10:23 +0200 Subject: [PATCH] V0.1.0 (#1) * add first working version * upd core script * fix main args * support for auto add and commit changes flags * upd hooks yaml * bump version to v0.1.0 * rem print statement after process nb --- pre_commit_nb/base64_to_image_files.py | 110 ++++++++++++++++++------- requirements.txt | 2 + setup.py | 2 +- 3 files changed, 83 insertions(+), 31 deletions(-) create mode 100644 requirements.txt diff --git a/pre_commit_nb/base64_to_image_files.py b/pre_commit_nb/base64_to_image_files.py index 8b87db0..58115d4 100644 --- a/pre_commit_nb/base64_to_image_files.py +++ b/pre_commit_nb/base64_to_image_files.py @@ -1,53 +1,103 @@ import argparse -import io import re -import tokenize +import os +import base64 +import uuid from typing import List from typing import Optional from typing import Sequence +import subprocess -def process_nb(filename: str) -> int: - # with open(filename, encoding='UTF-8', newline='') as f: - # contents = f.read() - # line_offsets = get_line_offsets_by_line_no(contents) +def base64_to_image_file(base64_string: str, image_path: str): + os.makedirs(os.path.dirname(image_path), exist_ok=True) + with open(image_path, "wb") as fh: + fh.write(base64.decodebytes(base64_string.encode())) - # # Basically a mutable string - # splitcontents = list(contents) - # # Iterate in reverse so the offsets are always correct - # tokens_l = list(tokenize.generate_tokens(io.StringIO(contents).readline)) - # tokens = reversed(tokens_l) - # for token_type, token_text, (srow, scol), (erow, ecol), _ in tokens: - # if token_type == tokenize.STRING: - # new_text = handle_match(token_text) - # splitcontents[ - # line_offsets[srow] + scol: - # line_offsets[erow] + ecol - # ] = new_text +def create_nb_cell_output(url: str) -> str: + return """"text/html": [ + "" + ]""" % url - # new_contents = ''.join(splitcontents) - # if contents != new_contents: - # with open(filename, 'w', encoding='UTF-8', newline='') as f: - # f.write(new_contents) - # return 1 - # else: - # return 0 - return 1 + +def process_nb( + filename: str, + add_changes_to_staging: bool, + auto_commit_changes: bool, + **kwargs + ) -> int: + print("==================") + print(add_changes_to_staging, auto_commit_changes) + print("Processing %s" % filename) + with open(filename, 'r') as file: + data = " ".join(file.readlines()) + matches = re.findall( + r"\"image/(?:gif|png|jpeg|bmp|webp)\": \".*[a-zA-Z0-9+/=]\"", + data) + + new_files = "" + + for match in matches: + ext = "." + re.findall(r"image/[a-zA-Z]*", match)[0].split('/')[1] + image_path = "nb_images" + "/" + str(uuid.uuid4()) + ext + + full_path = "./" + os.path.dirname(filename) + "/" + image_path + print("Base64 string found. Converting it to image file and saving as %s" % full_path) + base64_string = match.split(':')[1].replace('"', '').replace(' ', '').replace('\\n', '') + base64_to_image_file(base64_string, full_path) + new_files += " " + full_path + + url_path = "./" + image_path + data = data.replace(match, create_nb_cell_output(url_path)) + + if len(new_files) > 0: + with open(filename, 'w') as file: + file.write(data) + new_files += " " + filename + + if add_changes_to_staging: + print("'--add_changes_to_staging' flag set to 'True' - added new and changed files to staging.") + git_add(new_files) + + if auto_commit_changes: + print("'--auto_commit_changes' flag set to 'True' - git hook set to return exit code 0.") + return 0 + + return 1 + else: + print("Didn't find any base64 strings...") + return 0 + + +def git_add(filenames: str): + process = subprocess.Popen( + ['git', 'add', *filenames.split()], + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() def main(argv: Optional[Sequence[str]] = None) -> int: parser = argparse.ArgumentParser() parser.add_argument('filenames', nargs='*', help='Filenames to fix') + parser.add_argument( + '--add_changes_to_staging', + default=False, action='store_true', + help='Automatically add new and changed files to staging') + parser.add_argument( + '--auto_commit_changes', default=False, action='store_true', + help='Automatically commits added and changed files in staging') args = parser.parse_args(argv) retv = 0 for filename in args.filenames: - print(f'Processing {filename}') - return_value = process_nb(filename) - if return_value != 0: - print(f'Done converting base64 strings to files for {filename}') + # print(f'Processing {filename}') + return_value = process_nb(filename=filename, **vars(args)) + # if return_value != 0: + # print(f'Done converting base64 strings to files for {filename}') retv |= return_value return retv diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..036d8c5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +pytest +flake8 \ No newline at end of file diff --git a/setup.py b/setup.py index 9829fe1..ab0c1b6 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="pre_commit_nb", - version="0.0.2", + version="0.1.0", description="Set of git pre-commit hooks for Jupyter Notebooks compatible with https://pre-commit.com/ framework", long_description=long_description, long_description_content_type="text/markdown", # This is important!