Extract deploy logic into seperate function

Both deploy_command and recalculate_checksum use very similar body. Only differences are: 1) Deploy deals with V, R, A migrations, while recalculation deals only with R migrations. 2) Deploy also applies the script, while recalculation on updates schemachangetables.
salemove · Feb 6, 2024 · 5c4b02c · 5c4b02c
1 parent a5267d2
commit 5c4b02c
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 105 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -7,7 +7,7 @@ jobs:
 
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9]
+        python-version: [3.8, 3.9]
         github-runner: ['ubuntu-latest', 'windows-latest']
 
     runs-on: ${{ matrix.github-runner }}

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
 Jinja2==3.0.0
 pandas==1.3.0
-PyYAML==5.4
-snowflake-connector-python==2.8.0
+PyYAML==6.0
+snowflake-connector-python==3.6.0
diff --git a/schemachange/cli.py b/schemachange/cli.py
@@ -470,8 +470,7 @@ def record_change_script(self, script, script_content, change_history_table, exe
     query = self._q_ch_log.format(**frmt_args)
     self.execute_snowflake_query(query)
 
-
-def deploy_command(config):
+def setup_session(config):
   req_args = set(['snowflake_account','snowflake_user','snowflake_role','snowflake_warehouse'])
   validate_auth_config(config, req_args)
 
@@ -481,11 +480,9 @@ def deploy_command(config):
   print(_log_config_details.format(**config))
 
   #connect to snowflake and maintain connection
-  session = SnowflakeSchemachangeSession(config)
-
-  scripts_skipped = 0
-  scripts_applied = 0
+  return SnowflakeSchemachangeSession(config)
 
+def calculate_repeatable_migration_checksum(config, session):
   # Deal with the change history table (create if specified)
   change_history_table = get_change_history_table_details(config['change_history_table'])
   change_history_metadata = session.fetch_change_history_metadata(change_history_table)
@@ -515,15 +512,20 @@ def deploy_command(config):
     max_published_version_display = 'None'
   print(_log_ch_max_version.format(max_published_version_display=max_published_version_display))
 
-  # Find all scripts in the root folder (recursively) and sort them correctly
-  all_scripts = get_all_scripts_recursively(config['root_folder'], config['verbose'])
-  all_script_names = list(all_scripts.keys())
-  # Sort scripts such that versioned scripts get applied first and then the repeatable ones.
-  all_script_names_sorted =   sorted_alphanumeric([script for script in all_script_names if script[0] == 'V']) \
-                            + sorted_alphanumeric([script for script in all_script_names if script[0] == 'R']) \
-                            + sorted_alphanumeric([script for script in all_script_names if script[0] == 'A'])
+  return [change_history_table, r_scripts_checksum, max_published_version]
+
+def apply_scripts(config, all_scripts, all_script_names_sorted, apply = True):
+  session = setup_session(config)
+
+  scripts_skipped = 0
+  scripts_applied = 0
+
+  [
+    change_history_table,
+    r_scripts_checksum,
+    max_published_version
+  ] = calculate_repeatable_migration_checksum(config, session)
 
-  # Loop through each script in order and apply any required changes
   for script_name in all_script_names_sorted:
     script = all_scripts[script_name]
 
@@ -557,26 +559,36 @@ def deploy_command(config):
         scripts_skipped += 1
         continue
 
-    print(_log_apply.format(**script))
+    if apply:
+      print(_log_apply.format(**script))
+    else:
+      print(_log_recalculate.format(**script))
 
     if not config['dry_run']:
-      execution_time = session.apply_change_script(script, content, change_history_table)
+      execution_time = 0
+      if apply:
+        execution_time = session.apply_change_script(script, content, change_history_table)
       session.record_change_script(script, content, change_history_table, execution_time)
       scripts_applied += 1
 
-  print(_log_apply_set_complete.format(scripts_applied=scripts_applied, scripts_skipped=scripts_skipped))
+  return [scripts_skipped, scripts_applied]
 
-def undo_command(config):
-  req_args = set(['snowflake_account','snowflake_user','snowflake_role','snowflake_warehouse', 'step'])
-  validate_auth_config(config, req_args)
+def deploy_command(config):
+  # Find all scripts in the root folder (recursively) and sort them correctly
+  all_scripts = get_all_scripts_recursively(config['root_folder'], config['verbose'])
+  all_script_names = list(all_scripts.keys())
+  # Sort scripts such that versioned scripts get applied first and then the repeatable ones.
+  all_script_names_sorted =   sorted_alphanumeric([script for script in all_script_names if script[0] == 'V']) \
+                            + sorted_alphanumeric([script for script in all_script_names if script[0] == 'R']) \
+                            + sorted_alphanumeric([script for script in all_script_names if script[0] == 'A'])
 
-  # Log some additional details
-  if config['dry_run']:
-    print("Running in dry-run mode")
-  print(_log_config_details.format(**config))
+  # Loop through each script in order and apply any required changes
+  [scripts_skipped, scripts_applied] = apply_scripts(config, all_scripts, all_script_names_sorted, True)
 
-  #connect to snowflake and maintain connection
-  session = SnowflakeSchemachangeSession(config)
+  print(_log_apply_set_complete.format(scripts_applied=scripts_applied, scripts_skipped=scripts_skipped))
+
+def undo_command(config):
+  session = setup_session(config)
 
   # Deal with the change history table (raise if not provided)
   change_history_table = get_change_history_table_details(config['change_history_table'])
@@ -615,86 +627,13 @@ def undo_command(config):
   print(_log_undo_set_complete.format(scripts_applied=scripts_applied))
 
 def recalculate_checksum_command(config):
-  req_args = set(['snowflake_account','snowflake_user','snowflake_role','snowflake_warehouse'])
-  validate_auth_config(config, req_args)
-
-  # Log some additional details
-  if config['dry_run']:
-    print("Running in dry-run mode")
-  print(_log_config_details.format(**config))
-
-  #connect to snowflake and maintain connection
-  session = SnowflakeSchemachangeSession(config)
-
-  scripts_skipped = 0
-  scripts_applied = 0
-
-  # Deal with the change history table (create if specified)
-  change_history_table = get_change_history_table_details(config['change_history_table'])
-  change_history_metadata = session.fetch_change_history_metadata(change_history_table)
-  if change_history_metadata:
-    print(_log_ch_use.format(last_altered=change_history_metadata['last_altered'], **change_history_table))
-  elif config['create_change_history_table']:
-    # Create the change history table (and containing objects) if it don't exist.
-    if not config['dry_run']:
-      session.create_change_history_table_if_missing(change_history_table)
-    print(_log_ch_create.format(**change_history_table))
-  else:
-    raise ValueError(_err_ch_missing.format(**change_history_table))
-
-  # Find the max published version
-  max_published_version = ''
-
-  change_history = None
-  r_scripts_checksum = None
-  if (config['dry_run'] and change_history_metadata) or not config['dry_run']:
-    change_history = session.fetch_change_history(change_history_table)
-    r_scripts_checksum = session.fetch_r_scripts_checksum(change_history_table)
-
-  if change_history:
-    max_published_version = change_history[0]
-  max_published_version_display = max_published_version
-  if max_published_version_display == '':
-    max_published_version_display = 'None'
-  print(_log_ch_max_version.format(max_published_version_display=max_published_version_display))
-
   # Find all scripts in the root folder (recursively) and sort them correctly
   all_scripts = get_all_scripts_recursively(config['root_folder'], config['verbose'])
   all_script_names = list(all_scripts.keys())
   # Sort scripts such that versioned scripts get applied first and then the repeatable ones.
   all_script_names_sorted = sorted_alphanumeric([script for script in all_script_names if script[0] == 'R'])
 
-  # Loop through each script in order and apply any required changes
-  for script_name in all_script_names_sorted:
-    script = all_scripts[script_name]
-
-    # Always process with jinja engine
-    jinja_processor = JinjaTemplateProcessor(project_root = config['root_folder'], modules_folder = config['modules_folder'])
-    content = jinja_processor.render(jinja_processor.relpath(script['script_full_path']), config['vars'], config['verbose'])
-
-    # Apply only R scripts where the checksum changed compared to the last execution of snowchange
-    if script_name[0] == 'R':
-      # Compute the checksum for the script
-      checksum_current = hashlib.sha224(content.encode('utf-8')).hexdigest()
-
-      # check if R file was already executed
-      if (r_scripts_checksum is not None) and script_name in list(r_scripts_checksum['script_name']):
-        checksum_last = list(r_scripts_checksum.loc[r_scripts_checksum['script_name'] == script_name, 'checksum'])[0]
-      else:
-        checksum_last = ''
-
-      # check if there is a change of the checksum in the script
-      if checksum_current == checksum_last:
-        if config['verbose']:
-          print(_log_skip_r.format(**script))
-        scripts_skipped += 1
-        continue
-
-    print(_log_recalculate.format(**script))
-
-    if not config['dry_run']:
-      session.record_change_script(script, content, change_history_table, 0)
-      scripts_applied += 1
+  [scripts_applied, scripts_skipped] = apply_scripts(config, all_scripts, all_script_names_sorted, False)
 
   print(_log_apply_set_complete.format(scripts_applied=scripts_applied, scripts_skipped=scripts_skipped))
 

diff --git a/setup.cfg b/setup.cfg
@@ -16,9 +16,9 @@ classifiers =
 packages = schemachange
 python_requires = >=3.7
 install_requires =
-    snowflake-connector-python~=2.8
+    snowflake-connector-python~=3.6
     pandas~=1.3
-    pyyaml~=5.4
+    pyyaml~=6.0
     jinja2~=3.0
 include_package_data = True