Skip to content

Commit

Permalink
switched from gsutil cp to gcloud storage cp
Browse files Browse the repository at this point in the history
  • Loading branch information
bw2 committed Mar 17, 2024
1 parent 039ae01 commit 3797ce4
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 12 deletions.
2 changes: 2 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 11 additions & 11 deletions step_pipeline/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,7 @@ def _preprocess_input_spec(self, input_spec):
raise ValueError(f"Expected gs:// path but instead found '{input_spec.local_dir}'")
self.gcloud_auth_activate_service_account()
self.command(f"mkdir -p '{input_spec.local_dir}'")
self.command(self._generate_gsutil_copy_command(
self.command(self._generate_gcloud_copy_command(
input_spec.original_source_path, output_dir=input_spec.local_dir))
self.command(f"ls -lh '{input_spec.local_path}'") # check that file was copied successfully

Expand Down Expand Up @@ -817,7 +817,7 @@ def _preprocess_input_spec(self, input_spec):
self._paths_localized_via_temp_bucket.add(temp_file_path)

# copy file to temp bucket
gsutil_command = self._generate_gsutil_copy_command(source_path, output_dir=temp_dir)
gsutil_command = self._generate_gcloud_copy_command(source_path, output_dir=temp_dir)
self.command(gsutil_command)

# create an InputSpec with the updated source path
Expand Down Expand Up @@ -885,7 +885,7 @@ def _get_size_of_all_inputs_localized_by_copy(self):

return total_size_bytes

def _generate_gsutil_copy_command(self, source_path, output_dir=None, output_path=None, ignore_nonzero_exit_code=False):
def _generate_gcloud_copy_command(self, source_path, output_dir=None, output_path=None, ignore_nonzero_exit_code=False):
"""Utility method that puts together the gsutil command for copying the given source path to an output path
or directory. Either the output path or the output directory must be provided.
Expand All @@ -899,28 +899,28 @@ def _generate_gsutil_copy_command(self, source_path, output_dir=None, output_pat
str: gsutil command string
"""
args = self._pipeline.parse_known_args()
gsutil_command = f"gsutil"
gcloud_copy_command = f"gcloud "
if args.gcloud_project:
gsutil_command += f" -u {args.gcloud_project}"

gcloud_copy_command += f"--project {args.gcloud_project} "
gcloud_copy_command += "storage cp "
if output_path:
destination = output_path
elif output_dir:
destination = output_dir.rstrip("/") + "/"
else:
raise ValueError("Neither output_path nor output_dir arg was specified")

full_gsutil_command = f"time {gsutil_command} -m cp -r '{source_path}' '{destination}'"
full_gcloud_copy_command = f"time {gcloud_copy_command} --recursive '{source_path}' '{destination}'"

if ignore_nonzero_exit_code:
gsutil_command_with_error_handling = (
f"({full_gsutil_command}) || (touch {os.path.basename(source_path)}{MARK_FILE_SUFFIX} && "
f"{gsutil_command} -m cp -r '{os.path.basename(source_path)}{MARK_FILE_SUFFIX}' '{destination}{MARK_FILE_SUFFIX}' "
f"({full_gcloud_copy_command}) || (touch {os.path.basename(source_path)}{MARK_FILE_SUFFIX} && "
f"{gcloud_copy_command} cp --recursive '{os.path.basename(source_path)}{MARK_FILE_SUFFIX}' '{destination}{MARK_FILE_SUFFIX}' "
f") || true"
)
return gsutil_command_with_error_handling
else:
return full_gsutil_command
return full_gcloud_copy_command

def _handle_input_transfer_using_cloudfuse(self, input_spec):
"""Utility method that implements localizing an input via cloudfuse.
Expand Down Expand Up @@ -985,7 +985,7 @@ def _preprocess_output_spec(self, output_spec):
raise ValueError(f"{output_spec.output_path} Destination path must start with gs://")

self.gcloud_auth_activate_service_account()
self.command(self._generate_gsutil_copy_command(
self.command(self._generate_gcloud_copy_command(
output_spec.local_path,
output_path=output_spec.output_path,
ignore_nonzero_exit_code=output_spec.optional))
Expand Down
5 changes: 4 additions & 1 deletion step_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,10 @@ def switch_gcloud_auth_to_user_account(self, gcloud_credentials_path=None, gclou
self.command(f"gcloud auth list")

self.gcloud_auth_activate_service_account()
self.command(f"gsutil -m cp -r {os.path.join(gcloud_credentials_path, '.config')} /tmp/")
self.command("python3 -m pip install -U crcmod")
self.command(f"gcloud storage cp --recursive {os.path.join(gcloud_credentials_path, '.config')} /tmp/")
self.command("echo done copying .config")

self.command(f"rm -rf ~/.config")
self.command(f"mv /tmp/.config ~/")
self.command(f"gcloud config set account {gcloud_user_account}")
Expand Down

0 comments on commit 3797ce4

Please sign in to comment.