Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new command flepimop-push, flepimop-pull #296

Open
wants to merge 40 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
d85f6e4
add new command flepimop-pull
fang19911030 Jul 25, 2024
edfbb68
bug fix
fang19911030 Jul 25, 2024
1cb9ef1
change format
fang19911030 Jul 25, 2024
d9d59a2
add the test
fang19911030 Jul 25, 2024
c18ecde
change argument type
fang19911030 Jul 25, 2024
ea59026
add file check
fang19911030 Jul 29, 2024
aafc88b
add new file for push command
fang19911030 Aug 5, 2024
d3a4cac
Merge branch 'main' into python_script_resume
fang19911030 Aug 5, 2024
9e838b0
Merge branch 'main' into python_script_resume
fang19911030 Aug 8, 2024
62c0979
add function creating file names for pushing
fang19911030 Aug 8, 2024
5407c71
add body for flepimop-push
fang19911030 Aug 8, 2024
e8c1c42
add command flepimop-push
fang19911030 Aug 8, 2024
cde74d4
change error message
fang19911030 Aug 8, 2024
f1a57fb
fix wrong parameter
fang19911030 Aug 12, 2024
8c6b65f
rename file
fang19911030 Aug 12, 2024
b0d8895
wrong file name
fang19911030 Aug 12, 2024
fc8b4fa
update doc and fix format
fang19911030 Aug 12, 2024
5fce4d4
fix
fang19911030 Aug 12, 2024
ce734c4
black fix format
fang19911030 Aug 12, 2024
6cacb69
print message
fang19911030 Aug 13, 2024
34b18cf
clean
fang19911030 Aug 13, 2024
534d932
correct variable name
fang19911030 Aug 13, 2024
72ef61b
correct tests
fang19911030 Aug 13, 2024
1cbce6c
Merge branch 'main' into python_script_resume
jcblemai Sep 13, 2024
d4ba408
Merge branch 'main' into python_script_resume
jcblemai Sep 13, 2024
cee4259
Merge branch 'main' into python_script_resume
fang19911030 Oct 22, 2024
da0b989
address comments
fang19911030 Oct 23, 2024
3c21a82
address comments 2
fang19911030 Oct 23, 2024
1d8879a
Merge branch 'python_script_resume' of https://github.com/HopkinsIDD/…
fang19911030 Oct 23, 2024
c9a2307
Merge branch 'main' into python_script_resume
fang19911030 Oct 31, 2024
29cf95d
change doc string of file_paths
fang19911030 Nov 14, 2024
62c56f0
remove main
fang19911030 Nov 14, 2024
8194214
remove main and relocate import
fang19911030 Nov 14, 2024
14193ff
add test file
fang19911030 Nov 19, 2024
22f5188
change
fang19911030 Nov 20, 2024
a417e38
new unit test
fang19911030 Nov 20, 2024
f796ce2
add test
fang19911030 Nov 20, 2024
25d79d3
change click type
fang19911030 Nov 20, 2024
fea9449
change click string to path
fang19911030 Dec 5, 2024
1522e8f
format change
fang19911030 Dec 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions flepimop/gempyor_pkg/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ console_scripts =
gempyor-seir = gempyor.simulate_seir:simulate
gempyor-simulate = gempyor.simulate:simulate
flepimop-calibrate = gempyor.calibrate:calibrate
flepimop-pull = gempyor.resume_pull:fetching_resume_files
flepimop-push = gempyor.flepimop_push:flepimop_push
Comment on lines +56 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's my intention that these will be shortly replaced by interacting with this capability via the core flepimop cli. makes sense to add them for the time being, but people should be advised that they will migrate soon (ideally) to the overall flepimop cli.


[options.packages.find]
where = src
Expand Down
79 changes: 53 additions & 26 deletions flepimop/gempyor_pkg/src/gempyor/file_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,7 @@ def create_file_name(
"""
if create_directory:
os.makedirs(
create_dir_name(
run_id,
prefix,
ftype,
inference_filepath_suffix,
inference_filename_prefix,
),
create_dir_name(run_id, prefix, ftype, inference_filepath_suffix, inference_filename_prefix,),
jcblemai marked this conversation as resolved.
Show resolved Hide resolved
exist_ok=True,
)

Expand Down Expand Up @@ -123,13 +117,7 @@ def create_file_name_without_extension(
"""
if create_directory:
os.makedirs(
create_dir_name(
run_id,
prefix,
ftype,
inference_filepath_suffix,
inference_filename_prefix,
),
create_dir_name(run_id, prefix, ftype, inference_filepath_suffix, inference_filename_prefix,),
jcblemai marked this conversation as resolved.
Show resolved Hide resolved
exist_ok=True,
)
filename = Path(
Expand Down Expand Up @@ -169,11 +157,7 @@ def run_id(timestamp: None | datetime = None) -> str:


def create_dir_name(
run_id: str,
prefix: str,
ftype: str,
inference_filepath_suffix: str,
inference_filename_prefix: str,
run_id: str, prefix: str, ftype: str, inference_filepath_suffix: str, inference_filename_prefix: str,
jcblemai marked this conversation as resolved.
Show resolved Hide resolved
) -> str:
"""
Generate a directory name based on the given parameters.
Expand All @@ -198,12 +182,55 @@ def create_dir_name(
"""
return os.path.dirname(
create_file_name_without_extension(
run_id,
prefix,
1,
ftype,
inference_filepath_suffix,
inference_filename_prefix,
create_directory=False,
run_id, prefix, 1, ftype, inference_filepath_suffix, inference_filename_prefix, create_directory=False,
jcblemai marked this conversation as resolved.
Show resolved Hide resolved
)
)


def create_file_name_for_push(
flepi_run_index: str, prefix: str, flepi_slot_index: str, flepi_block_index: str
) -> list[str]:
"""
Generate a list of file names for different types of inference results.

This function generates a list of file names based on the provided run index, prefix, slot index,
and block index. Each file name corresponds to a different type of inference result, such as
"seir", "hosp", "llik", etc. The file names are generated using the `create_file_name` function,
with specific extensions based on the type: "csv" for "seed" and "parquet" for all other types.

Args:
flepi_run_index :
The index of the run. This is used to uniquely identify the run.

prefix :
A prefix string to be included in the file names. This is typically used to categorize or
identify the files.

flepi_slot_index :
The slot index used in the filename. This is formatted as a zero-padded nine-digit number.

flepi_block_index :
The block index used in the filename. This typically indicates a specific block or segment
of the data being processed.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could the spacing be changed just slightly to match this https://google.github.io/styleguide/pyguide.html#doc-function-args a bit better?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I changed comment according to this style. Let me know if you think we need further change.


Returns:
list[str]
fang19911030 marked this conversation as resolved.
Show resolved Hide resolved
A list of generated file names, each corresponding to a different type of inference result.
The file names include the provided prefix, run index, slot index, block index, type, and
the appropriate file extension (either "csv" or "parquet").
"""
type_list = ["seir", "hosp", "llik", "spar", "snpi", "hnpi", "hpar", "init", "seed"]
name_list = []
for type_name in type_list:
extension = "csv" if type_name == "seed" else "parquet"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor: feels like mild code smell to have this if test inside the loop right next to the variables outside. bit less weird as, dunno, a list comprehesion outside with the test, then use the key/value pairs in the loop.

but like i said, minor complaint.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this also seems like something that we should use the gempyor.utils.get_filetype_for_resume to get? Although, just trying todo that right now would be a circular import. Punt to a new issue?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

create a key/value pairs out of the loop

file_name = create_file_name(
run_id=flepi_run_index,
prefix=prefix,
inference_filename_prefix="{:09d}.".format(int(flepi_slot_index)),
inference_filepath_suffix="chimeric/intermediate",
index=flepi_block_index,
ftype=type_name,
extension=extension,
)
name_list.append(file_name)
return name_list
164 changes: 164 additions & 0 deletions flepimop/gempyor_pkg/src/gempyor/flepimop_push.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import os
import click
import shutil
from gempyor.file_paths import create_file_name_for_push
fang19911030 marked this conversation as resolved.
Show resolved Hide resolved


@click.command()
fang19911030 marked this conversation as resolved.
Show resolved Hide resolved
@click.option("--s3_upload", "s3_upload", envvar="S3_UPLOAD", help="push files to aws", required=True)
@click.option("--data-path", "data_path", envvar="PROJECT_PATH", type=click.Path(exists=True), required=True)
@click.option("--flepi_run_index", "flepi_run_index", envvar="FLEPI_RUN_INDEX", type=click.STRING, required=True)
@click.option("--flepi_prefix", "flepi_prefix", envvar="FLEPI_PREFIX", type=click.STRING, required=True)
@click.option("--flepi_block_index", "flepi_block_index", envvar="FLEPI_BLOCK_INDEX", type=click.STRING, required=True)
@click.option("--flepi_slot_index", "flepi_slot_index", envvar="FLEPI_SLOT_INDEX", type=click.STRING, required=True)
@click.option(
"--s3_results_path", "s3_results_path", envvar="S3_RESULTS_PATH", type=click.STRING, default="", required=False
)
@click.option(
"--fs_results_path", "fs_results_path", envvar="FS_RESULTS_PATH", type=click.STRING, default="", required=False
)
fang19911030 marked this conversation as resolved.
Show resolved Hide resolved
def flepimop_push(
s3_upload: str,
data_path: str,
flepi_run_index: str,
flepi_prefix: str,
flepi_slot_index: str,
flepi_block_index: str,
s3_results_path: str = "",
fs_results_path: str = "",
) -> None:
"""
Push files to either AWS S3 or the local filesystem.

This function generates a list of file names based on the provided parameters, checks which files
exist locally, and uploads or copies these files to either AWS S3 or the local filesystem based on
the specified options.

Parameters:
----------
s3_upload : str
String indicating whether to push files to AWS S3. If set to true, files will be uploaded to S3.
If set to False, files will be copied to the local filesystem as specified by `fs_results_path`.

data_path : str
The local directory path where the data files are stored.

flepi_run_index : str
The index of the FLEPI run. This is used to uniquely identify the run and generate the corresponding file names.

flepi_prefix : str
A prefix string to be included in the file names. This is typically used to categorize or identify the files.

flepi_slot_index : str
The slot index used in the filename. This is formatted as a zero-padded nine-digit number, which helps in
distinguishing different slots of data processing.

flepi_block_index : str
The block index used in the filename. This typically indicates a specific block or segment of the data being processed.

s3_results_path : str, optional
The S3 path where the results should be uploaded. This parameter is required if `s3_upload` is set to true.
Default is an empty string, which will raise an error if `s3_upload` is True.

fs_results_path : str, optional
The local filesystem path where the results should be copied.
Default is an empty string, which means no files will be copied locally unless specified.

Raises:
------
ValueError
If `s3_upload` is set to True and `s3_results_path` is not provided.

ModuleNotFoundError
If `boto3` is not installed when `s3_upload` is set to True.

Notes:
-----
- This function first checks for the existence of the files generated by `create_file_name_for_push`
in the `data_path` directory. Only the files that exist will be pushed to AWS S3 or copied to the local filesystem.

- When uploading to AWS S3, the function attempts to create the specified path in the S3 bucket if it does not exist.

- Local directories specified by `fs_results_path` are created if they do not already exist.

Example Usage:
--------------
```bash
flepimop-push --s3_upload true --data-path /path/to/data --flepi_run_index run_01 --flepi_prefix prefix_01 \
--flepi_slot_index 1 --flepi_block_index 1 --s3_results_path s3://my-bucket/results/
```

This would push the existing files generated by the `create_file_name_for_push` function to the specified S3 bucket.
"""
file_name_list = create_file_name_for_push(
flepi_run_index=flepi_run_index,
prefix=flepi_prefix,
flepi_slot_index=flepi_slot_index,
flepi_block_index=flepi_block_index,
)
exist_files = []
for file_name in file_name_list:
file_path = os.path.join(data_path, file_name)
if os.path.exists(file_path):
exist_files.append(file_name)
print("flepimos-push find these existing files: " + " ".join(exist_files))
# Track failed uploads/copies separately
failed_s3_uploads = []
failed_fs_copies = []
if s3_upload == "true":
try:
import boto3
from botocore.exceptions import ClientError
except ModuleNotFoundError:
raise ModuleNotFoundError(
(
"No module named 'boto3', which is required for "
"gempyor.flepimop_push.flepimop_push. Please install the aws target."
)
)
if s3_results_path == "":
raise ValueError(
"argument aws is setted to True, you must use --s3_results_path or environment variable S3_RESULTS_PATH."
)
s3 = boto3.client("s3")
for file in exist_files:
s3_path = os.path.join(s3_results_path, file)
bucket = s3_path.split("/")[2]
object_name = s3_path[len(bucket) + 6 :]
try:
s3.upload_file(os.path.join(data_path, file), bucket, object_name)
print(f"Uploaded {file} to S3 successfully.")
except ClientError as e:
print(f"Failed to upload {file} to S3: {e}")
failed_s3_uploads.append(file)

if fs_results_path != "":
for file in exist_files:
dst = os.path.join(fs_results_path, file)
os.makedirs(os.path.dirname(dst), exist_ok=True)
try:
shutil.copy(os.path.join(data_path, file), dst)
print(f"Copied {file} to local filesystem successfully.")
except IOError as e:
print(f"Failed to copy {file} to local filesystem: {e}")
failed_fs_copies.append(file)

# Print failed files for S3 uploads
if failed_s3_uploads:
print("The following files failed to upload to S3:")
for file in failed_s3_uploads:
print(file)

# Print failed files for local filesystem copies
if failed_fs_copies:
print("The following files failed to copy to the local filesystem:")
for file in failed_fs_copies:
print(file)

# Success message if no failures
if not failed_s3_uploads and not failed_fs_copies:
print("flepimop-push successfully pushed all existing files.")


if __name__ == "__main__":
flepimop_push()
Loading
Loading