-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
new command flepimop-push, flepimop-pull #296
base: main
Are you sure you want to change the base?
Changes from 29 commits
d85f6e4
edfbb68
1cb9ef1
d9d59a2
c18ecde
ea59026
aafc88b
d3a4cac
9e838b0
62c0979
5407c71
e8c1c42
cde74d4
f1a57fb
8c6b65f
b0d8895
fc8b4fa
5fce4d4
ce734c4
6cacb69
34b18cf
534d932
72ef61b
1cbce6c
d4ba408
cee4259
da0b989
3c21a82
1d8879a
c9a2307
29cf95d
62c56f0
8194214
14193ff
22f5188
a417e38
f796ce2
25d79d3
fea9449
1522e8f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,13 +58,7 @@ def create_file_name( | |
""" | ||
if create_directory: | ||
os.makedirs( | ||
create_dir_name( | ||
run_id, | ||
prefix, | ||
ftype, | ||
inference_filepath_suffix, | ||
inference_filename_prefix, | ||
), | ||
create_dir_name(run_id, prefix, ftype, inference_filepath_suffix, inference_filename_prefix,), | ||
jcblemai marked this conversation as resolved.
Show resolved
Hide resolved
|
||
exist_ok=True, | ||
) | ||
|
||
|
@@ -123,13 +117,7 @@ def create_file_name_without_extension( | |
""" | ||
if create_directory: | ||
os.makedirs( | ||
create_dir_name( | ||
run_id, | ||
prefix, | ||
ftype, | ||
inference_filepath_suffix, | ||
inference_filename_prefix, | ||
), | ||
create_dir_name(run_id, prefix, ftype, inference_filepath_suffix, inference_filename_prefix,), | ||
jcblemai marked this conversation as resolved.
Show resolved
Hide resolved
|
||
exist_ok=True, | ||
) | ||
filename = Path( | ||
|
@@ -169,11 +157,7 @@ def run_id(timestamp: None | datetime = None) -> str: | |
|
||
|
||
def create_dir_name( | ||
run_id: str, | ||
prefix: str, | ||
ftype: str, | ||
inference_filepath_suffix: str, | ||
inference_filename_prefix: str, | ||
run_id: str, prefix: str, ftype: str, inference_filepath_suffix: str, inference_filename_prefix: str, | ||
jcblemai marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) -> str: | ||
""" | ||
Generate a directory name based on the given parameters. | ||
|
@@ -198,12 +182,55 @@ def create_dir_name( | |
""" | ||
return os.path.dirname( | ||
create_file_name_without_extension( | ||
run_id, | ||
prefix, | ||
1, | ||
ftype, | ||
inference_filepath_suffix, | ||
inference_filename_prefix, | ||
create_directory=False, | ||
run_id, prefix, 1, ftype, inference_filepath_suffix, inference_filename_prefix, create_directory=False, | ||
jcblemai marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
) | ||
|
||
|
||
def create_file_name_for_push( | ||
flepi_run_index: str, prefix: str, flepi_slot_index: str, flepi_block_index: str | ||
) -> list[str]: | ||
""" | ||
Generate a list of file names for different types of inference results. | ||
|
||
This function generates a list of file names based on the provided run index, prefix, slot index, | ||
and block index. Each file name corresponds to a different type of inference result, such as | ||
"seir", "hosp", "llik", etc. The file names are generated using the `create_file_name` function, | ||
with specific extensions based on the type: "csv" for "seed" and "parquet" for all other types. | ||
|
||
Args: | ||
flepi_run_index : | ||
The index of the run. This is used to uniquely identify the run. | ||
|
||
prefix : | ||
A prefix string to be included in the file names. This is typically used to categorize or | ||
identify the files. | ||
|
||
flepi_slot_index : | ||
The slot index used in the filename. This is formatted as a zero-padded nine-digit number. | ||
|
||
flepi_block_index : | ||
The block index used in the filename. This typically indicates a specific block or segment | ||
of the data being processed. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could the spacing be changed just slightly to match this https://google.github.io/styleguide/pyguide.html#doc-function-args a bit better? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I changed comment according to this style. Let me know if you think we need further change. |
||
|
||
Returns: | ||
list[str] | ||
fang19911030 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
A list of generated file names, each corresponding to a different type of inference result. | ||
The file names include the provided prefix, run index, slot index, block index, type, and | ||
the appropriate file extension (either "csv" or "parquet"). | ||
""" | ||
type_list = ["seir", "hosp", "llik", "spar", "snpi", "hnpi", "hpar", "init", "seed"] | ||
name_list = [] | ||
for type_name in type_list: | ||
extension = "csv" if type_name == "seed" else "parquet" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor: feels like mild code smell to have this if test inside the loop right next to the variables outside. bit less weird as, dunno, a list comprehesion outside with the test, then use the key/value pairs in the loop. but like i said, minor complaint. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this also seems like something that we should use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. create a key/value pairs out of the loop |
||
file_name = create_file_name( | ||
run_id=flepi_run_index, | ||
prefix=prefix, | ||
inference_filename_prefix="{:09d}.".format(int(flepi_slot_index)), | ||
inference_filepath_suffix="chimeric/intermediate", | ||
index=flepi_block_index, | ||
ftype=type_name, | ||
extension=extension, | ||
) | ||
name_list.append(file_name) | ||
return name_list |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
import os | ||
import click | ||
import shutil | ||
from gempyor.file_paths import create_file_name_for_push | ||
fang19911030 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
@click.command() | ||
fang19911030 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@click.option("--s3_upload", "s3_upload", envvar="S3_UPLOAD", help="push files to aws", required=True) | ||
@click.option("--data-path", "data_path", envvar="PROJECT_PATH", type=click.Path(exists=True), required=True) | ||
@click.option("--flepi_run_index", "flepi_run_index", envvar="FLEPI_RUN_INDEX", type=click.STRING, required=True) | ||
@click.option("--flepi_prefix", "flepi_prefix", envvar="FLEPI_PREFIX", type=click.STRING, required=True) | ||
@click.option("--flepi_block_index", "flepi_block_index", envvar="FLEPI_BLOCK_INDEX", type=click.STRING, required=True) | ||
@click.option("--flepi_slot_index", "flepi_slot_index", envvar="FLEPI_SLOT_INDEX", type=click.STRING, required=True) | ||
@click.option( | ||
"--s3_results_path", "s3_results_path", envvar="S3_RESULTS_PATH", type=click.STRING, default="", required=False | ||
) | ||
@click.option( | ||
"--fs_results_path", "fs_results_path", envvar="FS_RESULTS_PATH", type=click.STRING, default="", required=False | ||
) | ||
fang19911030 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def flepimop_push( | ||
s3_upload: str, | ||
data_path: str, | ||
flepi_run_index: str, | ||
flepi_prefix: str, | ||
flepi_slot_index: str, | ||
flepi_block_index: str, | ||
s3_results_path: str = "", | ||
fs_results_path: str = "", | ||
) -> None: | ||
""" | ||
Push files to either AWS S3 or the local filesystem. | ||
|
||
This function generates a list of file names based on the provided parameters, checks which files | ||
exist locally, and uploads or copies these files to either AWS S3 or the local filesystem based on | ||
the specified options. | ||
|
||
Parameters: | ||
---------- | ||
s3_upload : str | ||
String indicating whether to push files to AWS S3. If set to true, files will be uploaded to S3. | ||
If set to False, files will be copied to the local filesystem as specified by `fs_results_path`. | ||
|
||
data_path : str | ||
The local directory path where the data files are stored. | ||
|
||
flepi_run_index : str | ||
The index of the FLEPI run. This is used to uniquely identify the run and generate the corresponding file names. | ||
|
||
flepi_prefix : str | ||
A prefix string to be included in the file names. This is typically used to categorize or identify the files. | ||
|
||
flepi_slot_index : str | ||
The slot index used in the filename. This is formatted as a zero-padded nine-digit number, which helps in | ||
distinguishing different slots of data processing. | ||
|
||
flepi_block_index : str | ||
The block index used in the filename. This typically indicates a specific block or segment of the data being processed. | ||
|
||
s3_results_path : str, optional | ||
The S3 path where the results should be uploaded. This parameter is required if `s3_upload` is set to true. | ||
Default is an empty string, which will raise an error if `s3_upload` is True. | ||
|
||
fs_results_path : str, optional | ||
The local filesystem path where the results should be copied. | ||
Default is an empty string, which means no files will be copied locally unless specified. | ||
|
||
Raises: | ||
------ | ||
ValueError | ||
If `s3_upload` is set to True and `s3_results_path` is not provided. | ||
|
||
ModuleNotFoundError | ||
If `boto3` is not installed when `s3_upload` is set to True. | ||
|
||
Notes: | ||
----- | ||
- This function first checks for the existence of the files generated by `create_file_name_for_push` | ||
in the `data_path` directory. Only the files that exist will be pushed to AWS S3 or copied to the local filesystem. | ||
|
||
- When uploading to AWS S3, the function attempts to create the specified path in the S3 bucket if it does not exist. | ||
|
||
- Local directories specified by `fs_results_path` are created if they do not already exist. | ||
|
||
Example Usage: | ||
-------------- | ||
```bash | ||
flepimop-push --s3_upload true --data-path /path/to/data --flepi_run_index run_01 --flepi_prefix prefix_01 \ | ||
--flepi_slot_index 1 --flepi_block_index 1 --s3_results_path s3://my-bucket/results/ | ||
``` | ||
|
||
This would push the existing files generated by the `create_file_name_for_push` function to the specified S3 bucket. | ||
""" | ||
file_name_list = create_file_name_for_push( | ||
flepi_run_index=flepi_run_index, | ||
prefix=flepi_prefix, | ||
flepi_slot_index=flepi_slot_index, | ||
flepi_block_index=flepi_block_index, | ||
) | ||
exist_files = [] | ||
for file_name in file_name_list: | ||
file_path = os.path.join(data_path, file_name) | ||
if os.path.exists(file_path): | ||
exist_files.append(file_name) | ||
print("flepimos-push find these existing files: " + " ".join(exist_files)) | ||
# Track failed uploads/copies separately | ||
failed_s3_uploads = [] | ||
failed_fs_copies = [] | ||
if s3_upload == "true": | ||
try: | ||
import boto3 | ||
from botocore.exceptions import ClientError | ||
except ModuleNotFoundError: | ||
raise ModuleNotFoundError( | ||
( | ||
"No module named 'boto3', which is required for " | ||
"gempyor.flepimop_push.flepimop_push. Please install the aws target." | ||
) | ||
) | ||
if s3_results_path == "": | ||
raise ValueError( | ||
"argument aws is setted to True, you must use --s3_results_path or environment variable S3_RESULTS_PATH." | ||
) | ||
s3 = boto3.client("s3") | ||
for file in exist_files: | ||
s3_path = os.path.join(s3_results_path, file) | ||
bucket = s3_path.split("/")[2] | ||
object_name = s3_path[len(bucket) + 6 :] | ||
try: | ||
s3.upload_file(os.path.join(data_path, file), bucket, object_name) | ||
print(f"Uploaded {file} to S3 successfully.") | ||
except ClientError as e: | ||
print(f"Failed to upload {file} to S3: {e}") | ||
failed_s3_uploads.append(file) | ||
|
||
if fs_results_path != "": | ||
for file in exist_files: | ||
dst = os.path.join(fs_results_path, file) | ||
os.makedirs(os.path.dirname(dst), exist_ok=True) | ||
try: | ||
shutil.copy(os.path.join(data_path, file), dst) | ||
print(f"Copied {file} to local filesystem successfully.") | ||
except IOError as e: | ||
print(f"Failed to copy {file} to local filesystem: {e}") | ||
failed_fs_copies.append(file) | ||
|
||
# Print failed files for S3 uploads | ||
if failed_s3_uploads: | ||
print("The following files failed to upload to S3:") | ||
for file in failed_s3_uploads: | ||
print(file) | ||
|
||
# Print failed files for local filesystem copies | ||
if failed_fs_copies: | ||
print("The following files failed to copy to the local filesystem:") | ||
for file in failed_fs_copies: | ||
print(file) | ||
|
||
# Success message if no failures | ||
if not failed_s3_uploads and not failed_fs_copies: | ||
print("flepimop-push successfully pushed all existing files.") | ||
|
||
|
||
if __name__ == "__main__": | ||
flepimop_push() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's my intention that these will be shortly replaced by interacting with this capability via the core flepimop cli. makes sense to add them for the time being, but people should be advised that they will migrate soon (ideally) to the overall flepimop cli.