diff --git a/config.yaml b/config.yaml index 2ab8b2a..bcee187 100644 --- a/config.yaml +++ b/config.yaml @@ -14,7 +14,7 @@ main: random_seed: 42 data: train_data: "mlops-creditcard_fraud_predictive/train_data.csv:latest" - file_url: "https://www.kaggle.com/mlg-ulb/creditcardfraud.csv" + file_url: "https://www.kaggle.com/mlg-ulb/creditcardfraud?select=creditcard.csv" reference_dataset: "mlops-creditcard_fraud_predictive/raw_data.csv:latest" # Threshold for Kolomorov-Smirnov test ks_alpha: 0.05 diff --git a/download/download_data.py b/download/download_data.py index 0284d11..096c82d 100644 --- a/download/download_data.py +++ b/download/download_data.py @@ -27,8 +27,8 @@ def process_args(args): logger.info("Creating run") with wandb.init(job_type="download_data") as run: # Load the file - # od.download(args.file_url) - with open("creditcard.csv", 'rb') as file: + od.download(args.file_url) + with open("./creditcard.csv", 'rb') as file: fp.writelines(file) # Download the file streaming and write to open temp file # with requests.get(args.file_url, stream=True) as r: