config.yaml

main:
  project_name: mlops-creditcard_fraud_predictive
  experiment_name: dev
  execute_steps:
    - download
    - preprocess
    - check_data
    - segregate
    - decision_tree
    - evaluate
  # This seed will be used to seed the random number generator
  # to ensure repeatibility of the data splits and other
  # pseudo-random operations
  random_seed: 42
data:
  train_data: "mlops-creditcard_fraud_predictive/train_data.csv:latest"
  file_url: "https://www.kaggle.com/mlg-ulb/creditcardfraud?select=creditcard.csv"
  reference_dataset: "mlops-creditcard_fraud_predictive/raw_data.csv:latest"
  # Threshold for Kolomorov-Smirnov test
  ks_alpha: 0.05
  test_size: 0.3
  val_size: 0.3
  # Stratify according to the target when splitting the data
  # in train/test or in train/val
  stratify: Class
decision_tree_pipeline:
  decision_tree:
    criterion: "entropy"
    splitter: "best"
    max_depth: 13
  numerical_pipe:
    model: 0
  export_artifact: "model_export"