examples/rl_order_execution/exp_configs/backtest_ppo.yml

order_file: ./data/orders/test_orders.pkl
start_time: "9:30"
end_time: "14:54"
data_granularity: "5min"
qlib:
  provider_uri_5min: ./data/bin/
exchange:
  limit_threshold: null
  deal_price: ["$close", "$close"]
  volume_threshold: null
strategies:
  1day:
    class: SAOEIntStrategy
    kwargs:
      data_granularity: 5
      action_interpreter:
        class: CategoricalActionInterpreter
        kwargs:
          max_step: 8
          values: 4
        module_path: qlib.rl.order_execution.interpreter
      network:
        class: Recurrent
        kwargs: {}
        module_path: qlib.rl.order_execution.network
      policy:
        class: PPO  # PPO, DQN
        kwargs:
          lr: 0.0001
          # Restore `weight_file` once the training workflow finishes. You can change the checkpoint file you want to use.
          # weight_file: outputs/ppo/checkpoints/latest.pth
        module_path: qlib.rl.order_execution.policy
      state_interpreter:
        class: FullHistoryStateInterpreter
        kwargs:
          data_dim: 5
          data_ticks: 48
          max_step: 8
          processed_data_provider:
            class: HandlerProcessedDataProvider
            kwargs:
              data_dir: ./data/pickle/
              feature_columns_today: ["$high", "$low", "$open", "$close", "$volume"]
              feature_columns_yesterday: ["$high_1", "$low_1", "$open_1", "$close_1", "$volume_1"]
            module_path: qlib.rl.data.native
        module_path: qlib.rl.order_execution.interpreter
    module_path: qlib.rl.order_execution.strategy
  30min:
    class: TWAPStrategy
    kwargs: {}
    module_path: qlib.contrib.strategy.rule_strategy
concurrency: 16
output_dir: outputs/ppo/