code_single/configs/waymo/streetsurf/withmask_withlidar_joint.240219.yaml

#------------------------------------------------------------
#------------    Some shortcut configs
#------------------------------------------------------------

device_ids: -1

num_rays_pixel: 8192
num_rays_lidar: 8192

near: 0.1
far: 200.0
depth_max: 120.0 # To visualize / colorize depth when render/eval
extend_size: 60.0
num_coarse: 128 # Number of coarse samples on each ray
step_size: 0.2 # Ray-marching step size
upsample_inv_s: 64.0
upsample_inv_s_factors: [1., 4., 16.]
num_fine: [8,8,32] # [8,8,8] # Number of samples of 3 upsample stages
radius_scale_min: 1 # Nearest sampling shell of NeRF++ background (Distant-view model)
radius_scale_max: 1000 # Furthest sampling shell of NeRF++ background (Distant-view model)
distant_interval_type: inverse_proportional
distant_mode: fixed_cuboid_shells
distant_nsample: 64

sdf_scale: 25.0 # The real-world length represented by one unit of SDF

rgb_fn: l1
rgb_fn_param: {}

lidar_fn: l1
lidar_fn_param: {}
w_lidar: 0.02
w_los: 0.1
# eps_los: anneal_1.5_0.75_0.5

w_mask: 0.3

num_uniform: ${eval:"2**16"}

w_eikonal: 0.01
on_render_ratio: 0.2
on_occ_ratio: 1.0
on_render_type: both
safe_mse: true
errlim: 5

w_sparsity: 0.002
sparsity_anneal_for: 1000
sparsity_enable_after: 0

clbeta: 10.0
clw: 0.2
clearance_sdf: 0.02 # 0.02 * (sdf_scale=25) = 0.5m

num_iters: 15000
warmup_steps: 2000
min_factor: 0.06
fglr: 1.0e-2
bglr: 1.0e-2
skylr: 1.0e-3
emblr: 2.0e-2
image_embedding_dim: 4

start_it: 0
start_level: 2
stop_it: 4000 # !!! Important for stable training.
final_inv_s: 2400.0
ctrl_start: 3000
lnini: 0.1

use_estimate_alpha: false

geo_init_method: pretrain_after_zero_out # pretrain

# camera_list: [camera_FRONT_LEFT, camera_FRONT, camera_FRONT_RIGHT]
camera_list: [camera_SIDE_LEFT, camera_FRONT_LEFT, camera_FRONT, camera_FRONT_RIGHT, camera_SIDE_RIGHT]
lidar_list: [lidar_TOP, lidar_FRONT, lidar_REAR, lidar_SIDE_LEFT, lidar_SIDE_RIGHT]
lidar_weight: [0.4,0.1,0.1,0.1,0.1] # Will be normalized when using

#------------------------------------------------------------
#------------    Full configs
#------------------------------------------------------------
# exp_dir: logs/streetsurf_refactor/dbgfix5.2_withmask_withlidar_seg938501_${lidar_fn}=${w_lidar}_lnini=${lnini}_invs=${final_inv_s}_${ctrl_start}_sdfscale=${sdf_scale}_wsp=${w_sparsity}_for=${sparsity_anneal_for}_wlos=${w_los}_eps=${eps_los}_weik=${w_eikonal}_on=${on_render_type}_onocc=${on_occ_ratio}_a=${on_render_ratio}_errlim=${errlim}_stlv=${start_level}_ini262144_softplus_stop=${stop_it}_cl=${clw}_${clbeta}_${clearance_sdf}_ego2.0
# exp_parent_dir: logs/streetsurf_230814_5cams_static_32.joint
exp_dir: logs/streetsurf/seg100613.withmask_withlidar_exp1

dataset_cfg:
  target: dataio.autonomous_driving.WaymoDataset
  param:
    # root: /nvme/guojianfei/waymo/processed/
    root: /data1/waymo/processed/
    # root: /home/ventus/datasets/waymo/processed/
    # root: ./data/waymo/processed/
    rgb_dirname: images
    lidar_dirname: lidars
    mask_dirname: masks

scenebank_cfg:
  # NOTE: scene_id[,start_frame[,n_frames]]
  scenarios:
    - segment-10061305430875486848_1080_000_1100_000_with_camera_labels, 0, 163
    # - segment-15868625208244306149_4340_000_4360_000_with_camera_labels, 70
    # - segment-1172406780360799916_1660_000_1680_000_with_camera_labels
    # - segment-13476374534576730229_240_000_260_000_with_camera_labels, 0, 140
    # - segment-14869732972903148657_2420_000_2440_000_with_camera_labels
    # - segment-15221704733958986648_1400_000_1420_000_with_camera_labels
    # - segment-15270638100874320175_2720_000_2740_000_with_camera_labels, 30
    # - segment-15365821471737026848_1160_000_1180_000_with_camera_labels, 0, 170
    # - segment-3425716115468765803_977_756_997_756_with_camera_labels, 0, 120
    # - segment-10676267326664322837_311_180_331_180_with_camera_labels
    # - segment-4058410353286511411_3980_000_4000_000_with_camera_labels, 90
    # - segment-16608525782988721413_100_000_120_000_with_camera_labels, 0, 120
    # - segment-15062351272945542584_5921_360_5941_360_with_camera_labels
    # - segment-16646360389507147817_3320_000_3340_000_with_camera_labels
    # - segment-10275144660749673822_5755_561_5775_561_with_camera_labels
    # - segment-11379226583756500423_6230_810_6250_810_with_camera_labels
    # - segment-13238419657658219864_4630_850_4650_850_with_camera_labels
    # - segment-14424804287031718399_1281_030_1301_030_with_camera_labels
    # - segment-15349503153813328111_2160_000_2180_000_with_camera_labels, 80
    # - segment-17761959194352517553_5448_420_5468_420_with_camera_labels
    # - segment-3224923476345749285_4480_000_4500_000_with_camera_labels
    # - segment-3988957004231180266_5566_500_5586_500_with_camera_labels
    # - segment-9385013624094020582_2547_650_2567_650_with_camera_labels
    # - segment-8811210064692949185_3066_770_3086_770_with_camera_labels
    # - segment-12879640240483815315_5852_605_5872_605_with_camera_labels
    # - segment-13142190313715360621_3888_090_3908_090_with_camera_labels, 17
    # - segment-13196796799137805454_3036_940_3056_940_with_camera_labels
    # - segment-14348136031422182645_3360_000_3380_000_with_camera_labels
    # - segment-16470190748368943792_4369_490_4389_490_with_camera_labels
    # - segment-13085453465864374565_2040_000_2060_000_with_camera_labels
    # - segment-14004546003548947884_2331_861_2351_861_with_camera_labels, 24
    # - segment-16345319168590318167_1420_000_1440_000_with_camera_labels
  observer_cfgs: 
    Camera:
      list: ${camera_list}
    RaysLidar:
      list: ${lidar_list}
  on_load:
    no_objects: true # Set to true to skip loading foreground objects into scene graph
    align_orientation: true
    consider_distortion: true
    scene_graph_has_ego_car: true # !!! Convinient for NVS

assetbank_cfg:
  Street:
    model_class: app.models.single.LoTDNeuSStreet
    model_params:
      dtype: half
      var_ctrl_cfg:
        ln_inv_s_init: ${lnini}
        ln_inv_s_factor: 10.0
        ctrl_type: mix_linear
        start_it: ${ctrl_start}
        stop_it: ${training.num_iters}
        final_inv_s: ${final_inv_s}
      cos_anneal_cfg: null
      surface_cfg:
        sdf_scale: ${sdf_scale}
        encoding_cfg:
          lotd_use_cuboid: true
          lotd_auto_compute_cfg:
            type: ngp
            target_num_params: ${eval:"32*(2**20)"} # 64 MiB float16 params -> 32 Mi params
            min_res: 16
            n_feats: 2
            log2_hashmap_size: 20
            max_num_levels: null
          param_init_cfg:
            type: uniform_to_type
            bound: 1.0e-4
          anneal_cfg:
            type: hardmask
            start_it: ${start_it}
            start_level: ${start_level} # (need to be small: so the training is stable; not too small, so there's still valid initialize pretraining.)
            stop_it: ${stop_it} # Not for too much iters; should end very soon to not hinder quality
        decoder_cfg: 
          type: mlp
          D: 1
          W: 64
          # select_n_levels: 14
          activation:
            type: softplus
            beta: 100.0
        n_extra_feat_from_output: 0
        geo_init_method: ${geo_init_method}
      radiance_cfg:
        use_pos: true
        use_view_dirs: true
        dir_embed_cfg: 
          type: spherical
          degree: 4
        D: 2
        W: 64
        n_appear_embedding: ${image_embedding_dim}
      use_tcnn_backend: false
      accel_cfg:
        type: occ_grid
        vox_size: 1.0
        # resolution: [64,64,64]
        occ_val_fn_cfg:
          type: sdf
          inv_s: 256.0 # => +- 0.01 sdf @ 0.3 thre
        occ_thre: 0.3
        ema_decay: 0.95
        init_cfg:
          mode: from_net
          num_steps: 4
          num_pts: ${eval:"2**20"}
        update_from_net_cfg:
          num_steps: 4
          num_pts: ${eval:"2**20"}
        update_from_samples_cfg: {}
        n_steps_between_update: 16
        n_steps_warmup: 256
      ray_query_cfg:
        query_mode: march_occ_multi_upsample_compressed
        # query_mode: march_occ_multi_upsample
        query_param:
          nablas_has_grad: true
          num_coarse: ${num_coarse}
          num_fine: ${num_fine}
          coarse_step_cfg:
            step_mode: linear
          march_cfg:
            step_size: ${step_size} # Typical value: (far-near) / 4000
            max_steps: 4096
          upsample_inv_s: ${upsample_inv_s}
          upsample_inv_s_factors: ${upsample_inv_s_factors}
          upsample_use_estimate_alpha: ${use_estimate_alpha}
    asset_params:
      initialize_cfg: 
        target_shape: road_surface
        obs_ref: camera_FRONT # Reference observer. Its trajectory will be used for initialization.
        lr: 1.0e-3
        num_iters: 1000
        num_points: 262144
        w_eikonal: 3.0e-3
        floor_dim: z
        floor_up_sign: 1
        ego_height: 2.0
      preload_cfg: {}
      populate_cfg:
        extend_size: ${extend_size}
      training_cfg:
        lr: ${fglr}
        eps: 1.0e-15
        betas: [0.9, 0.991]
        invs_betas: [0.9, 0.999]
        scheduler: ${training.scheduler}
  Distant:
    model_class: app.models.single.LoTDNeRFDistant
    model_params:
      dtype: half
      encoding_cfg:
        input_ch: 4
        lotd_use_cuboid: true
        lotd_auto_compute_cfg:
          type: ngp4d
          target_num_params: ${eval:"16*(2**20)"} # 16 Mi params
          min_res_xyz: 16
          min_res_w: 4
          n_feats: 2
          log2_hashmap_size: 19
          per_level_scale: 1.382
        param_init_cfg:
          type: uniform_to_type
          bound: 1.0e-4
        # anneal_cfg:
        #   type: hardmask
        #   start_it: ${start_it}
        #   start_level: ${bg_start_level} # (need to be small: so the training is stable; not too small, so there's still valid initialize pretraining.)
        #   stop_it: ${stop_it} # Not for too much iters; should end very soon to not hinder quality
      extra_pos_embed_cfg:
        type: identity
      density_decoder_cfg: 
        type: mlp
        D: 1
        W: 64
        output_activation: softplus
      radiance_decoder_cfg:
        use_pos: false
        # pos_embed_cfg:
        #   type: identity
        use_view_dirs: false
        # dir_embed_cfg:
        #   type: spherical
        #   degree: 4
        use_nablas: false
        D: 2
        W: 64
        n_appear_embedding: ${image_embedding_dim}
      n_extra_feat_from_output: 0
      use_tcnn_backend: false
      include_inf_distance: false # !!! has sky
      radius_scale_min: ${radius_scale_min}
      radius_scale_max: ${radius_scale_max}
      ray_query_cfg:
        query_mode: march
        query_param:
          march_cfg:
            interval_type: ${distant_interval_type}
            sample_mode: ${distant_mode}
            max_steps: ${distant_nsample}
    asset_params:
      populate_cfg:
        cr_obj_classname: Street
      training_cfg:
        lr: ${bglr}
        eps: 1.0e-15
        betas: [0.9, 0.99]
        scheduler: ${training.scheduler}
  Sky:
    model_class: app.models.env.SimpleSky
    model_params: 
      dir_embed_cfg:
        type: sinusoidal
        n_frequencies: 10
        use_tcnn_backend: false
      D: 2
      W: 256
      use_tcnn_backend: false
      n_appear_embedding: ${image_embedding_dim}
    asset_params:
      training_cfg: 
        lr: ${skylr}
        scheduler: ${training.scheduler}
  ImageEmbeddings:
    model_class: app.models.scene.ImageEmbeddings
    model_params:
      dims: ${image_embedding_dim}
      weight_init: uniform
      weight_init_std: 1.0e-4
    asset_params:
      training_cfg: 
        lr: ${emblr}
        scheduler: ${training.scheduler}
  #--- Pose refine related
  LearnableParams:
    model_class: app.models.scene.LearnableParams
    model_params:
      refine_ego_motion: 
        # node_id: ego_car
        class_name: Camera
      refine_camera_intr: null
      refine_camera_extr: null
      enable_after: 500
    asset_params:
      training_cfg:
        ego_motion:
          lr: 0.001
          alpha_lr_rotation: 0.05
        scheduler: ${training.scheduler}

renderer:
  common:
    with_env: true # !!! has sky
    with_rgb: true
    with_normal: true
    near: ${near} # NOTE: Critical to scene scale!
    far: ${far}
  train:
    depth_use_normalized_vw: false # For meaningful depth supervision (if any)
    perturb: true
  val:
    depth_use_normalized_vw: true # For correct depth rendering
    perturb: false
    rayschunk: 4096

training:
  #---------- Dataset and sampling
  dataloader:
    preload: true
    preload_on_gpu: false
    tags:
      camera:
        downscale: 1
        list: ${camera_list}
      image_occupancy_mask: {}
      image_human_mask: {}
      image_ignore_mask:
        ignore_not_occupied: false
        ignore_dynamic: false
        ignore_human: true
      lidar:
        list: ${lidar_list}
        multi_lidar_merge: true
        filter_when_preload: true
        filter_kwargs:
          filter_in_cams: true
    pixel_dataset:
      #---------- Frame and pixel dataloader
      # joint: false
      # equal_mode: ray_batch
      # num_rays: ${num_rays_pixel}
      # frame_sample_mode: uniform
      # pixel_sample_mode: error_map
      #---------- Joint frame-pixel dataloader
      joint: true
      equal_mode: ray_batch
      num_rays: ${num_rays_pixel}
    lidar_dataset:
      equal_mode: ray_batch
      num_rays: ${num_rays_lidar}
      frame_sample_mode: uniform
      lidar_sample_mode: merged_weighted
      multi_lidar_weight: ${lidar_weight} # Will be normalized when used
  val_dataloader:
    preload: false
    tags:
      camera:
        downscale: 4
        list: ${camera_list}
      image_occupancy_mask: {}
      image_human_mask: {}
      image_ignore_mask:
        ignore_not_occupied: false
        ignore_dynamic: false
        ignore_human: true
      lidar: ${training.dataloader.tags.lidar}
    image_dataset:
      camera_sample_mode: all_list # !!!
      frame_sample_mode: uniform

  error_map:
    error_map_hw: [32,64]
    frac_uniform: 0.5
    frac_mask_err: 0
    n_steps_max: 500 # NOTE: The actual effective time of this number now needs to be multiplied by the number of cameras! (Because each iteration samples a camera uniformly at random)

  #---------- Training losses
  uniform_sample: 
    Street: ${num_uniform}
  losses:
    rgb: 
      fn_type: ${rgb_fn}
      fn_param: ${rgb_fn_param}
      respect_ignore_mask: true
    occupancy_mask:
      w: ${w_mask}
      safe_bce: true
      pred_clip: 0
    mask_entropy:
      w: 0.005
      mode: crisp_cr
      enable_after: 2000
      anneal:
        type: linear
        start_it: 2000
        stop_it: 5000
        start_val: 0
        stop_val: 0.005
        update_every: 100
    lidar:
      discard_outliers: 0
      discard_outliers_median: 100.0
      discard_toofar: 80.0
      depth: 
        w: ${w_lidar}
        fn_type: ${lidar_fn}
        fn_param: ${lidar_fn_param}
      line_of_sight:
        w: ${w_los}
        fn_type: neus_unisim
        fn_param:
          # epsilon: ${eps_los}
          epsilon_anneal:
            type: milestones
            milestones: [5000, 10000]
            vals: [1.5, 0.75, 0.5]
    eikonal:
      safe_mse: ${safe_mse}
      safe_mse_err_limit: ${errlim}
      alpha_reg_zero: 0
      on_occ_ratio: ${on_occ_ratio}
      on_render_type: ${on_render_type}
      on_render_ratio: ${on_render_ratio}
      class_name_cfgs:
        Street:
          w: ${w_eikonal}
    sparsity:
      enable_after: ${sparsity_enable_after}
      class_name_cfgs:
        Street:
          key: sdf
          type: normalized_logistic_density
          inv_scale: 16.0
          w: ${w_sparsity}
          anneal:
            type: linear
            start_it: ${sparsity_enable_after}
            start_val: 0
            stop_it: ${eval:"${sparsity_anneal_for}+${sparsity_enable_after}"}
            stop_val: ${w_sparsity}
            update_every: 100
    clearance:
      class_name_cfgs:
        Street:
          w: ${clw}
          beta: ${clbeta}
          thresh: ${clearance_sdf}
    weight_reg:
      class_name_cfgs:
        Street:
          norm_type: 2.0
          w: 1.0e-6
        Distant:
          norm_type: 2.0
          w: 1.0e-6

  num_iters: ${num_iters}

  scheduler:
    # #---------- exponential
    type: exponential
    total_steps: ${training.num_iters}
    warmup_steps: ${warmup_steps}
    decay_target_factor: ${min_factor}
    # decay_interval: 3000
    #---------- exponential_plenoxels
    # type: exponential_plenoxels
    # total_steps: ${training.num_iters}
    # warmup_steps: ${warmup_steps}
    # decay_target_factor: ${min_factor}
    #---------- cosine
    # type: warmup_cosine
    # num_iters: ${training.num_iters}
    # min_factor: ${min_factor}
    # warmup_steps: ${warmup_steps}
    #---------- milestone
    # type: multistep
    # milestones: [20000, 30000]
    # gamma: 0.33

  #---------- Logging and validation
  i_val: 1500      # unit: iters
  i_backup: -1 # unit: iters
  i_save: 900     # unit: seconds
  i_log: 20
  log_grad: false
  log_param: false

  ckpt_file: null
  ckpt_ignore_keys: []
  ckpt_only_use_keys: null