-
Notifications
You must be signed in to change notification settings - Fork 142
/
petrv2_vovnet_gridmask_p4_1600x640_dn_multiscale_amp.yml
207 lines (201 loc) · 5.86 KB
/
petrv2_vovnet_gridmask_p4_1600x640_dn_multiscale_amp.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
batch_size: 1
epochs: 24
amp_cfg:
use_amp: True
# only enable backbone and fpn
enable: False
level: O1
scaler:
init_loss_scaling: 512.0
train_dataset:
type: NuscenesMVDataset
dataset_root: data/nuscenes/
ann_file: data/nuscenes/petr_nuscenes_annotation_train.pkl
mode: train
use_valid_flag: True
class_names: [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
transforms:
- type: LoadMultiViewImageFromFiles
to_float32: True
- type: LoadMultiViewImageFromMultiSweepsFiles
sweeps_num: 1
to_float32: True
pad_empty_sweeps: True
sweep_range: [3, 27]
test_mode: False
- type: LoadAnnotations3D
with_bbox_3d: True
with_label_3d: True
- type: SampleRangeFilter
point_cloud_range: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
- type: SampleNameFilter
classes: [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
- type: ResizeCropFlipImage
sample_aug_cfg:
resize_lim: [0.94, 1.25]
final_dim: [640, 1600]
bot_pct_lim: [0.0, 0.0]
rot_lim: [0.0, 0.0]
H: 900
W: 1600
rand_flip: True
training: True
- type: GlobalRotScaleTransImage
rot_range: [-0.3925, 0.3925]
translation_std: [0, 0, 0]
scale_ratio_range: [0.95, 1.05]
reverse_angle: True
training: True
- type: NormalizeMultiviewImage
mean: [103.530, 116.280, 123.675]
std: [57.375, 57.120, 58.395]
- type: PadMultiViewImage
size_divisor: 32
- type: SampleFilerByKey
keys: ['gt_bboxes_3d', 'gt_labels_3d', 'img']
meta_keys: ['filename', 'ori_shape', 'img_shape', 'lidar2img',
'intrinsics', 'extrinsics', 'pad_shape',
'scale_factor', 'flip', 'box_type_3d', 'img_norm_cfg', 'sample_idx',
'timestamp', 'gt_bboxes_3d','gt_labels_3d']
val_dataset:
type: NuscenesMVDataset
dataset_root: data/nuscenes/
ann_file: data/nuscenes/petr_nuscenes_annotation_val.pkl
mode: val
class_names: ['car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone']
transforms:
- type: LoadMultiViewImageFromFiles
to_float32: True
- type: LoadMultiViewImageFromMultiSweepsFiles
sweeps_num: 1
to_float32: True
pad_empty_sweeps: True
sweep_range: [3, 27]
- type: ResizeCropFlipImage
sample_aug_cfg:
resize_lim: [0.94, 1.25]
final_dim: [640, 1600]
bot_pct_lim: [0.0, 0.0]
rot_lim: [0.0, 0.0]
H: 900
W: 1600
rand_flip: True
training: False
- type: NormalizeMultiviewImage
mean: [103.530, 116.280, 123.675]
std: [57.375, 57.120, 58.395]
- type: PadMultiViewImage
size_divisor: 32
- type: SampleFilerByKey
keys: ['img']
meta_keys: ['filename', 'ori_shape', 'img_shape', 'lidar2img',
'intrinsics', 'extrinsics', 'pad_shape',
'scale_factor', 'flip', 'box_type_3d', 'img_norm_cfg', 'sample_idx',
'timestamp']
optimizer:
type: AdamW
weight_decay: 0.01
grad_clip:
type: ClipGradByGlobalNorm
clip_norm: 35
# auto_skip_clip: True
lr_scheduler:
type: LinearWarmup
learning_rate:
type: CosineAnnealingDecay
learning_rate: 0.0002
T_max: 84408 # 3517 * 24
eta_min: 0.0000002
warmup_steps: 500
start_lr: 0.00006666666
end_lr: 0.0002
model:
type: Petr3D
use_recompute: True
use_grid_mask: True
us_ms: True
multi_scale: [0.5, 1.0]
backbone:
type: VoVNet ###can't use checkpoint here
spec_name: V-99-eSE
norm_eval: True
frozen_stages: -1
input_ch: 3
out_features: ('stage4','stage5',)
neck:
type: CPFPN ###remove unused parameters
in_channels: [768, 1024]
out_channels: 256
num_outs: 2
pts_bbox_head:
type: PETRHead
num_classes: 10
in_channels: 512 ###multi scale features concat
num_query: 900
LID: true
with_multiview: true
with_position: true
with_fpe: true
with_time: true
with_multi: true
with_denoise: true
scalar: 10 ##noise groups
noise_scale: 1.0
dn_weight: 1.0 ##dn loss weight
split: 0.75 ###positive rate
position_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
normedlinear: False
transformer:
type: PETRDNTransformer
embed_dims: 256
decoder:
type: PETRTransformerDecoder
return_intermediate: True
num_layers: 6
transformerlayers:
type: PETRTransformerDecoderLayer
attns:
- type: MultiHeadAttention
embed_dims: 256
num_heads: 8
attn_drop: 0.1
drop_prob: 0.1
- type: PETRMultiheadAttention
embed_dims: 256
num_heads: 8
attn_drop: 0.1
drop_prob: 0.1
batch_first: True
feedforward_channels: 2048
ffn_dropout: 0.1
operation_order: ['self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm']
positional_encoding:
type: SinePositionalEncoding3D
num_feats: 128
normalize: True
bbox_coder:
type: NMSFreeCoder
post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
point_cloud_range: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
max_num: 300
voxel_size: [0.2, 0.2, 8]
num_classes: 10
loss_cls:
type: WeightedFocalLoss
gamma: 2.0
alpha: 0.25
loss_weight: 2.0
reduction: sum
loss_bbox:
type: WeightedL1Loss
loss_weight: 0.25
reduction: sum