initial

kddresearch · Jul 28, 2024 · b0622ef · b0622ef
commit b0622ef
Show file tree

Hide file tree

Showing 8 changed files with 454 additions and 0 deletions.
diff --git a/I3D/annotation.xlsx b/I3D/annotation.xlsx
diff --git a/I3D/bash.txt b/I3D/bash.txt
@@ -0,0 +1,7 @@
+python tools/train.py configs/recognition/i3d/i3d_r50_32x2x1_100e_goodbad.py     --work-dir work_dirs/goodbad_rgb2     --validate --seed 0 --deterministic
+
+python tools/test.py configs/recognition/i3d/i3d_r50_32x2x1_100e_goodbad.py     work_dirs/goodbad_rgb/latest.pth --eval top_k_accuracy mean_class_accuracy     --out result.json --average-clips prob
+
+https://github.com/open-mmlab/mmaction2/blob/master/configs/recognition/i3d/README.md install pre checkpoint
+https://github.com/open-mmlab/mmaction2
+delete 599
diff --git a/I3D/get_frame.py b/I3D/get_frame.py
@@ -0,0 +1,38 @@
+import cv2
+import numpy as np
+import os
+from multiprocessing import Pool, cpu_count
+import os
+import time
+import pdb
+
+if __name__ == '__main__':
+
+    root_path = '/home/yeling/yeling/data/yeling_code/TackleVideos-190/'
+    mx_path_save = '/home/yeling/yeling/data/yeling_code/frame_data'
+    if not os.path.exists(mx_path_save):
+        os.mkdir(mx_path_save)
+    for fi in  os.listdir(root_path):
+        mx_path=os.path.join(root_path,fi)
+
+        videos = sorted(os.listdir(mx_path))
+        for video_name in videos:
+
+            video_path=os.path.join(mx_path_save,video_name.split('.')[0])
+
+            if not os.path.exists(video_path):
+                os.mkdir(video_path)
+            cap = cv2.VideoCapture(os.path.join(mx_path,video_name))
+            c_frame = 1
+            while (True):
+
+                # print(cap.isOpened(),cap.read(),c_frame,os.path.join(mx_path,video_name))
+                ret, frame = cap.read()
+                # pdb.set_trace()
+                if ret:
+                    cv2.imwrite(os.path.join(video_path, 'img_' + str('{:05d}'.format(c_frame)) + '.jpg'), frame)
+                else:
+
+                    break
+                c_frame = c_frame + 1
+            cap.release()
diff --git a/I3D/goodbad_txt.py b/I3D/goodbad_txt.py
@@ -0,0 +1,42 @@
+import os
+import random
+
+path='frame_data'
+
+root_path = '/home/yeling/yeling/data/yeling_code/TackleVideos-190/'
+
+txt1=open('label_frame_all.txt','w')
+txt2=open('label_frame_train.txt','w')
+txt3=open('label_frame_val.txt','w')
+
+
+list_data=[]
+for fi in  os.listdir(root_path):
+	mx_path=os.path.join(root_path,fi)
+
+	videos = sorted(os.listdir(mx_path))
+
+	for v in videos:
+
+
+		vname=v.split('.')[0]
+
+
+		if fi=='safe':
+			list_data.append([vname,len(os.listdir(os.path.join(path,vname))),1])
+
+		if fi=='risky':
+			list_data.append([vname,len(os.listdir(os.path.join(path,vname))),0])
+
+
+random.shuffle(list_data)
+
+
+for idx ,data in enumerate(list_data):
+
+	if idx <len(list_data)*0.7:
+
+		txt2.writelines([data[0],' ',str(data[1]),' ',str(data[2]),'\n'])
+	else:
+		txt3.writelines([data[0],' ',str(data[1]),' ',str(data[2]),'\n'])
+		txt1.writelines([data[0],' ',str(data[1]),' ',str(data[2]),'\n'])
diff --git a/I3D/i3d_r50_32x2x1_100e_goodbad.py b/I3D/i3d_r50_32x2x1_100e_goodbad.py
@@ -0,0 +1,122 @@
+# model settings
+model = dict(
+    type='Recognizer3D',
+    backbone=dict(
+        type='ResNet3d',
+        pretrained2d=True,
+        pretrained='torchvision://resnet50',
+        depth=50,
+        conv_cfg=dict(type='Conv3d'),
+        norm_eval=False,
+        inflate=((1, 1, 1), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 1, 0)),
+        zero_init_residual=False),
+    cls_head=dict(
+        type='I3DHead',
+        num_classes=2,
+        in_channels=2048,
+        spatial_type='avg',
+        dropout_ratio=0.5,
+        init_std=0.01))
+# model training and testing settings
+train_cfg = None
+test_cfg = dict(average_clips=None)
+# dataset settings
+dataset_type = 'RawframeDataset'
+data_root = '/media/sdg/yiliu/mmaction2/frame_data/'
+data_root_val = '/media/sdg/yiliu/mmaction2/frame_data/'
+ann_file_train = '/media/sdg/yiliu/mmaction2/label_frame_train.txt'
+ann_file_val = '/media/sdg/yiliu/mmaction2/label_frame_val.txt'
+ann_file_test = '/media/sdg/yiliu/mmaction2/label_frame_val.txt'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
+train_pipeline = [
+    dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1),
+    dict(type='RawFrameDecode'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(
+        type='MultiScaleCrop',
+        input_size=224,
+        scales=(1, 0.8),
+        random_crop=False,
+        max_wh_scale_gap=0),
+    dict(type='Resize', scale=(224, 224), keep_ratio=False),
+    dict(type='Flip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs', 'label'])
+]
+val_pipeline = [
+    dict(
+        type='SampleFrames',
+        clip_len=32,
+        frame_interval=2,
+        num_clips=1,
+        test_mode=True),
+    dict(type='RawFrameDecode'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='CenterCrop', crop_size=224),
+    dict(type='Flip', flip_ratio=0),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs'])
+]
+test_pipeline = [
+    dict(
+        type='SampleFrames',
+        clip_len=32,
+        frame_interval=2,
+        num_clips=10,
+        test_mode=True),
+    dict(type='RawFrameDecode'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='ThreeCrop', crop_size=256),
+    dict(type='Flip', flip_ratio=0),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs'])
+]
+data = dict(
+    videos_per_gpu=8,
+    workers_per_gpu=4,
+    train=dict(
+        type=dataset_type,
+        ann_file=ann_file_train,
+        data_prefix=data_root,
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=ann_file_val,
+        data_prefix=data_root_val,
+        pipeline=val_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=ann_file_val,
+        data_prefix=data_root_val,
+        pipeline=test_pipeline))
+# optimizer
+optimizer = dict(
+    type='SGD', lr=0.005, momentum=0.9,
+    weight_decay=0.0001)  # this lr is used for 8 gpus
+optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
+# learning policy
+lr_config = dict(policy='step', step=[40, 80])
+total_epochs = 3
+checkpoint_config = dict(interval=1)
+evaluation = dict(
+    interval=1, metrics=['top_k_accuracy', 'mean_class_accuracy'], topk=(1,5))
+log_config = dict(
+    interval=20,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook'),
+    ])
+# runtime settings
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+work_dir = './work_dirs/goodbad_rgb/'
+load_from = '/media/sdg/yiliu/mmaction2/configs/recognition/i3d/i3d_r50_32x2x1_100e_kinetics400_rgb_20200614-c25ef9a4.pth'
+resume_from = None
+workflow = [('train', 1)]
diff --git a/I3D/label_frame_all.txt b/I3D/label_frame_all.txt
@@ -0,0 +1,56 @@
+569 636 0
+481_LOOP0017 269 1
+468_LOOP0004 301 0
+002 49 1
+005 54 1
+529 74 1
+012 77 0
+482_LOOP0018 299 1
+011 60 1
+030 193 1
+025 77 1
+505 82 0
+034 192 1
+558 266 0
+596 202 1
+521 63 1
+540 362 1
+559 237 0
+007 71 0
+506 96 0
+560 277 1
+466_LOOP0002 246 0
+592 191 1
+567 221 1
+013 165 0
+503 105 0
+008 55 1
+583 258 0
+470_LOOP0006 265 1
+479_LOOP0015 351 1
+565 247 0
+598 257 0
+500 92 1
+546 299 1
+033 167 1
+024 62 1
+616 326 1
+566 248 1
+547 280 1
+023 84 1
+601 336 1
+603 248 1
+541 338 1
+016 65 1
+499 112 1
+039 234 1
+523 88 0
+492_LOOP0028 292 1
+535 302 1
+476_LOOP0012 279 1
+542 321 0
+572 234 1
+006 73 1
+613 590 0
+573 217 1
+514 99 1