-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
186 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,3 +22,7 @@ olddata | |
output_flow/ | ||
exp_data/ | ||
**/.DS_Store | ||
arguments/i2v.py | ||
train.py | ||
guidance/zero123_utils.py | ||
output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
import torch | ||
from diffusers import StableVideoDiffusionPipeline | ||
from diffusers.utils import load_image, export_to_video, export_to_gif | ||
|
||
from PIL import Image | ||
import numpy as np | ||
|
||
import cv2 | ||
import rembg | ||
import os | ||
import argparse | ||
|
||
def add_margin(pil_img, top, right, bottom, left, color): | ||
width, height = pil_img.size | ||
new_width = width + right + left | ||
new_height = height + top + bottom | ||
result = Image.new(pil_img.mode, (new_width, new_height), color) | ||
result.paste(pil_img, (left, top)) | ||
return result | ||
|
||
def resize_image(image, output_size=(1024, 576)): | ||
image = image.resize((output_size[1],output_size[1])) | ||
pad_size = (output_size[0]-output_size[1]) //2 | ||
image = add_margin(image, 0, pad_size, 0, pad_size, tuple(np.array(image)[0,0])) | ||
return image | ||
|
||
|
||
def load_image(file, W, H, bg='white'): | ||
# load image | ||
print(f'[INFO] load image from {file}...') | ||
img = cv2.imread(file, cv2.IMREAD_UNCHANGED) | ||
bg_remover = rembg.new_session() | ||
img = rembg.remove(img, session=bg_remover) | ||
img = cv2.resize(img, (W, H), interpolation=cv2.INTER_AREA) | ||
img = img.astype(np.float32) / 255.0 | ||
input_mask = img[..., 3:] | ||
# white bg | ||
if bg == 'white': | ||
input_img = img[..., :3] * input_mask + (1 - input_mask) | ||
elif bg == 'black': | ||
input_img = img[..., :3] | ||
else: | ||
raise NotImplementedError | ||
# bgr to rgb | ||
input_img = input_img[..., ::-1].copy() | ||
input_img = Image.fromarray(np.uint8(input_img*255)) | ||
return input_img | ||
|
||
def load_image_w_bg(file, W, H): | ||
# load image | ||
print(f'[INFO] load image from {file}...') | ||
img = cv2.imread(file, cv2.IMREAD_UNCHANGED) | ||
img = cv2.resize(img, (W, H), interpolation=cv2.INTER_AREA) | ||
img = img.astype(np.float32) / 255.0 | ||
input_img = img[..., :3] | ||
# bgr to rgb | ||
input_img = input_img[..., ::-1].copy() | ||
input_img = Image.fromarray(np.uint8(input_img*255)) | ||
return input_img | ||
|
||
def gen_vid(data_path,name, seed, bg, is_pad): | ||
pipe = StableVideoDiffusionPipeline.from_pretrained( | ||
"stabilityai/stable-video-diffusion-img2vid", torch_dtype=torch.float16, variant="fp16" | ||
) | ||
# pipe.enable_model_cpu_offload() | ||
pipe.to("cuda") | ||
|
||
if is_pad: | ||
height, width = 576, 1024 | ||
else: | ||
height, width = 512, 512 | ||
|
||
|
||
save_dir=f"data/{name}_svd/{seed}" | ||
os.makedirs(save_dir,exist_ok=True) | ||
|
||
image = load_image(data_path, width, height, bg) | ||
if is_pad: | ||
image = resize_image(image, output_size=(width, height)) | ||
generator = torch.manual_seed(seed) | ||
frames = pipe(image, height, width, generator=generator).frames[0] | ||
|
||
export_to_video(frames, f"{save_dir}/{name}_generated.mp4", fps=8) | ||
export_to_gif(frames, f"{save_dir}/{name}_generated.gif") | ||
for idx, img in enumerate(frames): | ||
if is_pad: | ||
img = img.crop(((width-height) //2, 0, width - (width-height) //2, height)) | ||
|
||
img.save(f"{save_dir}/{idx}.png") | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--data_path", type=str, required=True) | ||
parser.add_argument("--name", type=str, required=True) | ||
parser.add_argument("--seed", type=int, default=None) | ||
parser.add_argument("--bg", type=str, default='white') | ||
parser.add_argument("--is_pad", type=bool, default=False) | ||
args, extras = parser.parse_known_args() | ||
if args.seed is None: | ||
for seed in range(30): | ||
gen_vid(args.data_path,args.name, seed, args.bg, args.is_pad) | ||
else: | ||
gen_vid(args.data_path,args.name, args.seed, args.bg, args.is_pad) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import os | ||
import random | ||
from concurrent.futures import ThreadPoolExecutor | ||
directory_path = '/data/users/yyy/4dgen_exp/data/harmonyview_testset' | ||
|
||
# 获取目录下所有文件 | ||
file_list = os.listdir(directory_path) | ||
|
||
# 打印文件列表 | ||
# for file in file_list: | ||
# file_name=file.split('/')[-1].split('.')[0] | ||
# file_name='harm_'+file_name | ||
# print(file_name) | ||
# file_path=directory_path+'/'+file | ||
# cmd=f'CUDA_VISIBLE_DEVICES="1" python image_to_video.py --data_path {file_path} --name {file_name}' | ||
# print(cmd) | ||
# os.system(cmd) | ||
|
||
def process_file(file): | ||
file_name = file.split('/')[-1].split('.')[0] | ||
file_name = 'harm_' + file_name | ||
print(file_name) | ||
|
||
file_path = os.path.join(directory_path, file) | ||
cuda_device = random.randint(0, 4) | ||
cmd = f'CUDA_VISIBLE_DEVICES="{cuda_device}" python image_to_video.py --data_path {file_path} --name {file_name}' | ||
print(cmd) | ||
|
||
os.system(cmd) | ||
|
||
for file in file_list: | ||
with ThreadPoolExecutor() as executor: | ||
executor.map(process_file, file_list) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters