From 3dc4f1fe1be1fad9aec80fe4de34dcee7a0d71ae Mon Sep 17 00:00:00 2001 From: Ruizhi Shao <2238454358@qq.com> Date: Fri, 5 Jan 2024 17:59:35 +0800 Subject: [PATCH] fix hifa batch bug (#400) --- threestudio/models/guidance/stable_diffusion_guidance.py | 4 ++-- .../models/guidance/stable_diffusion_unified_guidance.py | 6 +++--- .../models/guidance/stable_diffusion_vsd_guidance.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/threestudio/models/guidance/stable_diffusion_guidance.py b/threestudio/models/guidance/stable_diffusion_guidance.py index f58d12e6..c5c91bd6 100644 --- a/threestudio/models/guidance/stable_diffusion_guidance.py +++ b/threestudio/models/guidance/stable_diffusion_guidance.py @@ -264,8 +264,8 @@ def compute_grad_sds( f"Unknown weighting strategy: {self.cfg.weighting_strategy}" ) - alpha = self.alphas[t] ** 0.5 - sigma = (1 - self.alphas[t]) ** 0.5 + alpha = (self.alphas[t] ** 0.5).view(-1, 1, 1, 1) + sigma = ((1 - self.alphas[t]) ** 0.5).view(-1, 1, 1, 1) latents_denoised = (latents_noisy - sigma * noise_pred) / alpha image_denoised = self.decode_latents(latents_denoised) diff --git a/threestudio/models/guidance/stable_diffusion_unified_guidance.py b/threestudio/models/guidance/stable_diffusion_unified_guidance.py index 8ea4a490..5774b1d4 100644 --- a/threestudio/models/guidance/stable_diffusion_unified_guidance.py +++ b/threestudio/models/guidance/stable_diffusion_unified_guidance.py @@ -673,7 +673,7 @@ def forward( if self.cfg.guidance_type == "vsd": latents_denoised_est = ( latents_noisy - self.sigmas[t] * eps_phi - ) / self.alphas[t] + ) / self.alphas[t].view(-1, 1, 1, 1) image_denoised_est = self.vae_decode( self.pipe.vae, latents_denoised_est ) @@ -682,8 +682,8 @@ def forward( grad_img = ( w * (image_denoised_est - image_denoised_pretrain) - * self.alphas[t] - / self.sigmas[t] + * self.alphas[t].view(-1, 1, 1, 1) + / self.sigmas[t].view(-1, 1, 1, 1) ) if self.grad_clip_val is not None: grad_img = grad_img.clamp(-self.grad_clip_val, self.grad_clip_val) diff --git a/threestudio/models/guidance/stable_diffusion_vsd_guidance.py b/threestudio/models/guidance/stable_diffusion_vsd_guidance.py index 4f970f3c..8a380baf 100644 --- a/threestudio/models/guidance/stable_diffusion_vsd_guidance.py +++ b/threestudio/models/guidance/stable_diffusion_vsd_guidance.py @@ -541,8 +541,8 @@ def compute_grad_vsd( grad = w * (noise_pred_pretrain - noise_pred_est) - alpha = self.alphas[t] ** 0.5 - sigma = (1 - self.alphas[t]) ** 0.5 + alpha = (self.alphas[t] ** 0.5).view(-1, 1, 1, 1) + sigma = ((1 - self.alphas[t]) ** 0.5).view(-1, 1, 1, 1) # image-space SDS proposed in HiFA: https://hifa-team.github.io/HiFA-site/ if self.cfg.use_img_loss: latents_denoised_pretrain = (