From 3dc4f1fe1be1fad9aec80fe4de34dcee7a0d71ae Mon Sep 17 00:00:00 2001
From: Ruizhi Shao <2238454358@qq.com>
Date: Fri, 5 Jan 2024 17:59:35 +0800
Subject: [PATCH] fix hifa batch bug (#400)

---
 threestudio/models/guidance/stable_diffusion_guidance.py    | 4 ++--
 .../models/guidance/stable_diffusion_unified_guidance.py    | 6 +++---
 .../models/guidance/stable_diffusion_vsd_guidance.py        | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/threestudio/models/guidance/stable_diffusion_guidance.py b/threestudio/models/guidance/stable_diffusion_guidance.py
index f58d12e6..c5c91bd6 100644
--- a/threestudio/models/guidance/stable_diffusion_guidance.py
+++ b/threestudio/models/guidance/stable_diffusion_guidance.py
@@ -264,8 +264,8 @@ def compute_grad_sds(
                 f"Unknown weighting strategy: {self.cfg.weighting_strategy}"
             )
 
-        alpha = self.alphas[t] ** 0.5
-        sigma = (1 - self.alphas[t]) ** 0.5
+        alpha = (self.alphas[t] ** 0.5).view(-1, 1, 1, 1)
+        sigma = ((1 - self.alphas[t]) ** 0.5).view(-1, 1, 1, 1)
         latents_denoised = (latents_noisy - sigma * noise_pred) / alpha
         image_denoised = self.decode_latents(latents_denoised)
 
diff --git a/threestudio/models/guidance/stable_diffusion_unified_guidance.py b/threestudio/models/guidance/stable_diffusion_unified_guidance.py
index 8ea4a490..5774b1d4 100644
--- a/threestudio/models/guidance/stable_diffusion_unified_guidance.py
+++ b/threestudio/models/guidance/stable_diffusion_unified_guidance.py
@@ -673,7 +673,7 @@ def forward(
             if self.cfg.guidance_type == "vsd":
                 latents_denoised_est = (
                     latents_noisy - self.sigmas[t] * eps_phi
-                ) / self.alphas[t]
+                ) / self.alphas[t].view(-1, 1, 1, 1)
                 image_denoised_est = self.vae_decode(
                     self.pipe.vae, latents_denoised_est
                 )
@@ -682,8 +682,8 @@ def forward(
             grad_img = (
                 w
                 * (image_denoised_est - image_denoised_pretrain)
-                * self.alphas[t]
-                / self.sigmas[t]
+                * self.alphas[t].view(-1, 1, 1, 1)
+                / self.sigmas[t].view(-1, 1, 1, 1)
             )
             if self.grad_clip_val is not None:
                 grad_img = grad_img.clamp(-self.grad_clip_val, self.grad_clip_val)
diff --git a/threestudio/models/guidance/stable_diffusion_vsd_guidance.py b/threestudio/models/guidance/stable_diffusion_vsd_guidance.py
index 4f970f3c..8a380baf 100644
--- a/threestudio/models/guidance/stable_diffusion_vsd_guidance.py
+++ b/threestudio/models/guidance/stable_diffusion_vsd_guidance.py
@@ -541,8 +541,8 @@ def compute_grad_vsd(
 
         grad = w * (noise_pred_pretrain - noise_pred_est)
 
-        alpha = self.alphas[t] ** 0.5
-        sigma = (1 - self.alphas[t]) ** 0.5
+        alpha = (self.alphas[t] ** 0.5).view(-1, 1, 1, 1)
+        sigma = ((1 - self.alphas[t]) ** 0.5).view(-1, 1, 1, 1)
         # image-space SDS proposed in HiFA: https://hifa-team.github.io/HiFA-site/
         if self.cfg.use_img_loss:
             latents_denoised_pretrain = (