kabachuha · FederalSafe987 · Nov 12, 2023 · Nov 12, 2023 · Dec 4, 2023
diff --git a/scripts/modelscope/process_modelscope.py b/scripts/modelscope/process_modelscope.py
@@ -131,7 +131,7 @@ def process_modelscope(args_dict, extra_args=None):
 
         print(f"Converted the frames to tensor {bfchw.shape}")
 
-        vd_out = torch.from_numpy(bcfhw).to("cuda")
+        vd_out = torch.from_numpy(bcfhw).to("cpu")
 
         # should be -1,1, not 0,1
         vd_out = 2 * vd_out - 1
@@ -187,7 +187,7 @@ def process_modelscope(args_dict, extra_args=None):
 
             print(f"Converted the frames to tensor {bfchw.shape}")
 
-            vd_out = torch.from_numpy(bcfhw).to("cuda")
+            vd_out = torch.from_numpy(bcfhw).to("cpu")
 
             # should be -1,1, not 0,1
             vd_out = 2 * vd_out - 1

diff --git a/scripts/modelscope/t2v_pipeline.py b/scripts/modelscope/t2v_pipeline.py
@@ -100,8 +100,8 @@ def __init__(self, model_dir):
             strict=True,
         )
         self.sd_model.eval()
-        if not devices.has_mps() or torch.cuda.is_available() == True:
-            self.sd_model.half()
+        #if not devices.has_mps() or torch.cuda.is_available() == True:
+        #    self.sd_model.half()
 
         # Initialize diffusion
         betas = beta_schedule(
@@ -145,8 +145,8 @@ def __init__(self, model_dir):
             self.clip_encoder.to("cpu")
         self.noise_gen = torch.Generator(device='cpu')
 
-    def compute_latents(self, vd_out, cpu_vae='GPU (half precision)', device=torch.device('cuda')):
-        self.device = device
+    def compute_latents(self, vd_out, cpu_vae='CPU', device=torch.device('cpu')):
+        self.device = torch.device('cpu')
         with torch.no_grad():
             bs_vd, c, max_frames, height, width = vd_out.shape
             scale_factor = 0.18215
@@ -205,7 +205,7 @@ def infer(
         width=256, 
         height=256, 
         eta=0.0, 
-        cpu_vae='GPU (half precision)', 
+        cpu_vae='CPU', 
         device=torch.device('cpu'), 
         latents=None, 
         skip_steps=0,
@@ -245,7 +245,7 @@ def infer(
             A generated video (as list of np.arrays).
         """
 
-        self.device = device
+        self.device = torch.device('cpu')
         self.clip_encoder.to(self.device)
         self.clip_encoder.device = self.device
         steps = steps - skip_steps

diff --git a/scripts/samplers/ddim/sampler.py b/scripts/samplers/ddim/sampler.py
@@ -8,7 +8,7 @@
 from t2v_helpers.general_utils import reconstruct_conds
 
 class DDIMSampler(object):
-    def __init__(self, model, schedule="linear", device=torch.device("cuda"), **kwargs):
+    def __init__(self, model, schedule="linear", device=torch.device("cpu"), **kwargs):
         super().__init__()
         self.model = model
         self.ddpm_num_timesteps = model.num_timesteps