From 39b1fc4cccc829e341f35c05b2fae0902c1ecdc2 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 16 Dec 2024 23:31:10 -0500 Subject: [PATCH] Adjust used dtypes for hunyuan video VAE and diffusion model. --- comfy/sd.py | 1 + comfy/supported_models.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/comfy/sd.py b/comfy/sd.py index 0d498fef..89f0eb24 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -323,6 +323,7 @@ class VAE: self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1]) self.memory_used_decode = lambda shape, dtype: (1500 * shape[2] * shape[3] * shape[4] * (4 * 8 * 8)) * model_management.dtype_size(dtype) self.memory_used_encode = lambda shape, dtype: (900 * max(shape[2], 2) * shape[3] * shape[4]) * model_management.dtype_size(dtype) + self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32] elif "decoder.layers.1.layers.0.beta" in sd: self.first_stage_model = AudioOobleckVAE() diff --git a/comfy/supported_models.py b/comfy/supported_models.py index df232e83..ed3af9d1 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -753,7 +753,7 @@ class HunyuanVideo(supported_models_base.BASE): memory_usage_factor = 2.0 #TODO - supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32] + supported_inference_dtypes = [torch.bfloat16, torch.float32] vae_key_prefix = ["vae."] text_encoder_key_prefix = ["text_encoders."]