diff --git a/comfy/model_base.py b/comfy/model_base.py index 3d6879ae6..786c9cf47 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -166,8 +166,8 @@ class BaseModel(torch.nn.Module): def memory_required(self, input_shape): if comfy.model_management.xformers_enabled() or comfy.model_management.pytorch_attention_flash_attention(): #TODO: this needs to be tweaked - area = max(input_shape[0], 3) * input_shape[2] * input_shape[3] - return (area * comfy.model_management.dtype_size(self.get_dtype()) / 60) * (1024 * 1024) + area = input_shape[0] * input_shape[2] * input_shape[3] + return (area * comfy.model_management.dtype_size(self.get_dtype()) / 50) * (1024 * 1024) else: #TODO: this formula might be too aggressive since I tweaked the sub-quad and split algorithms to use less memory. area = input_shape[0] * input_shape[2] * input_shape[3] diff --git a/comfy/sample.py b/comfy/sample.py index 4bfdb8ce5..034db97ee 100644 --- a/comfy/sample.py +++ b/comfy/sample.py @@ -83,7 +83,7 @@ def prepare_sampling(model, noise_shape, positive, negative, noise_mask): real_model = None models, inference_memory = get_additional_models(positive, negative, model.model_dtype()) - comfy.model_management.load_models_gpu([model] + models, model.memory_required(noise_shape) + inference_memory) + comfy.model_management.load_models_gpu([model] + models, model.memory_required([noise_shape[0] * 2] + list(noise_shape[1:])) + inference_memory) real_model = model.model return real_model, positive, negative, noise_mask, models