diff --git a/comfy/model_base.py b/comfy/model_base.py index 7ba25347..f6de0b25 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -157,6 +157,16 @@ class BaseModel(torch.nn.Module): def set_inpaint(self): self.inpaint_model = True + def memory_required(self, input_shape): + area = input_shape[0] * input_shape[2] * input_shape[3] + if comfy.model_management.xformers_enabled() or comfy.model_management.pytorch_attention_flash_attention(): + #TODO: this needs to be tweaked + return (area / 20) * (1024 * 1024) + else: + #TODO: this formula might be too aggressive since I tweaked the sub-quad and split algorithms to use less memory. + return (((area * 0.6) / 0.9) + 1024) * (1024 * 1024) + + def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0): adm_inputs = [] weights = [] diff --git a/comfy/model_management.py b/comfy/model_management.py index 53582fc7..799e52ba 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -579,27 +579,6 @@ def get_free_memory(dev=None, torch_free_too=False): else: return mem_free_total -def batch_area_memory(area): - if xformers_enabled() or pytorch_attention_flash_attention(): - #TODO: these formulas are copied from maximum_batch_area below - return (area / 20) * (1024 * 1024) - else: - return (((area * 0.6) / 0.9) + 1024) * (1024 * 1024) - -def maximum_batch_area(): - global vram_state - if vram_state == VRAMState.NO_VRAM: - return 0 - - memory_free = get_free_memory() / (1024 * 1024) - if xformers_enabled() or pytorch_attention_flash_attention(): - #TODO: this needs to be tweaked - area = 20 * memory_free - else: - #TODO: this formula is because AMD sucks and has memory management issues which might be fixed in the future - area = ((memory_free - 1024) * 0.9) / (0.6) - return int(max(area, 0)) - def cpu_mode(): global cpu_state return cpu_state == CPUState.CPU diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index 9dc09791..1c36855d 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -52,6 +52,9 @@ class ModelPatcher: return True return False + def memory_required(self, input_shape): + return self.model.memory_required(input_shape=input_shape) + def set_model_sampler_cfg_function(self, sampler_cfg_function): if len(inspect.signature(sampler_cfg_function).parameters) == 3: self.model_options["sampler_cfg_function"] = lambda args: sampler_cfg_function(args["cond"], args["uncond"], args["cond_scale"]) #Old way diff --git a/comfy/sample.py b/comfy/sample.py index b3fcd165..4bfdb8ce 100644 --- a/comfy/sample.py +++ b/comfy/sample.py @@ -83,7 +83,7 @@ def prepare_sampling(model, noise_shape, positive, negative, noise_mask): real_model = None models, inference_memory = get_additional_models(positive, negative, model.model_dtype()) - comfy.model_management.load_models_gpu([model] + models, comfy.model_management.batch_area_memory(noise_shape[0] * noise_shape[2] * noise_shape[3]) + inference_memory) + comfy.model_management.load_models_gpu([model] + models, model.memory_required(noise_shape) + inference_memory) real_model = model.model return real_model, positive, negative, noise_mask, models diff --git a/comfy/samplers.py b/comfy/samplers.py index a2c784a4..5340dd01 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -134,7 +134,7 @@ def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_option return out - def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, max_total_area, model_options): + def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options): out_cond = torch.zeros_like(x_in) out_count = torch.ones_like(x_in) * 1e-37 @@ -170,9 +170,11 @@ def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_option to_batch_temp.reverse() to_batch = to_batch_temp[:1] + free_memory = model_management.get_free_memory(x_in.device) for i in range(1, len(to_batch_temp) + 1): batch_amount = to_batch_temp[:len(to_batch_temp)//i] - if (len(batch_amount) * first_shape[0] * first_shape[2] * first_shape[3] < max_total_area): + input_shape = [len(batch_amount) * first_shape[0]] + list(first_shape)[1:] + if model.memory_required(input_shape) < free_memory: to_batch = batch_amount break @@ -242,11 +244,10 @@ def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_option return out_cond, out_uncond - max_total_area = model_management.maximum_batch_area() if math.isclose(cond_scale, 1.0): uncond = None - cond, uncond = calc_cond_uncond_batch(model, cond, uncond, x, timestep, max_total_area, model_options) + cond, uncond = calc_cond_uncond_batch(model, cond, uncond, x, timestep, model_options) if "sampler_cfg_function" in model_options: args = {"cond": x - cond, "uncond": x - uncond, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep} return x - model_options["sampler_cfg_function"](args)