Fix lowvram bug when using a model multiple times in a row.

The memory system would load an extra 64MB each time until either the
model was completely in memory or OOM.
This commit is contained in:
comfyanonymous 2024-12-19 16:04:56 -05:00
parent c441048a4f
commit 3ad3248ad7

View File

@ -505,7 +505,7 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM) and not force_full_load:
model_size = loaded_model.model_memory_required(torch_dev)
current_free_mem = get_free_memory(torch_dev)
lowvram_model_memory = max(64 * (1024 * 1024), (current_free_mem - minimum_memory_required), min(current_free_mem * 0.4, current_free_mem - minimum_inference_memory()))
lowvram_model_memory = max(1, (current_free_mem - minimum_memory_required), min(current_free_mem * 0.4, current_free_mem - minimum_inference_memory()))
if model_size <= lowvram_model_memory: #only switch to lowvram if really necessary
lowvram_model_memory = 0