Add a --reserve-vram argument if you don't want comfy to use all of it.

--reserve-vram 1.0 for example will make ComfyUI try to keep 1GB vram free.

This can also be useful if workflows are failing because of OOM errors but
in that case please report it if --reserve-vram improves your situation.
This commit is contained in:
comfyanonymous 2024-08-19 17:16:18 -04:00
parent 4d341b78e8
commit 045377ea89
2 changed files with 16 additions and 2 deletions

View File

@ -116,6 +116,9 @@ vram_group.add_argument("--lowvram", action="store_true", help="Split the unet i
vram_group.add_argument("--novram", action="store_true", help="When lowvram isn't enough.") vram_group.add_argument("--novram", action="store_true", help="When lowvram isn't enough.")
vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).") vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")
parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reverved depending on your OS.")
parser.add_argument("--default-hashing-function", type=str, choices=['md5', 'sha1', 'sha256', 'sha512'], default='sha256', help="Allows you to choose the hash function to use for duplicate filename / contents comparison. Default is sha256.") parser.add_argument("--default-hashing-function", type=str, choices=['md5', 'sha1', 'sha256', 'sha512'], default='sha256', help="Allows you to choose the hash function to use for duplicate filename / contents comparison. Default is sha256.")
parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.") parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.")

View File

@ -367,6 +367,17 @@ def offloaded_memory(loaded_models, device):
def minimum_inference_memory(): def minimum_inference_memory():
return (1024 * 1024 * 1024) * 1.2 return (1024 * 1024 * 1024) * 1.2
EXTRA_RESERVED_VRAM = 200 * 1024 * 1024
if any(platform.win32_ver()):
EXTRA_RESERVED_VRAM = 400 * 1024 * 1024 #Windows is higher because of the shared vram issue
if args.reserve_vram is not None:
EXTRA_RESERVED_VRAM = args.reserve_vram * 1024 * 1024 * 1024
logging.debug("Reserving {}MB vram for other applications.".format(EXTRA_RESERVED_VRAM / (1024 * 1024)))
def extra_reserved_memory():
return EXTRA_RESERVED_VRAM
def unload_model_clones(model, unload_weights_only=True, force_unload=True): def unload_model_clones(model, unload_weights_only=True, force_unload=True):
to_unload = [] to_unload = []
for i in range(len(current_loaded_models)): for i in range(len(current_loaded_models)):
@ -436,11 +447,11 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
global vram_state global vram_state
inference_memory = minimum_inference_memory() inference_memory = minimum_inference_memory()
extra_mem = max(inference_memory, memory_required + 300 * 1024 * 1024) extra_mem = max(inference_memory, memory_required + extra_reserved_memory())
if minimum_memory_required is None: if minimum_memory_required is None:
minimum_memory_required = extra_mem minimum_memory_required = extra_mem
else: else:
minimum_memory_required = max(inference_memory, minimum_memory_required + 300 * 1024 * 1024) minimum_memory_required = max(inference_memory, minimum_memory_required + extra_reserved_memory())
models = set(models) models = set(models)