From 045377ea893d0703e515d87f891936784cb2f5de Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 19 Aug 2024 17:16:18 -0400
Subject: [PATCH] Add a --reserve-vram argument if you don't want comfy to use
 all of it.

--reserve-vram 1.0 for example will make ComfyUI try to keep 1GB vram free.

This can also be useful if workflows are failing because of OOM errors but
in that case please report it if --reserve-vram improves your situation.
---
 comfy/cli_args.py         |  3 +++
 comfy/model_management.py | 15 +++++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index a895c7e10..77009c912 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -116,6 +116,9 @@ vram_group.add_argument("--lowvram", action="store_true", help="Split the unet i
 vram_group.add_argument("--novram", action="store_true", help="When lowvram isn't enough.")
 vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")
 
+parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reverved depending on your OS.")
+
+
 parser.add_argument("--default-hashing-function", type=str, choices=['md5', 'sha1', 'sha256', 'sha512'], default='sha256', help="Allows you to choose the hash function to use for duplicate filename / contents comparison. Default is sha256.")
 
 parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.")
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 6387c8d08..fb136bd1e 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -367,6 +367,17 @@ def offloaded_memory(loaded_models, device):
 def minimum_inference_memory():
     return (1024 * 1024 * 1024) * 1.2
 
+EXTRA_RESERVED_VRAM = 200 * 1024 * 1024
+if any(platform.win32_ver()):
+    EXTRA_RESERVED_VRAM = 400 * 1024 * 1024 #Windows is higher because of the shared vram issue
+
+if args.reserve_vram is not None:
+    EXTRA_RESERVED_VRAM = args.reserve_vram * 1024 * 1024 * 1024
+    logging.debug("Reserving {}MB vram for other applications.".format(EXTRA_RESERVED_VRAM / (1024 * 1024)))
+
+def extra_reserved_memory():
+    return EXTRA_RESERVED_VRAM
+
 def unload_model_clones(model, unload_weights_only=True, force_unload=True):
     to_unload = []
     for i in range(len(current_loaded_models)):
@@ -436,11 +447,11 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
     global vram_state
 
     inference_memory = minimum_inference_memory()
-    extra_mem = max(inference_memory, memory_required + 300 * 1024 * 1024)
+    extra_mem = max(inference_memory, memory_required + extra_reserved_memory())
     if minimum_memory_required is None:
         minimum_memory_required = extra_mem
     else:
-        minimum_memory_required = max(inference_memory, minimum_memory_required + 300 * 1024 * 1024)
+        minimum_memory_required = max(inference_memory, minimum_memory_required + extra_reserved_memory())
 
     models = set(models)