diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index a895c7e1..77009c91 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -116,6 +116,9 @@ vram_group.add_argument("--lowvram", action="store_true", help="Split the unet i
 vram_group.add_argument("--novram", action="store_true", help="When lowvram isn't enough.")
 vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")
 
+parser.add_argument("--reserve-vram", type=float, default=None, help="Set the amount of vram in GB you want to reserve for use by your OS/other software. By default some amount is reverved depending on your OS.")
+
+
 parser.add_argument("--default-hashing-function", type=str, choices=['md5', 'sha1', 'sha256', 'sha512'], default='sha256', help="Allows you to choose the hash function to use for duplicate filename / contents comparison. Default is sha256.")
 
 parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.")
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 6387c8d0..fb136bd1 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -367,6 +367,17 @@ def offloaded_memory(loaded_models, device):
 def minimum_inference_memory():
     return (1024 * 1024 * 1024) * 1.2
 
+EXTRA_RESERVED_VRAM = 200 * 1024 * 1024
+if any(platform.win32_ver()):
+    EXTRA_RESERVED_VRAM = 400 * 1024 * 1024 #Windows is higher because of the shared vram issue
+
+if args.reserve_vram is not None:
+    EXTRA_RESERVED_VRAM = args.reserve_vram * 1024 * 1024 * 1024
+    logging.debug("Reserving {}MB vram for other applications.".format(EXTRA_RESERVED_VRAM / (1024 * 1024)))
+
+def extra_reserved_memory():
+    return EXTRA_RESERVED_VRAM
+
 def unload_model_clones(model, unload_weights_only=True, force_unload=True):
     to_unload = []
     for i in range(len(current_loaded_models)):
@@ -436,11 +447,11 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
     global vram_state
 
     inference_memory = minimum_inference_memory()
-    extra_mem = max(inference_memory, memory_required + 300 * 1024 * 1024)
+    extra_mem = max(inference_memory, memory_required + extra_reserved_memory())
     if minimum_memory_required is None:
         minimum_memory_required = extra_mem
     else:
-        minimum_memory_required = max(inference_memory, minimum_memory_required + 300 * 1024 * 1024)
+        minimum_memory_required = max(inference_memory, minimum_memory_required + extra_reserved_memory())
 
     models = set(models)