From 43a74c0de175933782e255e1d0443c413af7c6f3 Mon Sep 17 00:00:00 2001
From: catboxanon <122327233+catboxanon@users.noreply.github.com>
Date: Sat, 8 Feb 2025 17:00:56 -0500
Subject: [PATCH] Allow FP16 accumulation with `--fast` (#6453)

Currently only applies to PyTorch nightly releases. (>=20250208)
---
 comfy/model_management.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 225a83e0..ca84f206 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -241,6 +241,12 @@ if ENABLE_PYTORCH_ATTENTION:
     torch.backends.cuda.enable_flash_sdp(True)
     torch.backends.cuda.enable_mem_efficient_sdp(True)
 
+try:
+    if is_nvidia() and args.fast:
+        torch.backends.cuda.matmul.allow_fp16_accumulation = True
+except:
+    pass
+
 try:
     if int(torch_version[0]) == 2 and int(torch_version[2]) >= 5:
         torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)