Fix for running via DirectML

Fix DirectML empty image generation issue with Flux1. add CPU fallback for unsupported path. Verified the model works on AMD GPUs
2025-07-07 07:47:09 +08:00 · 2025-01-20 16:22:09 -08:00 · 2025-01-20 16:22:09 -08:00 · b4c7540fbc
commit b4c7540fbc
parent a00e1489d2
3 changed files with 13 additions and 2 deletions
--- a/comfy/clip_model.py
+++ b/comfy/clip_model.py
@ -104,7 +104,11 @@ class CLIPTextModel_(torch.nn.Module):
            mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, attention_mask.shape[-1], attention_mask.shape[-1])
            mask = mask.masked_fill(mask.to(torch.bool), float("-inf"))

-        causal_mask = torch.empty(x.shape[1], x.shape[1], dtype=x.dtype, device=x.device).fill_(float("-inf")).triu_(1)
+        if comfy.model_management.is_directml_enabled():
+            causal_mask = torch.empty(x.shape[1], x.shape[1], dtype=x.dtype, device=x.device).triu_(1)
+        else:
+            causal_mask = torch.empty(x.shape[1], x.shape[1], dtype=x.dtype, device=x.device).fill_(float("-inf")).triu_(1)
+        
        if mask is not None:
            mask += causal_mask
        else:
--- a/comfy/ldm/flux/math.py
+++ b/comfy/ldm/flux/math.py
@ -22,7 +22,7 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor:

 def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    assert dim % 2 == 0
-    if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu():
+    if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled():
        device = torch.device("cpu")
    else:
        device = pos.device
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -993,6 +993,13 @@ def is_device_mps(device):
 def is_device_cuda(device):
    return is_device_type(device, 'cuda')

+def is_directml_enabled():
+    global directml_enabled
+    if directml_enabled:
+        return True
+    
+    return False
+
 def should_use_fp16(device=None, model_params=0, prioritize_performance=True, manual_cast=False):
    global directml_enabled