allow passing attention mask in flux attention

2025-07-08 16:27:13 +08:00 · 2024-12-05 11:12:54 +01:00 · 2024-12-05 11:12:54 +01:00 · 8954dafb44
commit 8954dafb44
parent 927418464e
2 changed files with 3 additions and 3 deletions
--- a/comfy/ldm/flux/math.py
+++ b/comfy/ldm/flux/math.py
@ -4,11 +4,11 @@ from torch import Tensor
 from comfy.ldm.modules.attention import optimized_attention
 import comfy.model_management

-def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor:
+def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor:
    q, k = apply_rope(q, k, pe)

    heads = q.shape[1]
-    x = optimized_attention(q, k, v, heads, skip_reshape=True)
+    x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask)
    return x


--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@ -377,7 +377,7 @@ def attention_xformers(q, k, v, heads, mask=None, attn_precision=None, skip_resh
    if mask is not None:
        pad = 8 - mask.shape[-1] % 8
        # we assume the mask is either (B, M, N) or (M, N)
-        # this way we avoid allocating a huge 
+        # this way we avoid allocating a huge matrix
        mask_batch_size = mask.shape[0] if len(mask.shape) == 3 else 1
        # if skip_reshape, then q, k, v have merged heads and batch size
        if skip_reshape: