Make loras work with --async-offload (#7824)

2025-06-12 14:32:08 +08:00 · 2025-04-26 16:56:22 -07:00 · 2025-04-26 16:56:22 -07:00 · ac10a0d69e
commit ac10a0d69e
parent 0dcc75ca54
1 changed files with 13 additions and 4 deletions
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -22,6 +22,7 @@ import comfy.model_management
 from comfy.cli_args import args, PerformanceFeature
 import comfy.float
 import comfy.rmsnorm
 import contextlib
 cast_to = comfy.model_management.cast_to #TODO: remove once no more references
@ -38,20 +39,28 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
            device = input.device
    offload_stream = comfy.model_management.get_offload_stream(device)
    if offload_stream is not None:
        wf_context = offload_stream
    else:
        wf_context = contextlib.nullcontext()
    bias = None
    non_blocking = comfy.model_management.device_supports_non_blocking(device)
    if s.bias is not None:
        has_function = len(s.bias_function) > 0
        bias = comfy.model_management.cast_to(s.bias, bias_dtype, device, non_blocking=non_blocking, copy=has_function, stream=offload_stream)
        if has_function:
-            for f in s.bias_function:
+            with wf_context:
-                bias = f(bias)
+                for f in s.bias_function:
                    bias = f(bias)
    has_function = len(s.weight_function) > 0
    weight = comfy.model_management.cast_to(s.weight, dtype, device, non_blocking=non_blocking, copy=has_function, stream=offload_stream)
    if has_function:
-        for f in s.weight_function:
+        with wf_context:
-            weight = f(weight)
+            for f in s.weight_function:
                weight = f(weight)
    comfy.model_management.sync_stream(device, offload_stream)
    return weight, bias