Return proper error if diffusion model not detected properly. (#8272 )

ComfyUI version 0.3.36
Better error if sageattention is installed but a dependency is missing. (#8264 )
2025-06-02 01:22:11 +08:00 · 2025-05-25 05:28:11 -04:00 · 2025-05-24 17:30:37 -04:00 · 2025-05-24 06:43:12 -04:00 · 2025-05-23 21:53:49 -07:00 · 2025-05-23 17:43:50 -04:00
21 changed files with 143 additions and 45 deletions
--- a/README.md
+++ b/README.md
@ -197,11 +197,11 @@ Put your VAE in: models/vae
 ### AMD GPUs (Linux only)
 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:

-```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2.4```
+```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.3```

-This is the command to install the nightly with ROCm 6.3 which might have some performance improvements:
+This is the command to install the nightly with ROCm 6.4 which might have some performance improvements:

-```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3```
+```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.4```

 ### Intel GPUs (Windows and Linux)

--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -88,6 +88,7 @@ parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE"

 parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.")
 parser.add_argument("--disable-ipex-optimize", action="store_true", help="Disables ipex.optimize default when loading models with Intel's Extension for Pytorch.")
+parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.")

 class LatentPreviewMethod(enum.Enum):
    NoPreviews = "none"
--- a/comfy/ldm/chroma/model.py
+++ b/comfy/ldm/chroma/model.py
@ -163,7 +163,7 @@ class Chroma(nn.Module):
        distil_guidance = timestep_embedding(guidance.detach().clone(), 16).to(img.device, img.dtype)

        # get all modulation index
-        modulation_index = timestep_embedding(torch.arange(mod_index_length), 32).to(img.device, img.dtype)
+        modulation_index = timestep_embedding(torch.arange(mod_index_length, device=img.device), 32).to(img.device, img.dtype)
        # we need to broadcast the modulation index here so each batch has all of the index
        modulation_index = modulation_index.unsqueeze(0).repeat(img.shape[0], 1, 1).to(img.device, img.dtype)
        # and we need to broadcast timestep and guidance along too
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@ -20,8 +20,11 @@ if model_management.xformers_enabled():
 if model_management.sage_attention_enabled():
    try:
        from sageattention import sageattn
-    except ModuleNotFoundError:
-        logging.error(f"\n\nTo use the `--use-sage-attention` feature, the `sageattention` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install sageattention")
+    except ModuleNotFoundError as e:
+        if e.name == "sageattention":
+            logging.error(f"\n\nTo use the `--use-sage-attention` feature, the `sageattention` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install sageattention")
+        else:
+            raise e
        exit(-1)

 if model_management.flash_attention_enabled():
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@ -635,7 +635,7 @@ class VaceWanModel(WanModel):
        t,
        context,
        vace_context,
-        vace_strength=1.0,
+        vace_strength,
        clip_fea=None,
        freqs=None,
        transformer_options={},
@ -661,8 +661,11 @@ class VaceWanModel(WanModel):
                context = torch.concat([context_clip, context], dim=1)
            context_img_len = clip_fea.shape[-2]

+        orig_shape = list(vace_context.shape)
+        vace_context = vace_context.movedim(0, 1).reshape([-1] + orig_shape[2:])
        c = self.vace_patch_embedding(vace_context.float()).to(vace_context.dtype)
        c = c.flatten(2).transpose(1, 2)
+        c = list(c.split(orig_shape[0], dim=0))

        # arguments
        x_orig = x
@ -682,8 +685,9 @@ class VaceWanModel(WanModel):

            ii = self.vace_layers_mapping.get(i, None)
            if ii is not None:
-                c_skip, c = self.vace_blocks[ii](c, x=x_orig, e=e0, freqs=freqs, context=context, context_img_len=context_img_len)
-                x += c_skip * vace_strength
+                for iii in range(len(c)):
+                    c_skip, c[iii] = self.vace_blocks[ii](c[iii], x=x_orig, e=e0, freqs=freqs, context=context, context_img_len=context_img_len)
+                    x += c_skip * vace_strength[iii]
                del c_skip
        # head
        x = self.head(x, e)
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -1062,20 +1062,25 @@ class WAN21_Vace(WAN21):
        vace_frames = kwargs.get("vace_frames", None)
        if vace_frames is None:
            noise_shape[1] = 32
-            vace_frames = torch.zeros(noise_shape, device=noise.device, dtype=noise.dtype)
-
-        for i in range(0, vace_frames.shape[1], 16):
-            vace_frames = vace_frames.clone()
-            vace_frames[:, i:i + 16] = self.process_latent_in(vace_frames[:, i:i + 16])
+            vace_frames = [torch.zeros(noise_shape, device=noise.device, dtype=noise.dtype)]

        mask = kwargs.get("vace_mask", None)
        if mask is None:
            noise_shape[1] = 64
-            mask = torch.ones(noise_shape, device=noise.device, dtype=noise.dtype)
+            mask = [torch.ones(noise_shape, device=noise.device, dtype=noise.dtype)] * len(vace_frames)

-        out['vace_context'] = comfy.conds.CONDRegular(torch.cat([vace_frames.to(noise), mask.to(noise)], dim=1))
+        vace_frames_out = []
+        for j in range(len(vace_frames)):
+            vf = vace_frames[j].clone()
+            for i in range(0, vf.shape[1], 16):
+                vf[:, i:i + 16] = self.process_latent_in(vf[:, i:i + 16])
+            vf = torch.cat([vf, mask[j]], dim=1)
+            vace_frames_out.append(vf)

-        vace_strength = kwargs.get("vace_strength", 1.0)
+        vace_frames = torch.stack(vace_frames_out, dim=1)
+        out['vace_context'] = comfy.conds.CONDRegular(vace_frames)
+
+        vace_strength = kwargs.get("vace_strength", [1.0] * len(vace_frames_out))
        out['vace_strength'] = comfy.conds.CONDConstant(vace_strength)
        return out

--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@ -620,6 +620,9 @@ def convert_config(unet_config):


 def unet_config_from_diffusers_unet(state_dict, dtype=None):
+    if "conv_in.weight" not in state_dict:
+        return None
+
    match = {}
    transformer_depth = []

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1257,6 +1257,9 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
    return False

 def supports_fp8_compute(device=None):
+    if args.supports_fp8_compute:
+        return True
+
    if not is_nvidia():
        return False

--- a/comfy_api/torch_helpers/init.py
+++ b/comfy_api/torch_helpers/init.py
@ -0,0 +1,5 @@
+from .torch_compile import set_torch_compile_wrapper
+
+__all__ = [
+    "set_torch_compile_wrapper",
+]
--- a/comfy_api/torch_helpers/torch_compile.py
+++ b/comfy_api/torch_helpers/torch_compile.py
@ -0,0 +1,69 @@
+from __future__ import annotations
+import torch
+
+import comfy.utils
+from comfy.patcher_extension import WrappersMP
+from typing import TYPE_CHECKING, Callable, Optional
+if TYPE_CHECKING:
+    from comfy.model_patcher import ModelPatcher
+    from comfy.patcher_extension import WrapperExecutor
+
+
+COMPILE_KEY = "torch.compile"
+TORCH_COMPILE_KWARGS = "torch_compile_kwargs"
+
+
+def apply_torch_compile_factory(compiled_module_dict: dict[str, Callable]) -> Callable:
+    '''
+    Create a wrapper that will refer to the compiled_diffusion_model.
+    '''
+    def apply_torch_compile_wrapper(executor: WrapperExecutor, *args, **kwargs):
+        try:
+            orig_modules = {}
+            for key, value in compiled_module_dict.items():
+                orig_modules[key] = comfy.utils.get_attr(executor.class_obj, key)
+                comfy.utils.set_attr(executor.class_obj, key, value)
+            return executor(*args, **kwargs)
+        finally:
+            for key, value in orig_modules.items():
+                comfy.utils.set_attr(executor.class_obj, key, value)
+    return apply_torch_compile_wrapper
+
+
+def set_torch_compile_wrapper(model: ModelPatcher, backend: str, options: Optional[dict[str,str]]=None,
+                              mode: Optional[str]=None, fullgraph=False, dynamic: Optional[bool]=None,
+                              keys: list[str]=["diffusion_model"], *args, **kwargs):
+    '''
+    Perform torch.compile that will be applied at sample time for either the whole model or specific params of the BaseModel instance.
+
+    When keys is None, it will default to using ["diffusion_model"], compiling the whole diffusion_model.
+    When a list of keys is provided, it will perform torch.compile on only the selected modules.
+    '''
+    # clear out any other torch.compile wrappers
+    model.remove_wrappers_with_key(WrappersMP.APPLY_MODEL, COMPILE_KEY)
+    # if no keys, default to 'diffusion_model'
+    if not keys:
+        keys = ["diffusion_model"]
+    # create kwargs dict that can be referenced later
+    compile_kwargs = {
+        "backend": backend,
+        "options": options,
+        "mode": mode,
+        "fullgraph": fullgraph,
+        "dynamic": dynamic,
+    }
+    # get a dict of compiled keys
+    compiled_modules = {}
+    for key in keys:
+        compiled_modules[key] = torch.compile(
+                model=model.get_model_object(key),
+                **compile_kwargs,
+            )
+    # add torch.compile wrapper
+    wrapper_func = apply_torch_compile_factory(
+        compiled_module_dict=compiled_modules,
+    )
+    # store wrapper to run on BaseModel's apply_model function
+    model.add_wrapper_with_key(WrappersMP.APPLY_MODEL, COMPILE_KEY, wrapper_func)
+    # keep compile kwargs for reference
+    model.model_options[TORCH_COMPILE_KWARGS] = compile_kwargs
--- a/comfy_extras/nodes_load_3d.py
+++ b/comfy_extras/nodes_load_3d.py
@ -16,7 +16,7 @@ class Load3D():

        os.makedirs(input_dir, exist_ok=True)

-        files = [normalize_path(os.path.join("3d", f)) for f in os.listdir(input_dir) if f.endswith(('.gltf', '.glb', '.obj', '.mtl', '.fbx', '.stl'))]
+        files = [normalize_path(os.path.join("3d", f)) for f in os.listdir(input_dir) if f.endswith(('.gltf', '.glb', '.obj', '.fbx', '.stl'))]

        return {"required": {
            "model_file": (sorted(files), {"file_upload": True}),
--- a/comfy_extras/nodes_torch_compile.py
+++ b/comfy_extras/nodes_torch_compile.py
@ -1,4 +1,5 @@
-import torch
+from comfy_api.torch_helpers import set_torch_compile_wrapper
+

 class TorchCompileModel:
    @classmethod
@ -14,7 +15,7 @@ class TorchCompileModel:

    def patch(self, model, backend):
        m = model.clone()
-        m.add_object_patch("diffusion_model", torch.compile(model=m.get_model_object("diffusion_model"), backend=backend))
+        set_torch_compile_wrapper(model=m, backend=backend)
        return (m, )

 NODE_CLASS_MAPPINGS = {
--- a/comfy_extras/nodes_wan.py
+++ b/comfy_extras/nodes_wan.py
@ -268,8 +268,9 @@ class WanVaceToVideo:
            trim_latent = reference_image.shape[2]

        mask = mask.unsqueeze(0)
-        positive = node_helpers.conditioning_set_values(positive, {"vace_frames": control_video_latent, "vace_mask": mask, "vace_strength": strength})
-        negative = node_helpers.conditioning_set_values(negative, {"vace_frames": control_video_latent, "vace_mask": mask, "vace_strength": strength})
+
+        positive = node_helpers.conditioning_set_values(positive, {"vace_frames": [control_video_latent], "vace_mask": [mask], "vace_strength": [strength]}, append=True)
+        negative = node_helpers.conditioning_set_values(negative, {"vace_frames": [control_video_latent], "vace_mask": [mask], "vace_strength": [strength]}, append=True)

        latent = torch.zeros([batch_size, 16, latent_length, height // 8, width // 8], device=comfy.model_management.intermediate_device())
        out_latent = {}
--- a/comfyui_version.py
+++ b/comfyui_version.py
@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.34"
+__version__ = "0.3.36"
--- a/execution.py
+++ b/execution.py
@ -909,7 +909,6 @@ class PromptQueue:
        self.currently_running = {}
        self.history = {}
        self.flags = {}
-        server.prompt_queue = self

    def put(self, item):
        with self.mutex:
@ -954,6 +953,7 @@ class PromptQueue:
            self.history[prompt[1]].update(history_result)
            self.server.queue_updated()

+    # Note: slow
    def get_current_queue(self):
        with self.mutex:
            out = []
@ -961,6 +961,13 @@ class PromptQueue:
                out += [x]
            return (out, copy.deepcopy(self.queue))

+    # read-safe as long as queue items are immutable
+    def get_current_queue_volatile(self):
+        with self.mutex:
+            running = [x for x in self.currently_running.values()]
+            queued = copy.copy(self.queue)
+            return (running, queued)
+
    def get_tasks_remaining(self):
        with self.mutex:
            return len(self.queue) + len(self.currently_running)
--- a/main.py
+++ b/main.py
@ -260,7 +260,6 @@ def start_comfyui(asyncio_loop=None):
        asyncio_loop = asyncio.new_event_loop()
        asyncio.set_event_loop(asyncio_loop)
    prompt_server = server.PromptServer(asyncio_loop)
-    q = execution.PromptQueue(prompt_server)

    hook_breaker_ac10a0.save_functions()
    nodes.init_extra_nodes(init_custom_nodes=not args.disable_all_custom_nodes, init_api_nodes=not args.disable_api_nodes)
@ -271,7 +270,7 @@ def start_comfyui(asyncio_loop=None):
    prompt_server.add_routes()
    hijack_progress(prompt_server)

-    threading.Thread(target=prompt_worker, daemon=True, args=(q, prompt_server,)).start()
+    threading.Thread(target=prompt_worker, daemon=True, args=(prompt_server.prompt_queue, prompt_server,)).start()

    if args.quick_test_for_ci:
        exit(0)
--- a/node_helpers.py
+++ b/node_helpers.py
@ -5,12 +5,18 @@ from comfy.cli_args import args

 from PIL import ImageFile, UnidentifiedImageError

-def conditioning_set_values(conditioning, values={}):
+def conditioning_set_values(conditioning, values={}, append=False):
    c = []
    for t in conditioning:
        n = [t[0], t[1].copy()]
        for k in values:
-            n[1][k] = values[k]
+            val = values[k]
+            if append:
+                old_val = n[1].get(k, None)
+                if old_val is not None:
+                    val = old_val + val
+
+            n[1][k] = val
        c.append(n)

    return c
--- a/nodes.py
+++ b/nodes.py
@ -1103,16 +1103,7 @@ class unCLIPConditioning:
        if strength == 0:
            return (conditioning, )

-        c = []
-        for t in conditioning:
-            o = t[1].copy()
-            x = {"clip_vision_output": clip_vision_output, "strength": strength, "noise_augmentation": noise_augmentation}
-            if "unclip_conditioning" in o:
-                o["unclip_conditioning"] = o["unclip_conditioning"][:] + [x]
-            else:
-                o["unclip_conditioning"] = [x]
-            n = [t[0], o]
-            c.append(n)
+        c = node_helpers.conditioning_set_values(conditioning, {"unclip_conditioning": [{"clip_vision_output": clip_vision_output, "strength": strength, "noise_augmentation": noise_augmentation}]}, append=True)
        return (c, )

 class GLIGENLoader:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.34"
+version = "0.3.36"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
-comfyui-frontend-package==1.19.9
-comfyui-workflow-templates==0.1.14
+comfyui-frontend-package==1.20.5
+comfyui-workflow-templates==0.1.18
 torch
 torchsde
 torchvision
--- a/server.py
+++ b/server.py
@ -29,6 +29,7 @@ import comfy.model_management
 import node_helpers
 from comfyui_version import __version__
 from app.frontend_management import FrontendManager
+
 from app.user_manager import UserManager
 from app.model_manager import ModelFileManager
 from app.custom_node_manager import CustomNodeManager
@ -159,7 +160,7 @@ class PromptServer():
        self.custom_node_manager = CustomNodeManager()
        self.internal_routes = InternalRoutes(self)
        self.supports = ["custom_nodes_from_web"]
-        self.prompt_queue = None
+        self.prompt_queue = execution.PromptQueue(self)
        self.loop = loop
        self.messages = asyncio.Queue()
        self.client_session:Optional[aiohttp.ClientSession] = None
@ -226,7 +227,7 @@ class PromptServer():
            return response

        @routes.get("/embeddings")
-        def get_embeddings(self):
+        def get_embeddings(request):
            embeddings = folder_paths.get_filename_list("embeddings")
            return web.json_response(list(map(lambda a: os.path.splitext(a)[0], embeddings)))

@ -282,7 +283,6 @@ class PromptServer():
                    a.update(f.read())
                    b.update(image.file.read())
                    image.file.seek(0)
-                    f.close()
                return a.hexdigest() == b.hexdigest()
            return False

@ -621,7 +621,7 @@ class PromptServer():
        @routes.get("/queue")
        async def get_queue(request):
            queue_info = {}
-            current_queue = self.prompt_queue.get_current_queue()
+            current_queue = self.prompt_queue.get_current_queue_volatile()
            queue_info['queue_running'] = current_queue[0]
            queue_info['queue_pending'] = current_queue[1]
            return web.json_response(queue_info)
Author	SHA1	Message	Date
comfyanonymous	a0651359d7	Return proper error if diffusion model not detected properly. (#8272 )	2025-05-25 05:28:11 -04:00
comfyanonymous	ad3bd8aa49	ComfyUI version 0.3.36	2025-05-24 17:30:37 -04:00
comfyanonymous	5a87757ef9	Better error if sageattention is installed but a dependency is missing. (#8264 )	2025-05-24 06:43:12 -04:00
Christian Byrne	464aece92b	update frontend package to v1.20.5 (#8260 )	2025-05-23 21:53:49 -07:00
comfyanonymous	0b50d4c0db	Add argument to explicitly enable fp8 compute support. (#8257 ) This can be used to test if your current GPU/pytorch version supports fp8 matrix mult in combination with --fast or the fp8_e4m3fn_fast dtype.	2025-05-23 17:43:50 -04:00
drhead	30b2eb8a93	create arange on-device (#8255 )	2025-05-23 16:15:06 -04:00
comfyanonymous	f85c08df06	Make VACE conditionings stackable. (#8240 )	2025-05-22 19:22:26 -04:00
comfyanonymous	4202e956a0	Add append feature to conditioning_set_values (#8239 ) Refactor unclipconditioning node.	2025-05-22 08:11:13 -04:00
Terry Jia	b838c36720	remove mtl from 3d model file list (#8192 )	2025-05-22 08:08:36 -04:00
Chenlei Hu	fc39184ea9	Update frontend to 1.20 (#8232 )	2025-05-22 02:24:36 -04:00
ComfyUI Wiki	ded60c33a0	Update templates to 0.1.18 (#8224 )	2025-05-21 11:40:08 -07:00
Michael Abrahams	8bb858e4d3	Improve performance with large number of queued prompts (#8176 ) * get_current_queue_volatile * restore get_current_queue method * remove extra import	2025-05-21 05:14:17 -04:00
编程界的小学生	57893c843f	Code Optimization and Issues Fixes in ComfyUI server (#8196 ) * Update server.py * Update server.py	2025-05-21 04:59:42 -04:00
Jedrzej Kosinski	65da29aaa9	Make torch.compile LoRA/key-compatible (#8213 ) * Make torch compile node use wrapper instead of object_patch for the entire diffusion_models object, allowing key assotiations on diffusion_models to not break (loras, getting attributes, etc.) * Moved torch compile code into comfy_api so it can be used by custom nodes with a degree of confidence * Refactor set_torch_compile_wrapper to support a list of keys instead of just diffusion_model, as well as additional torch.compile args * remove unused import * Moved torch compile kwargs to be stored in model_options instead of attachments; attachments are more intended for things to be 'persisted', AKA not deepcopied * Add some comments * Remove random line of code, not sure how it got there	2025-05-21 04:56:56 -04:00
comfyanonymous	10024a38ea	ComfyUI version v0.3.35	2025-05-21 04:50:37 -04:00
comfyanonymous	87f9130778	Revert "This doesn't seem to be needed on chroma. (#8209 )" (#8210 ) This reverts commit 7e84bf53737879ace37a68dc93e0df7704a53514.	2025-05-20 05:39:55 -04:00
comfyanonymous	7e84bf5373	This doesn't seem to be needed on chroma. (#8209 )	2025-05-20 05:29:23 -04:00
filtered	4f3b50ba51	Update README ROCm text to match link (#8199 ) - Follow-up on #8198	2025-05-19 16:40:55 -04:00
comfyanonymous	e930a387d6	Update AMD instructions in README. (#8198 )	2025-05-19 04:58:41 -04:00