Merge branch 'comfyanonymous:master' into nix-support

This commit is contained in:
807 2025-03-15 13:04:24 +08:00 committed by GitHub
commit cc8715c0e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
161 changed files with 2988 additions and 335573 deletions

View File

@ -0,0 +1,2 @@
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast fp16_accumulation
pause

View File

@ -22,7 +22,7 @@ on:
description: 'Python patch version'
required: true
type: string
default: "8"
default: "9"
jobs:

View File

@ -1,58 +0,0 @@
name: Update Frontend Release
on:
workflow_dispatch:
inputs:
version:
description: "Frontend version to update to (e.g., 1.0.0)"
required: true
type: string
jobs:
update-frontend:
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout ComfyUI
uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install requirements
run: |
python -m pip install --upgrade pip
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install -r requirements.txt
pip install wait-for-it
# Frontend asset will be downloaded to ComfyUI/web_custom_versions/Comfy-Org_ComfyUI_frontend/{version}
- name: Start ComfyUI server
run: |
python main.py --cpu --front-end-version Comfy-Org/ComfyUI_frontend@${{ github.event.inputs.version }} 2>&1 | tee console_output.log &
wait-for-it --service 127.0.0.1:8188 -t 30
- name: Configure Git
run: |
git config --global user.name "GitHub Action"
git config --global user.email "action@github.com"
# Replace existing frontend content with the new version and remove .js.map files
# See https://github.com/Comfy-Org/ComfyUI_frontend/issues/2145 for why we remove .js.map files
- name: Update frontend content
run: |
rm -rf web/
cp -r web_custom_versions/Comfy-Org_ComfyUI_frontend/${{ github.event.inputs.version }} web/
rm web/**/*.js.map
- name: Create Pull Request
uses: peter-evans/create-pull-request@v7
with:
token: ${{ secrets.PR_BOT_PAT }}
commit-message: "Update frontend to v${{ github.event.inputs.version }}"
title: "Frontend Update: v${{ github.event.inputs.version }}"
body: |
Automated PR to update frontend content to version ${{ github.event.inputs.version }}
This PR was created automatically by the frontend update workflow.
branch: release-${{ github.event.inputs.version }}
base: master
labels: Frontend,dependencies

View File

@ -29,7 +29,7 @@ on:
description: 'python patch version'
required: true
type: string
default: "8"
default: "9"
# push:
# branches:
# - master

View File

@ -7,7 +7,7 @@ on:
description: 'cuda version'
required: true
type: string
default: "126"
default: "128"
python_minor:
description: 'python minor version'
@ -19,7 +19,7 @@ on:
description: 'python patch version'
required: true
type: string
default: "1"
default: "2"
# push:
# branches:
# - master
@ -34,7 +34,7 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
fetch-depth: 30
persist-credentials: false
- uses: actions/setup-python@v5
with:
@ -74,7 +74,7 @@ jobs:
pause" > ./update/update_comfyui_and_python_dependencies.bat
cd ..
"C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=8 -mfb=64 -md=32m -ms=on -mf=BCJ2 ComfyUI_windows_portable_nightly_pytorch.7z ComfyUI_windows_portable_nightly_pytorch
"C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=512m -ms=on -mf=BCJ2 ComfyUI_windows_portable_nightly_pytorch.7z ComfyUI_windows_portable_nightly_pytorch
mv ComfyUI_windows_portable_nightly_pytorch.7z ComfyUI/ComfyUI_windows_portable_nvidia_or_cpu_nightly_pytorch.7z
cd ComfyUI_windows_portable_nightly_pytorch

View File

@ -19,7 +19,7 @@ on:
description: 'python patch version'
required: true
type: string
default: "8"
default: "9"
# push:
# branches:
# - master

View File

@ -11,14 +11,14 @@
/notebooks/ @yoland68 @robinjhuang @huchenlei @webfiltered @pythongosssss @ltdrdata @Kosinkadink
/script_examples/ @yoland68 @robinjhuang @huchenlei @webfiltered @pythongosssss @ltdrdata @Kosinkadink
/.github/ @yoland68 @robinjhuang @huchenlei @webfiltered @pythongosssss @ltdrdata @Kosinkadink
/requirements.txt @yoland68 @robinjhuang @huchenlei @webfiltered @pythongosssss @ltdrdata @Kosinkadink
/pyproject.toml @yoland68 @robinjhuang @huchenlei @webfiltered @pythongosssss @ltdrdata @Kosinkadink
# Python web server
/api_server/ @yoland68 @robinjhuang @huchenlei @webfiltered @pythongosssss @ltdrdata
/app/ @yoland68 @robinjhuang @huchenlei @webfiltered @pythongosssss @ltdrdata
/utils/ @yoland68 @robinjhuang @huchenlei @webfiltered @pythongosssss @ltdrdata
# Frontend assets
/web/ @huchenlei @webfiltered @pythongosssss @yoland68 @robinjhuang
# Extra nodes
/comfy_extras/ @yoland68 @robinjhuang @huchenlei @pythongosssss @ltdrdata @Kosinkadink
# Node developers
/comfy_extras/ @yoland68 @robinjhuang @huchenlei @pythongosssss @ltdrdata @Kosinkadink @webfiltered
/comfy/comfy_types/ @yoland68 @robinjhuang @huchenlei @pythongosssss @ltdrdata @Kosinkadink @webfiltered

View File

@ -1,7 +1,7 @@
<div align="center">
# ComfyUI
**The most powerful and modular diffusion model GUI and backend.**
**The most powerful and modular visual AI engine and application.**
[![Website][website-shield]][website-url]
@ -46,7 +46,7 @@ ComfyUI lets you design and execute advanced stable diffusion pipelines using a
#### [Manual Install](#manual-install-windows-linux)
Supports all operating systems and GPU types (NVIDIA, AMD, Intel, Apple Silicon, Ascend).
## Examples
## [Examples](https://comfyanonymous.github.io/ComfyUI_examples/)
See what ComfyUI can do with the [example workflows](https://comfyanonymous.github.io/ComfyUI_examples/).
@ -68,6 +68,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
- [LTX-Video](https://comfyanonymous.github.io/ComfyUI_examples/ltxv/)
- [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/)
- [Nvidia Cosmos](https://comfyanonymous.github.io/ComfyUI_examples/cosmos/)
- [Wan 2.1](https://comfyanonymous.github.io/ComfyUI_examples/wan/)
- [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/)
- Asynchronous Queue system
- Many optimizations: Only re-executes the parts of the workflow that changes between executions.
@ -214,9 +215,9 @@ Nvidia users should install stable pytorch using this command:
```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu126```
This is the command to install pytorch nightly instead which might have performance improvements:
This is the command to install pytorch nightly instead which supports the new blackwell 50xx series GPUs and might have performance improvements.
```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126```
```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128```
#### Troubleshooting
@ -260,6 +261,13 @@ For models compatible with Ascend Extension for PyTorch (torch_npu). To get star
3. Next, install the necessary packages for torch-npu by adhering to the platform-specific instructions on the [Installation](https://ascend.github.io/docs/sources/pytorch/install.html#pytorch) page.
4. Finally, adhere to the [ComfyUI manual installation](#manual-install-windows-linux) guide for Linux. Once all components are installed, you can run ComfyUI as described earlier.
#### Cambricon MLUs
For models compatible with Cambricon Extension for PyTorch (torch_mlu). Here's a step-by-step guide tailored to your platform and installation method:
1. Install the Cambricon CNToolkit by adhering to the platform-specific instructions on the [Installation](https://www.cambricon.com/docs/sdk_1.15.0/cntoolkit_3.7.2/cntoolkit_install_3.7.2/index.html)
2. Next, install the PyTorch(torch_mlu) following the instructions on the [Installation](https://www.cambricon.com/docs/sdk_1.15.0/cambricon_pytorch_1.17.0/user_guide_1.9/index.html)
3. Launch ComfyUI by running `python main.py`
# Running

View File

@ -3,16 +3,51 @@ import argparse
import logging
import os
import re
import sys
import tempfile
import zipfile
import importlib
from dataclasses import dataclass
from functools import cached_property
from pathlib import Path
from typing import TypedDict, Optional
from importlib.metadata import version
import requests
from typing_extensions import NotRequired
from comfy.cli_args import DEFAULT_VERSION_STRING
import app.logger
# The path to the requirements.txt file
req_path = Path(__file__).parents[1] / "requirements.txt"
def frontend_install_warning_message():
"""The warning message to display when the frontend version is not up to date."""
extra = ""
if sys.flags.no_user_site:
extra = "-s "
return f"Please install the updated requirements.txt file by running:\n{sys.executable} {extra}-m pip install -r {req_path}\n\nThis error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.\n\nIf you are on the portable package you can run: update\\update_comfyui.bat to solve this problem"
def check_frontend_version():
"""Check if the frontend version is up to date."""
def parse_version(version: str) -> tuple[int, int, int]:
return tuple(map(int, version.split(".")))
try:
frontend_version_str = version("comfyui-frontend-package")
frontend_version = parse_version(frontend_version_str)
with open(req_path, "r", encoding="utf-8") as f:
required_frontend = parse_version(f.readline().split("=")[-1])
if frontend_version < required_frontend:
app.logger.log_startup_warning("________________________________________________________________________\nWARNING WARNING WARNING WARNING WARNING\n\nInstalled frontend version {} is lower than the recommended version {}.\n\n{}\n________________________________________________________________________".format('.'.join(map(str, frontend_version)), '.'.join(map(str, required_frontend)), frontend_install_warning_message()))
else:
logging.info("ComfyUI frontend version: {}".format(frontend_version_str))
except Exception as e:
logging.error(f"Failed to check frontend version: {e}")
REQUEST_TIMEOUT = 10 # seconds
@ -109,9 +144,17 @@ def download_release_asset_zip(release: Release, destination_path: str) -> None:
class FrontendManager:
DEFAULT_FRONTEND_PATH = str(Path(__file__).parents[1] / "web")
CUSTOM_FRONTENDS_ROOT = str(Path(__file__).parents[1] / "web_custom_versions")
@classmethod
def default_frontend_path(cls) -> str:
try:
import comfyui_frontend_package
return str(importlib.resources.files(comfyui_frontend_package) / "static")
except ImportError:
logging.error(f"\n\n********** ERROR ***********\n\ncomfyui-frontend-package is not installed. {frontend_install_warning_message()}\n********** ERROR **********\n")
sys.exit(-1)
@classmethod
def parse_version_string(cls, value: str) -> tuple[str, str, str]:
"""
@ -148,7 +191,8 @@ class FrontendManager:
main error source might be request timeout or invalid URL.
"""
if version_string == DEFAULT_VERSION_STRING:
return cls.DEFAULT_FRONTEND_PATH
check_frontend_version()
return cls.default_frontend_path()
repo_owner, repo_name, version = cls.parse_version_string(version_string)
@ -201,4 +245,5 @@ class FrontendManager:
except Exception as e:
logging.error("Failed to initialize frontend: %s", e)
logging.info("Falling back to the default frontend.")
return cls.DEFAULT_FRONTEND_PATH
check_frontend_version()
return cls.default_frontend_path()

View File

@ -82,3 +82,17 @@ def setup_logger(log_level: str = 'INFO', capacity: int = 300, use_stdout: bool
logger.addHandler(stdout_handler)
logger.addHandler(stream_handler)
STARTUP_WARNINGS = []
def log_startup_warning(msg):
logging.warning(msg)
STARTUP_WARNINGS.append(msg)
def print_startup_warnings():
for s in STARTUP_WARNINGS:
logging.warning(s)
STARTUP_WARNINGS.clear()

View File

@ -1,7 +1,6 @@
import argparse
import enum
import os
from typing import Optional
import comfy.options
@ -107,6 +106,7 @@ attn_group.add_argument("--use-split-cross-attention", action="store_true", help
attn_group.add_argument("--use-quad-cross-attention", action="store_true", help="Use the sub-quadratic cross attention optimization . Ignored when xformers is used.")
attn_group.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.")
attn_group.add_argument("--use-sage-attention", action="store_true", help="Use sage attention.")
attn_group.add_argument("--use-flash-attention", action="store_true", help="Use FlashAttention.")
parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.")
@ -130,7 +130,12 @@ parser.add_argument("--default-hashing-function", type=str, choices=['md5', 'sha
parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.")
parser.add_argument("--deterministic", action="store_true", help="Make pytorch use slower deterministic algorithms when it can. Note that this might not make images deterministic in all cases.")
parser.add_argument("--fast", action="store_true", help="Enable some untested and potentially quality deteriorating optimizations.")
class PerformanceFeature(enum.Enum):
Fp16Accumulation = "fp16_accumulation"
Fp8MatrixMultiplication = "fp8_matrix_mult"
parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: fp16_accumulation fp8_matrix_mult")
parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.")
parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.")
@ -161,13 +166,14 @@ parser.add_argument(
""",
)
def is_valid_directory(path: Optional[str]) -> Optional[str]:
"""Validate if the given path is a directory."""
if path is None:
return None
def is_valid_directory(path: str) -> str:
"""Validate if the given path is a directory, and check permissions."""
if not os.path.exists(path):
raise argparse.ArgumentTypeError(f"The path '{path}' does not exist.")
if not os.path.isdir(path):
raise argparse.ArgumentTypeError(f"{path} is not a valid directory.")
raise argparse.ArgumentTypeError(f"'{path}' is not a directory.")
if not os.access(path, os.R_OK):
raise argparse.ArgumentTypeError(f"You do not have read permissions for '{path}'.")
return path
parser.add_argument(
@ -194,3 +200,14 @@ if args.disable_auto_launch:
if args.force_fp16:
args.fp16_unet = True
# '--fast' is not provided, use an empty set
if args.fast is None:
args.fast = set()
# '--fast' is provided with an empty list, enable all optimizations
elif args.fast == []:
args.fast = set(PerformanceFeature)
# '--fast' is provided with a list of performance features, use that list
else:
args.fast = set(args.fast)

View File

@ -97,8 +97,12 @@ class CLIPTextModel_(torch.nn.Module):
self.encoder = CLIPEncoder(num_layers, embed_dim, heads, intermediate_size, intermediate_activation, dtype, device, operations)
self.final_layer_norm = operations.LayerNorm(embed_dim, dtype=dtype, device=device)
def forward(self, input_tokens, attention_mask=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=torch.float32):
x = self.embeddings(input_tokens, dtype=dtype)
def forward(self, input_tokens=None, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=torch.float32):
if embeds is not None:
x = embeds + comfy.ops.cast_to(self.embeddings.position_embedding.weight, dtype=dtype, device=embeds.device)
else:
x = self.embeddings(input_tokens, dtype=dtype)
mask = None
if attention_mask is not None:
mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, attention_mask.shape[-1], attention_mask.shape[-1])
@ -116,7 +120,10 @@ class CLIPTextModel_(torch.nn.Module):
if i is not None and final_layer_norm_intermediate:
i = self.final_layer_norm(i)
pooled_output = x[torch.arange(x.shape[0], device=x.device), (torch.round(input_tokens).to(dtype=torch.int, device=x.device) == self.eos_token_id).int().argmax(dim=-1),]
if num_tokens is not None:
pooled_output = x[list(range(x.shape[0])), list(map(lambda a: a - 1, num_tokens))]
else:
pooled_output = x[torch.arange(x.shape[0], device=x.device), (torch.round(input_tokens).to(dtype=torch.int, device=x.device) == self.eos_token_id).int().argmax(dim=-1),]
return x, i, pooled_output
class CLIPTextModel(torch.nn.Module):
@ -204,6 +211,15 @@ class CLIPVision(torch.nn.Module):
pooled_output = self.post_layernorm(x[:, 0, :])
return x, i, pooled_output
class LlavaProjector(torch.nn.Module):
def __init__(self, in_dim, out_dim, dtype, device, operations):
super().__init__()
self.linear_1 = operations.Linear(in_dim, out_dim, bias=True, device=device, dtype=dtype)
self.linear_2 = operations.Linear(out_dim, out_dim, bias=True, device=device, dtype=dtype)
def forward(self, x):
return self.linear_2(torch.nn.functional.gelu(self.linear_1(x[:, 1:])))
class CLIPVisionModelProjection(torch.nn.Module):
def __init__(self, config_dict, dtype, device, operations):
super().__init__()
@ -213,7 +229,16 @@ class CLIPVisionModelProjection(torch.nn.Module):
else:
self.visual_projection = lambda a: a
if "llava3" == config_dict.get("projector_type", None):
self.multi_modal_projector = LlavaProjector(config_dict["hidden_size"], 4096, dtype, device, operations)
else:
self.multi_modal_projector = None
def forward(self, *args, **kwargs):
x = self.vision_model(*args, **kwargs)
out = self.visual_projection(x[2])
return (x[0], x[1], out)
projected = None
if self.multi_modal_projector is not None:
projected = self.multi_modal_projector(x[1])
return (x[0], x[1], out, projected)

View File

@ -65,6 +65,7 @@ class ClipVisionModel():
outputs["last_hidden_state"] = out[0].to(comfy.model_management.intermediate_device())
outputs["image_embeds"] = out[2].to(comfy.model_management.intermediate_device())
outputs["penultimate_hidden_states"] = out[1].to(comfy.model_management.intermediate_device())
outputs["mm_projected"] = out[3]
return outputs
def convert_to_transformers(sd, prefix):
@ -104,7 +105,10 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
if sd["vision_model.encoder.layers.0.layer_norm1.weight"].shape[0] == 1152:
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_siglip_384.json")
elif sd["vision_model.embeddings.position_embedding.weight"].shape[0] == 577:
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336.json")
if "multi_modal_projector.linear_1.bias" in sd:
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336_llava.json")
else:
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336.json")
else:
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json")
else:

View File

@ -0,0 +1,19 @@
{
"attention_dropout": 0.0,
"dropout": 0.0,
"hidden_act": "quick_gelu",
"hidden_size": 1024,
"image_size": 336,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-5,
"model_type": "clip_vision_model",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 24,
"patch_size": 14,
"projection_dim": 768,
"projector_type": "llava3",
"torch_dtype": "float32"
}

View File

@ -1,6 +1,6 @@
import torch
from typing import Callable, Protocol, TypedDict, Optional, List
from .node_typing import IO, InputTypeDict, ComfyNodeABC, CheckLazyMixin
from .node_typing import IO, InputTypeDict, ComfyNodeABC, CheckLazyMixin, FileLocator
class UnetApplyFunction(Protocol):
@ -42,4 +42,5 @@ __all__ = [
InputTypeDict.__name__,
ComfyNodeABC.__name__,
CheckLazyMixin.__name__,
FileLocator.__name__,
]

View File

@ -2,6 +2,7 @@
from __future__ import annotations
from typing import Literal, TypedDict
from typing_extensions import NotRequired
from abc import ABC, abstractmethod
from enum import Enum
@ -26,6 +27,7 @@ class IO(StrEnum):
BOOLEAN = "BOOLEAN"
INT = "INT"
FLOAT = "FLOAT"
COMBO = "COMBO"
CONDITIONING = "CONDITIONING"
SAMPLER = "SAMPLER"
SIGMAS = "SIGMAS"
@ -66,6 +68,7 @@ class IO(StrEnum):
b = frozenset(value.split(","))
return not (b.issubset(a) or a.issubset(b))
class RemoteInputOptions(TypedDict):
route: str
"""The route to the remote source."""
@ -80,6 +83,14 @@ class RemoteInputOptions(TypedDict):
refresh: int
"""The TTL of the remote input's value in milliseconds. Specifies the interval at which the remote input's value is refreshed."""
class MultiSelectOptions(TypedDict):
placeholder: NotRequired[str]
"""The placeholder text to display in the multi-select widget when no items are selected."""
chip: NotRequired[bool]
"""Specifies whether to use chips instead of comma separated values for the multi-select widget."""
class InputTypeOptions(TypedDict):
"""Provides type hinting for the return type of the INPUT_TYPES node function.
@ -114,7 +125,7 @@ class InputTypeOptions(TypedDict):
# default: bool
label_on: str
"""The label to use in the UI when the bool is True (``BOOLEAN``)"""
label_on: str
label_off: str
"""The label to use in the UI when the bool is False (``BOOLEAN``)"""
# class InputTypeString(InputTypeOptions):
# default: str
@ -133,7 +144,22 @@ class InputTypeOptions(TypedDict):
"""Specifies which folder to get preview images from if the input has the ``image_upload`` flag.
"""
remote: RemoteInputOptions
"""Specifies the configuration for a remote input."""
"""Specifies the configuration for a remote input.
Available after ComfyUI frontend v1.9.7
https://github.com/Comfy-Org/ComfyUI_frontend/pull/2422"""
control_after_generate: bool
"""Specifies whether a control widget should be added to the input, adding options to automatically change the value after each prompt is queued. Currently only used for INT and COMBO types."""
options: NotRequired[list[str | int | float]]
"""COMBO type only. Specifies the selectable options for the combo widget.
Prefer:
["COMBO", {"options": ["Option 1", "Option 2", "Option 3"]}]
Over:
[["Option 1", "Option 2", "Option 3"]]
"""
multi_select: NotRequired[MultiSelectOptions]
"""COMBO type only. Specifies the configuration for a multi-select widget.
Available after ComfyUI frontend v1.13.4
https://github.com/Comfy-Org/ComfyUI_frontend/pull/2987"""
class HiddenInputTypeDict(TypedDict):
@ -293,3 +319,14 @@ class CheckLazyMixin:
need = [name for name in kwargs if kwargs[name] is None]
return need
class FileLocator(TypedDict):
"""Provides type hinting for the file location"""
filename: str
"""The filename of the file."""
subfolder: str
"""The subfolder of the file."""
type: Literal["input", "output", "temp"]
"""The root folder of the file."""

View File

@ -418,10 +418,7 @@ def controlnet_config(sd, model_options={}):
weight_dtype = comfy.utils.weight_dtype(sd)
supported_inference_dtypes = list(model_config.supported_inference_dtypes)
if weight_dtype is not None:
supported_inference_dtypes.append(weight_dtype)
unet_dtype = comfy.model_management.unet_dtype(model_params=-1, supported_dtypes=supported_inference_dtypes)
unet_dtype = comfy.model_management.unet_dtype(model_params=-1, supported_dtypes=supported_inference_dtypes, weight_dtype=weight_dtype)
load_device = comfy.model_management.get_torch_device()
manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
@ -689,10 +686,7 @@ def load_controlnet_state_dict(state_dict, model=None, model_options={}):
if supported_inference_dtypes is None:
supported_inference_dtypes = [comfy.model_management.unet_dtype()]
if weight_dtype is not None:
supported_inference_dtypes.append(weight_dtype)
unet_dtype = comfy.model_management.unet_dtype(model_params=-1, supported_dtypes=supported_inference_dtypes)
unet_dtype = comfy.model_management.unet_dtype(model_params=-1, supported_dtypes=supported_inference_dtypes, weight_dtype=weight_dtype)
load_device = comfy.model_management.get_torch_device()

View File

@ -688,10 +688,10 @@ def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=N
if len(sigmas) <= 1:
return x
extra_args = {} if extra_args is None else extra_args
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
seed = extra_args.get("seed", None)
noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
extra_args = {} if extra_args is None else extra_args
s_in = x.new_ones([x.shape[0]])
sigma_fn = lambda t: t.neg().exp()
t_fn = lambda sigma: sigma.log().neg()
@ -762,10 +762,10 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
if solver_type not in {'heun', 'midpoint'}:
raise ValueError('solver_type must be \'heun\' or \'midpoint\'')
extra_args = {} if extra_args is None else extra_args
seed = extra_args.get("seed", None)
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
extra_args = {} if extra_args is None else extra_args
s_in = x.new_ones([x.shape[0]])
old_denoised = None
@ -808,10 +808,10 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
if len(sigmas) <= 1:
return x
extra_args = {} if extra_args is None else extra_args
seed = extra_args.get("seed", None)
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
extra_args = {} if extra_args is None else extra_args
s_in = x.new_ones([x.shape[0]])
denoised_1, denoised_2 = None, None
@ -858,7 +858,7 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
if len(sigmas) <= 1:
return x
extra_args = {} if extra_args is None else extra_args
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
return sample_dpmpp_3m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler)
@ -867,7 +867,7 @@ def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, di
def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'):
if len(sigmas) <= 1:
return x
extra_args = {} if extra_args is None else extra_args
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type)
@ -876,7 +876,7 @@ def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, di
def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2):
if len(sigmas) <= 1:
return x
extra_args = {} if extra_args is None else extra_args
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
return sample_dpmpp_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=r)
@ -1366,3 +1366,59 @@ def sample_gradient_estimation(model, x, sigmas, extra_args=None, callback=None,
x = x + d_bar * dt
old_d = d
return x
@torch.no_grad()
def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, s_noise=1., noise_sampler=None, noise_scaler=None, max_stage=3):
"""
Extended Reverse-Time SDE solver (VE ER-SDE-Solver-3). Arxiv: https://arxiv.org/abs/2309.06169.
Code reference: https://github.com/QinpengCui/ER-SDE-Solver/blob/main/er_sde_solver.py.
"""
extra_args = {} if extra_args is None else extra_args
seed = extra_args.get("seed", None)
noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler
s_in = x.new_ones([x.shape[0]])
def default_noise_scaler(sigma):
return sigma * ((sigma ** 0.3).exp() + 10.0)
noise_scaler = default_noise_scaler if noise_scaler is None else noise_scaler
num_integration_points = 200.0
point_indice = torch.arange(0, num_integration_points, dtype=torch.float32, device=x.device)
old_denoised = None
old_denoised_d = None
for i in trange(len(sigmas) - 1, disable=disable):
denoised = model(x, sigmas[i] * s_in, **extra_args)
if callback is not None:
callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
stage_used = min(max_stage, i + 1)
if sigmas[i + 1] == 0:
x = denoised
elif stage_used == 1:
r = noise_scaler(sigmas[i + 1]) / noise_scaler(sigmas[i])
x = r * x + (1 - r) * denoised
else:
r = noise_scaler(sigmas[i + 1]) / noise_scaler(sigmas[i])
x = r * x + (1 - r) * denoised
dt = sigmas[i + 1] - sigmas[i]
sigma_step_size = -dt / num_integration_points
sigma_pos = sigmas[i + 1] + point_indice * sigma_step_size
scaled_pos = noise_scaler(sigma_pos)
# Stage 2
s = torch.sum(1 / scaled_pos) * sigma_step_size
denoised_d = (denoised - old_denoised) / (sigmas[i] - sigmas[i - 1])
x = x + (dt + s * noise_scaler(sigmas[i + 1])) * denoised_d
if stage_used >= 3:
# Stage 3
s_u = torch.sum((sigma_pos - sigmas[i]) / scaled_pos) * sigma_step_size
denoised_u = (denoised_d - old_denoised_d) / ((sigmas[i] - sigmas[i - 2]) / 2)
x = x + ((dt ** 2) / 2 + s_u * noise_scaler(sigmas[i + 1])) * denoised_u
old_denoised_d = denoised_d
if s_noise != 0 and sigmas[i + 1] > 0:
x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * (sigmas[i + 1] ** 2 - sigmas[i] ** 2 * r ** 2).sqrt()
old_denoised = denoised
return x

View File

@ -407,3 +407,52 @@ class Cosmos1CV8x8x8(LatentFormat):
]
latent_rgb_factors_bias = [-0.1223, -0.1889, -0.1976]
class Wan21(LatentFormat):
latent_channels = 16
latent_dimensions = 3
latent_rgb_factors = [
[-0.1299, -0.1692, 0.2932],
[ 0.0671, 0.0406, 0.0442],
[ 0.3568, 0.2548, 0.1747],
[ 0.0372, 0.2344, 0.1420],
[ 0.0313, 0.0189, -0.0328],
[ 0.0296, -0.0956, -0.0665],
[-0.3477, -0.4059, -0.2925],
[ 0.0166, 0.1902, 0.1975],
[-0.0412, 0.0267, -0.1364],
[-0.1293, 0.0740, 0.1636],
[ 0.0680, 0.3019, 0.1128],
[ 0.0032, 0.0581, 0.0639],
[-0.1251, 0.0927, 0.1699],
[ 0.0060, -0.0633, 0.0005],
[ 0.3477, 0.2275, 0.2950],
[ 0.1984, 0.0913, 0.1861]
]
latent_rgb_factors_bias = [-0.1835, -0.0868, -0.3360]
def __init__(self):
self.scale_factor = 1.0
self.latents_mean = torch.tensor([
-0.7571, -0.7089, -0.9113, 0.1075, -0.1745, 0.9653, -0.1517, 1.5508,
0.4134, -0.0715, 0.5517, -0.3632, -0.1922, -0.9497, 0.2503, -0.2921
]).view(1, self.latent_channels, 1, 1, 1)
self.latents_std = torch.tensor([
2.8184, 1.4541, 2.3275, 2.6558, 1.2196, 1.7708, 2.6052, 2.0743,
3.2687, 2.1526, 2.8652, 1.5579, 1.6382, 1.1253, 2.8251, 1.9160
]).view(1, self.latent_channels, 1, 1, 1)
self.taesd_decoder_name = None #TODO
def process_in(self, latent):
latents_mean = self.latents_mean.to(latent.device, latent.dtype)
latents_std = self.latents_std.to(latent.device, latent.dtype)
return (latent - latents_mean) * self.scale_factor / latents_std
def process_out(self, latent):
latents_mean = self.latents_mean.to(latent.device, latent.dtype)
latents_std = self.latents_std.to(latent.device, latent.dtype)
return latent * latents_std / self.scale_factor + latents_mean

View File

@ -19,6 +19,10 @@
import torch
from torch import nn
from torch.autograd import Function
import comfy.ops
ops = comfy.ops.disable_weight_init
class vector_quantize(Function):
@staticmethod
@ -121,15 +125,15 @@ class ResBlock(nn.Module):
self.norm1 = nn.LayerNorm(c, elementwise_affine=False, eps=1e-6)
self.depthwise = nn.Sequential(
nn.ReplicationPad2d(1),
nn.Conv2d(c, c, kernel_size=3, groups=c)
ops.Conv2d(c, c, kernel_size=3, groups=c)
)
# channelwise
self.norm2 = nn.LayerNorm(c, elementwise_affine=False, eps=1e-6)
self.channelwise = nn.Sequential(
nn.Linear(c, c_hidden),
ops.Linear(c, c_hidden),
nn.GELU(),
nn.Linear(c_hidden, c),
ops.Linear(c_hidden, c),
)
self.gammas = nn.Parameter(torch.zeros(6), requires_grad=True)
@ -171,16 +175,16 @@ class StageA(nn.Module):
# Encoder blocks
self.in_block = nn.Sequential(
nn.PixelUnshuffle(2),
nn.Conv2d(3 * 4, c_levels[0], kernel_size=1)
ops.Conv2d(3 * 4, c_levels[0], kernel_size=1)
)
down_blocks = []
for i in range(levels):
if i > 0:
down_blocks.append(nn.Conv2d(c_levels[i - 1], c_levels[i], kernel_size=4, stride=2, padding=1))
down_blocks.append(ops.Conv2d(c_levels[i - 1], c_levels[i], kernel_size=4, stride=2, padding=1))
block = ResBlock(c_levels[i], c_levels[i] * 4)
down_blocks.append(block)
down_blocks.append(nn.Sequential(
nn.Conv2d(c_levels[-1], c_latent, kernel_size=1, bias=False),
ops.Conv2d(c_levels[-1], c_latent, kernel_size=1, bias=False),
nn.BatchNorm2d(c_latent), # then normalize them to have mean 0 and std 1
))
self.down_blocks = nn.Sequential(*down_blocks)
@ -191,7 +195,7 @@ class StageA(nn.Module):
# Decoder blocks
up_blocks = [nn.Sequential(
nn.Conv2d(c_latent, c_levels[-1], kernel_size=1)
ops.Conv2d(c_latent, c_levels[-1], kernel_size=1)
)]
for i in range(levels):
for j in range(bottleneck_blocks if i == 0 else 1):
@ -199,11 +203,11 @@ class StageA(nn.Module):
up_blocks.append(block)
if i < levels - 1:
up_blocks.append(
nn.ConvTranspose2d(c_levels[levels - 1 - i], c_levels[levels - 2 - i], kernel_size=4, stride=2,
ops.ConvTranspose2d(c_levels[levels - 1 - i], c_levels[levels - 2 - i], kernel_size=4, stride=2,
padding=1))
self.up_blocks = nn.Sequential(*up_blocks)
self.out_block = nn.Sequential(
nn.Conv2d(c_levels[0], 3 * 4, kernel_size=1),
ops.Conv2d(c_levels[0], 3 * 4, kernel_size=1),
nn.PixelShuffle(2),
)
@ -232,17 +236,17 @@ class Discriminator(nn.Module):
super().__init__()
d = max(depth - 3, 3)
layers = [
nn.utils.spectral_norm(nn.Conv2d(c_in, c_hidden // (2 ** d), kernel_size=3, stride=2, padding=1)),
nn.utils.spectral_norm(ops.Conv2d(c_in, c_hidden // (2 ** d), kernel_size=3, stride=2, padding=1)),
nn.LeakyReLU(0.2),
]
for i in range(depth - 1):
c_in = c_hidden // (2 ** max((d - i), 0))
c_out = c_hidden // (2 ** max((d - 1 - i), 0))
layers.append(nn.utils.spectral_norm(nn.Conv2d(c_in, c_out, kernel_size=3, stride=2, padding=1)))
layers.append(nn.utils.spectral_norm(ops.Conv2d(c_in, c_out, kernel_size=3, stride=2, padding=1)))
layers.append(nn.InstanceNorm2d(c_out))
layers.append(nn.LeakyReLU(0.2))
self.encoder = nn.Sequential(*layers)
self.shuffle = nn.Conv2d((c_hidden + c_cond) if c_cond > 0 else c_hidden, 1, kernel_size=1)
self.shuffle = ops.Conv2d((c_hidden + c_cond) if c_cond > 0 else c_hidden, 1, kernel_size=1)
self.logits = nn.Sigmoid()
def forward(self, x, cond=None):

View File

@ -19,6 +19,9 @@ import torch
import torchvision
from torch import nn
import comfy.ops
ops = comfy.ops.disable_weight_init
# EfficientNet
class EfficientNetEncoder(nn.Module):
@ -26,7 +29,7 @@ class EfficientNetEncoder(nn.Module):
super().__init__()
self.backbone = torchvision.models.efficientnet_v2_s().features.eval()
self.mapper = nn.Sequential(
nn.Conv2d(1280, c_latent, kernel_size=1, bias=False),
ops.Conv2d(1280, c_latent, kernel_size=1, bias=False),
nn.BatchNorm2d(c_latent, affine=False), # then normalize them to have mean 0 and std 1
)
self.mean = nn.Parameter(torch.tensor([0.485, 0.456, 0.406]))
@ -34,7 +37,7 @@ class EfficientNetEncoder(nn.Module):
def forward(self, x):
x = x * 0.5 + 0.5
x = (x - self.mean.view([3,1,1])) / self.std.view([3,1,1])
x = (x - self.mean.view([3,1,1]).to(device=x.device, dtype=x.dtype)) / self.std.view([3,1,1]).to(device=x.device, dtype=x.dtype)
o = self.mapper(self.backbone(x))
return o
@ -44,39 +47,39 @@ class Previewer(nn.Module):
def __init__(self, c_in=16, c_hidden=512, c_out=3):
super().__init__()
self.blocks = nn.Sequential(
nn.Conv2d(c_in, c_hidden, kernel_size=1), # 16 channels to 512 channels
ops.Conv2d(c_in, c_hidden, kernel_size=1), # 16 channels to 512 channels
nn.GELU(),
nn.BatchNorm2d(c_hidden),
nn.Conv2d(c_hidden, c_hidden, kernel_size=3, padding=1),
ops.Conv2d(c_hidden, c_hidden, kernel_size=3, padding=1),
nn.GELU(),
nn.BatchNorm2d(c_hidden),
nn.ConvTranspose2d(c_hidden, c_hidden // 2, kernel_size=2, stride=2), # 16 -> 32
ops.ConvTranspose2d(c_hidden, c_hidden // 2, kernel_size=2, stride=2), # 16 -> 32
nn.GELU(),
nn.BatchNorm2d(c_hidden // 2),
nn.Conv2d(c_hidden // 2, c_hidden // 2, kernel_size=3, padding=1),
ops.Conv2d(c_hidden // 2, c_hidden // 2, kernel_size=3, padding=1),
nn.GELU(),
nn.BatchNorm2d(c_hidden // 2),
nn.ConvTranspose2d(c_hidden // 2, c_hidden // 4, kernel_size=2, stride=2), # 32 -> 64
ops.ConvTranspose2d(c_hidden // 2, c_hidden // 4, kernel_size=2, stride=2), # 32 -> 64
nn.GELU(),
nn.BatchNorm2d(c_hidden // 4),
nn.Conv2d(c_hidden // 4, c_hidden // 4, kernel_size=3, padding=1),
ops.Conv2d(c_hidden // 4, c_hidden // 4, kernel_size=3, padding=1),
nn.GELU(),
nn.BatchNorm2d(c_hidden // 4),
nn.ConvTranspose2d(c_hidden // 4, c_hidden // 4, kernel_size=2, stride=2), # 64 -> 128
ops.ConvTranspose2d(c_hidden // 4, c_hidden // 4, kernel_size=2, stride=2), # 64 -> 128
nn.GELU(),
nn.BatchNorm2d(c_hidden // 4),
nn.Conv2d(c_hidden // 4, c_hidden // 4, kernel_size=3, padding=1),
ops.Conv2d(c_hidden // 4, c_hidden // 4, kernel_size=3, padding=1),
nn.GELU(),
nn.BatchNorm2d(c_hidden // 4),
nn.Conv2d(c_hidden // 4, c_out, kernel_size=1),
ops.Conv2d(c_hidden // 4, c_out, kernel_size=1),
)
def forward(self, x):

View File

@ -105,7 +105,9 @@ class Modulation(nn.Module):
self.lin = operations.Linear(dim, self.multiplier * dim, bias=True, dtype=dtype, device=device)
def forward(self, vec: Tensor) -> tuple:
out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(self.multiplier, dim=-1)
if vec.ndim == 2:
vec = vec[:, None, :]
out = self.lin(nn.functional.silu(vec)).chunk(self.multiplier, dim=-1)
return (
ModulationOut(*out[:3]),
@ -113,6 +115,20 @@ class Modulation(nn.Module):
)
def apply_mod(tensor, m_mult, m_add=None, modulation_dims=None):
if modulation_dims is None:
if m_add is not None:
return tensor * m_mult + m_add
else:
return tensor * m_mult
else:
for d in modulation_dims:
tensor[:, d[0]:d[1]] *= m_mult[:, d[2]]
if m_add is not None:
tensor[:, d[0]:d[1]] += m_add[:, d[2]]
return tensor
class DoubleStreamBlock(nn.Module):
def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, dtype=None, device=None, operations=None):
super().__init__()
@ -143,20 +159,20 @@ class DoubleStreamBlock(nn.Module):
)
self.flipped_img_txt = flipped_img_txt
def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None):
def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims_img=None, modulation_dims_txt=None):
img_mod1, img_mod2 = self.img_mod(vec)
txt_mod1, txt_mod2 = self.txt_mod(vec)
# prepare image for attention
img_modulated = self.img_norm1(img)
img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
img_modulated = apply_mod(img_modulated, (1 + img_mod1.scale), img_mod1.shift, modulation_dims_img)
img_qkv = self.img_attn.qkv(img_modulated)
img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
# prepare txt for attention
txt_modulated = self.txt_norm1(txt)
txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
txt_modulated = apply_mod(txt_modulated, (1 + txt_mod1.scale), txt_mod1.shift, modulation_dims_txt)
txt_qkv = self.txt_attn.qkv(txt_modulated)
txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
@ -179,12 +195,12 @@ class DoubleStreamBlock(nn.Module):
txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]
# calculate the img bloks
img = img + img_mod1.gate * self.img_attn.proj(img_attn)
img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift)
img = img + apply_mod(self.img_attn.proj(img_attn), img_mod1.gate, None, modulation_dims_img)
img = img + apply_mod(self.img_mlp(apply_mod(self.img_norm2(img), (1 + img_mod2.scale), img_mod2.shift, modulation_dims_img)), img_mod2.gate, None, modulation_dims_img)
# calculate the txt bloks
txt += txt_mod1.gate * self.txt_attn.proj(txt_attn)
txt += txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift)
txt += apply_mod(self.txt_attn.proj(txt_attn), txt_mod1.gate, None, modulation_dims_txt)
txt += apply_mod(self.txt_mlp(apply_mod(self.txt_norm2(txt), (1 + txt_mod2.scale), txt_mod2.shift, modulation_dims_txt)), txt_mod2.gate, None, modulation_dims_txt)
if txt.dtype == torch.float16:
txt = torch.nan_to_num(txt, nan=0.0, posinf=65504, neginf=-65504)
@ -228,9 +244,9 @@ class SingleStreamBlock(nn.Module):
self.mlp_act = nn.GELU(approximate="tanh")
self.modulation = Modulation(hidden_size, double=False, dtype=dtype, device=device, operations=operations)
def forward(self, x: Tensor, vec: Tensor, pe: Tensor, attn_mask=None) -> Tensor:
def forward(self, x: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims=None) -> Tensor:
mod, _ = self.modulation(vec)
qkv, mlp = torch.split(self.linear1((1 + mod.scale) * self.pre_norm(x) + mod.shift), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)
qkv, mlp = torch.split(self.linear1(apply_mod(self.pre_norm(x), (1 + mod.scale), mod.shift, modulation_dims)), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)
q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
q, k = self.norm(q, k, v)
@ -239,7 +255,7 @@ class SingleStreamBlock(nn.Module):
attn = attention(q, k, v, pe=pe, mask=attn_mask)
# compute activation in mlp stream, cat again and run second linear layer
output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2))
x += mod.gate * output
x += apply_mod(output, mod.gate, None, modulation_dims)
if x.dtype == torch.float16:
x = torch.nan_to_num(x, nan=0.0, posinf=65504, neginf=-65504)
return x
@ -252,8 +268,11 @@ class LastLayer(nn.Module):
self.linear = operations.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device)
self.adaLN_modulation = nn.Sequential(nn.SiLU(), operations.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device))
def forward(self, x: Tensor, vec: Tensor) -> Tensor:
shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1)
x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :]
def forward(self, x: Tensor, vec: Tensor, modulation_dims=None) -> Tensor:
if vec.ndim == 2:
vec = vec[:, None, :]
shift, scale = self.adaLN_modulation(vec).chunk(2, dim=-1)
x = apply_mod(self.norm_final(x), (1 + scale), shift, modulation_dims)
x = self.linear(x)
return x

View File

@ -227,6 +227,7 @@ class HunyuanVideo(nn.Module):
timesteps: Tensor,
y: Tensor,
guidance: Tensor = None,
guiding_frame_index=None,
control=None,
transformer_options={},
) -> Tensor:
@ -237,7 +238,17 @@ class HunyuanVideo(nn.Module):
img = self.img_in(img)
vec = self.time_in(timestep_embedding(timesteps, 256, time_factor=1.0).to(img.dtype))
vec = vec + self.vector_in(y[:, :self.params.vec_in_dim])
if guiding_frame_index is not None:
token_replace_vec = self.time_in(timestep_embedding(guiding_frame_index, 256, time_factor=1.0))
vec_ = self.vector_in(y[:, :self.params.vec_in_dim])
vec = torch.cat([(vec_ + token_replace_vec).unsqueeze(1), (vec_ + vec).unsqueeze(1)], dim=1)
frame_tokens = (initial_shape[-1] // self.patch_size[-1]) * (initial_shape[-2] // self.patch_size[-2])
modulation_dims = [(0, frame_tokens, 0), (frame_tokens, None, 1)]
modulation_dims_txt = [(0, None, 1)]
else:
vec = vec + self.vector_in(y[:, :self.params.vec_in_dim])
modulation_dims = None
modulation_dims_txt = None
if self.params.guidance_embed:
if guidance is not None:
@ -264,14 +275,14 @@ class HunyuanVideo(nn.Module):
if ("double_block", i) in blocks_replace:
def block_wrap(args):
out = {}
out["img"], out["txt"] = block(img=args["img"], txt=args["txt"], vec=args["vec"], pe=args["pe"], attn_mask=args["attention_mask"])
out["img"], out["txt"] = block(img=args["img"], txt=args["txt"], vec=args["vec"], pe=args["pe"], attn_mask=args["attention_mask"], modulation_dims_img=args["modulation_dims_img"], modulation_dims_txt=args["modulation_dims_txt"])
return out
out = blocks_replace[("double_block", i)]({"img": img, "txt": txt, "vec": vec, "pe": pe, "attention_mask": attn_mask}, {"original_block": block_wrap})
out = blocks_replace[("double_block", i)]({"img": img, "txt": txt, "vec": vec, "pe": pe, "attention_mask": attn_mask, 'modulation_dims_img': modulation_dims, 'modulation_dims_txt': modulation_dims_txt}, {"original_block": block_wrap})
txt = out["txt"]
img = out["img"]
else:
img, txt = block(img=img, txt=txt, vec=vec, pe=pe, attn_mask=attn_mask)
img, txt = block(img=img, txt=txt, vec=vec, pe=pe, attn_mask=attn_mask, modulation_dims_img=modulation_dims, modulation_dims_txt=modulation_dims_txt)
if control is not None: # Controlnet
control_i = control.get("input")
@ -286,13 +297,13 @@ class HunyuanVideo(nn.Module):
if ("single_block", i) in blocks_replace:
def block_wrap(args):
out = {}
out["img"] = block(args["img"], vec=args["vec"], pe=args["pe"], attn_mask=args["attention_mask"])
out["img"] = block(args["img"], vec=args["vec"], pe=args["pe"], attn_mask=args["attention_mask"], modulation_dims=args["modulation_dims"])
return out
out = blocks_replace[("single_block", i)]({"img": img, "vec": vec, "pe": pe, "attention_mask": attn_mask}, {"original_block": block_wrap})
out = blocks_replace[("single_block", i)]({"img": img, "vec": vec, "pe": pe, "attention_mask": attn_mask, 'modulation_dims': modulation_dims}, {"original_block": block_wrap})
img = out["img"]
else:
img = block(img, vec=vec, pe=pe, attn_mask=attn_mask)
img = block(img, vec=vec, pe=pe, attn_mask=attn_mask, modulation_dims=modulation_dims)
if control is not None: # Controlnet
control_o = control.get("output")
@ -303,7 +314,7 @@ class HunyuanVideo(nn.Module):
img = img[:, : img_len]
img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels)
img = self.final_layer(img, vec, modulation_dims=modulation_dims) # (N, T, patch_size ** 2 * out_channels)
shape = initial_shape[-3:]
for i in range(len(shape)):
@ -313,7 +324,7 @@ class HunyuanVideo(nn.Module):
img = img.reshape(initial_shape[0], self.out_channels, initial_shape[2], initial_shape[3], initial_shape[4])
return img
def forward(self, x, timestep, context, y, guidance=None, attention_mask=None, control=None, transformer_options={}, **kwargs):
def forward(self, x, timestep, context, y, guidance=None, attention_mask=None, guiding_frame_index=None, control=None, transformer_options={}, **kwargs):
bs, c, t, h, w = x.shape
patch_size = self.patch_size
t_len = ((t + (patch_size[0] // 2)) // patch_size[0])
@ -325,5 +336,5 @@ class HunyuanVideo(nn.Module):
img_ids[:, :, :, 2] = img_ids[:, :, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).reshape(1, 1, -1)
img_ids = repeat(img_ids, "t h w c -> b (t h w) c", b=bs)
txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
out = self.forward_orig(x, img_ids, context, txt_ids, attention_mask, timestep, y, guidance, control, transformer_options)
out = self.forward_orig(x, img_ids, context, txt_ids, attention_mask, timestep, y, guidance, guiding_frame_index, control, transformer_options)
return out

View File

@ -7,7 +7,7 @@ from einops import rearrange
import math
from typing import Dict, Optional, Tuple
from .symmetric_patchifier import SymmetricPatchifier
from .symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords
def get_timestep_embedding(
@ -377,12 +377,16 @@ class LTXVModel(torch.nn.Module):
positional_embedding_theta=10000.0,
positional_embedding_max_pos=[20, 2048, 2048],
causal_temporal_positioning=False,
vae_scale_factors=(8, 32, 32),
dtype=None, device=None, operations=None, **kwargs):
super().__init__()
self.generator = None
self.vae_scale_factors = vae_scale_factors
self.dtype = dtype
self.out_channels = in_channels
self.inner_dim = num_attention_heads * attention_head_dim
self.causal_temporal_positioning = causal_temporal_positioning
self.patchify_proj = operations.Linear(in_channels, self.inner_dim, bias=True, dtype=dtype, device=device)
@ -416,42 +420,23 @@ class LTXVModel(torch.nn.Module):
self.patchifier = SymmetricPatchifier(1)
def forward(self, x, timestep, context, attention_mask, frame_rate=25, guiding_latent=None, guiding_latent_noise_scale=0, transformer_options={}, **kwargs):
def forward(self, x, timestep, context, attention_mask, frame_rate=25, transformer_options={}, keyframe_idxs=None, **kwargs):
patches_replace = transformer_options.get("patches_replace", {})
indices_grid = self.patchifier.get_grid(
orig_num_frames=x.shape[2],
orig_height=x.shape[3],
orig_width=x.shape[4],
batch_size=x.shape[0],
scale_grid=((1 / frame_rate) * 8, 32, 32),
device=x.device,
)
if guiding_latent is not None:
ts = torch.ones([x.shape[0], 1, x.shape[2], x.shape[3], x.shape[4]], device=x.device, dtype=x.dtype)
input_ts = timestep.view([timestep.shape[0]] + [1] * (x.ndim - 1))
ts *= input_ts
ts[:, :, 0] = guiding_latent_noise_scale * (input_ts[:, :, 0] ** 2)
timestep = self.patchifier.patchify(ts)
input_x = x.clone()
x[:, :, 0] = guiding_latent[:, :, 0]
if guiding_latent_noise_scale > 0:
if self.generator is None:
self.generator = torch.Generator(device=x.device).manual_seed(42)
elif self.generator.device != x.device:
self.generator = torch.Generator(device=x.device).set_state(self.generator.get_state())
noise_shape = [guiding_latent.shape[0], guiding_latent.shape[1], 1, guiding_latent.shape[3], guiding_latent.shape[4]]
scale = guiding_latent_noise_scale * (input_ts ** 2)
guiding_noise = scale * torch.randn(size=noise_shape, device=x.device, generator=self.generator)
x[:, :, 0] = guiding_noise[:, :, 0] + x[:, :, 0] * (1.0 - scale[:, :, 0])
orig_shape = list(x.shape)
x = self.patchifier.patchify(x)
x, latent_coords = self.patchifier.patchify(x)
pixel_coords = latent_to_pixel_coords(
latent_coords=latent_coords,
scale_factors=self.vae_scale_factors,
causal_fix=self.causal_temporal_positioning,
)
if keyframe_idxs is not None:
pixel_coords[:, :, -keyframe_idxs.shape[2]:] = keyframe_idxs
fractional_coords = pixel_coords.to(torch.float32)
fractional_coords[:, 0] = fractional_coords[:, 0] * (1.0 / frame_rate)
x = self.patchify_proj(x)
timestep = timestep * 1000.0
@ -459,7 +444,7 @@ class LTXVModel(torch.nn.Module):
if attention_mask is not None and not torch.is_floating_point(attention_mask):
attention_mask = (attention_mask - 1).to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])) * torch.finfo(x.dtype).max
pe = precompute_freqs_cis(indices_grid, dim=self.inner_dim, out_dtype=x.dtype)
pe = precompute_freqs_cis(fractional_coords, dim=self.inner_dim, out_dtype=x.dtype)
batch_size = x.shape[0]
timestep, embedded_timestep = self.adaln_single(
@ -519,8 +504,4 @@ class LTXVModel(torch.nn.Module):
out_channels=orig_shape[1] // math.prod(self.patchifier.patch_size),
)
if guiding_latent is not None:
x[:, :, 0] = (input_x[:, :, 0] - guiding_latent[:, :, 0]) / input_ts[:, :, 0]
# print("res", x)
return x

View File

@ -6,16 +6,29 @@ from einops import rearrange
from torch import Tensor
def append_dims(x: torch.Tensor, target_dims: int) -> torch.Tensor:
"""Appends dimensions to the end of a tensor until it has target_dims dimensions."""
dims_to_append = target_dims - x.ndim
if dims_to_append < 0:
raise ValueError(
f"input has {x.ndim} dims but target_dims is {target_dims}, which is less"
)
elif dims_to_append == 0:
return x
return x[(...,) + (None,) * dims_to_append]
def latent_to_pixel_coords(
latent_coords: Tensor, scale_factors: Tuple[int, int, int], causal_fix: bool = False
) -> Tensor:
"""
Converts latent coordinates to pixel coordinates by scaling them according to the VAE's
configuration.
Args:
latent_coords (Tensor): A tensor of shape [batch_size, 3, num_latents]
containing the latent corner coordinates of each token.
scale_factors (Tuple[int, int, int]): The scale factors of the VAE's latent space.
causal_fix (bool): Whether to take into account the different temporal scale
of the first frame. Default = False for backwards compatibility.
Returns:
Tensor: A tensor of pixel coordinates corresponding to the input latent coordinates.
"""
pixel_coords = (
latent_coords
* torch.tensor(scale_factors, device=latent_coords.device)[None, :, None]
)
if causal_fix:
# Fix temporal scale for first frame to 1 due to causality
pixel_coords[:, 0] = (pixel_coords[:, 0] + 1 - scale_factors[0]).clamp(min=0)
return pixel_coords
class Patchifier(ABC):
@ -44,29 +57,26 @@ class Patchifier(ABC):
def patch_size(self):
return self._patch_size
def get_grid(
self, orig_num_frames, orig_height, orig_width, batch_size, scale_grid, device
def get_latent_coords(
self, latent_num_frames, latent_height, latent_width, batch_size, device
):
f = orig_num_frames // self._patch_size[0]
h = orig_height // self._patch_size[1]
w = orig_width // self._patch_size[2]
grid_h = torch.arange(h, dtype=torch.float32, device=device)
grid_w = torch.arange(w, dtype=torch.float32, device=device)
grid_f = torch.arange(f, dtype=torch.float32, device=device)
grid = torch.meshgrid(grid_f, grid_h, grid_w, indexing='ij')
grid = torch.stack(grid, dim=0)
grid = grid.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1)
if scale_grid is not None:
for i in range(3):
if isinstance(scale_grid[i], Tensor):
scale = append_dims(scale_grid[i], grid.ndim - 1)
else:
scale = scale_grid[i]
grid[:, i, ...] = grid[:, i, ...] * scale * self._patch_size[i]
grid = rearrange(grid, "b c f h w -> b c (f h w)", b=batch_size)
return grid
"""
Return a tensor of shape [batch_size, 3, num_patches] containing the
top-left corner latent coordinates of each latent patch.
The tensor is repeated for each batch element.
"""
latent_sample_coords = torch.meshgrid(
torch.arange(0, latent_num_frames, self._patch_size[0], device=device),
torch.arange(0, latent_height, self._patch_size[1], device=device),
torch.arange(0, latent_width, self._patch_size[2], device=device),
indexing="ij",
)
latent_sample_coords = torch.stack(latent_sample_coords, dim=0)
latent_coords = latent_sample_coords.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1)
latent_coords = rearrange(
latent_coords, "b c f h w -> b c (f h w)", b=batch_size
)
return latent_coords
class SymmetricPatchifier(Patchifier):
@ -74,6 +84,8 @@ class SymmetricPatchifier(Patchifier):
self,
latents: Tensor,
) -> Tuple[Tensor, Tensor]:
b, _, f, h, w = latents.shape
latent_coords = self.get_latent_coords(f, h, w, b, latents.device)
latents = rearrange(
latents,
"b c (f p1) (h p2) (w p3) -> b (f h w) (c p1 p2 p3)",
@ -81,7 +93,7 @@ class SymmetricPatchifier(Patchifier):
p2=self._patch_size[1],
p3=self._patch_size[2],
)
return latents
return latents, latent_coords
def unpatchify(
self,

View File

@ -15,6 +15,7 @@ class CausalConv3d(nn.Module):
stride: Union[int, Tuple[int]] = 1,
dilation: int = 1,
groups: int = 1,
spatial_padding_mode: str = "zeros",
**kwargs,
):
super().__init__()
@ -38,7 +39,7 @@ class CausalConv3d(nn.Module):
stride=stride,
dilation=dilation,
padding=padding,
padding_mode="zeros",
padding_mode=spatial_padding_mode,
groups=groups,
)

View File

@ -1,13 +1,15 @@
from __future__ import annotations
import torch
from torch import nn
from functools import partial
import math
from einops import rearrange
from typing import Optional, Tuple, Union
from typing import List, Optional, Tuple, Union
from .conv_nd_factory import make_conv_nd, make_linear_nd
from .pixel_norm import PixelNorm
from ..model import PixArtAlphaCombinedTimestepSizeEmbeddings
import comfy.ops
ops = comfy.ops.disable_weight_init
class Encoder(nn.Module):
@ -32,7 +34,7 @@ class Encoder(nn.Module):
norm_layer (`str`, *optional*, defaults to `group_norm`):
The normalization layer to use. Can be either `group_norm` or `pixel_norm`.
latent_log_var (`str`, *optional*, defaults to `per_channel`):
The number of channels for the log variance. Can be either `per_channel`, `uniform`, or `none`.
The number of channels for the log variance. Can be either `per_channel`, `uniform`, `constant` or `none`.
"""
def __init__(
@ -40,12 +42,13 @@ class Encoder(nn.Module):
dims: Union[int, Tuple[int, int]] = 3,
in_channels: int = 3,
out_channels: int = 3,
blocks=[("res_x", 1)],
blocks: List[Tuple[str, int | dict]] = [("res_x", 1)],
base_channels: int = 128,
norm_num_groups: int = 32,
patch_size: Union[int, Tuple[int]] = 1,
norm_layer: str = "group_norm", # group_norm, pixel_norm
latent_log_var: str = "per_channel",
spatial_padding_mode: str = "zeros",
):
super().__init__()
self.patch_size = patch_size
@ -65,6 +68,7 @@ class Encoder(nn.Module):
stride=1,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
self.down_blocks = nn.ModuleList([])
@ -82,6 +86,7 @@ class Encoder(nn.Module):
resnet_eps=1e-6,
resnet_groups=norm_num_groups,
norm_layer=norm_layer,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "res_x_y":
output_channel = block_params.get("multiplier", 2) * output_channel
@ -92,6 +97,7 @@ class Encoder(nn.Module):
eps=1e-6,
groups=norm_num_groups,
norm_layer=norm_layer,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_time":
block = make_conv_nd(
@ -101,6 +107,7 @@ class Encoder(nn.Module):
kernel_size=3,
stride=(2, 1, 1),
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_space":
block = make_conv_nd(
@ -110,6 +117,7 @@ class Encoder(nn.Module):
kernel_size=3,
stride=(1, 2, 2),
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_all":
block = make_conv_nd(
@ -119,6 +127,7 @@ class Encoder(nn.Module):
kernel_size=3,
stride=(2, 2, 2),
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_all_x_y":
output_channel = block_params.get("multiplier", 2) * output_channel
@ -129,6 +138,34 @@ class Encoder(nn.Module):
kernel_size=3,
stride=(2, 2, 2),
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_all_res":
output_channel = block_params.get("multiplier", 2) * output_channel
block = SpaceToDepthDownsample(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
stride=(2, 2, 2),
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_space_res":
output_channel = block_params.get("multiplier", 2) * output_channel
block = SpaceToDepthDownsample(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
stride=(1, 2, 2),
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_time_res":
output_channel = block_params.get("multiplier", 2) * output_channel
block = SpaceToDepthDownsample(
dims=dims,
in_channels=input_channel,
out_channels=output_channel,
stride=(2, 1, 1),
spatial_padding_mode=spatial_padding_mode,
)
else:
raise ValueError(f"unknown block: {block_name}")
@ -152,10 +189,18 @@ class Encoder(nn.Module):
conv_out_channels *= 2
elif latent_log_var == "uniform":
conv_out_channels += 1
elif latent_log_var == "constant":
conv_out_channels += 1
elif latent_log_var != "none":
raise ValueError(f"Invalid latent_log_var: {latent_log_var}")
self.conv_out = make_conv_nd(
dims, output_channel, conv_out_channels, 3, padding=1, causal=True
dims,
output_channel,
conv_out_channels,
3,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
self.gradient_checkpointing = False
@ -197,6 +242,15 @@ class Encoder(nn.Module):
sample = torch.cat([sample, repeated_last_channel], dim=1)
else:
raise ValueError(f"Invalid input shape: {sample.shape}")
elif self.latent_log_var == "constant":
sample = sample[:, :-1, ...]
approx_ln_0 = (
-30
) # this is the minimal clamp value in DiagonalGaussianDistribution objects
sample = torch.cat(
[sample, torch.ones_like(sample, device=sample.device) * approx_ln_0],
dim=1,
)
return sample
@ -231,7 +285,7 @@ class Decoder(nn.Module):
dims,
in_channels: int = 3,
out_channels: int = 3,
blocks=[("res_x", 1)],
blocks: List[Tuple[str, int | dict]] = [("res_x", 1)],
base_channels: int = 128,
layers_per_block: int = 2,
norm_num_groups: int = 32,
@ -239,6 +293,7 @@ class Decoder(nn.Module):
norm_layer: str = "group_norm",
causal: bool = True,
timestep_conditioning: bool = False,
spatial_padding_mode: str = "zeros",
):
super().__init__()
self.patch_size = patch_size
@ -264,6 +319,7 @@ class Decoder(nn.Module):
stride=1,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
self.up_blocks = nn.ModuleList([])
@ -283,6 +339,7 @@ class Decoder(nn.Module):
norm_layer=norm_layer,
inject_noise=block_params.get("inject_noise", False),
timestep_conditioning=timestep_conditioning,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "attn_res_x":
block = UNetMidBlock3D(
@ -294,6 +351,7 @@ class Decoder(nn.Module):
inject_noise=block_params.get("inject_noise", False),
timestep_conditioning=timestep_conditioning,
attention_head_dim=block_params["attention_head_dim"],
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "res_x_y":
output_channel = output_channel // block_params.get("multiplier", 2)
@ -306,14 +364,21 @@ class Decoder(nn.Module):
norm_layer=norm_layer,
inject_noise=block_params.get("inject_noise", False),
timestep_conditioning=False,
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_time":
block = DepthToSpaceUpsample(
dims=dims, in_channels=input_channel, stride=(2, 1, 1)
dims=dims,
in_channels=input_channel,
stride=(2, 1, 1),
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_space":
block = DepthToSpaceUpsample(
dims=dims, in_channels=input_channel, stride=(1, 2, 2)
dims=dims,
in_channels=input_channel,
stride=(1, 2, 2),
spatial_padding_mode=spatial_padding_mode,
)
elif block_name == "compress_all":
output_channel = output_channel // block_params.get("multiplier", 1)
@ -323,6 +388,7 @@ class Decoder(nn.Module):
stride=(2, 2, 2),
residual=block_params.get("residual", False),
out_channels_reduction_factor=block_params.get("multiplier", 1),
spatial_padding_mode=spatial_padding_mode,
)
else:
raise ValueError(f"unknown layer: {block_name}")
@ -340,7 +406,13 @@ class Decoder(nn.Module):
self.conv_act = nn.SiLU()
self.conv_out = make_conv_nd(
dims, output_channel, out_channels, 3, padding=1, causal=True
dims,
output_channel,
out_channels,
3,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
self.gradient_checkpointing = False
@ -433,6 +505,12 @@ class UNetMidBlock3D(nn.Module):
resnet_eps (`float`, *optional*, 1e-6 ): The epsilon value for the resnet blocks.
resnet_groups (`int`, *optional*, defaults to 32):
The number of groups to use in the group normalization layers of the resnet blocks.
norm_layer (`str`, *optional*, defaults to `group_norm`):
The normalization layer to use. Can be either `group_norm` or `pixel_norm`.
inject_noise (`bool`, *optional*, defaults to `False`):
Whether to inject noise into the hidden states.
timestep_conditioning (`bool`, *optional*, defaults to `False`):
Whether to condition the hidden states on the timestep.
Returns:
`torch.FloatTensor`: The output of the last residual block, which is a tensor of shape `(batch_size,
@ -451,6 +529,7 @@ class UNetMidBlock3D(nn.Module):
norm_layer: str = "group_norm",
inject_noise: bool = False,
timestep_conditioning: bool = False,
spatial_padding_mode: str = "zeros",
):
super().__init__()
resnet_groups = (
@ -476,13 +555,17 @@ class UNetMidBlock3D(nn.Module):
norm_layer=norm_layer,
inject_noise=inject_noise,
timestep_conditioning=timestep_conditioning,
spatial_padding_mode=spatial_padding_mode,
)
for _ in range(num_layers)
]
)
def forward(
self, hidden_states: torch.FloatTensor, causal: bool = True, timestep: Optional[torch.Tensor] = None
self,
hidden_states: torch.FloatTensor,
causal: bool = True,
timestep: Optional[torch.Tensor] = None,
) -> torch.FloatTensor:
timestep_embed = None
if self.timestep_conditioning:
@ -507,9 +590,62 @@ class UNetMidBlock3D(nn.Module):
return hidden_states
class SpaceToDepthDownsample(nn.Module):
def __init__(self, dims, in_channels, out_channels, stride, spatial_padding_mode):
super().__init__()
self.stride = stride
self.group_size = in_channels * math.prod(stride) // out_channels
self.conv = make_conv_nd(
dims=dims,
in_channels=in_channels,
out_channels=out_channels // math.prod(stride),
kernel_size=3,
stride=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
def forward(self, x, causal: bool = True):
if self.stride[0] == 2:
x = torch.cat(
[x[:, :, :1, :, :], x], dim=2
) # duplicate first frames for padding
# skip connection
x_in = rearrange(
x,
"b c (d p1) (h p2) (w p3) -> b (c p1 p2 p3) d h w",
p1=self.stride[0],
p2=self.stride[1],
p3=self.stride[2],
)
x_in = rearrange(x_in, "b (c g) d h w -> b c g d h w", g=self.group_size)
x_in = x_in.mean(dim=2)
# conv
x = self.conv(x, causal=causal)
x = rearrange(
x,
"b c (d p1) (h p2) (w p3) -> b (c p1 p2 p3) d h w",
p1=self.stride[0],
p2=self.stride[1],
p3=self.stride[2],
)
x = x + x_in
return x
class DepthToSpaceUpsample(nn.Module):
def __init__(
self, dims, in_channels, stride, residual=False, out_channels_reduction_factor=1
self,
dims,
in_channels,
stride,
residual=False,
out_channels_reduction_factor=1,
spatial_padding_mode="zeros",
):
super().__init__()
self.stride = stride
@ -523,6 +659,7 @@ class DepthToSpaceUpsample(nn.Module):
kernel_size=3,
stride=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
self.residual = residual
self.out_channels_reduction_factor = out_channels_reduction_factor
@ -558,7 +695,7 @@ class DepthToSpaceUpsample(nn.Module):
class LayerNorm(nn.Module):
def __init__(self, dim, eps, elementwise_affine=True) -> None:
super().__init__()
self.norm = nn.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine)
self.norm = ops.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine)
def forward(self, x):
x = rearrange(x, "b c d h w -> b d h w c")
@ -591,6 +728,7 @@ class ResnetBlock3D(nn.Module):
norm_layer: str = "group_norm",
inject_noise: bool = False,
timestep_conditioning: bool = False,
spatial_padding_mode: str = "zeros",
):
super().__init__()
self.in_channels = in_channels
@ -617,6 +755,7 @@ class ResnetBlock3D(nn.Module):
stride=1,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
if inject_noise:
@ -641,6 +780,7 @@ class ResnetBlock3D(nn.Module):
stride=1,
padding=1,
causal=True,
spatial_padding_mode=spatial_padding_mode,
)
if inject_noise:
@ -801,9 +941,44 @@ class processor(nn.Module):
return (x - self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1).to(x)) / self.get_buffer("std-of-means").view(1, -1, 1, 1, 1).to(x)
class VideoVAE(nn.Module):
def __init__(self, version=0):
def __init__(self, version=0, config=None):
super().__init__()
if config is None:
config = self.guess_config(version)
self.timestep_conditioning = config.get("timestep_conditioning", False)
double_z = config.get("double_z", True)
latent_log_var = config.get(
"latent_log_var", "per_channel" if double_z else "none"
)
self.encoder = Encoder(
dims=config["dims"],
in_channels=config.get("in_channels", 3),
out_channels=config["latent_channels"],
blocks=config.get("encoder_blocks", config.get("encoder_blocks", config.get("blocks"))),
patch_size=config.get("patch_size", 1),
latent_log_var=latent_log_var,
norm_layer=config.get("norm_layer", "group_norm"),
spatial_padding_mode=config.get("spatial_padding_mode", "zeros"),
)
self.decoder = Decoder(
dims=config["dims"],
in_channels=config["latent_channels"],
out_channels=config.get("out_channels", 3),
blocks=config.get("decoder_blocks", config.get("decoder_blocks", config.get("blocks"))),
patch_size=config.get("patch_size", 1),
norm_layer=config.get("norm_layer", "group_norm"),
causal=config.get("causal_decoder", False),
timestep_conditioning=self.timestep_conditioning,
spatial_padding_mode=config.get("spatial_padding_mode", "zeros"),
)
self.per_channel_statistics = processor()
def guess_config(self, version):
if version == 0:
config = {
"_class_name": "CausalVideoAutoencoder",
@ -830,7 +1005,7 @@ class VideoVAE(nn.Module):
"use_quant_conv": False,
"causal_decoder": False,
}
else:
elif version == 1:
config = {
"_class_name": "CausalVideoAutoencoder",
"dims": 3,
@ -866,37 +1041,47 @@ class VideoVAE(nn.Module):
"causal_decoder": False,
"timestep_conditioning": True,
}
double_z = config.get("double_z", True)
latent_log_var = config.get(
"latent_log_var", "per_channel" if double_z else "none"
)
self.encoder = Encoder(
dims=config["dims"],
in_channels=config.get("in_channels", 3),
out_channels=config["latent_channels"],
blocks=config.get("encoder_blocks", config.get("encoder_blocks", config.get("blocks"))),
patch_size=config.get("patch_size", 1),
latent_log_var=latent_log_var,
norm_layer=config.get("norm_layer", "group_norm"),
)
self.decoder = Decoder(
dims=config["dims"],
in_channels=config["latent_channels"],
out_channels=config.get("out_channels", 3),
blocks=config.get("decoder_blocks", config.get("decoder_blocks", config.get("blocks"))),
patch_size=config.get("patch_size", 1),
norm_layer=config.get("norm_layer", "group_norm"),
causal=config.get("causal_decoder", False),
timestep_conditioning=config.get("timestep_conditioning", False),
)
self.timestep_conditioning = config.get("timestep_conditioning", False)
self.per_channel_statistics = processor()
else:
config = {
"_class_name": "CausalVideoAutoencoder",
"dims": 3,
"in_channels": 3,
"out_channels": 3,
"latent_channels": 128,
"encoder_blocks": [
["res_x", {"num_layers": 4}],
["compress_space_res", {"multiplier": 2}],
["res_x", {"num_layers": 6}],
["compress_time_res", {"multiplier": 2}],
["res_x", {"num_layers": 6}],
["compress_all_res", {"multiplier": 2}],
["res_x", {"num_layers": 2}],
["compress_all_res", {"multiplier": 2}],
["res_x", {"num_layers": 2}]
],
"decoder_blocks": [
["res_x", {"num_layers": 5, "inject_noise": False}],
["compress_all", {"residual": True, "multiplier": 2}],
["res_x", {"num_layers": 5, "inject_noise": False}],
["compress_all", {"residual": True, "multiplier": 2}],
["res_x", {"num_layers": 5, "inject_noise": False}],
["compress_all", {"residual": True, "multiplier": 2}],
["res_x", {"num_layers": 5, "inject_noise": False}]
],
"scaling_factor": 1.0,
"norm_layer": "pixel_norm",
"patch_size": 4,
"latent_log_var": "uniform",
"use_quant_conv": False,
"causal_decoder": False,
"timestep_conditioning": True
}
return config
def encode(self, x):
frames_count = x.shape[2]
if ((frames_count - 1) % 8) != 0:
raise ValueError("Invalid number of frames: Encode input must have 1 + 8 * x frames (e.g., 1, 9, 17, ...). Please check your input.")
means, logvar = torch.chunk(self.encoder(x), 2, dim=1)
return self.per_channel_statistics.normalize(means)

View File

@ -17,7 +17,11 @@ def make_conv_nd(
groups=1,
bias=True,
causal=False,
spatial_padding_mode="zeros",
temporal_padding_mode="zeros",
):
if not (spatial_padding_mode == temporal_padding_mode or causal):
raise NotImplementedError("spatial and temporal padding modes must be equal")
if dims == 2:
return ops.Conv2d(
in_channels=in_channels,
@ -28,6 +32,7 @@ def make_conv_nd(
dilation=dilation,
groups=groups,
bias=bias,
padding_mode=spatial_padding_mode,
)
elif dims == 3:
if causal:
@ -40,6 +45,7 @@ def make_conv_nd(
dilation=dilation,
groups=groups,
bias=bias,
spatial_padding_mode=spatial_padding_mode,
)
return ops.Conv3d(
in_channels=in_channels,
@ -50,6 +56,7 @@ def make_conv_nd(
dilation=dilation,
groups=groups,
bias=bias,
padding_mode=spatial_padding_mode,
)
elif dims == (2, 1):
return DualConv3d(
@ -59,6 +66,7 @@ def make_conv_nd(
stride=stride,
padding=padding,
bias=bias,
padding_mode=spatial_padding_mode,
)
else:
raise ValueError(f"unsupported dimensions: {dims}")

View File

@ -18,11 +18,13 @@ class DualConv3d(nn.Module):
dilation: Union[int, Tuple[int, int, int]] = 1,
groups=1,
bias=True,
padding_mode="zeros",
):
super(DualConv3d, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.padding_mode = padding_mode
# Ensure kernel_size, stride, padding, and dilation are tuples of length 3
if isinstance(kernel_size, int):
kernel_size = (kernel_size, kernel_size, kernel_size)
@ -108,6 +110,7 @@ class DualConv3d(nn.Module):
self.padding1,
self.dilation1,
self.groups,
padding_mode=self.padding_mode,
)
if skip_time_conv:
@ -122,6 +125,7 @@ class DualConv3d(nn.Module):
self.padding2,
self.dilation2,
self.groups,
padding_mode=self.padding_mode,
)
return x
@ -137,7 +141,16 @@ class DualConv3d(nn.Module):
stride1 = (self.stride1[1], self.stride1[2])
padding1 = (self.padding1[1], self.padding1[2])
dilation1 = (self.dilation1[1], self.dilation1[2])
x = F.conv2d(x, weight1, self.bias1, stride1, padding1, dilation1, self.groups)
x = F.conv2d(
x,
weight1,
self.bias1,
stride1,
padding1,
dilation1,
self.groups,
padding_mode=self.padding_mode,
)
_, _, h, w = x.shape
@ -154,7 +167,16 @@ class DualConv3d(nn.Module):
stride2 = self.stride2[0]
padding2 = self.padding2[0]
dilation2 = self.dilation2[0]
x = F.conv1d(x, weight2, self.bias2, stride2, padding2, dilation2, self.groups)
x = F.conv1d(
x,
weight2,
self.bias2,
stride2,
padding2,
dilation2,
self.groups,
padding_mode=self.padding_mode,
)
x = rearrange(x, "(b h w) c d -> b c d h w", b=b, h=h, w=w)
return x

View File

@ -24,6 +24,13 @@ if model_management.sage_attention_enabled():
logging.error(f"\n\nTo use the `--use-sage-attention` feature, the `sageattention` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install sageattention")
exit(-1)
if model_management.flash_attention_enabled():
try:
from flash_attn import flash_attn_func
except ModuleNotFoundError:
logging.error(f"\n\nTo use the `--use-flash-attention` feature, the `flash-attn` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install flash-attn")
exit(-1)
from comfy.cli_args import args
import comfy.ops
ops = comfy.ops.disable_weight_init
@ -496,6 +503,63 @@ def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=
return out
try:
@torch.library.custom_op("flash_attention::flash_attn", mutates_args=())
def flash_attn_wrapper(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
dropout_p: float = 0.0, causal: bool = False) -> torch.Tensor:
return flash_attn_func(q, k, v, dropout_p=dropout_p, causal=causal)
@flash_attn_wrapper.register_fake
def flash_attn_fake(q, k, v, dropout_p=0.0, causal=False):
# Output shape is the same as q
return q.new_empty(q.shape)
except AttributeError as error:
FLASH_ATTN_ERROR = error
def flash_attn_wrapper(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
dropout_p: float = 0.0, causal: bool = False) -> torch.Tensor:
assert False, f"Could not define flash_attn_wrapper: {FLASH_ATTN_ERROR}"
def attention_flash(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False):
if skip_reshape:
b, _, _, dim_head = q.shape
else:
b, _, dim_head = q.shape
dim_head //= heads
q, k, v = map(
lambda t: t.view(b, -1, heads, dim_head).transpose(1, 2),
(q, k, v),
)
if mask is not None:
# add a batch dimension if there isn't already one
if mask.ndim == 2:
mask = mask.unsqueeze(0)
# add a heads dimension if there isn't already one
if mask.ndim == 3:
mask = mask.unsqueeze(1)
try:
assert mask is None
out = flash_attn_wrapper(
q.transpose(1, 2),
k.transpose(1, 2),
v.transpose(1, 2),
dropout_p=0.0,
causal=False,
).transpose(1, 2)
except Exception as e:
logging.warning(f"Flash Attention failed, using default SDPA: {e}")
out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False)
if not skip_output_reshape:
out = (
out.transpose(1, 2).reshape(b, -1, heads * dim_head)
)
return out
optimized_attention = attention_basic
if model_management.sage_attention_enabled():
@ -504,6 +568,9 @@ if model_management.sage_attention_enabled():
elif model_management.xformers_enabled():
logging.info("Using xformers attention")
optimized_attention = attention_xformers
elif model_management.flash_attention_enabled():
logging.info("Using Flash Attention")
optimized_attention = attention_flash
elif model_management.pytorch_attention_enabled():
logging.info("Using pytorch attention")
optimized_attention = attention_pytorch

485
comfy/ldm/wan/model.py Normal file
View File

@ -0,0 +1,485 @@
# original version: https://github.com/Wan-Video/Wan2.1/blob/main/wan/modules/model.py
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
import math
import torch
import torch.nn as nn
from einops import repeat
from comfy.ldm.modules.attention import optimized_attention
from comfy.ldm.flux.layers import EmbedND
from comfy.ldm.flux.math import apply_rope
from comfy.ldm.modules.diffusionmodules.mmdit import RMSNorm
import comfy.ldm.common_dit
import comfy.model_management
def sinusoidal_embedding_1d(dim, position):
# preprocess
assert dim % 2 == 0
half = dim // 2
position = position.type(torch.float32)
# calculation
sinusoid = torch.outer(
position, torch.pow(10000, -torch.arange(half).to(position).div(half)))
x = torch.cat([torch.cos(sinusoid), torch.sin(sinusoid)], dim=1)
return x
class WanSelfAttention(nn.Module):
def __init__(self,
dim,
num_heads,
window_size=(-1, -1),
qk_norm=True,
eps=1e-6, operation_settings={}):
assert dim % num_heads == 0
super().__init__()
self.dim = dim
self.num_heads = num_heads
self.head_dim = dim // num_heads
self.window_size = window_size
self.qk_norm = qk_norm
self.eps = eps
# layers
self.q = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.k = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.v = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.o = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.norm_q = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity()
self.norm_k = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity()
def forward(self, x, freqs):
r"""
Args:
x(Tensor): Shape [B, L, num_heads, C / num_heads]
freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2]
"""
b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim
# query, key, value function
def qkv_fn(x):
q = self.norm_q(self.q(x)).view(b, s, n, d)
k = self.norm_k(self.k(x)).view(b, s, n, d)
v = self.v(x).view(b, s, n * d)
return q, k, v
q, k, v = qkv_fn(x)
q, k = apply_rope(q, k, freqs)
x = optimized_attention(
q.view(b, s, n * d),
k.view(b, s, n * d),
v,
heads=self.num_heads,
)
x = self.o(x)
return x
class WanT2VCrossAttention(WanSelfAttention):
def forward(self, x, context):
r"""
Args:
x(Tensor): Shape [B, L1, C]
context(Tensor): Shape [B, L2, C]
"""
# compute query, key, value
q = self.norm_q(self.q(x))
k = self.norm_k(self.k(context))
v = self.v(context)
# compute attention
x = optimized_attention(q, k, v, heads=self.num_heads)
x = self.o(x)
return x
class WanI2VCrossAttention(WanSelfAttention):
def __init__(self,
dim,
num_heads,
window_size=(-1, -1),
qk_norm=True,
eps=1e-6, operation_settings={}):
super().__init__(dim, num_heads, window_size, qk_norm, eps, operation_settings=operation_settings)
self.k_img = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.v_img = operation_settings.get("operations").Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
# self.alpha = nn.Parameter(torch.zeros((1, )))
self.norm_k_img = RMSNorm(dim, eps=eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if qk_norm else nn.Identity()
def forward(self, x, context):
r"""
Args:
x(Tensor): Shape [B, L1, C]
context(Tensor): Shape [B, L2, C]
"""
context_img = context[:, :257]
context = context[:, 257:]
# compute query, key, value
q = self.norm_q(self.q(x))
k = self.norm_k(self.k(context))
v = self.v(context)
k_img = self.norm_k_img(self.k_img(context_img))
v_img = self.v_img(context_img)
img_x = optimized_attention(q, k_img, v_img, heads=self.num_heads)
# compute attention
x = optimized_attention(q, k, v, heads=self.num_heads)
# output
x = x + img_x
x = self.o(x)
return x
WAN_CROSSATTENTION_CLASSES = {
't2v_cross_attn': WanT2VCrossAttention,
'i2v_cross_attn': WanI2VCrossAttention,
}
class WanAttentionBlock(nn.Module):
def __init__(self,
cross_attn_type,
dim,
ffn_dim,
num_heads,
window_size=(-1, -1),
qk_norm=True,
cross_attn_norm=False,
eps=1e-6, operation_settings={}):
super().__init__()
self.dim = dim
self.ffn_dim = ffn_dim
self.num_heads = num_heads
self.window_size = window_size
self.qk_norm = qk_norm
self.cross_attn_norm = cross_attn_norm
self.eps = eps
# layers
self.norm1 = operation_settings.get("operations").LayerNorm(dim, eps, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.self_attn = WanSelfAttention(dim, num_heads, window_size, qk_norm,
eps, operation_settings=operation_settings)
self.norm3 = operation_settings.get("operations").LayerNorm(
dim, eps,
elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")) if cross_attn_norm else nn.Identity()
self.cross_attn = WAN_CROSSATTENTION_CLASSES[cross_attn_type](dim,
num_heads,
(-1, -1),
qk_norm,
eps, operation_settings=operation_settings)
self.norm2 = operation_settings.get("operations").LayerNorm(dim, eps, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.ffn = nn.Sequential(
operation_settings.get("operations").Linear(dim, ffn_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), nn.GELU(approximate='tanh'),
operation_settings.get("operations").Linear(ffn_dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
# modulation
self.modulation = nn.Parameter(torch.empty(1, 6, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
def forward(
self,
x,
e,
freqs,
context,
):
r"""
Args:
x(Tensor): Shape [B, L, C]
e(Tensor): Shape [B, 6, C]
freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2]
"""
# assert e.dtype == torch.float32
e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e).chunk(6, dim=1)
# assert e[0].dtype == torch.float32
# self-attention
y = self.self_attn(
self.norm1(x) * (1 + e[1]) + e[0],
freqs)
x = x + y * e[2]
# cross-attention & ffn
x = x + self.cross_attn(self.norm3(x), context)
y = self.ffn(self.norm2(x) * (1 + e[4]) + e[3])
x = x + y * e[5]
return x
class Head(nn.Module):
def __init__(self, dim, out_dim, patch_size, eps=1e-6, operation_settings={}):
super().__init__()
self.dim = dim
self.out_dim = out_dim
self.patch_size = patch_size
self.eps = eps
# layers
out_dim = math.prod(patch_size) * out_dim
self.norm = operation_settings.get("operations").LayerNorm(dim, eps, elementwise_affine=False, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
self.head = operation_settings.get("operations").Linear(dim, out_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
# modulation
self.modulation = nn.Parameter(torch.empty(1, 2, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
def forward(self, x, e):
r"""
Args:
x(Tensor): Shape [B, L1, C]
e(Tensor): Shape [B, C]
"""
# assert e.dtype == torch.float32
e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e.unsqueeze(1)).chunk(2, dim=1)
x = (self.head(self.norm(x) * (1 + e[1]) + e[0]))
return x
class MLPProj(torch.nn.Module):
def __init__(self, in_dim, out_dim, operation_settings={}):
super().__init__()
self.proj = torch.nn.Sequential(
operation_settings.get("operations").LayerNorm(in_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), operation_settings.get("operations").Linear(in_dim, in_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")),
torch.nn.GELU(), operation_settings.get("operations").Linear(in_dim, out_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")),
operation_settings.get("operations").LayerNorm(out_dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
def forward(self, image_embeds):
clip_extra_context_tokens = self.proj(image_embeds)
return clip_extra_context_tokens
class WanModel(torch.nn.Module):
r"""
Wan diffusion backbone supporting both text-to-video and image-to-video.
"""
def __init__(self,
model_type='t2v',
patch_size=(1, 2, 2),
text_len=512,
in_dim=16,
dim=2048,
ffn_dim=8192,
freq_dim=256,
text_dim=4096,
out_dim=16,
num_heads=16,
num_layers=32,
window_size=(-1, -1),
qk_norm=True,
cross_attn_norm=True,
eps=1e-6,
image_model=None,
device=None,
dtype=None,
operations=None,
):
r"""
Initialize the diffusion model backbone.
Args:
model_type (`str`, *optional*, defaults to 't2v'):
Model variant - 't2v' (text-to-video) or 'i2v' (image-to-video)
patch_size (`tuple`, *optional*, defaults to (1, 2, 2)):
3D patch dimensions for video embedding (t_patch, h_patch, w_patch)
text_len (`int`, *optional*, defaults to 512):
Fixed length for text embeddings
in_dim (`int`, *optional*, defaults to 16):
Input video channels (C_in)
dim (`int`, *optional*, defaults to 2048):
Hidden dimension of the transformer
ffn_dim (`int`, *optional*, defaults to 8192):
Intermediate dimension in feed-forward network
freq_dim (`int`, *optional*, defaults to 256):
Dimension for sinusoidal time embeddings
text_dim (`int`, *optional*, defaults to 4096):
Input dimension for text embeddings
out_dim (`int`, *optional*, defaults to 16):
Output video channels (C_out)
num_heads (`int`, *optional*, defaults to 16):
Number of attention heads
num_layers (`int`, *optional*, defaults to 32):
Number of transformer blocks
window_size (`tuple`, *optional*, defaults to (-1, -1)):
Window size for local attention (-1 indicates global attention)
qk_norm (`bool`, *optional*, defaults to True):
Enable query/key normalization
cross_attn_norm (`bool`, *optional*, defaults to False):
Enable cross-attention normalization
eps (`float`, *optional*, defaults to 1e-6):
Epsilon value for normalization layers
"""
super().__init__()
self.dtype = dtype
operation_settings = {"operations": operations, "device": device, "dtype": dtype}
assert model_type in ['t2v', 'i2v']
self.model_type = model_type
self.patch_size = patch_size
self.text_len = text_len
self.in_dim = in_dim
self.dim = dim
self.ffn_dim = ffn_dim
self.freq_dim = freq_dim
self.text_dim = text_dim
self.out_dim = out_dim
self.num_heads = num_heads
self.num_layers = num_layers
self.window_size = window_size
self.qk_norm = qk_norm
self.cross_attn_norm = cross_attn_norm
self.eps = eps
# embeddings
self.patch_embedding = operations.Conv3d(
in_dim, dim, kernel_size=patch_size, stride=patch_size, device=operation_settings.get("device"), dtype=torch.float32)
self.text_embedding = nn.Sequential(
operations.Linear(text_dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), nn.GELU(approximate='tanh'),
operations.Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
self.time_embedding = nn.Sequential(
operations.Linear(freq_dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")), nn.SiLU(), operations.Linear(dim, dim, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
self.time_projection = nn.Sequential(nn.SiLU(), operations.Linear(dim, dim * 6, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")))
# blocks
cross_attn_type = 't2v_cross_attn' if model_type == 't2v' else 'i2v_cross_attn'
self.blocks = nn.ModuleList([
WanAttentionBlock(cross_attn_type, dim, ffn_dim, num_heads,
window_size, qk_norm, cross_attn_norm, eps, operation_settings=operation_settings)
for _ in range(num_layers)
])
# head
self.head = Head(dim, out_dim, patch_size, eps, operation_settings=operation_settings)
d = dim // num_heads
self.rope_embedder = EmbedND(dim=d, theta=10000.0, axes_dim=[d - 4 * (d // 6), 2 * (d // 6), 2 * (d // 6)])
if model_type == 'i2v':
self.img_emb = MLPProj(1280, dim, operation_settings=operation_settings)
else:
self.img_emb = None
def forward_orig(
self,
x,
t,
context,
clip_fea=None,
freqs=None,
transformer_options={},
):
r"""
Forward pass through the diffusion model
Args:
x (Tensor):
List of input video tensors with shape [B, C_in, F, H, W]
t (Tensor):
Diffusion timesteps tensor of shape [B]
context (List[Tensor]):
List of text embeddings each with shape [B, L, C]
seq_len (`int`):
Maximum sequence length for positional encoding
clip_fea (Tensor, *optional*):
CLIP image features for image-to-video mode
y (List[Tensor], *optional*):
Conditional video inputs for image-to-video mode, same shape as x
Returns:
List[Tensor]:
List of denoised video tensors with original input shapes [C_out, F, H / 8, W / 8]
"""
# embeddings
x = self.patch_embedding(x.float()).to(x.dtype)
grid_sizes = x.shape[2:]
x = x.flatten(2).transpose(1, 2)
# time embeddings
e = self.time_embedding(
sinusoidal_embedding_1d(self.freq_dim, t).to(dtype=x[0].dtype))
e0 = self.time_projection(e).unflatten(1, (6, self.dim))
# context
context = self.text_embedding(context)
if clip_fea is not None and self.img_emb is not None:
context_clip = self.img_emb(clip_fea) # bs x 257 x dim
context = torch.concat([context_clip, context], dim=1)
patches_replace = transformer_options.get("patches_replace", {})
blocks_replace = patches_replace.get("dit", {})
for i, block in enumerate(self.blocks):
if ("double_block", i) in blocks_replace:
def block_wrap(args):
out = {}
out["img"] = block(args["img"], context=args["txt"], e=args["vec"], freqs=args["pe"])
return out
out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": e0, "pe": freqs}, {"original_block": block_wrap})
x = out["img"]
else:
x = block(x, e=e0, freqs=freqs, context=context)
# head
x = self.head(x, e)
# unpatchify
x = self.unpatchify(x, grid_sizes)
return x
def forward(self, x, timestep, context, clip_fea=None, transformer_options={},**kwargs):
bs, c, t, h, w = x.shape
x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size)
patch_size = self.patch_size
t_len = ((t + (patch_size[0] // 2)) // patch_size[0])
h_len = ((h + (patch_size[1] // 2)) // patch_size[1])
w_len = ((w + (patch_size[2] // 2)) // patch_size[2])
img_ids = torch.zeros((t_len, h_len, w_len, 3), device=x.device, dtype=x.dtype)
img_ids[:, :, :, 0] = img_ids[:, :, :, 0] + torch.linspace(0, t_len - 1, steps=t_len, device=x.device, dtype=x.dtype).reshape(-1, 1, 1)
img_ids[:, :, :, 1] = img_ids[:, :, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).reshape(1, -1, 1)
img_ids[:, :, :, 2] = img_ids[:, :, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).reshape(1, 1, -1)
img_ids = repeat(img_ids, "t h w c -> b (t h w) c", b=bs)
freqs = self.rope_embedder(img_ids).movedim(1, 2)
return self.forward_orig(x, timestep, context, clip_fea=clip_fea, freqs=freqs, transformer_options=transformer_options)[:, :, :t, :h, :w]
def unpatchify(self, x, grid_sizes):
r"""
Reconstruct video tensors from patch embeddings.
Args:
x (List[Tensor]):
List of patchified features, each with shape [L, C_out * prod(patch_size)]
grid_sizes (Tensor):
Original spatial-temporal grid dimensions before patching,
shape [B, 3] (3 dimensions correspond to F_patches, H_patches, W_patches)
Returns:
List[Tensor]:
Reconstructed video tensors with shape [L, C_out, F, H / 8, W / 8]
"""
c = self.out_dim
u = x
b = u.shape[0]
u = u[:, :math.prod(grid_sizes)].view(b, *grid_sizes, *self.patch_size, c)
u = torch.einsum('bfhwpqrc->bcfphqwr', u)
u = u.reshape(b, c, *[i * j for i, j in zip(grid_sizes, self.patch_size)])
return u

567
comfy/ldm/wan/vae.py Normal file
View File

@ -0,0 +1,567 @@
# original version: https://github.com/Wan-Video/Wan2.1/blob/main/wan/modules/vae.py
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange
from comfy.ldm.modules.diffusionmodules.model import vae_attention
import comfy.ops
ops = comfy.ops.disable_weight_init
CACHE_T = 2
class CausalConv3d(ops.Conv3d):
"""
Causal 3d convolusion.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._padding = (self.padding[2], self.padding[2], self.padding[1],
self.padding[1], 2 * self.padding[0], 0)
self.padding = (0, 0, 0)
def forward(self, x, cache_x=None):
padding = list(self._padding)
if cache_x is not None and self._padding[4] > 0:
cache_x = cache_x.to(x.device)
x = torch.cat([cache_x, x], dim=2)
padding[4] -= cache_x.shape[2]
x = F.pad(x, padding)
return super().forward(x)
class RMS_norm(nn.Module):
def __init__(self, dim, channel_first=True, images=True, bias=False):
super().__init__()
broadcastable_dims = (1, 1, 1) if not images else (1, 1)
shape = (dim, *broadcastable_dims) if channel_first else (dim,)
self.channel_first = channel_first
self.scale = dim**0.5
self.gamma = nn.Parameter(torch.ones(shape))
self.bias = nn.Parameter(torch.zeros(shape)) if bias else None
def forward(self, x):
return F.normalize(
x, dim=(1 if self.channel_first else -1)) * self.scale * self.gamma.to(x) + (self.bias.to(x) if self.bias is not None else 0)
class Upsample(nn.Upsample):
def forward(self, x):
"""
Fix bfloat16 support for nearest neighbor interpolation.
"""
return super().forward(x.float()).type_as(x)
class Resample(nn.Module):
def __init__(self, dim, mode):
assert mode in ('none', 'upsample2d', 'upsample3d', 'downsample2d',
'downsample3d')
super().__init__()
self.dim = dim
self.mode = mode
# layers
if mode == 'upsample2d':
self.resample = nn.Sequential(
Upsample(scale_factor=(2., 2.), mode='nearest-exact'),
ops.Conv2d(dim, dim // 2, 3, padding=1))
elif mode == 'upsample3d':
self.resample = nn.Sequential(
Upsample(scale_factor=(2., 2.), mode='nearest-exact'),
ops.Conv2d(dim, dim // 2, 3, padding=1))
self.time_conv = CausalConv3d(
dim, dim * 2, (3, 1, 1), padding=(1, 0, 0))
elif mode == 'downsample2d':
self.resample = nn.Sequential(
nn.ZeroPad2d((0, 1, 0, 1)),
ops.Conv2d(dim, dim, 3, stride=(2, 2)))
elif mode == 'downsample3d':
self.resample = nn.Sequential(
nn.ZeroPad2d((0, 1, 0, 1)),
ops.Conv2d(dim, dim, 3, stride=(2, 2)))
self.time_conv = CausalConv3d(
dim, dim, (3, 1, 1), stride=(2, 1, 1), padding=(0, 0, 0))
else:
self.resample = nn.Identity()
def forward(self, x, feat_cache=None, feat_idx=[0]):
b, c, t, h, w = x.size()
if self.mode == 'upsample3d':
if feat_cache is not None:
idx = feat_idx[0]
if feat_cache[idx] is None:
feat_cache[idx] = 'Rep'
feat_idx[0] += 1
else:
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[
idx] is not None and feat_cache[idx] != 'Rep':
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
if cache_x.shape[2] < 2 and feat_cache[
idx] is not None and feat_cache[idx] == 'Rep':
cache_x = torch.cat([
torch.zeros_like(cache_x).to(cache_x.device),
cache_x
],
dim=2)
if feat_cache[idx] == 'Rep':
x = self.time_conv(x)
else:
x = self.time_conv(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
x = x.reshape(b, 2, c, t, h, w)
x = torch.stack((x[:, 0, :, :, :, :], x[:, 1, :, :, :, :]),
3)
x = x.reshape(b, c, t * 2, h, w)
t = x.shape[2]
x = rearrange(x, 'b c t h w -> (b t) c h w')
x = self.resample(x)
x = rearrange(x, '(b t) c h w -> b c t h w', t=t)
if self.mode == 'downsample3d':
if feat_cache is not None:
idx = feat_idx[0]
if feat_cache[idx] is None:
feat_cache[idx] = x.clone()
feat_idx[0] += 1
else:
cache_x = x[:, :, -1:, :, :].clone()
# if cache_x.shape[2] < 2 and feat_cache[idx] is not None and feat_cache[idx]!='Rep':
# # cache last frame of last two chunk
# cache_x = torch.cat([feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(cache_x.device), cache_x], dim=2)
x = self.time_conv(
torch.cat([feat_cache[idx][:, :, -1:, :, :], x], 2))
feat_cache[idx] = cache_x
feat_idx[0] += 1
return x
def init_weight(self, conv):
conv_weight = conv.weight
nn.init.zeros_(conv_weight)
c1, c2, t, h, w = conv_weight.size()
one_matrix = torch.eye(c1, c2)
init_matrix = one_matrix
nn.init.zeros_(conv_weight)
#conv_weight.data[:,:,-1,1,1] = init_matrix * 0.5
conv_weight.data[:, :, 1, 0, 0] = init_matrix #* 0.5
conv.weight.data.copy_(conv_weight)
nn.init.zeros_(conv.bias.data)
def init_weight2(self, conv):
conv_weight = conv.weight.data
nn.init.zeros_(conv_weight)
c1, c2, t, h, w = conv_weight.size()
init_matrix = torch.eye(c1 // 2, c2)
#init_matrix = repeat(init_matrix, 'o ... -> (o 2) ...').permute(1,0,2).contiguous().reshape(c1,c2)
conv_weight[:c1 // 2, :, -1, 0, 0] = init_matrix
conv_weight[c1 // 2:, :, -1, 0, 0] = init_matrix
conv.weight.data.copy_(conv_weight)
nn.init.zeros_(conv.bias.data)
class ResidualBlock(nn.Module):
def __init__(self, in_dim, out_dim, dropout=0.0):
super().__init__()
self.in_dim = in_dim
self.out_dim = out_dim
# layers
self.residual = nn.Sequential(
RMS_norm(in_dim, images=False), nn.SiLU(),
CausalConv3d(in_dim, out_dim, 3, padding=1),
RMS_norm(out_dim, images=False), nn.SiLU(), nn.Dropout(dropout),
CausalConv3d(out_dim, out_dim, 3, padding=1))
self.shortcut = CausalConv3d(in_dim, out_dim, 1) \
if in_dim != out_dim else nn.Identity()
def forward(self, x, feat_cache=None, feat_idx=[0]):
h = self.shortcut(x)
for layer in self.residual:
if isinstance(layer, CausalConv3d) and feat_cache is not None:
idx = feat_idx[0]
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
x = layer(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
else:
x = layer(x)
return x + h
class AttentionBlock(nn.Module):
"""
Causal self-attention with a single head.
"""
def __init__(self, dim):
super().__init__()
self.dim = dim
# layers
self.norm = RMS_norm(dim)
self.to_qkv = ops.Conv2d(dim, dim * 3, 1)
self.proj = ops.Conv2d(dim, dim, 1)
self.optimized_attention = vae_attention()
def forward(self, x):
identity = x
b, c, t, h, w = x.size()
x = rearrange(x, 'b c t h w -> (b t) c h w')
x = self.norm(x)
# compute query, key, value
q, k, v = self.to_qkv(x).chunk(3, dim=1)
x = self.optimized_attention(q, k, v)
# output
x = self.proj(x)
x = rearrange(x, '(b t) c h w-> b c t h w', t=t)
return x + identity
class Encoder3d(nn.Module):
def __init__(self,
dim=128,
z_dim=4,
dim_mult=[1, 2, 4, 4],
num_res_blocks=2,
attn_scales=[],
temperal_downsample=[True, True, False],
dropout=0.0):
super().__init__()
self.dim = dim
self.z_dim = z_dim
self.dim_mult = dim_mult
self.num_res_blocks = num_res_blocks
self.attn_scales = attn_scales
self.temperal_downsample = temperal_downsample
# dimensions
dims = [dim * u for u in [1] + dim_mult]
scale = 1.0
# init block
self.conv1 = CausalConv3d(3, dims[0], 3, padding=1)
# downsample blocks
downsamples = []
for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])):
# residual (+attention) blocks
for _ in range(num_res_blocks):
downsamples.append(ResidualBlock(in_dim, out_dim, dropout))
if scale in attn_scales:
downsamples.append(AttentionBlock(out_dim))
in_dim = out_dim
# downsample block
if i != len(dim_mult) - 1:
mode = 'downsample3d' if temperal_downsample[
i] else 'downsample2d'
downsamples.append(Resample(out_dim, mode=mode))
scale /= 2.0
self.downsamples = nn.Sequential(*downsamples)
# middle blocks
self.middle = nn.Sequential(
ResidualBlock(out_dim, out_dim, dropout), AttentionBlock(out_dim),
ResidualBlock(out_dim, out_dim, dropout))
# output blocks
self.head = nn.Sequential(
RMS_norm(out_dim, images=False), nn.SiLU(),
CausalConv3d(out_dim, z_dim, 3, padding=1))
def forward(self, x, feat_cache=None, feat_idx=[0]):
if feat_cache is not None:
idx = feat_idx[0]
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
x = self.conv1(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
else:
x = self.conv1(x)
## downsamples
for layer in self.downsamples:
if feat_cache is not None:
x = layer(x, feat_cache, feat_idx)
else:
x = layer(x)
## middle
for layer in self.middle:
if isinstance(layer, ResidualBlock) and feat_cache is not None:
x = layer(x, feat_cache, feat_idx)
else:
x = layer(x)
## head
for layer in self.head:
if isinstance(layer, CausalConv3d) and feat_cache is not None:
idx = feat_idx[0]
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
x = layer(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
else:
x = layer(x)
return x
class Decoder3d(nn.Module):
def __init__(self,
dim=128,
z_dim=4,
dim_mult=[1, 2, 4, 4],
num_res_blocks=2,
attn_scales=[],
temperal_upsample=[False, True, True],
dropout=0.0):
super().__init__()
self.dim = dim
self.z_dim = z_dim
self.dim_mult = dim_mult
self.num_res_blocks = num_res_blocks
self.attn_scales = attn_scales
self.temperal_upsample = temperal_upsample
# dimensions
dims = [dim * u for u in [dim_mult[-1]] + dim_mult[::-1]]
scale = 1.0 / 2**(len(dim_mult) - 2)
# init block
self.conv1 = CausalConv3d(z_dim, dims[0], 3, padding=1)
# middle blocks
self.middle = nn.Sequential(
ResidualBlock(dims[0], dims[0], dropout), AttentionBlock(dims[0]),
ResidualBlock(dims[0], dims[0], dropout))
# upsample blocks
upsamples = []
for i, (in_dim, out_dim) in enumerate(zip(dims[:-1], dims[1:])):
# residual (+attention) blocks
if i == 1 or i == 2 or i == 3:
in_dim = in_dim // 2
for _ in range(num_res_blocks + 1):
upsamples.append(ResidualBlock(in_dim, out_dim, dropout))
if scale in attn_scales:
upsamples.append(AttentionBlock(out_dim))
in_dim = out_dim
# upsample block
if i != len(dim_mult) - 1:
mode = 'upsample3d' if temperal_upsample[i] else 'upsample2d'
upsamples.append(Resample(out_dim, mode=mode))
scale *= 2.0
self.upsamples = nn.Sequential(*upsamples)
# output blocks
self.head = nn.Sequential(
RMS_norm(out_dim, images=False), nn.SiLU(),
CausalConv3d(out_dim, 3, 3, padding=1))
def forward(self, x, feat_cache=None, feat_idx=[0]):
## conv1
if feat_cache is not None:
idx = feat_idx[0]
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
x = self.conv1(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
else:
x = self.conv1(x)
## middle
for layer in self.middle:
if isinstance(layer, ResidualBlock) and feat_cache is not None:
x = layer(x, feat_cache, feat_idx)
else:
x = layer(x)
## upsamples
for layer in self.upsamples:
if feat_cache is not None:
x = layer(x, feat_cache, feat_idx)
else:
x = layer(x)
## head
for layer in self.head:
if isinstance(layer, CausalConv3d) and feat_cache is not None:
idx = feat_idx[0]
cache_x = x[:, :, -CACHE_T:, :, :].clone()
if cache_x.shape[2] < 2 and feat_cache[idx] is not None:
# cache last frame of last two chunk
cache_x = torch.cat([
feat_cache[idx][:, :, -1, :, :].unsqueeze(2).to(
cache_x.device), cache_x
],
dim=2)
x = layer(x, feat_cache[idx])
feat_cache[idx] = cache_x
feat_idx[0] += 1
else:
x = layer(x)
return x
def count_conv3d(model):
count = 0
for m in model.modules():
if isinstance(m, CausalConv3d):
count += 1
return count
class WanVAE(nn.Module):
def __init__(self,
dim=128,
z_dim=4,
dim_mult=[1, 2, 4, 4],
num_res_blocks=2,
attn_scales=[],
temperal_downsample=[True, True, False],
dropout=0.0):
super().__init__()
self.dim = dim
self.z_dim = z_dim
self.dim_mult = dim_mult
self.num_res_blocks = num_res_blocks
self.attn_scales = attn_scales
self.temperal_downsample = temperal_downsample
self.temperal_upsample = temperal_downsample[::-1]
# modules
self.encoder = Encoder3d(dim, z_dim * 2, dim_mult, num_res_blocks,
attn_scales, self.temperal_downsample, dropout)
self.conv1 = CausalConv3d(z_dim * 2, z_dim * 2, 1)
self.conv2 = CausalConv3d(z_dim, z_dim, 1)
self.decoder = Decoder3d(dim, z_dim, dim_mult, num_res_blocks,
attn_scales, self.temperal_upsample, dropout)
def forward(self, x):
mu, log_var = self.encode(x)
z = self.reparameterize(mu, log_var)
x_recon = self.decode(z)
return x_recon, mu, log_var
def encode(self, x):
self.clear_cache()
## cache
t = x.shape[2]
iter_ = 1 + (t - 1) // 4
## 对encode输入的x按时间拆分为1、4、4、4....
for i in range(iter_):
self._enc_conv_idx = [0]
if i == 0:
out = self.encoder(
x[:, :, :1, :, :],
feat_cache=self._enc_feat_map,
feat_idx=self._enc_conv_idx)
else:
out_ = self.encoder(
x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :],
feat_cache=self._enc_feat_map,
feat_idx=self._enc_conv_idx)
out = torch.cat([out, out_], 2)
mu, log_var = self.conv1(out).chunk(2, dim=1)
self.clear_cache()
return mu
def decode(self, z):
self.clear_cache()
# z: [b,c,t,h,w]
iter_ = z.shape[2]
x = self.conv2(z)
for i in range(iter_):
self._conv_idx = [0]
if i == 0:
out = self.decoder(
x[:, :, i:i + 1, :, :],
feat_cache=self._feat_map,
feat_idx=self._conv_idx)
else:
out_ = self.decoder(
x[:, :, i:i + 1, :, :],
feat_cache=self._feat_map,
feat_idx=self._conv_idx)
out = torch.cat([out, out_], 2)
self.clear_cache()
return out
def reparameterize(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps * std + mu
def sample(self, imgs, deterministic=False):
mu, log_var = self.encode(imgs)
if deterministic:
return mu
std = torch.exp(0.5 * log_var.clamp(-30.0, 20.0))
return mu + std * torch.randn_like(std)
def clear_cache(self):
self._conv_num = count_conv3d(self.decoder)
self._conv_idx = [0]
self._feat_map = [None] * self._conv_num
#cache encode
self._enc_conv_num = count_conv3d(self.encoder)
self._enc_conv_idx = [0]
self._enc_feat_map = [None] * self._enc_conv_num

View File

@ -35,6 +35,7 @@ import comfy.ldm.lightricks.model
import comfy.ldm.hunyuan_video.model
import comfy.ldm.cosmos.model
import comfy.ldm.lumina.model
import comfy.ldm.wan.model
import comfy.model_management
import comfy.patcher_extension
@ -107,7 +108,7 @@ class BaseModel(torch.nn.Module):
if not unet_config.get("disable_unet_model_creation", False):
if model_config.custom_operations is None:
fp8 = model_config.optimizations.get("fp8", model_config.scaled_fp8 is not None)
fp8 = model_config.optimizations.get("fp8", False)
operations = comfy.ops.pick_operations(unet_config.get("dtype", None), self.manual_cast_dtype, fp8_optimizations=fp8, scaled_fp8=model_config.scaled_fp8)
else:
operations = model_config.custom_operations
@ -160,9 +161,13 @@ class BaseModel(torch.nn.Module):
extra = extra.to(dtype)
extra_conds[o] = extra
t = self.process_timestep(t, x=x, **extra_conds)
model_output = self.diffusion_model(xc, t, context=context, control=control, transformer_options=transformer_options, **extra_conds).float()
return self.model_sampling.calculate_denoised(sigma, model_output, x)
def process_timestep(self, timestep, **kwargs):
return timestep
def get_dtype(self):
return self.diffusion_model.dtype
@ -184,6 +189,11 @@ class BaseModel(torch.nn.Module):
if concat_latent_image.shape[1:] != noise.shape[1:]:
concat_latent_image = utils.common_upscale(concat_latent_image, noise.shape[-1], noise.shape[-2], "bilinear", "center")
if noise.ndim == 5:
if concat_latent_image.shape[-3] < noise.shape[-3]:
concat_latent_image = torch.nn.functional.pad(concat_latent_image, (0, 0, 0, 0, 0, noise.shape[-3] - concat_latent_image.shape[-3]), "constant", 0)
else:
concat_latent_image = concat_latent_image[:, :, :noise.shape[-3]]
concat_latent_image = utils.resize_to_batch_size(concat_latent_image, noise.shape[0])
@ -212,6 +222,11 @@ class BaseModel(torch.nn.Module):
cond_concat.append(self.blank_inpaint_image_like(noise))
elif ck == "mask_inverted":
cond_concat.append(torch.zeros_like(noise)[:, :1])
if ck == "concat_image":
if concat_latent_image is not None:
cond_concat.append(concat_latent_image.to(device))
else:
cond_concat.append(torch.zeros_like(noise))
data = torch.cat(cond_concat, dim=1)
return data
return None
@ -844,17 +859,26 @@ class LTXV(BaseModel):
if cross_attn is not None:
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
guiding_latent = kwargs.get("guiding_latent", None)
if guiding_latent is not None:
out['guiding_latent'] = comfy.conds.CONDRegular(guiding_latent)
guiding_latent_noise_scale = kwargs.get("guiding_latent_noise_scale", None)
if guiding_latent_noise_scale is not None:
out["guiding_latent_noise_scale"] = comfy.conds.CONDConstant(guiding_latent_noise_scale)
out['frame_rate'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", 25))
denoise_mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
if denoise_mask is not None:
out["denoise_mask"] = comfy.conds.CONDRegular(denoise_mask)
keyframe_idxs = kwargs.get("keyframe_idxs", None)
if keyframe_idxs is not None:
out['keyframe_idxs'] = comfy.conds.CONDRegular(keyframe_idxs)
return out
def process_timestep(self, timestep, x, denoise_mask=None, **kwargs):
if denoise_mask is None:
return timestep
return self.diffusion_model.patchifier.patchify(((denoise_mask) * timestep.view([timestep.shape[0]] + [1] * (denoise_mask.ndim - 1)))[:, :1])[0]
def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
return latent_image
class HunyuanVideo(BaseModel):
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan_video.model.HunyuanVideo)
@ -871,20 +895,35 @@ class HunyuanVideo(BaseModel):
if cross_attn is not None:
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
image = kwargs.get("concat_latent_image", None)
noise = kwargs.get("noise", None)
if image is not None:
padding_shape = (noise.shape[0], 16, noise.shape[2] - 1, noise.shape[3], noise.shape[4])
latent_padding = torch.zeros(padding_shape, device=noise.device, dtype=noise.dtype)
image_latents = torch.cat([image.to(noise), latent_padding], dim=2)
out['c_concat'] = comfy.conds.CONDNoiseShape(self.process_latent_in(image_latents))
guidance = kwargs.get("guidance", 6.0)
if guidance is not None:
out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance]))
guiding_frame_index = kwargs.get("guiding_frame_index", None)
if guiding_frame_index is not None:
out['guiding_frame_index'] = comfy.conds.CONDRegular(torch.FloatTensor([guiding_frame_index]))
return out
def scale_latent_inpaint(self, latent_image, **kwargs):
return latent_image
class HunyuanVideoI2V(HunyuanVideo):
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
super().__init__(model_config, model_type, device=device)
self.concat_keys = ("concat_image", "mask_inverted")
def scale_latent_inpaint(self, latent_image, **kwargs):
return super().scale_latent_inpaint(latent_image=latent_image, **kwargs)
class HunyuanVideoSkyreelsI2V(HunyuanVideo):
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
super().__init__(model_config, model_type, device=device)
self.concat_keys = ("concat_image",)
def scale_latent_inpaint(self, latent_image, **kwargs):
return super().scale_latent_inpaint(latent_image=latent_image, **kwargs)
class CosmosVideo(BaseModel):
def __init__(self, model_config, model_type=ModelType.EDM, image_to_video=False, device=None):
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.cosmos.model.GeneralDIT)
@ -927,3 +966,50 @@ class Lumina2(BaseModel):
if cross_attn is not None:
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
return out
class WAN21(BaseModel):
def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel)
self.image_to_video = image_to_video
def concat_cond(self, **kwargs):
noise = kwargs.get("noise", None)
if self.diffusion_model.patch_embedding.weight.shape[1] == noise.shape[1]:
return None
image = kwargs.get("concat_latent_image", None)
device = kwargs["device"]
if image is None:
image = torch.zeros_like(noise)
image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
image = self.process_latent_in(image)
image = utils.resize_to_batch_size(image, noise.shape[0])
if not self.image_to_video:
return image
mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
if mask is None:
mask = torch.zeros_like(noise)[:, :4]
else:
mask = 1.0 - torch.mean(mask, dim=1, keepdim=True)
mask = utils.common_upscale(mask.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
if mask.shape[-3] < noise.shape[-3]:
mask = torch.nn.functional.pad(mask, (0, 0, 0, 0, 0, noise.shape[-3] - mask.shape[-3]), mode='constant', value=0)
mask = mask.repeat(1, 4, 1, 1, 1)
mask = utils.resize_to_batch_size(mask, noise.shape[0])
return torch.cat((mask, image), dim=1)
def extra_conds(self, **kwargs):
out = super().extra_conds(**kwargs)
cross_attn = kwargs.get("cross_attn", None)
if cross_attn is not None:
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
clip_vision_output = kwargs.get("clip_vision_output", None)
if clip_vision_output is not None:
out['clip_fea'] = comfy.conds.CONDRegular(clip_vision_output.penultimate_hidden_states)
return out

View File

@ -1,3 +1,4 @@
import json
import comfy.supported_models
import comfy.supported_models_base
import comfy.utils
@ -33,7 +34,7 @@ def calculate_transformer_depth(prefix, state_dict_keys, state_dict):
return last_transformer_depth, context_dim, use_linear_in_transformer, time_stack, time_stack_cross
return None
def detect_unet_config(state_dict, key_prefix):
def detect_unet_config(state_dict, key_prefix, metadata=None):
state_dict_keys = list(state_dict.keys())
if '{}joint_blocks.0.context_block.attn.qkv.weight'.format(key_prefix) in state_dict_keys: #mmdit model
@ -210,6 +211,8 @@ def detect_unet_config(state_dict, key_prefix):
if '{}adaln_single.emb.timestep_embedder.linear_1.bias'.format(key_prefix) in state_dict_keys: #Lightricks ltxv
dit_config = {}
dit_config["image_model"] = "ltxv"
if metadata is not None and "config" in metadata:
dit_config.update(json.loads(metadata["config"]).get("transformer", {}))
return dit_config
if '{}t_block.1.weight'.format(key_prefix) in state_dict_keys: # PixArt
@ -299,6 +302,27 @@ def detect_unet_config(state_dict, key_prefix):
dit_config["axes_lens"] = [300, 512, 512]
return dit_config
if '{}head.modulation'.format(key_prefix) in state_dict_keys: # Wan 2.1
dit_config = {}
dit_config["image_model"] = "wan2.1"
dim = state_dict['{}head.modulation'.format(key_prefix)].shape[-1]
dit_config["dim"] = dim
dit_config["num_heads"] = dim // 128
dit_config["ffn_dim"] = state_dict['{}blocks.0.ffn.0.weight'.format(key_prefix)].shape[0]
dit_config["num_layers"] = count_blocks(state_dict_keys, '{}blocks.'.format(key_prefix) + '{}.')
dit_config["patch_size"] = (1, 2, 2)
dit_config["freq_dim"] = 256
dit_config["window_size"] = (-1, -1)
dit_config["qk_norm"] = True
dit_config["cross_attn_norm"] = True
dit_config["eps"] = 1e-6
dit_config["in_dim"] = state_dict['{}patch_embedding.weight'.format(key_prefix)].shape[1]
if '{}img_emb.proj.0.bias'.format(key_prefix) in state_dict_keys:
dit_config["model_type"] = "i2v"
else:
dit_config["model_type"] = "t2v"
return dit_config
if '{}input_blocks.0.0.weight'.format(key_prefix) not in state_dict_keys:
return None
@ -433,8 +457,8 @@ def model_config_from_unet_config(unet_config, state_dict=None):
logging.error("no match {}".format(unet_config))
return None
def model_config_from_unet(state_dict, unet_key_prefix, use_base_if_no_match=False):
unet_config = detect_unet_config(state_dict, unet_key_prefix)
def model_config_from_unet(state_dict, unet_key_prefix, use_base_if_no_match=False, metadata=None):
unet_config = detect_unet_config(state_dict, unet_key_prefix, metadata=metadata)
if unet_config is None:
return None
model_config = model_config_from_unet_config(unet_config, state_dict)
@ -447,6 +471,10 @@ def model_config_from_unet(state_dict, unet_key_prefix, use_base_if_no_match=Fal
model_config.scaled_fp8 = scaled_fp8_weight.dtype
if model_config.scaled_fp8 == torch.float32:
model_config.scaled_fp8 = torch.float8_e4m3fn
if scaled_fp8_weight.nelement() == 2:
model_config.optimizations["fp8"] = False
else:
model_config.optimizations["fp8"] = True
return model_config

View File

@ -19,7 +19,7 @@
import psutil
import logging
from enum import Enum
from comfy.cli_args import args
from comfy.cli_args import args, PerformanceFeature
import torch
import sys
import platform
@ -95,6 +95,13 @@ try:
except:
npu_available = False
try:
import torch_mlu # noqa: F401
_ = torch.mlu.device_count()
mlu_available = torch.mlu.is_available()
except:
mlu_available = False
if args.cpu:
cpu_state = CPUState.CPU
@ -112,6 +119,12 @@ def is_ascend_npu():
return True
return False
def is_mlu():
global mlu_available
if mlu_available:
return True
return False
def get_torch_device():
global directml_enabled
global cpu_state
@ -127,6 +140,8 @@ def get_torch_device():
return torch.device("xpu", torch.xpu.current_device())
elif is_ascend_npu():
return torch.device("npu", torch.npu.current_device())
elif is_mlu():
return torch.device("mlu", torch.mlu.current_device())
else:
return torch.device(torch.cuda.current_device())
@ -153,6 +168,12 @@ def get_total_memory(dev=None, torch_total_too=False):
_, mem_total_npu = torch.npu.mem_get_info(dev)
mem_total_torch = mem_reserved
mem_total = mem_total_npu
elif is_mlu():
stats = torch.mlu.memory_stats(dev)
mem_reserved = stats['reserved_bytes.all.current']
_, mem_total_mlu = torch.mlu.mem_get_info(dev)
mem_total_torch = mem_reserved
mem_total = mem_total_mlu
else:
stats = torch.cuda.memory_stats(dev)
mem_reserved = stats['reserved_bytes.all.current']
@ -165,12 +186,21 @@ def get_total_memory(dev=None, torch_total_too=False):
else:
return mem_total
def mac_version():
try:
return tuple(int(n) for n in platform.mac_ver()[0].split("."))
except:
return None
total_vram = get_total_memory(get_torch_device()) / (1024 * 1024)
total_ram = psutil.virtual_memory().total / (1024 * 1024)
logging.info("Total VRAM {:0.0f} MB, total RAM {:0.0f} MB".format(total_vram, total_ram))
try:
logging.info("pytorch version: {}".format(torch_version))
mac_ver = mac_version()
if mac_ver is not None:
logging.info("Mac Version {}".format(mac_ver))
except:
pass
@ -232,7 +262,7 @@ try:
if torch_version_numeric[0] >= 2:
if ENABLE_PYTORCH_ATTENTION == False and args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
ENABLE_PYTORCH_ATTENTION = True
if is_intel_xpu() or is_ascend_npu():
if is_intel_xpu() or is_ascend_npu() or is_mlu():
if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
ENABLE_PYTORCH_ATTENTION = True
except:
@ -259,9 +289,10 @@ if ENABLE_PYTORCH_ATTENTION:
PRIORITIZE_FP16 = False # TODO: remove and replace with something that shows exactly which dtype is faster than the other
try:
if is_nvidia() and args.fast:
if is_nvidia() and PerformanceFeature.Fp16Accumulation in args.fast:
torch.backends.cuda.matmul.allow_fp16_accumulation = True
PRIORITIZE_FP16 = True # TODO: limit to cards where it actually boosts performance
logging.info("Enabled fp16 accumulation.")
except:
pass
@ -316,6 +347,8 @@ def get_torch_device_name(device):
return "{} {}".format(device, torch.xpu.get_device_name(device))
elif is_ascend_npu():
return "{} {}".format(device, torch.npu.get_device_name(device))
elif is_mlu():
return "{} {}".format(device, torch.mlu.get_device_name(device))
else:
return "CUDA {}: {}".format(device, torch.cuda.get_device_name(device))
@ -557,7 +590,7 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
loaded_memory = loaded_model.model_loaded_memory()
current_free_mem = get_free_memory(torch_dev) + loaded_memory
lowvram_model_memory = max(64 * 1024 * 1024, (current_free_mem - minimum_memory_required), min(current_free_mem * MIN_WEIGHT_MEMORY_RATIO, current_free_mem - minimum_inference_memory()))
lowvram_model_memory = max(128 * 1024 * 1024, (current_free_mem - minimum_memory_required), min(current_free_mem * MIN_WEIGHT_MEMORY_RATIO, current_free_mem - minimum_inference_memory()))
lowvram_model_memory = max(0.1, lowvram_model_memory - loaded_memory)
if vram_set_state == VRAMState.NO_VRAM:
@ -651,7 +684,7 @@ def unet_inital_load_device(parameters, dtype):
def maximum_vram_for_weights(device=None):
return (get_total_memory(device) * 0.88 - minimum_inference_memory())
def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, torch.bfloat16, torch.float32]):
def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, torch.bfloat16, torch.float32], weight_dtype=None):
if model_params < 0:
model_params = 1000000000000000000000
if args.fp32_unet:
@ -669,10 +702,8 @@ def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, tor
fp8_dtype = None
try:
for dtype in [torch.float8_e4m3fn, torch.float8_e5m2]:
if dtype in supported_dtypes:
fp8_dtype = dtype
break
if weight_dtype in [torch.float8_e4m3fn, torch.float8_e5m2]:
fp8_dtype = weight_dtype
except:
pass
@ -684,7 +715,7 @@ def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, tor
if model_params * 2 > free_model_memory:
return fp8_dtype
if PRIORITIZE_FP16:
if PRIORITIZE_FP16 or weight_dtype == torch.float16:
if torch.float16 in supported_dtypes and should_use_fp16(device=device, model_params=model_params):
return torch.float16
@ -720,6 +751,9 @@ def unet_manual_cast(weight_dtype, inference_device, supported_dtypes=[torch.flo
return None
fp16_supported = should_use_fp16(inference_device, prioritize_performance=True)
if PRIORITIZE_FP16 and fp16_supported and torch.float16 in supported_dtypes:
return torch.float16
for dt in supported_dtypes:
if dt == torch.float16 and fp16_supported:
return torch.float16
@ -896,6 +930,9 @@ def cast_to_device(tensor, device, dtype, copy=False):
def sage_attention_enabled():
return args.use_sage_attention
def flash_attention_enabled():
return args.use_flash_attention
def xformers_enabled():
global directml_enabled
global cpu_state
@ -905,6 +942,8 @@ def xformers_enabled():
return False
if is_ascend_npu():
return False
if is_mlu():
return False
if directml_enabled:
return False
return XFORMERS_IS_AVAILABLE
@ -936,16 +975,12 @@ def pytorch_attention_flash_attention():
return True
if is_ascend_npu():
return True
if is_mlu():
return True
if is_amd():
return True #if you have pytorch attention enabled on AMD it probably supports at least mem efficient attention
return False
def mac_version():
try:
return tuple(int(n) for n in platform.mac_ver()[0].split("."))
except:
return None
def force_upcast_attention_dtype():
upcast = args.force_upcast_attention
@ -984,6 +1019,13 @@ def get_free_memory(dev=None, torch_free_too=False):
mem_free_npu, _ = torch.npu.mem_get_info(dev)
mem_free_torch = mem_reserved - mem_active
mem_free_total = mem_free_npu + mem_free_torch
elif is_mlu():
stats = torch.mlu.memory_stats(dev)
mem_active = stats['active_bytes.all.current']
mem_reserved = stats['reserved_bytes.all.current']
mem_free_mlu, _ = torch.mlu.mem_get_info(dev)
mem_free_torch = mem_reserved - mem_active
mem_free_total = mem_free_mlu + mem_free_torch
else:
stats = torch.cuda.memory_stats(dev)
mem_active = stats['active_bytes.all.current']
@ -1053,6 +1095,9 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
if is_ascend_npu():
return True
if is_mlu():
return True
if torch.version.hip:
return True
@ -1121,6 +1166,11 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
return False
props = torch.cuda.get_device_properties(device)
if is_mlu():
if props.major > 3:
return True
if props.major >= 8:
return True

View File

@ -639,7 +639,7 @@ class ModelPatcher:
mem_counter += module_mem
load_completely.append((module_mem, n, m, params))
if cast_weight:
if cast_weight and hasattr(m, "comfy_cast_weights"):
m.prev_comfy_cast_weights = m.comfy_cast_weights
m.comfy_cast_weights = True
@ -1089,7 +1089,6 @@ class ModelPatcher:
def patch_hooks(self, hooks: comfy.hooks.HookGroup):
with self.use_ejected():
self.unpatch_hooks()
if hooks is not None:
model_sd_keys = list(self.model_state_dict().keys())
memory_counter = None
@ -1100,12 +1099,16 @@ class ModelPatcher:
# if have cached weights for hooks, use it
cached_weights = self.cached_hook_patches.get(hooks, None)
if cached_weights is not None:
model_sd_keys_set = set(model_sd_keys)
for key in cached_weights:
if key not in model_sd_keys:
logging.warning(f"Cached hook could not patch. Key does not exist in model: {key}")
continue
self.patch_cached_hook_weights(cached_weights=cached_weights, key=key, memory_counter=memory_counter)
model_sd_keys_set.remove(key)
self.unpatch_hooks(model_sd_keys_set)
else:
self.unpatch_hooks()
relevant_patches = self.get_combined_hook_patches(hooks=hooks)
original_weights = None
if len(relevant_patches) > 0:
@ -1116,6 +1119,8 @@ class ModelPatcher:
continue
self.patch_hook_weight_to_device(hooks=hooks, combined_patches=relevant_patches, key=key, original_weights=original_weights,
memory_counter=memory_counter)
else:
self.unpatch_hooks()
self.current_hooks = hooks
def patch_cached_hook_weights(self, cached_weights: dict, key: str, memory_counter: MemoryCounter):
@ -1172,17 +1177,23 @@ class ModelPatcher:
del out_weight
del weight
def unpatch_hooks(self) -> None:
def unpatch_hooks(self, whitelist_keys_set: set[str]=None) -> None:
with self.use_ejected():
if len(self.hook_backup) == 0:
self.current_hooks = None
return
keys = list(self.hook_backup.keys())
for k in keys:
comfy.utils.copy_to_param(self.model, k, self.hook_backup[k][0].to(device=self.hook_backup[k][1]))
if whitelist_keys_set:
for k in keys:
if k in whitelist_keys_set:
comfy.utils.copy_to_param(self.model, k, self.hook_backup[k][0].to(device=self.hook_backup[k][1]))
self.hook_backup.pop(k)
else:
for k in keys:
comfy.utils.copy_to_param(self.model, k, self.hook_backup[k][0].to(device=self.hook_backup[k][1]))
self.hook_backup.clear()
self.current_hooks = None
self.hook_backup.clear()
self.current_hooks = None
def clean_hooks(self):
self.unpatch_hooks()

View File

@ -17,8 +17,9 @@
"""
import torch
import logging
import comfy.model_management
from comfy.cli_args import args
from comfy.cli_args import args, PerformanceFeature
import comfy.float
cast_to = comfy.model_management.cast_to #TODO: remove once no more references
@ -308,6 +309,7 @@ class fp8_ops(manual_cast):
return torch.nn.functional.linear(input, weight, bias)
def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None):
logging.info("Using scaled fp8: fp8 matrix mult: {}, scale input: {}".format(fp8_matrix_mult, scale_input))
class scaled_fp8_op(manual_cast):
class Linear(manual_cast.Linear):
def __init__(self, *args, **kwargs):
@ -358,9 +360,13 @@ def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None
def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, scaled_fp8=None):
fp8_compute = comfy.model_management.supports_fp8_compute(load_device)
if scaled_fp8 is not None:
return scaled_fp8_ops(fp8_matrix_mult=fp8_compute, scale_input=True, override_dtype=scaled_fp8)
return scaled_fp8_ops(fp8_matrix_mult=fp8_compute and fp8_optimizations, scale_input=fp8_optimizations, override_dtype=scaled_fp8)
if fp8_compute and (fp8_optimizations or args.fast) and not disable_fast_fp8:
if (
fp8_compute and
(fp8_optimizations or PerformanceFeature.Fp8MatrixMultiplication in args.fast) and
not disable_fast_fp8
):
return fp8_ops
if compute_dtype is None or weight_dtype == compute_dtype:

View File

@ -19,6 +19,12 @@ import comfy.hooks
import scipy.stats
import numpy
def add_area_dims(area, num_dims):
while (len(area) // 2) < num_dims:
area = [2147483648] + area[:len(area) // 2] + [0] + area[len(area) // 2:]
return area
def get_area_and_mult(conds, x_in, timestep_in):
dims = tuple(x_in.shape[2:])
area = None
@ -34,6 +40,10 @@ def get_area_and_mult(conds, x_in, timestep_in):
return None
if 'area' in conds:
area = list(conds['area'])
area = add_area_dims(area, len(dims))
if (len(area) // 2) > len(dims):
area = area[:len(dims)] + area[len(area) // 2:(len(area) // 2) + len(dims)]
if 'strength' in conds:
strength = conds['strength']
@ -50,7 +60,7 @@ def get_area_and_mult(conds, x_in, timestep_in):
if "mask_strength" in conds:
mask_strength = conds["mask_strength"]
mask = conds['mask']
assert(mask.shape[1:] == x_in.shape[2:])
assert (mask.shape[1:] == x_in.shape[2:])
mask = mask[:input_x.shape[0]]
if area is not None:
@ -64,16 +74,17 @@ def get_area_and_mult(conds, x_in, timestep_in):
mult = mask * strength
if 'mask' not in conds and area is not None:
rr = 8
fuzz = 8
for i in range(len(dims)):
rr = min(fuzz, mult.shape[2 + i] // 4)
if area[len(dims) + i] != 0:
for t in range(rr):
m = mult.narrow(i + 2, t, 1)
m *= ((1.0/rr) * (t + 1))
m *= ((1.0 / rr) * (t + 1))
if (area[i] + area[len(dims) + i]) < x_in.shape[i + 2]:
for t in range(rr):
m = mult.narrow(i + 2, area[i] - 1 - t, 1)
m *= ((1.0/rr) * (t + 1))
m *= ((1.0 / rr) * (t + 1))
conditioning = {}
model_conds = conds["model_conds"]
@ -548,25 +559,37 @@ def resolve_areas_and_cond_masks(conditions, h, w, device):
logging.warning("WARNING: The comfy.samplers.resolve_areas_and_cond_masks function is deprecated please use the resolve_areas_and_cond_masks_multidim one instead.")
return resolve_areas_and_cond_masks_multidim(conditions, [h, w], device)
def create_cond_with_same_area_if_none(conds, c): #TODO: handle dim != 2
def create_cond_with_same_area_if_none(conds, c):
if 'area' not in c:
return
def area_inside(a, area_cmp):
a = add_area_dims(a, len(area_cmp) // 2)
area_cmp = add_area_dims(area_cmp, len(a) // 2)
a_l = len(a) // 2
area_cmp_l = len(area_cmp) // 2
for i in range(min(a_l, area_cmp_l)):
if a[a_l + i] < area_cmp[area_cmp_l + i]:
return False
for i in range(min(a_l, area_cmp_l)):
if (a[i] + a[a_l + i]) > (area_cmp[i] + area_cmp[area_cmp_l + i]):
return False
return True
c_area = c['area']
smallest = None
for x in conds:
if 'area' in x:
a = x['area']
if c_area[2] >= a[2] and c_area[3] >= a[3]:
if a[0] + a[2] >= c_area[0] + c_area[2]:
if a[1] + a[3] >= c_area[1] + c_area[3]:
if smallest is None:
smallest = x
elif 'area' not in smallest:
smallest = x
else:
if smallest['area'][0] * smallest['area'][1] > a[0] * a[1]:
smallest = x
if area_inside(c_area, a):
if smallest is None:
smallest = x
elif 'area' not in smallest:
smallest = x
else:
if math.prod(smallest['area'][:len(smallest['area']) // 2]) > math.prod(a[:len(a) // 2]):
smallest = x
else:
if smallest is None:
smallest = x
@ -687,7 +710,7 @@ KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_c
"lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_2s_ancestral_cfg_pp", "dpmpp_sde", "dpmpp_sde_gpu",
"dpmpp_2m", "dpmpp_2m_cfg_pp", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm",
"ipndm", "ipndm_v", "deis", "res_multistep", "res_multistep_cfg_pp", "res_multistep_ancestral", "res_multistep_ancestral_cfg_pp",
"gradient_estimation"]
"gradient_estimation", "er_sde"]
class KSAMPLER(Sampler):
def __init__(self, sampler_function, extra_options={}, inpaint_options={}):

View File

@ -1,4 +1,5 @@
from __future__ import annotations
import json
import torch
from enum import Enum
import logging
@ -12,6 +13,7 @@ from .ldm.audio.autoencoder import AudioOobleckVAE
import comfy.ldm.genmo.vae.model
import comfy.ldm.lightricks.vae.causal_video_autoencoder
import comfy.ldm.cosmos.vae
import comfy.ldm.wan.vae
import yaml
import math
@ -37,6 +39,7 @@ import comfy.text_encoders.lt
import comfy.text_encoders.hunyuan_video
import comfy.text_encoders.cosmos
import comfy.text_encoders.lumina2
import comfy.text_encoders.wan
import comfy.model_patcher
import comfy.lora
@ -132,8 +135,8 @@ class CLIP:
def clip_layer(self, layer_idx):
self.layer_idx = layer_idx
def tokenize(self, text, return_word_ids=False):
return self.tokenizer.tokenize_with_weights(text, return_word_ids)
def tokenize(self, text, return_word_ids=False, **kwargs):
return self.tokenizer.tokenize_with_weights(text, return_word_ids, **kwargs)
def add_hooks_to_dict(self, pooled_dict: dict[str]):
if self.apply_hooks_to_conds:
@ -247,7 +250,7 @@ class CLIP:
return self.patcher.get_key_patches()
class VAE:
def __init__(self, sd=None, device=None, config=None, dtype=None):
def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None):
if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format
sd = diffusers_convert.convert_vae_state_dict(sd)
@ -355,7 +358,12 @@ class VAE:
version = 0
elif tensor_conv1.shape[0] == 1024:
version = 1
self.first_stage_model = comfy.ldm.lightricks.vae.causal_video_autoencoder.VideoVAE(version=version)
if "encoder.down_blocks.1.conv.conv.bias" in sd:
version = 2
vae_config = None
if metadata is not None and "config" in metadata:
vae_config = json.loads(metadata["config"]).get("vae", None)
self.first_stage_model = comfy.ldm.lightricks.vae.causal_video_autoencoder.VideoVAE(version=version, config=vae_config)
self.latent_channels = 128
self.latent_dim = 3
self.memory_used_decode = lambda shape, dtype: (900 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
@ -392,6 +400,18 @@ class VAE:
self.memory_used_decode = lambda shape, dtype: (50 * shape[2] * shape[3] * shape[4] * (8 * 8 * 8)) * model_management.dtype_size(dtype)
self.memory_used_encode = lambda shape, dtype: (50 * (round((shape[2] + 7) / 8) * 8) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
self.working_dtypes = [torch.bfloat16, torch.float32]
elif "decoder.middle.0.residual.0.gamma" in sd:
self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 8, 8)
self.upscale_index_formula = (4, 8, 8)
self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 8, 8)
self.downscale_index_formula = (4, 8, 8)
self.latent_dim = 3
self.latent_channels = 16
ddconfig = {"dim": 96, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "dropout": 0.0}
self.first_stage_model = comfy.ldm.wan.vae.WanVAE(**ddconfig)
self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
self.memory_used_encode = lambda shape, dtype: 6000 * shape[3] * shape[4] * model_management.dtype_size(dtype)
self.memory_used_decode = lambda shape, dtype: 7000 * shape[3] * shape[4] * (8 * 8) * model_management.dtype_size(dtype)
else:
logging.warning("WARNING: No VAE weights detected, VAE not initalized.")
self.first_stage_model = None
@ -659,6 +679,7 @@ class CLIPType(Enum):
PIXART = 10
COSMOS = 11
LUMINA2 = 12
WAN = 13
def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
@ -763,6 +784,10 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
elif clip_type == CLIPType.PIXART:
clip_target.clip = comfy.text_encoders.pixart_t5.pixart_te(**t5xxl_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.pixart_t5.PixArtTokenizer
elif clip_type == CLIPType.WAN:
clip_target.clip = comfy.text_encoders.wan.te(**t5xxl_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.wan.WanT5Tokenizer
tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
else: #CLIPType.MOCHI
clip_target.clip = comfy.text_encoders.genmo.mochi_te(**t5xxl_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.genmo.MochiT5Tokenizer
@ -854,13 +879,13 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl
return (model, clip, vae)
def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}):
sd = comfy.utils.load_torch_file(ckpt_path)
out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options)
sd, metadata = comfy.utils.load_torch_file(ckpt_path, return_metadata=True)
out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata)
if out is None:
raise RuntimeError("ERROR: Could not detect model type of: {}".format(ckpt_path))
return out
def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}):
def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None):
clip = None
clipvision = None
vae = None
@ -872,19 +897,19 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
weight_dtype = comfy.utils.weight_dtype(sd, diffusion_model_prefix)
load_device = model_management.get_torch_device()
model_config = model_detection.model_config_from_unet(sd, diffusion_model_prefix)
model_config = model_detection.model_config_from_unet(sd, diffusion_model_prefix, metadata=metadata)
if model_config is None:
return None
unet_weight_dtype = list(model_config.supported_inference_dtypes)
if weight_dtype is not None and model_config.scaled_fp8 is None:
unet_weight_dtype.append(weight_dtype)
if model_config.scaled_fp8 is not None:
weight_dtype = None
model_config.custom_operations = model_options.get("custom_operations", None)
unet_dtype = model_options.get("dtype", model_options.get("weight_dtype", None))
if unet_dtype is None:
unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=unet_weight_dtype)
unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=unet_weight_dtype, weight_dtype=weight_dtype)
manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device, model_config.supported_inference_dtypes)
model_config.set_inference_dtype(unet_dtype, manual_cast_dtype)
@ -901,7 +926,7 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
if output_vae:
vae_sd = comfy.utils.state_dict_prefix_replace(sd, {k: "" for k in model_config.vae_key_prefix}, filter_keys=True)
vae_sd = model_config.process_vae_state_dict(vae_sd)
vae = VAE(sd=vae_sd)
vae = VAE(sd=vae_sd, metadata=metadata)
if output_clip:
clip_target = model_config.clip_target(state_dict=sd)
@ -975,11 +1000,11 @@ def load_diffusion_model_state_dict(sd, model_options={}): #load unet in diffuse
offload_device = model_management.unet_offload_device()
unet_weight_dtype = list(model_config.supported_inference_dtypes)
if weight_dtype is not None and model_config.scaled_fp8 is None:
unet_weight_dtype.append(weight_dtype)
if model_config.scaled_fp8 is not None:
weight_dtype = None
if dtype is None:
unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=unet_weight_dtype)
unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=unet_weight_dtype, weight_dtype=weight_dtype)
else:
unet_dtype = dtype

View File

@ -158,71 +158,93 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
self.layer_idx = self.options_default[1]
self.return_projected_pooled = self.options_default[2]
def set_up_textual_embeddings(self, tokens, current_embeds):
out_tokens = []
next_new_token = token_dict_size = current_embeds.weight.shape[0]
embedding_weights = []
def process_tokens(self, tokens, device):
end_token = self.special_tokens.get("end", None)
if end_token is None:
cmp_token = self.special_tokens.get("pad", -1)
else:
cmp_token = end_token
embeds_out = []
attention_masks = []
num_tokens = []
for x in tokens:
attention_mask = []
tokens_temp = []
other_embeds = []
eos = False
index = 0
for y in x:
if isinstance(y, numbers.Integral):
tokens_temp += [int(y)]
else:
if y.shape[0] == current_embeds.weight.shape[1]:
embedding_weights += [y]
tokens_temp += [next_new_token]
next_new_token += 1
if eos:
attention_mask.append(0)
else:
logging.warning("WARNING: shape mismatch when trying to apply embedding, embedding will be ignored {} != {}".format(y.shape[0], current_embeds.weight.shape[1]))
while len(tokens_temp) < len(x):
tokens_temp += [self.special_tokens["pad"]]
out_tokens += [tokens_temp]
attention_mask.append(1)
token = int(y)
tokens_temp += [token]
if not eos and token == cmp_token:
if end_token is None:
attention_mask[-1] = 0
eos = True
else:
other_embeds.append((index, y))
index += 1
n = token_dict_size
if len(embedding_weights) > 0:
new_embedding = self.operations.Embedding(next_new_token + 1, current_embeds.weight.shape[1], device=current_embeds.weight.device, dtype=current_embeds.weight.dtype)
new_embedding.weight[:token_dict_size] = current_embeds.weight
for x in embedding_weights:
new_embedding.weight[n] = x
n += 1
self.transformer.set_input_embeddings(new_embedding)
tokens_embed = torch.tensor([tokens_temp], device=device, dtype=torch.long)
tokens_embed = self.transformer.get_input_embeddings()(tokens_embed, out_dtype=torch.float32)
index = 0
pad_extra = 0
for o in other_embeds:
emb = o[1]
if torch.is_tensor(emb):
emb = {"type": "embedding", "data": emb}
processed_tokens = []
for x in out_tokens:
processed_tokens += [list(map(lambda a: n if a == -1 else a, x))] #The EOS token should always be the largest one
emb_type = emb.get("type", None)
if emb_type == "embedding":
emb = emb.get("data", None)
else:
if hasattr(self.transformer, "preprocess_embed"):
emb = self.transformer.preprocess_embed(emb, device=device)
else:
emb = None
return processed_tokens
if emb is None:
index += -1
continue
ind = index + o[0]
emb = emb.view(1, -1, emb.shape[-1]).to(device=device, dtype=torch.float32)
emb_shape = emb.shape[1]
if emb.shape[-1] == tokens_embed.shape[-1]:
tokens_embed = torch.cat([tokens_embed[:, :ind], emb, tokens_embed[:, ind:]], dim=1)
attention_mask = attention_mask[:ind] + [1] * emb_shape + attention_mask[ind:]
index += emb_shape - 1
else:
index += -1
pad_extra += emb_shape
logging.warning("WARNING: shape mismatch when trying to apply embedding, embedding will be ignored {} != {}".format(emb.shape[-1], tokens_embed.shape[-1]))
if pad_extra > 0:
padd_embed = self.transformer.get_input_embeddings()(torch.tensor([[self.special_tokens["pad"]] * pad_extra], device=device, dtype=torch.long), out_dtype=torch.float32)
tokens_embed = torch.cat([tokens_embed, padd_embed], dim=1)
attention_mask = attention_mask + [0] * pad_extra
embeds_out.append(tokens_embed)
attention_masks.append(attention_mask)
num_tokens.append(sum(attention_mask))
return torch.cat(embeds_out), torch.tensor(attention_masks, device=device, dtype=torch.long), num_tokens
def forward(self, tokens):
backup_embeds = self.transformer.get_input_embeddings()
device = backup_embeds.weight.device
tokens = self.set_up_textual_embeddings(tokens, backup_embeds)
tokens = torch.LongTensor(tokens).to(device)
attention_mask = None
if self.enable_attention_masks or self.zero_out_masked or self.return_attention_masks:
attention_mask = torch.zeros_like(tokens)
end_token = self.special_tokens.get("end", None)
if end_token is None:
cmp_token = self.special_tokens.get("pad", -1)
else:
cmp_token = end_token
for x in range(attention_mask.shape[0]):
for y in range(attention_mask.shape[1]):
attention_mask[x, y] = 1
if tokens[x, y] == cmp_token:
if end_token is None:
attention_mask[x, y] = 0
break
device = self.transformer.get_input_embeddings().weight.device
embeds, attention_mask, num_tokens = self.process_tokens(tokens, device)
attention_mask_model = None
if self.enable_attention_masks:
attention_mask_model = attention_mask
outputs = self.transformer(tokens, attention_mask_model, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state, dtype=torch.float32)
self.transformer.set_input_embeddings(backup_embeds)
outputs = self.transformer(None, attention_mask_model, embeds=embeds, num_tokens=num_tokens, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state, dtype=torch.float32)
if self.layer == "last":
z = outputs[0].float()
@ -482,7 +504,7 @@ class SDTokenizer:
return (embed, leftover)
def tokenize_with_weights(self, text:str, return_word_ids=False):
def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
'''
Takes a prompt and converts it to a list of (token, weight, word id) elements.
Tokens can both be integer tokens and pre computed CLIP tensors.
@ -596,7 +618,7 @@ class SD1Tokenizer:
tokenizer = tokenizer_data.get("{}_tokenizer_class".format(self.clip), tokenizer)
setattr(self, self.clip, tokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data))
def tokenize_with_weights(self, text:str, return_word_ids=False):
def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
out = {}
out[self.clip_name] = getattr(self, self.clip).tokenize_with_weights(text, return_word_ids)
return out

View File

@ -26,7 +26,7 @@ class SDXLTokenizer:
self.clip_l = clip_l_tokenizer_class(embedding_directory=embedding_directory)
self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory)
def tokenize_with_weights(self, text:str, return_word_ids=False):
def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
out = {}
out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids)
out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)

View File

@ -16,6 +16,7 @@ import comfy.text_encoders.lt
import comfy.text_encoders.hunyuan_video
import comfy.text_encoders.cosmos
import comfy.text_encoders.lumina2
import comfy.text_encoders.wan
from . import supported_models_base
from . import latent_formats
@ -761,7 +762,7 @@ class LTXV(supported_models_base.BASE):
unet_extra_config = {}
latent_format = latent_formats.LTXV
memory_usage_factor = 2.7
memory_usage_factor = 5.5 # TODO: img2vid is about 2x vs txt2vid
supported_inference_dtypes = [torch.bfloat16, torch.float32]
@ -825,6 +826,26 @@ class HunyuanVideo(supported_models_base.BASE):
hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}llama.transformer.".format(pref))
return supported_models_base.ClipTarget(comfy.text_encoders.hunyuan_video.HunyuanVideoTokenizer, comfy.text_encoders.hunyuan_video.hunyuan_video_clip(**hunyuan_detect))
class HunyuanVideoI2V(HunyuanVideo):
unet_config = {
"image_model": "hunyuan_video",
"in_channels": 33,
}
def get_model(self, state_dict, prefix="", device=None):
out = model_base.HunyuanVideoI2V(self, device=device)
return out
class HunyuanVideoSkyreelsI2V(HunyuanVideo):
unet_config = {
"image_model": "hunyuan_video",
"in_channels": 32,
}
def get_model(self, state_dict, prefix="", device=None):
out = model_base.HunyuanVideoSkyreelsI2V(self, device=device)
return out
class CosmosT2V(supported_models_base.BASE):
unet_config = {
"image_model": "cosmos",
@ -895,6 +916,49 @@ class Lumina2(supported_models_base.BASE):
hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}gemma2_2b.transformer.".format(pref))
return supported_models_base.ClipTarget(comfy.text_encoders.lumina2.LuminaTokenizer, comfy.text_encoders.lumina2.te(**hunyuan_detect))
models = [Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2]
class WAN21_T2V(supported_models_base.BASE):
unet_config = {
"image_model": "wan2.1",
"model_type": "t2v",
}
sampling_settings = {
"shift": 8.0,
}
unet_extra_config = {}
latent_format = latent_formats.Wan21
memory_usage_factor = 1.0
supported_inference_dtypes = [torch.float16, torch.bfloat16, torch.float32]
vae_key_prefix = ["vae."]
text_encoder_key_prefix = ["text_encoders."]
def __init__(self, unet_config):
super().__init__(unet_config)
self.memory_usage_factor = self.unet_config.get("dim", 2000) / 2000
def get_model(self, state_dict, prefix="", device=None):
out = model_base.WAN21(self, device=device)
return out
def clip_target(self, state_dict={}):
pref = self.text_encoder_key_prefix[0]
t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}umt5xxl.transformer.".format(pref))
return supported_models_base.ClipTarget(comfy.text_encoders.wan.WanT5Tokenizer, comfy.text_encoders.wan.te(**t5_detect))
class WAN21_I2V(WAN21_T2V):
unet_config = {
"image_model": "wan2.1",
"model_type": "i2v",
}
def get_model(self, state_dict, prefix="", device=None):
out = model_base.WAN21(self, image_to_video=True, device=device)
return out
models = [Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V]
models += [SVD_img2vid]

View File

@ -93,8 +93,11 @@ class BertEmbeddings(torch.nn.Module):
self.LayerNorm = operations.LayerNorm(embed_dim, eps=layer_norm_eps, dtype=dtype, device=device)
def forward(self, input_tokens, token_type_ids=None, dtype=None):
x = self.word_embeddings(input_tokens, out_dtype=dtype)
def forward(self, input_tokens, embeds=None, token_type_ids=None, dtype=None):
if embeds is not None:
x = embeds
else:
x = self.word_embeddings(input_tokens, out_dtype=dtype)
x += comfy.ops.cast_to_input(self.position_embeddings.weight[:x.shape[1]], x)
if token_type_ids is not None:
x += self.token_type_embeddings(token_type_ids, out_dtype=x.dtype)
@ -113,8 +116,8 @@ class BertModel_(torch.nn.Module):
self.embeddings = BertEmbeddings(config_dict["vocab_size"], config_dict["max_position_embeddings"], config_dict["type_vocab_size"], config_dict["pad_token_id"], embed_dim, layer_norm_eps, dtype, device, operations)
self.encoder = BertEncoder(config_dict["num_hidden_layers"], embed_dim, config_dict["intermediate_size"], config_dict["num_attention_heads"], layer_norm_eps, dtype, device, operations)
def forward(self, input_tokens, attention_mask=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None):
x = self.embeddings(input_tokens, dtype=dtype)
def forward(self, input_tokens, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None):
x = self.embeddings(input_tokens, embeds=embeds, dtype=dtype)
mask = None
if attention_mask is not None:
mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, attention_mask.shape[-1], attention_mask.shape[-1])

View File

@ -18,7 +18,7 @@ class FluxTokenizer:
self.clip_l = clip_l_tokenizer_class(embedding_directory=embedding_directory)
self.t5xxl = T5XXLTokenizer(embedding_directory=embedding_directory)
def tokenize_with_weights(self, text:str, return_word_ids=False):
def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
out = {}
out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)
out["t5xxl"] = self.t5xxl.tokenize_with_weights(text, return_word_ids)

View File

@ -4,6 +4,7 @@ import comfy.text_encoders.llama
from transformers import LlamaTokenizerFast
import torch
import os
import numbers
def llama_detect(state_dict, prefix=""):
@ -22,7 +23,7 @@ def llama_detect(state_dict, prefix=""):
class LLAMA3Tokenizer(sd1_clip.SDTokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}, min_length=256):
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "llama_tokenizer")
super().__init__(tokenizer_path, embedding_directory=embedding_directory, pad_with_end=False, embedding_size=4096, embedding_key='llama', tokenizer_class=LlamaTokenizerFast, has_start_token=True, has_end_token=False, pad_to_max_length=False, max_length=99999999, pad_token=128258, end_token=128009, min_length=min_length)
super().__init__(tokenizer_path, embedding_directory=embedding_directory, pad_with_end=False, embedding_size=4096, embedding_key='llama', tokenizer_class=LlamaTokenizerFast, has_start_token=True, has_end_token=False, pad_to_max_length=False, max_length=99999999, pad_token=128258, min_length=min_length)
class LLAMAModel(sd1_clip.SDClipModel):
def __init__(self, device="cpu", layer="hidden", layer_idx=-3, dtype=None, attention_mask=True, model_options={}):
@ -38,15 +39,26 @@ class HunyuanVideoTokenizer:
def __init__(self, embedding_directory=None, tokenizer_data={}):
clip_l_tokenizer_class = tokenizer_data.get("clip_l_tokenizer_class", sd1_clip.SDTokenizer)
self.clip_l = clip_l_tokenizer_class(embedding_directory=embedding_directory)
self.llama_template = """<|start_header_id|>system<|end_header_id|>\n\nDescribe the video by detailing the following aspects: 1. The main content and theme of the video.2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects.3. Actions, events, behaviors temporal relationships, physical movement changes of the objects.4. background environment, light, style and atmosphere.5. camera angles, movements, and transitions used in the video:<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n""" # 95 tokens
self.llama_template = """<|start_header_id|>system<|end_header_id|>\n\nDescribe the video by detailing the following aspects: 1. The main content and theme of the video.2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects.3. Actions, events, behaviors temporal relationships, physical movement changes of the objects.4. background environment, light, style and atmosphere.5. camera angles, movements, and transitions used in the video:<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|>""" # 95 tokens
self.llama = LLAMA3Tokenizer(embedding_directory=embedding_directory, min_length=1)
def tokenize_with_weights(self, text:str, return_word_ids=False):
def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, image_embeds=None, image_interleave=1, **kwargs):
out = {}
out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)
llama_text = "{}{}".format(self.llama_template, text)
out["llama"] = self.llama.tokenize_with_weights(llama_text, return_word_ids)
if llama_template is None:
llama_text = self.llama_template.format(text)
else:
llama_text = llama_template.format(text)
llama_text_tokens = self.llama.tokenize_with_weights(llama_text, return_word_ids)
embed_count = 0
for r in llama_text_tokens:
for i in range(len(r)):
if r[i][0] == 128257:
if image_embeds is not None and embed_count < image_embeds.shape[0]:
r[i] = ({"type": "embedding", "data": image_embeds[embed_count], "original_type": "image", "image_interleave": image_interleave},) + r[i][1:]
embed_count += 1
out["llama"] = llama_text_tokens
return out
def untokenize(self, token_weight_pair):
@ -80,20 +92,51 @@ class HunyuanVideoClipModel(torch.nn.Module):
llama_out, llama_pooled, llama_extra_out = self.llama.encode_token_weights(token_weight_pairs_llama)
template_end = 0
for i, v in enumerate(token_weight_pairs_llama[0]):
if v[0] == 128007: # <|end_header_id|>
template_end = i
extra_template_end = 0
extra_sizes = 0
user_end = 9999999999999
images = []
tok_pairs = token_weight_pairs_llama[0]
for i, v in enumerate(tok_pairs):
elem = v[0]
if not torch.is_tensor(elem):
if isinstance(elem, numbers.Integral):
if elem == 128006:
if tok_pairs[i + 1][0] == 882:
if tok_pairs[i + 2][0] == 128007:
template_end = i + 2
user_end = -1
if elem == 128009 and user_end == -1:
user_end = i + 1
else:
if elem.get("original_type") == "image":
elem_size = elem.get("data").shape[0]
if template_end > 0:
if user_end == -1:
extra_template_end += elem_size - 1
else:
image_start = i + extra_sizes
image_end = i + elem_size + extra_sizes
images.append((image_start, image_end, elem.get("image_interleave", 1)))
extra_sizes += elem_size - 1
if llama_out.shape[1] > (template_end + 2):
if token_weight_pairs_llama[0][template_end + 1][0] == 271:
if tok_pairs[template_end + 1][0] == 271:
template_end += 2
llama_out = llama_out[:, template_end:]
llama_extra_out["attention_mask"] = llama_extra_out["attention_mask"][:, template_end:]
llama_output = llama_out[:, template_end + extra_sizes:user_end + extra_sizes + extra_template_end]
llama_extra_out["attention_mask"] = llama_extra_out["attention_mask"][:, template_end + extra_sizes:user_end + extra_sizes + extra_template_end]
if llama_extra_out["attention_mask"].sum() == torch.numel(llama_extra_out["attention_mask"]):
llama_extra_out.pop("attention_mask") # attention mask is useless if no masked elements
if len(images) > 0:
out = []
for i in images:
out.append(llama_out[:, i[0]: i[1]: i[2]])
llama_output = torch.cat(out + [llama_output], dim=1)
l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l)
return llama_out, l_pooled, llama_extra_out
return llama_output, l_pooled, llama_extra_out
def load_sd(self, sd):
if "text_model.encoder.layers.1.mlp.fc1.weight" in sd:

View File

@ -37,7 +37,7 @@ class HyditTokenizer:
self.hydit_clip = HyditBertTokenizer(embedding_directory=embedding_directory)
self.mt5xl = MT5XLTokenizer(tokenizer_data={"spiece_model": mt5_tokenizer_data}, embedding_directory=embedding_directory)
def tokenize_with_weights(self, text:str, return_word_ids=False):
def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
out = {}
out["hydit_clip"] = self.hydit_clip.tokenize_with_weights(text, return_word_ids)
out["mt5xl"] = self.mt5xl.tokenize_with_weights(text, return_word_ids)

View File

@ -241,8 +241,11 @@ class Llama2_(nn.Module):
self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, add=config.rms_norm_add, device=device, dtype=dtype)
# self.lm_head = ops.Linear(config.hidden_size, config.vocab_size, bias=False, device=device, dtype=dtype)
def forward(self, x, attention_mask=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None):
x = self.embed_tokens(x, out_dtype=dtype)
def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None):
if embeds is not None:
x = embeds
else:
x = self.embed_tokens(x, out_dtype=dtype)
if self.normalize_in:
x *= self.config.hidden_size ** 0.5

View File

@ -19,11 +19,6 @@ class LuminaTokenizer(sd1_clip.SD1Tokenizer):
class Gemma2_2BModel(sd1_clip.SDClipModel):
def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}):
llama_scaled_fp8 = model_options.get("llama_scaled_fp8", None)
if llama_scaled_fp8 is not None:
model_options = model_options.copy()
model_options["scaled_fp8"] = llama_scaled_fp8
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma2_2B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
@ -35,10 +30,10 @@ class LuminaModel(sd1_clip.SD1ClipModel):
def te(dtype_llama=None, llama_scaled_fp8=None):
class LuminaTEModel_(LuminaModel):
def __init__(self, device="cpu", dtype=None, model_options={}):
if llama_scaled_fp8 is not None and "llama_scaled_fp8" not in model_options:
if llama_scaled_fp8 is not None and "scaled_fp8" not in model_options:
model_options = model_options.copy()
model_options["llama_scaled_fp8"] = llama_scaled_fp8
if dtype_llama is not None:
dtype = dtype_llama
model_options["scaled_fp8"] = llama_scaled_fp8
if dtype_llama is not None:
dtype = dtype_llama
super().__init__(device=device, dtype=dtype, model_options=model_options)
return LuminaTEModel_

View File

@ -43,7 +43,7 @@ class SD3Tokenizer:
self.clip_g = sdxl_clip.SDXLClipGTokenizer(embedding_directory=embedding_directory)
self.t5xxl = T5XXLTokenizer(embedding_directory=embedding_directory)
def tokenize_with_weights(self, text:str, return_word_ids=False):
def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
out = {}
out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids)
out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)

View File

@ -239,8 +239,11 @@ class T5(torch.nn.Module):
def set_input_embeddings(self, embeddings):
self.shared = embeddings
def forward(self, input_ids, *args, **kwargs):
x = self.shared(input_ids, out_dtype=kwargs.get("dtype", torch.float32))
def forward(self, input_ids, attention_mask, embeds=None, num_tokens=None, **kwargs):
if input_ids is None:
x = embeds
else:
x = self.shared(input_ids, out_dtype=kwargs.get("dtype", torch.float32))
if self.dtype not in [torch.float32, torch.float16, torch.bfloat16]:
x = torch.nan_to_num(x) #Fix for fp8 T5 base
return self.encoder(x, *args, **kwargs)
return self.encoder(x, attention_mask=attention_mask, **kwargs)

View File

@ -0,0 +1,22 @@
{
"d_ff": 10240,
"d_kv": 64,
"d_model": 4096,
"decoder_start_token_id": 0,
"dropout_rate": 0.1,
"eos_token_id": 1,
"dense_act_fn": "gelu_pytorch_tanh",
"initializer_factor": 1.0,
"is_encoder_decoder": true,
"is_gated_act": true,
"layer_norm_epsilon": 1e-06,
"model_type": "umt5",
"num_decoder_layers": 24,
"num_heads": 64,
"num_layers": 24,
"output_past": true,
"pad_token_id": 0,
"relative_attention_num_buckets": 32,
"tie_word_embeddings": false,
"vocab_size": 256384
}

View File

@ -0,0 +1,37 @@
from comfy import sd1_clip
from .spiece_tokenizer import SPieceTokenizer
import comfy.text_encoders.t5
import os
class UMT5XXlModel(sd1_clip.SDClipModel):
def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, model_options={}):
textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "umt5_config_xxl.json")
super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=comfy.text_encoders.t5.T5, enable_attention_masks=True, zero_out_masked=True, model_options=model_options)
class UMT5XXlTokenizer(sd1_clip.SDTokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}):
tokenizer = tokenizer_data.get("spiece_model", None)
super().__init__(tokenizer, pad_with_end=False, embedding_size=4096, embedding_key='umt5xxl', tokenizer_class=SPieceTokenizer, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_token=0)
def state_dict(self):
return {"spiece_model": self.tokenizer.serialize_model()}
class WanT5Tokenizer(sd1_clip.SD1Tokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}):
super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, clip_name="umt5xxl", tokenizer=UMT5XXlTokenizer)
class WanT5Model(sd1_clip.SD1ClipModel):
def __init__(self, device="cpu", dtype=None, model_options={}, **kwargs):
super().__init__(device=device, dtype=dtype, model_options=model_options, name="umt5xxl", clip_model=UMT5XXlModel, **kwargs)
def te(dtype_t5=None, t5xxl_scaled_fp8=None):
class WanTEModel(WanT5Model):
def __init__(self, device="cpu", dtype=None, model_options={}):
if t5xxl_scaled_fp8 is not None and "scaled_fp8" not in model_options:
model_options = model_options.copy()
model_options["scaled_fp8"] = t5xxl_scaled_fp8
if dtype_t5 is not None:
dtype = dtype_t5
super().__init__(device=device, dtype=dtype, model_options=model_options)
return WanTEModel

View File

@ -46,12 +46,18 @@ if hasattr(torch.serialization, "add_safe_globals"): # TODO: this was added in
else:
logging.info("Warning, you are using an old pytorch version and some ckpt/pt files might be loaded unsafely. Upgrading to 2.4 or above is recommended.")
def load_torch_file(ckpt, safe_load=False, device=None):
def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
if device is None:
device = torch.device("cpu")
metadata = None
if ckpt.lower().endswith(".safetensors") or ckpt.lower().endswith(".sft"):
try:
sd = safetensors.torch.load_file(ckpt, device=device.type)
with safetensors.safe_open(ckpt, framework="pt", device=device.type) as f:
sd = {}
for k in f.keys():
sd[k] = f.get_tensor(k)
if return_metadata:
metadata = f.metadata()
except Exception as e:
if len(e.args) > 0:
message = e.args[0]
@ -77,7 +83,7 @@ def load_torch_file(ckpt, safe_load=False, device=None):
sd = pl_sd
else:
sd = pl_sd
return sd
return (sd, metadata) if return_metadata else sd
def save_torch_file(sd, ckpt, metadata=None):
if metadata is not None:

View File

@ -1,3 +1,5 @@
from __future__ import annotations
import torchaudio
import torch
import comfy.model_management
@ -10,6 +12,7 @@ import random
import hashlib
import node_helpers
from comfy.cli_args import args
from comfy.comfy_types import FileLocator
class EmptyLatentAudio:
def __init__(self):
@ -164,7 +167,7 @@ class SaveAudio:
def save_audio(self, audio, filename_prefix="ComfyUI", prompt=None, extra_pnginfo=None):
filename_prefix += self.prefix_append
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir)
results = list()
results: list[FileLocator] = []
metadata = {}
if not args.disable_metadata:

View File

@ -454,7 +454,7 @@ class SamplerCustom:
return {"required":
{"model": ("MODEL",),
"add_noise": ("BOOLEAN", {"default": True}),
"noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
"noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff, "control_after_generate": True}),
"cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}),
"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
@ -605,10 +605,16 @@ class DisableNoise:
class RandomNoise(DisableNoise):
@classmethod
def INPUT_TYPES(s):
return {"required":{
"noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
}
}
return {
"required": {
"noise_seed": ("INT", {
"default": 0,
"min": 0,
"max": 0xffffffffffffffff,
"control_after_generate": True,
}),
}
}
def get_noise(self, noise_seed):
return (Noise_RandomNoise(noise_seed),)

View File

@ -1,4 +1,5 @@
import nodes
import node_helpers
import torch
import comfy.model_management
@ -38,7 +39,83 @@ class EmptyHunyuanLatentVideo:
latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
return ({"samples":latent}, )
PROMPT_TEMPLATE_ENCODE_VIDEO_I2V = (
"<|start_header_id|>system<|end_header_id|>\n\n<image>\nDescribe the video by detailing the following aspects according to the reference image: "
"1. The main content and theme of the video."
"2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects."
"3. Actions, events, behaviors temporal relationships, physical movement changes of the objects."
"4. background environment, light, style and atmosphere."
"5. camera angles, movements, and transitions used in the video:<|eot_id|>\n\n"
"<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|>"
"<|start_header_id|>assistant<|end_header_id|>\n\n"
)
class TextEncodeHunyuanVideo_ImageToVideo:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"clip": ("CLIP", ),
"clip_vision_output": ("CLIP_VISION_OUTPUT", ),
"prompt": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"image_interleave": ("INT", {"default": 2, "min": 1, "max": 512, "tooltip": "How much the image influences things vs the text prompt. Higher number means more influence from the text prompt."}),
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "encode"
CATEGORY = "advanced/conditioning"
def encode(self, clip, clip_vision_output, prompt, image_interleave):
tokens = clip.tokenize(prompt, llama_template=PROMPT_TEMPLATE_ENCODE_VIDEO_I2V, image_embeds=clip_vision_output.mm_projected, image_interleave=image_interleave)
return (clip.encode_from_tokens_scheduled(tokens), )
class HunyuanImageToVideo:
@classmethod
def INPUT_TYPES(s):
return {"required": {"positive": ("CONDITIONING", ),
"vae": ("VAE", ),
"width": ("INT", {"default": 848, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
"height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
"length": ("INT", {"default": 53, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
"guidance_type": (["v1 (concat)", "v2 (replace)"], )
},
"optional": {"start_image": ("IMAGE", ),
}}
RETURN_TYPES = ("CONDITIONING", "LATENT")
RETURN_NAMES = ("positive", "latent")
FUNCTION = "encode"
CATEGORY = "conditioning/video_models"
def encode(self, positive, vae, width, height, length, batch_size, guidance_type, start_image=None):
latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
out_latent = {}
if start_image is not None:
start_image = comfy.utils.common_upscale(start_image[:length, :, :, :3].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
concat_latent_image = vae.encode(start_image)
mask = torch.ones((1, 1, latent.shape[2], concat_latent_image.shape[-2], concat_latent_image.shape[-1]), device=start_image.device, dtype=start_image.dtype)
mask[:, :, :((start_image.shape[0] - 1) // 4) + 1] = 0.0
if guidance_type == "v1 (concat)":
cond = {"concat_latent_image": concat_latent_image, "concat_mask": mask}
else:
cond = {'guiding_frame_index': 0}
latent[:, :, :concat_latent_image.shape[2]] = concat_latent_image
out_latent["noise_mask"] = mask
positive = node_helpers.conditioning_set_values(positive, cond)
out_latent["samples"] = latent
return (positive, out_latent)
NODE_CLASS_MAPPINGS = {
"CLIPTextEncodeHunyuanDiT": CLIPTextEncodeHunyuanDiT,
"TextEncodeHunyuanVideo_ImageToVideo": TextEncodeHunyuanVideo_ImageToVideo,
"EmptyHunyuanLatentVideo": EmptyHunyuanLatentVideo,
"HunyuanImageToVideo": HunyuanImageToVideo,
}

View File

@ -1,3 +1,5 @@
from __future__ import annotations
import nodes
import folder_paths
from comfy.cli_args import args
@ -9,6 +11,8 @@ import numpy as np
import json
import os
from comfy.comfy_types import FileLocator
MAX_RESOLUTION = nodes.MAX_RESOLUTION
class ImageCrop:
@ -99,7 +103,7 @@ class SaveAnimatedWEBP:
method = self.methods.get(method)
filename_prefix += self.prefix_append
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir, images[0].shape[1], images[0].shape[0])
results = list()
results: list[FileLocator] = []
pil_images = []
for image in images:
i = 255. * image.cpu().numpy()

View File

@ -19,8 +19,6 @@ class Load3D():
"image": ("LOAD_3D", {}),
"width": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}),
"height": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}),
"material": (["original", "normal", "wireframe", "depth"],),
"up_direction": (["original", "-x", "+x", "-y", "+y", "-z", "+z"],),
}}
RETURN_TYPES = ("IMAGE", "MASK", "STRING")
@ -55,8 +53,6 @@ class Load3DAnimation():
"image": ("LOAD_3D_ANIMATION", {}),
"width": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}),
"height": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}),
"material": (["original", "normal", "wireframe", "depth"],),
"up_direction": (["original", "-x", "+x", "-y", "+y", "-z", "+z"],),
}}
RETURN_TYPES = ("IMAGE", "MASK", "STRING")
@ -82,8 +78,6 @@ class Preview3D():
def INPUT_TYPES(s):
return {"required": {
"model_file": ("STRING", {"default": "", "multiline": False}),
"material": (["original", "normal", "wireframe", "depth"],),
"up_direction": (["original", "-x", "+x", "-y", "+y", "-z", "+z"],),
}}
OUTPUT_NODE = True
@ -102,8 +96,6 @@ class Preview3DAnimation():
def INPUT_TYPES(s):
return {"required": {
"model_file": ("STRING", {"default": "", "multiline": False}),
"material": (["original", "normal", "wireframe", "depth"],),
"up_direction": (["original", "-x", "+x", "-y", "+y", "-z", "+z"],),
}}
OUTPUT_NODE = True

View File

@ -1,9 +1,14 @@
import io
import nodes
import node_helpers
import torch
import comfy.model_management
import comfy.model_sampling
import comfy.utils
import math
import numpy as np
import av
from comfy.ldm.lightricks.symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords
class EmptyLTXVLatentVideo:
@classmethod
@ -33,7 +38,6 @@ class LTXVImgToVideo:
"height": ("INT", {"default": 512, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
"length": ("INT", {"default": 97, "min": 9, "max": nodes.MAX_RESOLUTION, "step": 8}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
"image_noise_scale": ("FLOAT", {"default": 0.15, "min": 0, "max": 1.0, "step": 0.01, "tooltip": "Amount of noise to apply on conditioning image latent."})
}}
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
@ -42,16 +46,219 @@ class LTXVImgToVideo:
CATEGORY = "conditioning/video_models"
FUNCTION = "generate"
def generate(self, positive, negative, image, vae, width, height, length, batch_size, image_noise_scale):
def generate(self, positive, negative, image, vae, width, height, length, batch_size):
pixels = comfy.utils.common_upscale(image.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
encode_pixels = pixels[:, :, :, :3]
t = vae.encode(encode_pixels)
positive = node_helpers.conditioning_set_values(positive, {"guiding_latent": t, "guiding_latent_noise_scale": image_noise_scale})
negative = node_helpers.conditioning_set_values(negative, {"guiding_latent": t, "guiding_latent_noise_scale": image_noise_scale})
latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
latent[:, :, :t.shape[2]] = t
return (positive, negative, {"samples": latent}, )
conditioning_latent_frames_mask = torch.ones(
(batch_size, 1, latent.shape[2], 1, 1),
dtype=torch.float32,
device=latent.device,
)
conditioning_latent_frames_mask[:, :, :t.shape[2]] = 0
return (positive, negative, {"samples": latent, "noise_mask": conditioning_latent_frames_mask}, )
def conditioning_get_any_value(conditioning, key, default=None):
for t in conditioning:
if key in t[1]:
return t[1][key]
return default
def get_noise_mask(latent):
noise_mask = latent.get("noise_mask", None)
latent_image = latent["samples"]
if noise_mask is None:
batch_size, _, latent_length, _, _ = latent_image.shape
noise_mask = torch.ones(
(batch_size, 1, latent_length, 1, 1),
dtype=torch.float32,
device=latent_image.device,
)
else:
noise_mask = noise_mask.clone()
return noise_mask
def get_keyframe_idxs(cond):
keyframe_idxs = conditioning_get_any_value(cond, "keyframe_idxs", None)
if keyframe_idxs is None:
return None, 0
num_keyframes = torch.unique(keyframe_idxs[:, 0]).shape[0]
return keyframe_idxs, num_keyframes
class LTXVAddGuide:
@classmethod
def INPUT_TYPES(s):
return {"required": {"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"vae": ("VAE",),
"latent": ("LATENT",),
"image": ("IMAGE", {"tooltip": "Image or video to condition the latent video on. Must be 8*n + 1 frames."
"If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames."}),
"frame_idx": ("INT", {"default": 0, "min": -9999, "max": 9999,
"tooltip": "Frame index to start the conditioning at. For single-frame images or "
"videos with 1-8 frames, any frame_idx value is acceptable. For videos with 9+ "
"frames, frame_idx must be divisible by 8, otherwise it will be rounded down to "
"the nearest multiple of 8. Negative values are counted from the end of the video."}),
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
}
}
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
RETURN_NAMES = ("positive", "negative", "latent")
CATEGORY = "conditioning/video_models"
FUNCTION = "generate"
def __init__(self):
self._num_prefix_frames = 2
self._patchifier = SymmetricPatchifier(1)
def encode(self, vae, latent_width, latent_height, images, scale_factors):
time_scale_factor, width_scale_factor, height_scale_factor = scale_factors
images = images[:(images.shape[0] - 1) // time_scale_factor * time_scale_factor + 1]
pixels = comfy.utils.common_upscale(images.movedim(-1, 1), latent_width * width_scale_factor, latent_height * height_scale_factor, "bilinear", crop="disabled").movedim(1, -1)
encode_pixels = pixels[:, :, :, :3]
t = vae.encode(encode_pixels)
return encode_pixels, t
def get_latent_index(self, cond, latent_length, guide_length, frame_idx, scale_factors):
time_scale_factor, _, _ = scale_factors
_, num_keyframes = get_keyframe_idxs(cond)
latent_count = latent_length - num_keyframes
frame_idx = frame_idx if frame_idx >= 0 else max((latent_count - 1) * time_scale_factor + 1 + frame_idx, 0)
if guide_length > 1:
frame_idx = frame_idx // time_scale_factor * time_scale_factor # frame index must be divisible by 8
latent_idx = (frame_idx + time_scale_factor - 1) // time_scale_factor
return frame_idx, latent_idx
def add_keyframe_index(self, cond, frame_idx, guiding_latent, scale_factors):
keyframe_idxs, _ = get_keyframe_idxs(cond)
_, latent_coords = self._patchifier.patchify(guiding_latent)
pixel_coords = latent_to_pixel_coords(latent_coords, scale_factors, True)
pixel_coords[:, 0] += frame_idx
if keyframe_idxs is None:
keyframe_idxs = pixel_coords
else:
keyframe_idxs = torch.cat([keyframe_idxs, pixel_coords], dim=2)
return node_helpers.conditioning_set_values(cond, {"keyframe_idxs": keyframe_idxs})
def append_keyframe(self, positive, negative, frame_idx, latent_image, noise_mask, guiding_latent, strength, scale_factors):
positive = self.add_keyframe_index(positive, frame_idx, guiding_latent, scale_factors)
negative = self.add_keyframe_index(negative, frame_idx, guiding_latent, scale_factors)
mask = torch.full(
(noise_mask.shape[0], 1, guiding_latent.shape[2], 1, 1),
1.0 - strength,
dtype=noise_mask.dtype,
device=noise_mask.device,
)
latent_image = torch.cat([latent_image, guiding_latent], dim=2)
noise_mask = torch.cat([noise_mask, mask], dim=2)
return positive, negative, latent_image, noise_mask
def replace_latent_frames(self, latent_image, noise_mask, guiding_latent, latent_idx, strength):
cond_length = guiding_latent.shape[2]
assert latent_image.shape[2] >= latent_idx + cond_length, "Conditioning frames exceed the length of the latent sequence."
mask = torch.full(
(noise_mask.shape[0], 1, cond_length, 1, 1),
1.0 - strength,
dtype=noise_mask.dtype,
device=noise_mask.device,
)
latent_image = latent_image.clone()
noise_mask = noise_mask.clone()
latent_image[:, :, latent_idx : latent_idx + cond_length] = guiding_latent
noise_mask[:, :, latent_idx : latent_idx + cond_length] = mask
return latent_image, noise_mask
def generate(self, positive, negative, vae, latent, image, frame_idx, strength):
scale_factors = vae.downscale_index_formula
latent_image = latent["samples"]
noise_mask = get_noise_mask(latent)
_, _, latent_length, latent_height, latent_width = latent_image.shape
image, t = self.encode(vae, latent_width, latent_height, image, scale_factors)
frame_idx, latent_idx = self.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors)
assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
num_prefix_frames = min(self._num_prefix_frames, t.shape[2])
positive, negative, latent_image, noise_mask = self.append_keyframe(
positive,
negative,
frame_idx,
latent_image,
noise_mask,
t[:, :, :num_prefix_frames],
strength,
scale_factors,
)
latent_idx += num_prefix_frames
t = t[:, :, num_prefix_frames:]
if t.shape[2] == 0:
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
latent_image, noise_mask = self.replace_latent_frames(
latent_image,
noise_mask,
t,
latent_idx,
strength,
)
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
class LTXVCropGuides:
@classmethod
def INPUT_TYPES(s):
return {"required": {"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"latent": ("LATENT",),
}
}
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
RETURN_NAMES = ("positive", "negative", "latent")
CATEGORY = "conditioning/video_models"
FUNCTION = "crop"
def __init__(self):
self._patchifier = SymmetricPatchifier(1)
def crop(self, positive, negative, latent):
latent_image = latent["samples"].clone()
noise_mask = get_noise_mask(latent)
_, num_keyframes = get_keyframe_idxs(positive)
if num_keyframes == 0:
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
latent_image = latent_image[:, :, :-num_keyframes]
noise_mask = noise_mask[:, :, :-num_keyframes]
positive = node_helpers.conditioning_set_values(positive, {"keyframe_idxs": None})
negative = node_helpers.conditioning_set_values(negative, {"keyframe_idxs": None})
return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
class LTXVConditioning:
@ -174,6 +381,77 @@ class LTXVScheduler:
return (sigmas,)
def encode_single_frame(output_file, image_array: np.ndarray, crf):
container = av.open(output_file, "w", format="mp4")
try:
stream = container.add_stream(
"h264", rate=1, options={"crf": str(crf), "preset": "veryfast"}
)
stream.height = image_array.shape[0]
stream.width = image_array.shape[1]
av_frame = av.VideoFrame.from_ndarray(image_array, format="rgb24").reformat(
format="yuv420p"
)
container.mux(stream.encode(av_frame))
container.mux(stream.encode())
finally:
container.close()
def decode_single_frame(video_file):
container = av.open(video_file)
try:
stream = next(s for s in container.streams if s.type == "video")
frame = next(container.decode(stream))
finally:
container.close()
return frame.to_ndarray(format="rgb24")
def preprocess(image: torch.Tensor, crf=29):
if crf == 0:
return image
image_array = (image[:(image.shape[0] // 2) * 2, :(image.shape[1] // 2) * 2] * 255.0).byte().cpu().numpy()
with io.BytesIO() as output_file:
encode_single_frame(output_file, image_array, crf)
video_bytes = output_file.getvalue()
with io.BytesIO(video_bytes) as video_file:
image_array = decode_single_frame(video_file)
tensor = torch.tensor(image_array, dtype=image.dtype, device=image.device) / 255.0
return tensor
class LTXVPreprocess:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"image": ("IMAGE",),
"img_compression": (
"INT",
{
"default": 35,
"min": 0,
"max": 100,
"tooltip": "Amount of compression to apply on image.",
},
),
}
}
FUNCTION = "preprocess"
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES = ("output_image",)
CATEGORY = "image"
def preprocess(self, image, img_compression):
if img_compression > 0:
output_images = []
for i in range(image.shape[0]):
output_images.append(preprocess(image[i], img_compression))
return (torch.stack(output_images),)
NODE_CLASS_MAPPINGS = {
"EmptyLTXVLatentVideo": EmptyLTXVLatentVideo,
@ -181,4 +459,7 @@ NODE_CLASS_MAPPINGS = {
"ModelSamplingLTXV": ModelSamplingLTXV,
"LTXVConditioning": LTXVConditioning,
"LTXVScheduler": LTXVScheduler,
"LTXVAddGuide": LTXVAddGuide,
"LTXVPreprocess": LTXVPreprocess,
"LTXVCropGuides": LTXVCropGuides,
}

View File

@ -1,9 +1,12 @@
from __future__ import annotations
import os
import av
import torch
import folder_paths
import json
from fractions import Fraction
from comfy.comfy_types import FileLocator
class SaveWEBM:
@ -59,9 +62,10 @@ class SaveWEBM:
frame = av.VideoFrame.from_ndarray(torch.clamp(frame[..., :3] * 255, min=0, max=255).to(device=torch.device("cpu"), dtype=torch.uint8).numpy(), format="rgb24")
for packet in stream.encode(frame):
container.mux(packet)
container.mux(stream.encode())
container.close()
results = [{
results: list[FileLocator] = [{
"filename": file,
"subfolder": subfolder,
"type": self.type

View File

@ -4,6 +4,7 @@ import comfy.utils
import comfy.sd
import folder_paths
import comfy_extras.nodes_model_merging
import node_helpers
class ImageOnlyCheckpointLoader:
@ -121,12 +122,38 @@ class ImageOnlyCheckpointSave(comfy_extras.nodes_model_merging.CheckpointSave):
comfy_extras.nodes_model_merging.save_checkpoint(model, clip_vision=clip_vision, vae=vae, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo)
return {}
class ConditioningSetAreaPercentageVideo:
@classmethod
def INPUT_TYPES(s):
return {"required": {"conditioning": ("CONDITIONING", ),
"width": ("FLOAT", {"default": 1.0, "min": 0, "max": 1.0, "step": 0.01}),
"height": ("FLOAT", {"default": 1.0, "min": 0, "max": 1.0, "step": 0.01}),
"temporal": ("FLOAT", {"default": 1.0, "min": 0, "max": 1.0, "step": 0.01}),
"x": ("FLOAT", {"default": 0, "min": 0, "max": 1.0, "step": 0.01}),
"y": ("FLOAT", {"default": 0, "min": 0, "max": 1.0, "step": 0.01}),
"z": ("FLOAT", {"default": 0, "min": 0, "max": 1.0, "step": 0.01}),
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "append"
CATEGORY = "conditioning"
def append(self, conditioning, width, height, temporal, x, y, z, strength):
c = node_helpers.conditioning_set_values(conditioning, {"area": ("percentage", temporal, height, width, z, y, x),
"strength": strength,
"set_area_to_bounds": False})
return (c, )
NODE_CLASS_MAPPINGS = {
"ImageOnlyCheckpointLoader": ImageOnlyCheckpointLoader,
"SVD_img2vid_Conditioning": SVD_img2vid_Conditioning,
"VideoLinearCFGGuidance": VideoLinearCFGGuidance,
"VideoTriangleCFGGuidance": VideoTriangleCFGGuidance,
"ImageOnlyCheckpointSave": ImageOnlyCheckpointSave,
"ConditioningSetAreaPercentageVideo": ConditioningSetAreaPercentageVideo,
}
NODE_DISPLAY_NAME_MAPPINGS = {

54
comfy_extras/nodes_wan.py Normal file
View File

@ -0,0 +1,54 @@
import nodes
import node_helpers
import torch
import comfy.model_management
import comfy.utils
class WanImageToVideo:
@classmethod
def INPUT_TYPES(s):
return {"required": {"positive": ("CONDITIONING", ),
"negative": ("CONDITIONING", ),
"vae": ("VAE", ),
"width": ("INT", {"default": 832, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
"height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
"length": ("INT", {"default": 81, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
},
"optional": {"clip_vision_output": ("CLIP_VISION_OUTPUT", ),
"start_image": ("IMAGE", ),
}}
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
RETURN_NAMES = ("positive", "negative", "latent")
FUNCTION = "encode"
CATEGORY = "conditioning/video_models"
def encode(self, positive, negative, vae, width, height, length, batch_size, start_image=None, clip_vision_output=None):
latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
if start_image is not None:
start_image = comfy.utils.common_upscale(start_image[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
image = torch.ones((length, height, width, start_image.shape[-1]), device=start_image.device, dtype=start_image.dtype) * 0.5
image[:start_image.shape[0]] = start_image
concat_latent_image = vae.encode(image[:, :, :, :3])
mask = torch.ones((1, 1, latent.shape[2], concat_latent_image.shape[-2], concat_latent_image.shape[-1]), device=start_image.device, dtype=start_image.dtype)
mask[:, :, :((start_image.shape[0] - 1) // 4) + 1] = 0.0
positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": concat_latent_image, "concat_mask": mask})
negative = node_helpers.conditioning_set_values(negative, {"concat_latent_image": concat_latent_image, "concat_mask": mask})
if clip_vision_output is not None:
positive = node_helpers.conditioning_set_values(positive, {"clip_vision_output": clip_vision_output})
negative = node_helpers.conditioning_set_values(negative, {"clip_vision_output": clip_vision_output})
out_latent = {}
out_latent["samples"] = latent
return (positive, negative, out_latent)
NODE_CLASS_MAPPINGS = {
"WanImageToVideo": WanImageToVideo,
}

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is
# updated in pyproject.toml.
__version__ = "0.3.15"
__version__ = "0.3.26"

View File

@ -634,6 +634,13 @@ def validate_inputs(prompt, item, validated):
continue
else:
try:
# Unwraps values wrapped in __value__ key. This is used to pass
# list widget value to execution, as by default list value is
# reserved to represent the connection between nodes.
if isinstance(val, dict) and "__value__" in val:
val = val["__value__"]
inputs[x] = val
if type_input == "INT":
val = int(val)
inputs[x] = val

View File

@ -139,6 +139,7 @@ from server import BinaryEventTypes
import nodes
import comfy.model_management
import comfyui_version
import app.logger
def cuda_malloc_warning():
@ -295,9 +296,12 @@ def start_comfyui(asyncio_loop=None):
if __name__ == "__main__":
# Running directly, just start ComfyUI.
logging.info("ComfyUI version: {}".format(comfyui_version.__version__))
event_loop, _, start_all_func = start_comfyui()
try:
event_loop.run_until_complete(start_all_func())
x = start_all_func()
app.logger.print_startup_warnings()
event_loop.run_until_complete(x)
except KeyboardInterrupt:
logging.info("\nStopped server")

View File

@ -25,7 +25,7 @@ import comfy.sample
import comfy.sd
import comfy.utils
import comfy.controlnet
from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict
from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict, FileLocator
import comfy.clip_vision
@ -479,7 +479,7 @@ class SaveLatent:
file = f"{filename}_{counter:05}_.latent"
results = list()
results: list[FileLocator] = []
results.append({
"filename": file,
"subfolder": subfolder,
@ -489,7 +489,7 @@ class SaveLatent:
file = os.path.join(full_output_folder, file)
output = {}
output["latent_tensor"] = samples["samples"]
output["latent_tensor"] = samples["samples"].contiguous()
output["latent_format_version_0"] = torch.tensor([])
comfy.utils.save_torch_file(output, file, metadata=metadata)
@ -914,7 +914,7 @@ class CLIPLoader:
@classmethod
def INPUT_TYPES(s):
return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2"], ),
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan"], ),
},
"optional": {
"device": (["default", "cpu"], {"advanced": True}),
@ -924,7 +924,7 @@ class CLIPLoader:
CATEGORY = "advanced/loaders"
DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 / clip-g / clip-l\nstable_audio: t5\nmochi: t5\ncosmos: old t5 xxl\nlumina2: gemma 2 2B"
DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 xxl/ clip-g / clip-l\nstable_audio: t5 base\nmochi: t5 xxl\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl"
def load_clip(self, clip_name, type="stable_diffusion", device="default"):
if type == "stable_cascade":
@ -943,6 +943,8 @@ class CLIPLoader:
clip_type = comfy.sd.CLIPType.COSMOS
elif type == "lumina2":
clip_type = comfy.sd.CLIPType.LUMINA2
elif type == "wan":
clip_type = comfy.sd.CLIPType.WAN
else:
clip_type = comfy.sd.CLIPType.STABLE_DIFFUSION
@ -1517,7 +1519,7 @@ class KSampler:
return {
"required": {
"model": ("MODEL", {"tooltip": "The model used for denoising the input latent."}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff, "tooltip": "The random seed used for creating the noise."}),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff, "control_after_generate": True, "tooltip": "The random seed used for creating the noise."}),
"steps": ("INT", {"default": 20, "min": 1, "max": 10000, "tooltip": "The number of steps used in the denoising process."}),
"cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01, "tooltip": "The Classifier-Free Guidance scale balances creativity and adherence to the prompt. Higher values result in images more closely matching the prompt however too high values will negatively impact quality."}),
"sampler_name": (comfy.samplers.KSampler.SAMPLERS, {"tooltip": "The algorithm used when sampling, this can affect the quality, speed, and style of the generated output."}),
@ -1545,7 +1547,7 @@ class KSamplerAdvanced:
return {"required":
{"model": ("MODEL",),
"add_noise": (["enable", "disable"], ),
"noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
"noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff, "control_after_generate": True}),
"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
"cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}),
"sampler_name": (comfy.samplers.KSampler.SAMPLERS, ),
@ -1783,14 +1785,7 @@ class LoadImageOutput(LoadImage):
DESCRIPTION = "Load an image from the output folder. When the refresh button is clicked, the node will update the image list and automatically select the first image, allowing for easy iteration."
EXPERIMENTAL = True
FUNCTION = "load_image_output"
def load_image_output(self, image):
return self.load_image(f"{image} [output]")
@classmethod
def VALIDATE_INPUTS(s, image):
return True
FUNCTION = "load_image"
class ImageScale:
@ -2267,6 +2262,7 @@ def init_builtin_extra_nodes():
"nodes_cosmos.py",
"nodes_video.py",
"nodes_lumina2.py",
"nodes_wan.py",
]
import_failed = []

View File

@ -1,6 +1,6 @@
[project]
name = "ComfyUI"
version = "0.3.15"
version = "0.3.26"
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.9"

View File

@ -1,3 +1,4 @@
comfyui-frontend-package==1.12.11
torch
torchsde
torchvision

View File

@ -70,7 +70,7 @@ def test_get_release_invalid_version(mock_provider):
def test_init_frontend_default():
version_string = DEFAULT_VERSION_STRING
frontend_path = FrontendManager.init_frontend(version_string)
assert frontend_path == FrontendManager.DEFAULT_FRONTEND_PATH
assert frontend_path == FrontendManager.default_frontend_path()
def test_init_frontend_invalid_version():
@ -84,24 +84,29 @@ def test_init_frontend_invalid_provider():
with pytest.raises(HTTPError):
FrontendManager.init_frontend_unsafe(version_string)
@pytest.fixture
def mock_os_functions():
with patch('app.frontend_management.os.makedirs') as mock_makedirs, \
patch('app.frontend_management.os.listdir') as mock_listdir, \
patch('app.frontend_management.os.rmdir') as mock_rmdir:
with (
patch("app.frontend_management.os.makedirs") as mock_makedirs,
patch("app.frontend_management.os.listdir") as mock_listdir,
patch("app.frontend_management.os.rmdir") as mock_rmdir,
):
mock_listdir.return_value = [] # Simulate empty directory
yield mock_makedirs, mock_listdir, mock_rmdir
@pytest.fixture
def mock_download():
with patch('app.frontend_management.download_release_asset_zip') as mock:
with patch("app.frontend_management.download_release_asset_zip") as mock:
mock.side_effect = Exception("Download failed") # Simulate download failure
yield mock
def test_finally_block(mock_os_functions, mock_download, mock_provider):
# Arrange
mock_makedirs, mock_listdir, mock_rmdir = mock_os_functions
version_string = 'test-owner/test-repo@1.0.0'
version_string = "test-owner/test-repo@1.0.0"
# Act & Assert
with pytest.raises(Exception):
@ -128,3 +133,42 @@ def test_parse_version_string_invalid():
version_string = "invalid"
with pytest.raises(argparse.ArgumentTypeError):
FrontendManager.parse_version_string(version_string)
def test_init_frontend_default_with_mocks():
# Arrange
version_string = DEFAULT_VERSION_STRING
# Act
with (
patch("app.frontend_management.check_frontend_version") as mock_check,
patch.object(
FrontendManager, "default_frontend_path", return_value="/mocked/path"
),
):
frontend_path = FrontendManager.init_frontend(version_string)
# Assert
assert frontend_path == "/mocked/path"
mock_check.assert_called_once()
def test_init_frontend_fallback_on_error():
# Arrange
version_string = "test-owner/test-repo@1.0.0"
# Act
with (
patch.object(
FrontendManager, "init_frontend_unsafe", side_effect=Exception("Test error")
),
patch("app.frontend_management.check_frontend_version") as mock_check,
patch.object(
FrontendManager, "default_frontend_path", return_value="/default/path"
),
):
frontend_path = FrontendManager.init_frontend(version_string)
# Assert
assert frontend_path == "/default/path"
mock_check.assert_called_once()

View File

@ -1,51 +0,0 @@
import { d as defineComponent, T as ref, p as onMounted, b8 as isElectron, V as nextTick, b9 as electronAPI, o as openBlock, f as createElementBlock, i as withDirectives, v as vShow, j as unref, ba as isNativeWindow, m as createBaseVNode, A as renderSlot, aj as normalizeClass } from "./index-Bv0b06LE.js";
const _hoisted_1 = { class: "flex-grow w-full flex items-center justify-center overflow-auto" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "BaseViewTemplate",
props: {
dark: { type: Boolean, default: false }
},
setup(__props) {
const props = __props;
const darkTheme = {
color: "rgba(0, 0, 0, 0)",
symbolColor: "#d4d4d4"
};
const lightTheme = {
color: "rgba(0, 0, 0, 0)",
symbolColor: "#171717"
};
const topMenuRef = ref(null);
onMounted(async () => {
if (isElectron()) {
await nextTick();
electronAPI().changeTheme({
...props.dark ? darkTheme : lightTheme,
height: topMenuRef.value.getBoundingClientRect().height
});
}
});
return (_ctx, _cache) => {
return openBlock(), createElementBlock("div", {
class: normalizeClass(["font-sans w-screen h-screen flex flex-col", [
props.dark ? "text-neutral-300 bg-neutral-900 dark-theme" : "text-neutral-900 bg-neutral-300"
]])
}, [
withDirectives(createBaseVNode("div", {
ref_key: "topMenuRef",
ref: topMenuRef,
class: "app-drag w-full h-[var(--comfy-topbar-height)]"
}, null, 512), [
[vShow, unref(isNativeWindow)()]
]),
createBaseVNode("div", _hoisted_1, [
renderSlot(_ctx.$slots, "default")
])
], 2);
};
}
});
export {
_sfc_main as _
};
//# sourceMappingURL=BaseViewTemplate-BTbuZf5t.js.map

1
web/assets/CREDIT.txt generated vendored
View File

@ -1 +0,0 @@
Thanks to OpenArt (https://openart.ai) for providing the sorted-custom-node-map data, captured in September 2024.

View File

@ -1,19 +0,0 @@
import { d as defineComponent, o as openBlock, y as createBlock, z as withCtx, k as createVNode, j as unref, bE as script } from "./index-Bv0b06LE.js";
import { _ as _sfc_main$1 } from "./BaseViewTemplate-BTbuZf5t.js";
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "DesktopStartView",
setup(__props) {
return (_ctx, _cache) => {
return openBlock(), createBlock(_sfc_main$1, { dark: "" }, {
default: withCtx(() => [
createVNode(unref(script), { class: "m-8 w-48 h-48" })
]),
_: 1
});
};
}
});
export {
_sfc_main as default
};
//# sourceMappingURL=DesktopStartView-D9r53Bue.js.map

View File

@ -1,58 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { d as defineComponent, T as ref, d8 as onUnmounted, o as openBlock, y as createBlock, z as withCtx, m as createBaseVNode, E as toDisplayString, j as unref, bg as t, k as createVNode, bE as script, l as script$1, b9 as electronAPI, _ as _export_sfc } from "./index-Bv0b06LE.js";
import { s as script$2 } from "./index-A_bXPJCN.js";
import { _ as _sfc_main$1 } from "./TerminalOutputDrawer-CKr7Br7O.js";
import { _ as _sfc_main$2 } from "./BaseViewTemplate-BTbuZf5t.js";
const _hoisted_1 = { class: "h-screen w-screen grid items-center justify-around overflow-y-auto" };
const _hoisted_2 = { class: "relative m-8 text-center" };
const _hoisted_3 = { class: "download-bg pi-download text-4xl font-bold" };
const _hoisted_4 = { class: "m-8" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "DesktopUpdateView",
setup(__props) {
const electron = electronAPI();
const terminalVisible = ref(false);
const toggleConsoleDrawer = /* @__PURE__ */ __name(() => {
terminalVisible.value = !terminalVisible.value;
}, "toggleConsoleDrawer");
onUnmounted(() => electron.Validation.dispose());
return (_ctx, _cache) => {
return openBlock(), createBlock(_sfc_main$2, { dark: "" }, {
default: withCtx(() => [
createBaseVNode("div", _hoisted_1, [
createBaseVNode("div", _hoisted_2, [
createBaseVNode("h1", _hoisted_3, toDisplayString(unref(t)("desktopUpdate.title")), 1),
createBaseVNode("div", _hoisted_4, [
createBaseVNode("span", null, toDisplayString(unref(t)("desktopUpdate.description")), 1)
]),
createVNode(unref(script), { class: "m-8 w-48 h-48" }),
createVNode(unref(script$1), {
style: { "transform": "translateX(-50%)" },
class: "fixed bottom-0 left-1/2 my-8",
label: unref(t)("maintenance.consoleLogs"),
icon: "pi pi-desktop",
"icon-pos": "left",
severity: "secondary",
onClick: toggleConsoleDrawer
}, null, 8, ["label"]),
createVNode(_sfc_main$1, {
modelValue: terminalVisible.value,
"onUpdate:modelValue": _cache[0] || (_cache[0] = ($event) => terminalVisible.value = $event),
header: unref(t)("g.terminal"),
"default-message": unref(t)("desktopUpdate.terminalDefaultMessage")
}, null, 8, ["modelValue", "header", "default-message"])
])
]),
createVNode(unref(script$2))
]),
_: 1
});
};
}
});
const DesktopUpdateView = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-8d77828d"]]);
export {
DesktopUpdateView as default
};
//# sourceMappingURL=DesktopUpdateView-C-R0415K.js.map

View File

@ -1,20 +0,0 @@
.download-bg[data-v-8d77828d]::before {
position: absolute;
margin: 0px;
color: var(--p-text-muted-color);
font-family: 'primeicons';
top: -2rem;
right: 2rem;
speak: none;
font-style: normal;
font-weight: normal;
font-variant: normal;
text-transform: none;
line-height: 1;
display: inline-block;
-webkit-font-smoothing: antialiased;
opacity: 0.02;
font-size: min(14rem, 90vw);
z-index: 0
}

View File

@ -1,58 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { d as defineComponent, o as openBlock, y as createBlock, z as withCtx, m as createBaseVNode, E as toDisplayString, k as createVNode, j as unref, l as script, bi as useRouter } from "./index-Bv0b06LE.js";
import { _ as _sfc_main$1 } from "./BaseViewTemplate-BTbuZf5t.js";
const _hoisted_1 = { class: "max-w-screen-sm flex flex-col gap-8 p-8 bg-[url('/assets/images/Git-Logo-White.svg')] bg-no-repeat bg-right-top bg-origin-padding" };
const _hoisted_2 = { class: "mt-24 text-4xl font-bold text-red-500" };
const _hoisted_3 = { class: "space-y-4" };
const _hoisted_4 = { class: "text-xl" };
const _hoisted_5 = { class: "text-xl" };
const _hoisted_6 = { class: "text-m" };
const _hoisted_7 = { class: "flex gap-4 flex-row-reverse" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "DownloadGitView",
setup(__props) {
const openGitDownloads = /* @__PURE__ */ __name(() => {
window.open("https://git-scm.com/downloads/", "_blank");
}, "openGitDownloads");
const skipGit = /* @__PURE__ */ __name(() => {
console.warn("pushing");
const router = useRouter();
router.push("install");
}, "skipGit");
return (_ctx, _cache) => {
return openBlock(), createBlock(_sfc_main$1, null, {
default: withCtx(() => [
createBaseVNode("div", _hoisted_1, [
createBaseVNode("h1", _hoisted_2, toDisplayString(_ctx.$t("downloadGit.title")), 1),
createBaseVNode("div", _hoisted_3, [
createBaseVNode("p", _hoisted_4, toDisplayString(_ctx.$t("downloadGit.message")), 1),
createBaseVNode("p", _hoisted_5, toDisplayString(_ctx.$t("downloadGit.instructions")), 1),
createBaseVNode("p", _hoisted_6, toDisplayString(_ctx.$t("downloadGit.warning")), 1)
]),
createBaseVNode("div", _hoisted_7, [
createVNode(unref(script), {
label: _ctx.$t("downloadGit.gitWebsite"),
icon: "pi pi-external-link",
"icon-pos": "right",
onClick: openGitDownloads,
severity: "primary"
}, null, 8, ["label"]),
createVNode(unref(script), {
label: _ctx.$t("downloadGit.skip"),
icon: "pi pi-exclamation-triangle",
onClick: skipGit,
severity: "secondary"
}, null, 8, ["label"])
])
])
]),
_: 1
});
};
}
});
export {
_sfc_main as default
};
//# sourceMappingURL=DownloadGitView-PWqK5ke4.js.map

182
web/assets/ExtensionPanel-Ba57xrmg.js generated vendored
View File

@ -1,182 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { d as defineComponent, T as ref, dx as FilterMatchMode, dC as useExtensionStore, a as useSettingStore, p as onMounted, c as computed, o as openBlock, y as createBlock, z as withCtx, k as createVNode, dy as SearchBox, j as unref, bn as script, m as createBaseVNode, f as createElementBlock, D as renderList, E as toDisplayString, a8 as createTextVNode, F as Fragment, l as script$1, B as createCommentVNode, a5 as script$3, ay as script$4, br as script$5, dz as _sfc_main$1 } from "./index-Bv0b06LE.js";
import { g as script$2, h as script$6 } from "./index-CgMyWf7n.js";
import "./index-Dzu9WL4p.js";
const _hoisted_1 = { class: "flex justify-end" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "ExtensionPanel",
setup(__props) {
const filters = ref({
global: { value: "", matchMode: FilterMatchMode.CONTAINS }
});
const extensionStore = useExtensionStore();
const settingStore = useSettingStore();
const editingEnabledExtensions = ref({});
onMounted(() => {
extensionStore.extensions.forEach((ext) => {
editingEnabledExtensions.value[ext.name] = extensionStore.isExtensionEnabled(ext.name);
});
});
const changedExtensions = computed(() => {
return extensionStore.extensions.filter(
(ext) => editingEnabledExtensions.value[ext.name] !== extensionStore.isExtensionEnabled(ext.name)
);
});
const hasChanges = computed(() => {
return changedExtensions.value.length > 0;
});
const updateExtensionStatus = /* @__PURE__ */ __name(() => {
const editingDisabledExtensionNames = Object.entries(
editingEnabledExtensions.value
).filter(([_, enabled]) => !enabled).map(([name]) => name);
settingStore.set("Comfy.Extension.Disabled", [
...extensionStore.inactiveDisabledExtensionNames,
...editingDisabledExtensionNames
]);
}, "updateExtensionStatus");
const enableAllExtensions = /* @__PURE__ */ __name(() => {
extensionStore.extensions.forEach((ext) => {
if (extensionStore.isExtensionReadOnly(ext.name)) return;
editingEnabledExtensions.value[ext.name] = true;
});
updateExtensionStatus();
}, "enableAllExtensions");
const disableAllExtensions = /* @__PURE__ */ __name(() => {
extensionStore.extensions.forEach((ext) => {
if (extensionStore.isExtensionReadOnly(ext.name)) return;
editingEnabledExtensions.value[ext.name] = false;
});
updateExtensionStatus();
}, "disableAllExtensions");
const disableThirdPartyExtensions = /* @__PURE__ */ __name(() => {
extensionStore.extensions.forEach((ext) => {
if (extensionStore.isCoreExtension(ext.name)) return;
editingEnabledExtensions.value[ext.name] = false;
});
updateExtensionStatus();
}, "disableThirdPartyExtensions");
const applyChanges = /* @__PURE__ */ __name(() => {
window.location.reload();
}, "applyChanges");
const menu = ref();
const contextMenuItems = [
{
label: "Enable All",
icon: "pi pi-check",
command: enableAllExtensions
},
{
label: "Disable All",
icon: "pi pi-times",
command: disableAllExtensions
},
{
label: "Disable 3rd Party",
icon: "pi pi-times",
command: disableThirdPartyExtensions,
disabled: !extensionStore.hasThirdPartyExtensions
}
];
return (_ctx, _cache) => {
return openBlock(), createBlock(_sfc_main$1, {
value: "Extension",
class: "extension-panel"
}, {
header: withCtx(() => [
createVNode(SearchBox, {
modelValue: filters.value["global"].value,
"onUpdate:modelValue": _cache[0] || (_cache[0] = ($event) => filters.value["global"].value = $event),
placeholder: _ctx.$t("g.searchExtensions") + "..."
}, null, 8, ["modelValue", "placeholder"]),
hasChanges.value ? (openBlock(), createBlock(unref(script), {
key: 0,
severity: "info",
"pt:text": "w-full",
class: "max-h-96 overflow-y-auto"
}, {
default: withCtx(() => [
createBaseVNode("ul", null, [
(openBlock(true), createElementBlock(Fragment, null, renderList(changedExtensions.value, (ext) => {
return openBlock(), createElementBlock("li", {
key: ext.name
}, [
createBaseVNode("span", null, toDisplayString(unref(extensionStore).isExtensionEnabled(ext.name) ? "[-]" : "[+]"), 1),
createTextVNode(" " + toDisplayString(ext.name), 1)
]);
}), 128))
]),
createBaseVNode("div", _hoisted_1, [
createVNode(unref(script$1), {
label: _ctx.$t("g.reloadToApplyChanges"),
onClick: applyChanges,
outlined: "",
severity: "danger"
}, null, 8, ["label"])
])
]),
_: 1
})) : createCommentVNode("", true)
]),
default: withCtx(() => [
createVNode(unref(script$6), {
value: unref(extensionStore).extensions,
stripedRows: "",
size: "small",
filters: filters.value
}, {
default: withCtx(() => [
createVNode(unref(script$2), {
header: _ctx.$t("g.extensionName"),
sortable: "",
field: "name"
}, {
body: withCtx((slotProps) => [
createTextVNode(toDisplayString(slotProps.data.name) + " ", 1),
unref(extensionStore).isCoreExtension(slotProps.data.name) ? (openBlock(), createBlock(unref(script$3), {
key: 0,
value: "Core"
})) : createCommentVNode("", true)
]),
_: 1
}, 8, ["header"]),
createVNode(unref(script$2), { pt: {
headerCell: "flex items-center justify-end",
bodyCell: "flex items-center justify-end"
} }, {
header: withCtx(() => [
createVNode(unref(script$1), {
icon: "pi pi-ellipsis-h",
text: "",
severity: "secondary",
onClick: _cache[1] || (_cache[1] = ($event) => menu.value.show($event))
}),
createVNode(unref(script$4), {
ref_key: "menu",
ref: menu,
model: contextMenuItems
}, null, 512)
]),
body: withCtx((slotProps) => [
createVNode(unref(script$5), {
disabled: unref(extensionStore).isExtensionReadOnly(slotProps.data.name),
modelValue: editingEnabledExtensions.value[slotProps.data.name],
"onUpdate:modelValue": /* @__PURE__ */ __name(($event) => editingEnabledExtensions.value[slotProps.data.name] = $event, "onUpdate:modelValue"),
onChange: updateExtensionStatus
}, null, 8, ["disabled", "modelValue", "onUpdate:modelValue"])
]),
_: 1
})
]),
_: 1
}, 8, ["value", "filters"])
]),
_: 1
});
};
}
});
export {
_sfc_main as default
};
//# sourceMappingURL=ExtensionPanel-Ba57xrmg.js.map

4919
web/assets/GraphView-B_UDZi95.js generated vendored

File diff suppressed because it is too large Load Diff

383
web/assets/GraphView-Bo28XDd0.css generated vendored
View File

@ -1,383 +0,0 @@
.comfy-menu-hamburger[data-v-82120b51] {
position: fixed;
z-index: 9999;
display: flex;
flex-direction: row
}
[data-v-e50caa15] .p-splitter-gutter {
pointer-events: auto;
}
[data-v-e50caa15] .p-splitter-gutter:hover,[data-v-e50caa15] .p-splitter-gutter[data-p-gutter-resizing='true'] {
transition: background-color 0.2s ease 300ms;
background-color: var(--p-primary-color);
}
.side-bar-panel[data-v-e50caa15] {
background-color: var(--bg-color);
pointer-events: auto;
}
.bottom-panel[data-v-e50caa15] {
background-color: var(--bg-color);
pointer-events: auto;
}
.splitter-overlay[data-v-e50caa15] {
pointer-events: none;
border-style: none;
background-color: transparent;
}
.splitter-overlay-root[data-v-e50caa15] {
position: absolute;
top: 0px;
left: 0px;
height: 100%;
width: 100%;
/* Set it the same as the ComfyUI menu */
/* Note: Lite-graph DOM widgets have the same z-index as the node id, so
999 should be sufficient to make sure splitter overlays on node's DOM
widgets */
z-index: 999;
}
.p-buttongroup-vertical[data-v-27a9500c] {
display: flex;
flex-direction: column;
border-radius: var(--p-button-border-radius);
overflow: hidden;
border: 1px solid var(--p-panel-border-color);
}
.p-buttongroup-vertical .p-button[data-v-27a9500c] {
margin: 0;
border-radius: 0;
}
.node-tooltip[data-v-f03142eb] {
background: var(--comfy-input-bg);
border-radius: 5px;
box-shadow: 0 0 5px rgba(0, 0, 0, 0.4);
color: var(--input-text);
font-family: sans-serif;
left: 0;
max-width: 30vw;
padding: 4px 8px;
position: absolute;
top: 0;
transform: translate(5px, calc(-100% - 5px));
white-space: pre-wrap;
z-index: 99999;
}
.group-title-editor.node-title-editor[data-v-12d3fd12] {
z-index: 9999;
padding: 0.25rem;
}
[data-v-12d3fd12] .editable-text {
width: 100%;
height: 100%;
}
[data-v-12d3fd12] .editable-text input {
width: 100%;
height: 100%;
/* Override the default font size */
font-size: inherit;
}
[data-v-fd0a74bd] .highlight {
background-color: var(--p-primary-color);
color: var(--p-primary-contrast-color);
font-weight: bold;
border-radius: 0.25rem;
padding: 0rem 0.125rem;
margin: -0.125rem 0.125rem;
}
.invisible-dialog-root {
width: 60%;
min-width: 24rem;
max-width: 48rem;
border: 0 !important;
background-color: transparent !important;
margin-top: 25vh;
margin-left: 400px;
}
@media all and (max-width: 768px) {
.invisible-dialog-root {
margin-left: 0px;
}
}
.node-search-box-dialog-mask {
align-items: flex-start !important;
}
.side-bar-button-icon {
font-size: var(--sidebar-icon-size) !important;
}
.side-bar-button-selected .side-bar-button-icon {
font-size: var(--sidebar-icon-size) !important;
font-weight: bold;
}
.side-bar-button[data-v-6ab4daa6] {
width: var(--sidebar-width);
height: var(--sidebar-width);
border-radius: 0;
}
.comfyui-body-left .side-bar-button.side-bar-button-selected[data-v-6ab4daa6],
.comfyui-body-left .side-bar-button.side-bar-button-selected[data-v-6ab4daa6]:hover {
border-left: 4px solid var(--p-button-text-primary-color);
}
.comfyui-body-right .side-bar-button.side-bar-button-selected[data-v-6ab4daa6],
.comfyui-body-right .side-bar-button.side-bar-button-selected[data-v-6ab4daa6]:hover {
border-right: 4px solid var(--p-button-text-primary-color);
}
.side-tool-bar-container[data-v-04875455] {
display: flex;
flex-direction: column;
align-items: center;
width: var(--sidebar-width);
height: 100%;
background-color: var(--comfy-menu-secondary-bg);
color: var(--fg-color);
box-shadow: var(--bar-shadow);
--sidebar-width: 4rem;
--sidebar-icon-size: 1.5rem;
}
.side-tool-bar-container.small-sidebar[data-v-04875455] {
--sidebar-width: 2.5rem;
--sidebar-icon-size: 1rem;
}
.side-tool-bar-end[data-v-04875455] {
align-self: flex-end;
margin-top: auto;
}
.status-indicator[data-v-fd6ae3af] {
position: absolute;
font-weight: 700;
font-size: 1.5rem;
top: 50%;
left: 50%;
transform: translate(-50%, -50%)
}
[data-v-54fadc45] .p-togglebutton {
position: relative;
flex-shrink: 0;
border-radius: 0px;
border-width: 0px;
border-right-width: 1px;
border-style: solid;
background-color: transparent;
padding: 0px;
border-right-color: var(--border-color)
}
[data-v-54fadc45] .p-togglebutton::before {
display: none
}
[data-v-54fadc45] .p-togglebutton:first-child {
border-left-width: 1px;
border-style: solid;
border-left-color: var(--border-color)
}
[data-v-54fadc45] .p-togglebutton:not(:first-child) {
border-left-width: 0px
}
[data-v-54fadc45] .p-togglebutton.p-togglebutton-checked {
height: 100%;
border-bottom-width: 1px;
border-style: solid;
border-bottom-color: var(--p-button-text-primary-color)
}
[data-v-54fadc45] .p-togglebutton:not(.p-togglebutton-checked) {
opacity: 0.75
}
[data-v-54fadc45] .p-togglebutton-checked .close-button,[data-v-54fadc45] .p-togglebutton:hover .close-button {
visibility: visible
}
[data-v-54fadc45] .p-togglebutton:hover .status-indicator {
display: none
}
[data-v-54fadc45] .p-togglebutton .close-button {
visibility: hidden
}
[data-v-54fadc45] .p-scrollpanel-content {
height: 100%
}
/* Scrollbar half opacity to avoid blocking the active tab bottom border */
[data-v-54fadc45] .p-scrollpanel:hover .p-scrollpanel-bar,[data-v-54fadc45] .p-scrollpanel:active .p-scrollpanel-bar {
opacity: 0.5
}
[data-v-54fadc45] .p-selectbutton {
height: 100%;
border-radius: 0px
}
[data-v-6ab68035] .workflow-tabs {
background-color: var(--comfy-menu-bg);
}
[data-v-26957f1f] .p-inputtext {
border-top-left-radius: 0;
border-bottom-left-radius: 0;
}
.comfyui-queue-button[data-v-91a628af] .p-splitbutton-dropdown {
border-top-right-radius: 0;
border-bottom-right-radius: 0;
}
.actionbar[data-v-ebd56d51] {
pointer-events: all;
position: fixed;
z-index: 1000;
}
.actionbar.is-docked[data-v-ebd56d51] {
position: static;
border-style: none;
background-color: transparent;
padding: 0px;
}
.actionbar.is-dragging[data-v-ebd56d51] {
-webkit-user-select: none;
-moz-user-select: none;
user-select: none;
}
[data-v-ebd56d51] .p-panel-content {
padding: 0.25rem;
}
.is-docked[data-v-ebd56d51] .p-panel-content {
padding: 0px;
}
[data-v-ebd56d51] .p-panel-header {
display: none;
}
.drag-handle[data-v-ebd56d51] {
height: -moz-max-content;
height: max-content;
width: 0.75rem;
}
.top-menubar[data-v-56df69d2] .p-menubar-item-link svg {
display: none;
}
[data-v-56df69d2] .p-menubar-submenu.dropdown-direction-up {
top: auto;
bottom: 100%;
flex-direction: column-reverse;
}
.keybinding-tag[data-v-56df69d2] {
background: var(--p-content-hover-background);
border-color: var(--p-content-border-color);
border-style: solid;
}
.comfyui-menu[data-v-68d3b5b9] {
width: 100vw;
height: var(--comfy-topbar-height);
background: var(--comfy-menu-bg);
color: var(--fg-color);
box-shadow: var(--bar-shadow);
font-family: Arial, Helvetica, sans-serif;
font-size: 0.8em;
box-sizing: border-box;
z-index: 1000;
order: 0;
grid-column: 1/-1;
}
.comfyui-menu.dropzone[data-v-68d3b5b9] {
background: var(--p-highlight-background);
}
.comfyui-menu.dropzone-active[data-v-68d3b5b9] {
background: var(--p-highlight-background-focus);
}
[data-v-68d3b5b9] .p-menubar-item-label {
line-height: revert;
}
.comfyui-logo[data-v-68d3b5b9] {
font-size: 1.2em;
-webkit-user-select: none;
-moz-user-select: none;
user-select: none;
cursor: default;
}
.comfyui-body[data-v-e89d9273] {
grid-template-columns: auto 1fr auto;
grid-template-rows: auto 1fr auto;
}
/**
+------------------+------------------+------------------+
| |
| .comfyui-body- |
| top |
| (spans all cols) |
| |
+------------------+------------------+------------------+
| | | |
| .comfyui-body- | #graph-canvas | .comfyui-body- |
| left | | right |
| | | |
| | | |
+------------------+------------------+------------------+
| |
| .comfyui-body- |
| bottom |
| (spans all cols) |
| |
+------------------+------------------+------------------+
*/
.comfyui-body-top[data-v-e89d9273] {
order: -5;
/* Span across all columns */
grid-column: 1/-1;
/* Position at the first row */
grid-row: 1;
/* Top menu bar dropdown needs to be above of graph canvas splitter overlay which is z-index: 999 */
/* Top menu bar z-index needs to be higher than bottom menu bar z-index as by default
pysssss's image feed is located at body-bottom, and it can overlap with the queue button, which
is located in body-top. */
z-index: 1001;
display: flex;
flex-direction: column;
}
.comfyui-body-left[data-v-e89d9273] {
order: -4;
/* Position in the first column */
grid-column: 1;
/* Position below the top element */
grid-row: 2;
z-index: 10;
display: flex;
}
.graph-canvas-container[data-v-e89d9273] {
width: 100%;
height: 100%;
order: -3;
grid-column: 2;
grid-row: 2;
position: relative;
overflow: hidden;
}
.comfyui-body-right[data-v-e89d9273] {
order: -2;
z-index: 10;
grid-column: 3;
grid-row: 2;
}
.comfyui-body-bottom[data-v-e89d9273] {
order: 4;
/* Span across all columns */
grid-column: 1/-1;
grid-row: 3;
/* Bottom menu bar dropdown needs to be above of graph canvas splitter overlay which is z-index: 999 */
z-index: 1000;
display: flex;
flex-direction: column;
}

971
web/assets/InstallView-DW9xwU_F.js generated vendored
View File

@ -1,971 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { d as defineComponent, T as ref, bq as useModel, o as openBlock, f as createElementBlock, m as createBaseVNode, E as toDisplayString, k as createVNode, j as unref, br as script, bl as script$1, as as withModifiers, z as withCtx, ac as script$2, I as useI18n, c as computed, aj as normalizeClass, B as createCommentVNode, a5 as script$3, a8 as createTextVNode, b9 as electronAPI, _ as _export_sfc, p as onMounted, r as resolveDirective, bk as script$4, i as withDirectives, bs as script$5, bt as script$6, l as script$7, y as createBlock, bn as script$8, bu as MigrationItems, w as watchEffect, F as Fragment, D as renderList, bv as script$9, bw as mergeModels, bx as ValidationState, X as normalizeI18nKey, N as watch, by as checkMirrorReachable, bz as _sfc_main$7, bA as isInChina, bB as mergeValidationStates, bg as t, b3 as script$a, bC as CUDA_TORCH_URL, bD as NIGHTLY_CPU_TORCH_URL, bi as useRouter, ah as toRaw } from "./index-Bv0b06LE.js";
import { s as script$b, a as script$c, b as script$d, c as script$e, d as script$f } from "./index-SeIZOWJp.js";
import { P as PYTHON_MIRROR, a as PYPI_MIRROR } from "./uvMirrors-B-HKMf6X.js";
import { _ as _sfc_main$8 } from "./BaseViewTemplate-BTbuZf5t.js";
const _hoisted_1$5 = { class: "flex flex-col gap-6 w-[600px]" };
const _hoisted_2$5 = { class: "flex flex-col gap-4" };
const _hoisted_3$5 = { class: "text-2xl font-semibold text-neutral-100" };
const _hoisted_4$5 = { class: "text-neutral-400 my-0" };
const _hoisted_5$3 = { class: "flex flex-col bg-neutral-800 p-4 rounded-lg" };
const _hoisted_6$3 = { class: "flex items-center gap-4" };
const _hoisted_7$3 = { class: "flex-1" };
const _hoisted_8$3 = { class: "text-lg font-medium text-neutral-100" };
const _hoisted_9$3 = { class: "text-sm text-neutral-400 mt-1" };
const _hoisted_10$3 = { class: "flex items-center gap-4" };
const _hoisted_11$3 = { class: "flex-1" };
const _hoisted_12$3 = { class: "text-lg font-medium text-neutral-100" };
const _hoisted_13$1 = { class: "text-sm text-neutral-400 mt-1" };
const _hoisted_14$1 = { class: "text-neutral-300" };
const _hoisted_15 = { class: "font-medium mb-2" };
const _hoisted_16 = { class: "list-disc pl-6 space-y-1" };
const _hoisted_17 = { class: "font-medium mt-4 mb-2" };
const _hoisted_18 = { class: "list-disc pl-6 space-y-1" };
const _hoisted_19 = { class: "mt-4" };
const _hoisted_20 = {
href: "https://comfy.org/privacy",
target: "_blank",
class: "text-blue-400 hover:text-blue-300 underline"
};
const _sfc_main$6 = /* @__PURE__ */ defineComponent({
__name: "DesktopSettingsConfiguration",
props: {
"autoUpdate": { type: Boolean, ...{ required: true } },
"autoUpdateModifiers": {},
"allowMetrics": { type: Boolean, ...{ required: true } },
"allowMetricsModifiers": {}
},
emits: ["update:autoUpdate", "update:allowMetrics"],
setup(__props) {
const showDialog = ref(false);
const autoUpdate = useModel(__props, "autoUpdate");
const allowMetrics = useModel(__props, "allowMetrics");
const showMetricsInfo = /* @__PURE__ */ __name(() => {
showDialog.value = true;
}, "showMetricsInfo");
return (_ctx, _cache) => {
return openBlock(), createElementBlock("div", _hoisted_1$5, [
createBaseVNode("div", _hoisted_2$5, [
createBaseVNode("h2", _hoisted_3$5, toDisplayString(_ctx.$t("install.desktopAppSettings")), 1),
createBaseVNode("p", _hoisted_4$5, toDisplayString(_ctx.$t("install.desktopAppSettingsDescription")), 1)
]),
createBaseVNode("div", _hoisted_5$3, [
createBaseVNode("div", _hoisted_6$3, [
createBaseVNode("div", _hoisted_7$3, [
createBaseVNode("h3", _hoisted_8$3, toDisplayString(_ctx.$t("install.settings.autoUpdate")), 1),
createBaseVNode("p", _hoisted_9$3, toDisplayString(_ctx.$t("install.settings.autoUpdateDescription")), 1)
]),
createVNode(unref(script), {
modelValue: autoUpdate.value,
"onUpdate:modelValue": _cache[0] || (_cache[0] = ($event) => autoUpdate.value = $event)
}, null, 8, ["modelValue"])
]),
createVNode(unref(script$1)),
createBaseVNode("div", _hoisted_10$3, [
createBaseVNode("div", _hoisted_11$3, [
createBaseVNode("h3", _hoisted_12$3, toDisplayString(_ctx.$t("install.settings.allowMetrics")), 1),
createBaseVNode("p", _hoisted_13$1, toDisplayString(_ctx.$t("install.settings.allowMetricsDescription")), 1),
createBaseVNode("a", {
href: "#",
class: "text-sm text-blue-400 hover:text-blue-300 mt-1 inline-block",
onClick: withModifiers(showMetricsInfo, ["prevent"])
}, toDisplayString(_ctx.$t("install.settings.learnMoreAboutData")), 1)
]),
createVNode(unref(script), {
modelValue: allowMetrics.value,
"onUpdate:modelValue": _cache[1] || (_cache[1] = ($event) => allowMetrics.value = $event)
}, null, 8, ["modelValue"])
])
]),
createVNode(unref(script$2), {
visible: showDialog.value,
"onUpdate:visible": _cache[2] || (_cache[2] = ($event) => showDialog.value = $event),
modal: "",
header: _ctx.$t("install.settings.dataCollectionDialog.title")
}, {
default: withCtx(() => [
createBaseVNode("div", _hoisted_14$1, [
createBaseVNode("h4", _hoisted_15, toDisplayString(_ctx.$t("install.settings.dataCollectionDialog.whatWeCollect")), 1),
createBaseVNode("ul", _hoisted_16, [
createBaseVNode("li", null, toDisplayString(_ctx.$t("install.settings.dataCollectionDialog.collect.errorReports")), 1),
createBaseVNode("li", null, toDisplayString(_ctx.$t("install.settings.dataCollectionDialog.collect.systemInfo")), 1),
createBaseVNode("li", null, toDisplayString(_ctx.$t(
"install.settings.dataCollectionDialog.collect.userJourneyEvents"
)), 1)
]),
createBaseVNode("h4", _hoisted_17, toDisplayString(_ctx.$t("install.settings.dataCollectionDialog.whatWeDoNotCollect")), 1),
createBaseVNode("ul", _hoisted_18, [
createBaseVNode("li", null, toDisplayString(_ctx.$t(
"install.settings.dataCollectionDialog.doNotCollect.personalInformation"
)), 1),
createBaseVNode("li", null, toDisplayString(_ctx.$t(
"install.settings.dataCollectionDialog.doNotCollect.workflowContents"
)), 1),
createBaseVNode("li", null, toDisplayString(_ctx.$t(
"install.settings.dataCollectionDialog.doNotCollect.fileSystemInformation"
)), 1),
createBaseVNode("li", null, toDisplayString(_ctx.$t(
"install.settings.dataCollectionDialog.doNotCollect.customNodeConfigurations"
)), 1)
]),
createBaseVNode("div", _hoisted_19, [
createBaseVNode("a", _hoisted_20, toDisplayString(_ctx.$t("install.settings.dataCollectionDialog.viewFullPolicy")), 1)
])
])
]),
_: 1
}, 8, ["visible", "header"])
]);
};
}
});
const _imports_0 = "" + new URL("images/nvidia-logo.svg", import.meta.url).href;
const _imports_1 = "" + new URL("images/apple-mps-logo.png", import.meta.url).href;
const _imports_2 = "" + new URL("images/manual-configuration.svg", import.meta.url).href;
const _hoisted_1$4 = { class: "flex flex-col gap-6 w-[600px] h-[30rem] select-none" };
const _hoisted_2$4 = { class: "grow flex flex-col gap-4 text-neutral-300" };
const _hoisted_3$4 = { class: "text-2xl font-semibold text-neutral-100" };
const _hoisted_4$4 = { class: "m-1 text-neutral-400" };
const _hoisted_5$2 = {
key: 0,
class: "m-1"
};
const _hoisted_6$2 = {
key: 1,
class: "m-1"
};
const _hoisted_7$2 = {
key: 2,
class: "text-neutral-300"
};
const _hoisted_8$2 = { class: "m-1" };
const _hoisted_9$2 = { key: 3 };
const _hoisted_10$2 = { class: "m-1" };
const _hoisted_11$2 = { class: "m-1" };
const _hoisted_12$2 = {
for: "cpu-mode",
class: "select-none"
};
const _sfc_main$5 = /* @__PURE__ */ defineComponent({
__name: "GpuPicker",
props: {
"device": {
required: true
},
"deviceModifiers": {}
},
emits: ["update:device"],
setup(__props) {
const { t: t2 } = useI18n();
const cpuMode = computed({
get: /* @__PURE__ */ __name(() => selected.value === "cpu", "get"),
set: /* @__PURE__ */ __name((value) => {
selected.value = value ? "cpu" : null;
}, "set")
});
const selected = useModel(__props, "device");
const electron = electronAPI();
const platform = electron.getPlatform();
const pickGpu = /* @__PURE__ */ __name((value) => {
const newValue = selected.value === value ? null : value;
selected.value = newValue;
}, "pickGpu");
return (_ctx, _cache) => {
return openBlock(), createElementBlock("div", _hoisted_1$4, [
createBaseVNode("div", _hoisted_2$4, [
createBaseVNode("h2", _hoisted_3$4, toDisplayString(_ctx.$t("install.gpuSelection.selectGpu")), 1),
createBaseVNode("p", _hoisted_4$4, toDisplayString(_ctx.$t("install.gpuSelection.selectGpuDescription")) + ": ", 1),
createBaseVNode("div", {
class: normalizeClass(["flex gap-2 text-center transition-opacity", { selected: selected.value }])
}, [
unref(platform) !== "darwin" ? (openBlock(), createElementBlock("div", {
key: 0,
class: normalizeClass(["gpu-button", { selected: selected.value === "nvidia" }]),
role: "button",
onClick: _cache[0] || (_cache[0] = ($event) => pickGpu("nvidia"))
}, _cache[4] || (_cache[4] = [
createBaseVNode("img", {
class: "m-12",
alt: "NVIDIA logo",
width: "196",
height: "32",
src: _imports_0
}, null, -1)
]), 2)) : createCommentVNode("", true),
unref(platform) === "darwin" ? (openBlock(), createElementBlock("div", {
key: 1,
class: normalizeClass(["gpu-button", { selected: selected.value === "mps" }]),
role: "button",
onClick: _cache[1] || (_cache[1] = ($event) => pickGpu("mps"))
}, _cache[5] || (_cache[5] = [
createBaseVNode("img", {
class: "rounded-lg hover-brighten",
alt: "Apple Metal Performance Shaders Logo",
width: "292",
ratio: "",
src: _imports_1
}, null, -1)
]), 2)) : createCommentVNode("", true),
createBaseVNode("div", {
class: normalizeClass(["gpu-button", { selected: selected.value === "unsupported" }]),
role: "button",
onClick: _cache[2] || (_cache[2] = ($event) => pickGpu("unsupported"))
}, _cache[6] || (_cache[6] = [
createBaseVNode("img", {
class: "m-12",
alt: "Manual configuration",
width: "196",
src: _imports_2
}, null, -1)
]), 2)
], 2),
selected.value === "nvidia" ? (openBlock(), createElementBlock("p", _hoisted_5$2, [
createVNode(unref(script$3), {
icon: "pi pi-check",
severity: "success",
value: "CUDA"
}),
createTextVNode(" " + toDisplayString(_ctx.$t("install.gpuSelection.nvidiaDescription")), 1)
])) : createCommentVNode("", true),
selected.value === "mps" ? (openBlock(), createElementBlock("p", _hoisted_6$2, [
createVNode(unref(script$3), {
icon: "pi pi-check",
severity: "success",
value: "MPS"
}),
createTextVNode(" " + toDisplayString(_ctx.$t("install.gpuSelection.mpsDescription")), 1)
])) : createCommentVNode("", true),
selected.value === "unsupported" ? (openBlock(), createElementBlock("div", _hoisted_7$2, [
createBaseVNode("p", _hoisted_8$2, [
createVNode(unref(script$3), {
icon: "pi pi-exclamation-triangle",
severity: "warn",
value: unref(t2)("icon.exclamation-triangle")
}, null, 8, ["value"]),
createTextVNode(" " + toDisplayString(_ctx.$t("install.gpuSelection.customSkipsPython")), 1)
]),
createBaseVNode("ul", null, [
createBaseVNode("li", null, [
createBaseVNode("strong", null, toDisplayString(_ctx.$t("install.gpuSelection.customComfyNeedsPython")), 1)
]),
createBaseVNode("li", null, toDisplayString(_ctx.$t("install.gpuSelection.customManualVenv")), 1),
createBaseVNode("li", null, toDisplayString(_ctx.$t("install.gpuSelection.customInstallRequirements")), 1),
createBaseVNode("li", null, toDisplayString(_ctx.$t("install.gpuSelection.customMayNotWork")), 1)
])
])) : createCommentVNode("", true),
selected.value === "cpu" ? (openBlock(), createElementBlock("div", _hoisted_9$2, [
createBaseVNode("p", _hoisted_10$2, [
createVNode(unref(script$3), {
icon: "pi pi-exclamation-triangle",
severity: "warn",
value: unref(t2)("icon.exclamation-triangle")
}, null, 8, ["value"]),
createTextVNode(" " + toDisplayString(_ctx.$t("install.gpuSelection.cpuModeDescription")), 1)
]),
createBaseVNode("p", _hoisted_11$2, toDisplayString(_ctx.$t("install.gpuSelection.cpuModeDescription2")), 1)
])) : createCommentVNode("", true)
]),
createBaseVNode("div", {
class: normalizeClass(["transition-opacity flex gap-3 h-0", {
"opacity-40": selected.value && selected.value !== "cpu"
}])
}, [
createVNode(unref(script), {
modelValue: cpuMode.value,
"onUpdate:modelValue": _cache[3] || (_cache[3] = ($event) => cpuMode.value = $event),
inputId: "cpu-mode",
class: "-translate-y-40"
}, null, 8, ["modelValue"]),
createBaseVNode("label", _hoisted_12$2, toDisplayString(_ctx.$t("install.gpuSelection.enableCpuMode")), 1)
], 2)
]);
};
}
});
const GpuPicker = /* @__PURE__ */ _export_sfc(_sfc_main$5, [["__scopeId", "data-v-79125ff6"]]);
const _hoisted_1$3 = { class: "flex flex-col gap-6 w-[600px]" };
const _hoisted_2$3 = { class: "flex flex-col gap-4" };
const _hoisted_3$3 = { class: "text-2xl font-semibold text-neutral-100" };
const _hoisted_4$3 = { class: "text-neutral-400 my-0" };
const _hoisted_5$1 = { class: "flex gap-2" };
const _hoisted_6$1 = { class: "bg-neutral-800 p-4 rounded-lg" };
const _hoisted_7$1 = { class: "text-lg font-medium mt-0 mb-3 text-neutral-100" };
const _hoisted_8$1 = { class: "flex flex-col gap-2" };
const _hoisted_9$1 = { class: "flex items-center gap-2" };
const _hoisted_10$1 = { class: "text-neutral-200" };
const _hoisted_11$1 = { class: "pi pi-info-circle" };
const _hoisted_12$1 = { class: "flex items-center gap-2" };
const _hoisted_13 = { class: "text-neutral-200" };
const _hoisted_14 = { class: "pi pi-info-circle" };
const _sfc_main$4 = /* @__PURE__ */ defineComponent({
__name: "InstallLocationPicker",
props: {
"installPath": { required: true },
"installPathModifiers": {},
"pathError": { required: true },
"pathErrorModifiers": {}
},
emits: ["update:installPath", "update:pathError"],
setup(__props) {
const { t: t2 } = useI18n();
const installPath = useModel(__props, "installPath");
const pathError = useModel(__props, "pathError");
const pathExists = ref(false);
const appData = ref("");
const appPath = ref("");
const inputTouched = ref(false);
const electron = electronAPI();
onMounted(async () => {
const paths = await electron.getSystemPaths();
appData.value = paths.appData;
appPath.value = paths.appPath;
installPath.value = paths.defaultInstallPath;
await validatePath(paths.defaultInstallPath);
});
const validatePath = /* @__PURE__ */ __name(async (path) => {
try {
pathError.value = "";
pathExists.value = false;
const validation = await electron.validateInstallPath(path);
if (!validation.isValid) {
const errors = [];
if (validation.cannotWrite) errors.push(t2("install.cannotWrite"));
if (validation.freeSpace < validation.requiredSpace) {
const requiredGB = validation.requiredSpace / 1024 / 1024 / 1024;
errors.push(`${t2("install.insufficientFreeSpace")}: ${requiredGB} GB`);
}
if (validation.parentMissing) errors.push(t2("install.parentMissing"));
if (validation.error)
errors.push(`${t2("install.unhandledError")}: ${validation.error}`);
pathError.value = errors.join("\n");
}
if (validation.exists) pathExists.value = true;
} catch (error) {
pathError.value = t2("install.pathValidationFailed");
}
}, "validatePath");
const browsePath = /* @__PURE__ */ __name(async () => {
try {
const result = await electron.showDirectoryPicker();
if (result) {
installPath.value = result;
await validatePath(result);
}
} catch (error) {
pathError.value = t2("install.failedToSelectDirectory");
}
}, "browsePath");
const onFocus = /* @__PURE__ */ __name(() => {
if (!inputTouched.value) {
inputTouched.value = true;
return;
}
validatePath(installPath.value);
}, "onFocus");
return (_ctx, _cache) => {
const _directive_tooltip = resolveDirective("tooltip");
return openBlock(), createElementBlock("div", _hoisted_1$3, [
createBaseVNode("div", _hoisted_2$3, [
createBaseVNode("h2", _hoisted_3$3, toDisplayString(_ctx.$t("install.chooseInstallationLocation")), 1),
createBaseVNode("p", _hoisted_4$3, toDisplayString(_ctx.$t("install.installLocationDescription")), 1),
createBaseVNode("div", _hoisted_5$1, [
createVNode(unref(script$6), { class: "flex-1" }, {
default: withCtx(() => [
createVNode(unref(script$4), {
modelValue: installPath.value,
"onUpdate:modelValue": [
_cache[0] || (_cache[0] = ($event) => installPath.value = $event),
validatePath
],
class: normalizeClass(["w-full", { "p-invalid": pathError.value }]),
onFocus
}, null, 8, ["modelValue", "class"]),
withDirectives(createVNode(unref(script$5), { class: "pi pi-info-circle" }, null, 512), [
[
_directive_tooltip,
_ctx.$t("install.installLocationTooltip"),
void 0,
{ top: true }
]
])
]),
_: 1
}),
createVNode(unref(script$7), {
icon: "pi pi-folder",
onClick: browsePath,
class: "w-12"
})
]),
pathError.value ? (openBlock(), createBlock(unref(script$8), {
key: 0,
severity: "error",
class: "whitespace-pre-line"
}, {
default: withCtx(() => [
createTextVNode(toDisplayString(pathError.value), 1)
]),
_: 1
})) : createCommentVNode("", true),
pathExists.value ? (openBlock(), createBlock(unref(script$8), {
key: 1,
severity: "warn"
}, {
default: withCtx(() => [
createTextVNode(toDisplayString(_ctx.$t("install.pathExists")), 1)
]),
_: 1
})) : createCommentVNode("", true)
]),
createBaseVNode("div", _hoisted_6$1, [
createBaseVNode("h3", _hoisted_7$1, toDisplayString(_ctx.$t("install.systemLocations")), 1),
createBaseVNode("div", _hoisted_8$1, [
createBaseVNode("div", _hoisted_9$1, [
_cache[1] || (_cache[1] = createBaseVNode("i", { class: "pi pi-folder text-neutral-400" }, null, -1)),
_cache[2] || (_cache[2] = createBaseVNode("span", { class: "text-neutral-400" }, "App Data:", -1)),
createBaseVNode("span", _hoisted_10$1, toDisplayString(appData.value), 1),
withDirectives(createBaseVNode("span", _hoisted_11$1, null, 512), [
[_directive_tooltip, _ctx.$t("install.appDataLocationTooltip")]
])
]),
createBaseVNode("div", _hoisted_12$1, [
_cache[3] || (_cache[3] = createBaseVNode("i", { class: "pi pi-desktop text-neutral-400" }, null, -1)),
_cache[4] || (_cache[4] = createBaseVNode("span", { class: "text-neutral-400" }, "App Path:", -1)),
createBaseVNode("span", _hoisted_13, toDisplayString(appPath.value), 1),
withDirectives(createBaseVNode("span", _hoisted_14, null, 512), [
[_directive_tooltip, _ctx.$t("install.appPathLocationTooltip")]
])
])
])
])
]);
};
}
});
const _hoisted_1$2 = { class: "flex flex-col gap-6 w-[600px]" };
const _hoisted_2$2 = { class: "flex flex-col gap-4" };
const _hoisted_3$2 = { class: "text-2xl font-semibold text-neutral-100" };
const _hoisted_4$2 = { class: "text-neutral-400 my-0" };
const _hoisted_5 = { class: "flex gap-2" };
const _hoisted_6 = {
key: 0,
class: "flex flex-col gap-4 bg-neutral-800 p-4 rounded-lg"
};
const _hoisted_7 = { class: "text-lg mt-0 font-medium text-neutral-100" };
const _hoisted_8 = { class: "flex flex-col gap-3" };
const _hoisted_9 = ["onClick"];
const _hoisted_10 = ["for"];
const _hoisted_11 = { class: "text-sm text-neutral-400 my-1" };
const _hoisted_12 = {
key: 1,
class: "text-neutral-400 italic"
};
const _sfc_main$3 = /* @__PURE__ */ defineComponent({
__name: "MigrationPicker",
props: {
"sourcePath": { required: false },
"sourcePathModifiers": {},
"migrationItemIds": {
required: false
},
"migrationItemIdsModifiers": {}
},
emits: ["update:sourcePath", "update:migrationItemIds"],
setup(__props) {
const { t: t2 } = useI18n();
const electron = electronAPI();
const sourcePath = useModel(__props, "sourcePath");
const migrationItemIds = useModel(__props, "migrationItemIds");
const migrationItems = ref(
MigrationItems.map((item) => ({
...item,
selected: true
}))
);
const pathError = ref("");
const isValidSource = computed(
() => sourcePath.value !== "" && pathError.value === ""
);
const validateSource = /* @__PURE__ */ __name(async (sourcePath2) => {
if (!sourcePath2) {
pathError.value = "";
return;
}
try {
pathError.value = "";
const validation = await electron.validateComfyUISource(sourcePath2);
if (!validation.isValid) pathError.value = validation.error;
} catch (error) {
console.error(error);
pathError.value = t2("install.pathValidationFailed");
}
}, "validateSource");
const browsePath = /* @__PURE__ */ __name(async () => {
try {
const result = await electron.showDirectoryPicker();
if (result) {
sourcePath.value = result;
await validateSource(result);
}
} catch (error) {
console.error(error);
pathError.value = t2("install.failedToSelectDirectory");
}
}, "browsePath");
watchEffect(() => {
migrationItemIds.value = migrationItems.value.filter((item) => item.selected).map((item) => item.id);
});
return (_ctx, _cache) => {
return openBlock(), createElementBlock("div", _hoisted_1$2, [
createBaseVNode("div", _hoisted_2$2, [
createBaseVNode("h2", _hoisted_3$2, toDisplayString(_ctx.$t("install.migrateFromExistingInstallation")), 1),
createBaseVNode("p", _hoisted_4$2, toDisplayString(_ctx.$t("install.migrationSourcePathDescription")), 1),
createBaseVNode("div", _hoisted_5, [
createVNode(unref(script$4), {
modelValue: sourcePath.value,
"onUpdate:modelValue": [
_cache[0] || (_cache[0] = ($event) => sourcePath.value = $event),
validateSource
],
placeholder: "Select existing ComfyUI installation (optional)",
class: normalizeClass(["flex-1", { "p-invalid": pathError.value }])
}, null, 8, ["modelValue", "class"]),
createVNode(unref(script$7), {
icon: "pi pi-folder",
onClick: browsePath,
class: "w-12"
})
]),
pathError.value ? (openBlock(), createBlock(unref(script$8), {
key: 0,
severity: "error"
}, {
default: withCtx(() => [
createTextVNode(toDisplayString(pathError.value), 1)
]),
_: 1
})) : createCommentVNode("", true)
]),
isValidSource.value ? (openBlock(), createElementBlock("div", _hoisted_6, [
createBaseVNode("h3", _hoisted_7, toDisplayString(_ctx.$t("install.selectItemsToMigrate")), 1),
createBaseVNode("div", _hoisted_8, [
(openBlock(true), createElementBlock(Fragment, null, renderList(migrationItems.value, (item) => {
return openBlock(), createElementBlock("div", {
key: item.id,
class: "flex items-center gap-3 p-2 hover:bg-neutral-700 rounded",
onClick: /* @__PURE__ */ __name(($event) => item.selected = !item.selected, "onClick")
}, [
createVNode(unref(script$9), {
modelValue: item.selected,
"onUpdate:modelValue": /* @__PURE__ */ __name(($event) => item.selected = $event, "onUpdate:modelValue"),
inputId: item.id,
binary: true,
onClick: _cache[1] || (_cache[1] = withModifiers(() => {
}, ["stop"]))
}, null, 8, ["modelValue", "onUpdate:modelValue", "inputId"]),
createBaseVNode("div", null, [
createBaseVNode("label", {
for: item.id,
class: "text-neutral-200 font-medium"
}, toDisplayString(item.label), 9, _hoisted_10),
createBaseVNode("p", _hoisted_11, toDisplayString(item.description), 1)
])
], 8, _hoisted_9);
}), 128))
])
])) : (openBlock(), createElementBlock("div", _hoisted_12, toDisplayString(_ctx.$t("install.migrationOptional")), 1))
]);
};
}
});
const _hoisted_1$1 = { class: "flex flex-col items-center gap-4" };
const _hoisted_2$1 = { class: "w-full" };
const _hoisted_3$1 = { class: "text-lg font-medium text-neutral-100" };
const _hoisted_4$1 = { class: "text-sm text-neutral-400 mt-1" };
const _sfc_main$2 = /* @__PURE__ */ defineComponent({
__name: "MirrorItem",
props: /* @__PURE__ */ mergeModels({
item: {}
}, {
"modelValue": { required: true },
"modelModifiers": {}
}),
emits: /* @__PURE__ */ mergeModels(["state-change"], ["update:modelValue"]),
setup(__props, { emit: __emit }) {
const emit = __emit;
const modelValue = useModel(__props, "modelValue");
const validationState = ref(ValidationState.IDLE);
const normalizedSettingId = computed(() => {
return normalizeI18nKey(__props.item.settingId);
});
onMounted(() => {
modelValue.value = __props.item.mirror;
});
watch(validationState, (newState) => {
emit("state-change", newState);
if (newState === ValidationState.INVALID && modelValue.value === __props.item.mirror) {
modelValue.value = __props.item.fallbackMirror;
}
});
return (_ctx, _cache) => {
return openBlock(), createElementBlock("div", _hoisted_1$1, [
createBaseVNode("div", _hoisted_2$1, [
createBaseVNode("h3", _hoisted_3$1, toDisplayString(_ctx.$t(`settings.${normalizedSettingId.value}.name`)), 1),
createBaseVNode("p", _hoisted_4$1, toDisplayString(_ctx.$t(`settings.${normalizedSettingId.value}.tooltip`)), 1)
]),
createVNode(_sfc_main$7, {
modelValue: modelValue.value,
"onUpdate:modelValue": _cache[0] || (_cache[0] = ($event) => modelValue.value = $event),
"validate-url-fn": /* @__PURE__ */ __name((mirror) => unref(checkMirrorReachable)(mirror + (_ctx.item.validationPathSuffix ?? "")), "validate-url-fn"),
onStateChange: _cache[1] || (_cache[1] = ($event) => validationState.value = $event)
}, null, 8, ["modelValue", "validate-url-fn"])
]);
};
}
});
const _sfc_main$1 = /* @__PURE__ */ defineComponent({
__name: "MirrorsConfiguration",
props: /* @__PURE__ */ mergeModels({
device: {}
}, {
"pythonMirror": { required: true },
"pythonMirrorModifiers": {},
"pypiMirror": { required: true },
"pypiMirrorModifiers": {},
"torchMirror": { required: true },
"torchMirrorModifiers": {}
}),
emits: ["update:pythonMirror", "update:pypiMirror", "update:torchMirror"],
setup(__props) {
const showMirrorInputs = ref(false);
const pythonMirror = useModel(__props, "pythonMirror");
const pypiMirror = useModel(__props, "pypiMirror");
const torchMirror = useModel(__props, "torchMirror");
const getTorchMirrorItem = /* @__PURE__ */ __name((device) => {
const settingId = "Comfy-Desktop.UV.TorchInstallMirror";
switch (device) {
case "mps":
return {
settingId,
mirror: NIGHTLY_CPU_TORCH_URL,
fallbackMirror: NIGHTLY_CPU_TORCH_URL
};
case "nvidia":
return {
settingId,
mirror: CUDA_TORCH_URL,
fallbackMirror: CUDA_TORCH_URL
};
case "cpu":
default:
return {
settingId,
mirror: PYPI_MIRROR.mirror,
fallbackMirror: PYPI_MIRROR.fallbackMirror
};
}
}, "getTorchMirrorItem");
const userIsInChina = ref(false);
onMounted(async () => {
userIsInChina.value = await isInChina();
});
const useFallbackMirror = /* @__PURE__ */ __name((mirror) => ({
...mirror,
mirror: mirror.fallbackMirror
}), "useFallbackMirror");
const mirrors = computed(
() => [
[PYTHON_MIRROR, pythonMirror],
[PYPI_MIRROR, pypiMirror],
[getTorchMirrorItem(__props.device), torchMirror]
].map(([item, modelValue]) => [
userIsInChina.value ? useFallbackMirror(item) : item,
modelValue
])
);
const validationStates = ref(
mirrors.value.map(() => ValidationState.IDLE)
);
const validationState = computed(() => {
return mergeValidationStates(validationStates.value);
});
const validationStateTooltip = computed(() => {
switch (validationState.value) {
case ValidationState.INVALID:
return t("install.settings.mirrorsUnreachable");
case ValidationState.VALID:
return t("install.settings.mirrorsReachable");
default:
return t("install.settings.checkingMirrors");
}
});
return (_ctx, _cache) => {
const _directive_tooltip = resolveDirective("tooltip");
return openBlock(), createBlock(unref(script$a), {
header: _ctx.$t("install.settings.mirrorSettings"),
toggleable: "",
collapsed: !showMirrorInputs.value,
"pt:root": "bg-neutral-800 border-none w-[600px]"
}, {
icons: withCtx(() => [
withDirectives(createBaseVNode("i", {
class: normalizeClass({
"pi pi-spin pi-spinner text-neutral-400": validationState.value === unref(ValidationState).LOADING,
"pi pi-check text-green-500": validationState.value === unref(ValidationState).VALID,
"pi pi-times text-red-500": validationState.value === unref(ValidationState).INVALID
})
}, null, 2), [
[_directive_tooltip, validationStateTooltip.value]
])
]),
default: withCtx(() => [
(openBlock(true), createElementBlock(Fragment, null, renderList(mirrors.value, ([item, modelValue], index) => {
return openBlock(), createElementBlock(Fragment, {
key: item.settingId + item.mirror
}, [
index > 0 ? (openBlock(), createBlock(unref(script$1), { key: 0 })) : createCommentVNode("", true),
createVNode(_sfc_main$2, {
item,
modelValue: modelValue.value,
"onUpdate:modelValue": /* @__PURE__ */ __name(($event) => modelValue.value = $event, "onUpdate:modelValue"),
onStateChange: /* @__PURE__ */ __name(($event) => validationStates.value[index] = $event, "onStateChange")
}, null, 8, ["item", "modelValue", "onUpdate:modelValue", "onStateChange"])
], 64);
}), 128))
]),
_: 1
}, 8, ["header", "collapsed"]);
};
}
});
const _hoisted_1 = { class: "flex pt-6 justify-end" };
const _hoisted_2 = { class: "flex pt-6 justify-between" };
const _hoisted_3 = { class: "flex pt-6 justify-between" };
const _hoisted_4 = { class: "flex mt-6 justify-between" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "InstallView",
setup(__props) {
const device = ref(null);
const installPath = ref("");
const pathError = ref("");
const migrationSourcePath = ref("");
const migrationItemIds = ref([]);
const autoUpdate = ref(true);
const allowMetrics = ref(true);
const pythonMirror = ref("");
const pypiMirror = ref("");
const torchMirror = ref("");
const highestStep = ref(0);
const handleStepChange = /* @__PURE__ */ __name((value) => {
setHighestStep(value);
electronAPI().Events.trackEvent("install_stepper_change", {
step: value
});
}, "handleStepChange");
const setHighestStep = /* @__PURE__ */ __name((value) => {
const int = typeof value === "number" ? value : parseInt(value, 10);
if (!isNaN(int) && int > highestStep.value) highestStep.value = int;
}, "setHighestStep");
const hasError = computed(() => pathError.value !== "");
const noGpu = computed(() => typeof device.value !== "string");
const electron = electronAPI();
const router = useRouter();
const install = /* @__PURE__ */ __name(() => {
const options = {
installPath: installPath.value,
autoUpdate: autoUpdate.value,
allowMetrics: allowMetrics.value,
migrationSourcePath: migrationSourcePath.value,
migrationItemIds: toRaw(migrationItemIds.value),
pythonMirror: pythonMirror.value,
pypiMirror: pypiMirror.value,
torchMirror: torchMirror.value,
device: device.value
};
electron.installComfyUI(options);
const nextPage = options.device === "unsupported" ? "/manual-configuration" : "/server-start";
router.push(nextPage);
}, "install");
onMounted(async () => {
if (!electron) return;
const detectedGpu = await electron.Config.getDetectedGpu();
if (detectedGpu === "mps" || detectedGpu === "nvidia") {
device.value = detectedGpu;
}
electronAPI().Events.trackEvent("install_stepper_change", {
step: "0",
gpu: detectedGpu
});
});
return (_ctx, _cache) => {
return openBlock(), createBlock(_sfc_main$8, { dark: "" }, {
default: withCtx(() => [
createVNode(unref(script$f), {
class: "h-full p-8 2xl:p-16",
value: "0",
"onUpdate:value": handleStepChange
}, {
default: withCtx(() => [
createVNode(unref(script$b), { class: "select-none" }, {
default: withCtx(() => [
createVNode(unref(script$c), { value: "0" }, {
default: withCtx(() => [
createTextVNode(toDisplayString(_ctx.$t("install.gpu")), 1)
]),
_: 1
}),
createVNode(unref(script$c), {
value: "1",
disabled: noGpu.value
}, {
default: withCtx(() => [
createTextVNode(toDisplayString(_ctx.$t("install.installLocation")), 1)
]),
_: 1
}, 8, ["disabled"]),
createVNode(unref(script$c), {
value: "2",
disabled: noGpu.value || hasError.value || highestStep.value < 1
}, {
default: withCtx(() => [
createTextVNode(toDisplayString(_ctx.$t("install.migration")), 1)
]),
_: 1
}, 8, ["disabled"]),
createVNode(unref(script$c), {
value: "3",
disabled: noGpu.value || hasError.value || highestStep.value < 2
}, {
default: withCtx(() => [
createTextVNode(toDisplayString(_ctx.$t("install.desktopSettings")), 1)
]),
_: 1
}, 8, ["disabled"])
]),
_: 1
}),
createVNode(unref(script$d), null, {
default: withCtx(() => [
createVNode(unref(script$e), { value: "0" }, {
default: withCtx(({ activateCallback }) => [
createVNode(GpuPicker, {
device: device.value,
"onUpdate:device": _cache[0] || (_cache[0] = ($event) => device.value = $event)
}, null, 8, ["device"]),
createBaseVNode("div", _hoisted_1, [
createVNode(unref(script$7), {
label: _ctx.$t("g.next"),
icon: "pi pi-arrow-right",
iconPos: "right",
onClick: /* @__PURE__ */ __name(($event) => activateCallback("1"), "onClick"),
disabled: typeof device.value !== "string"
}, null, 8, ["label", "onClick", "disabled"])
])
]),
_: 1
}),
createVNode(unref(script$e), { value: "1" }, {
default: withCtx(({ activateCallback }) => [
createVNode(_sfc_main$4, {
installPath: installPath.value,
"onUpdate:installPath": _cache[1] || (_cache[1] = ($event) => installPath.value = $event),
pathError: pathError.value,
"onUpdate:pathError": _cache[2] || (_cache[2] = ($event) => pathError.value = $event)
}, null, 8, ["installPath", "pathError"]),
createBaseVNode("div", _hoisted_2, [
createVNode(unref(script$7), {
label: _ctx.$t("g.back"),
severity: "secondary",
icon: "pi pi-arrow-left",
onClick: /* @__PURE__ */ __name(($event) => activateCallback("0"), "onClick")
}, null, 8, ["label", "onClick"]),
createVNode(unref(script$7), {
label: _ctx.$t("g.next"),
icon: "pi pi-arrow-right",
iconPos: "right",
onClick: /* @__PURE__ */ __name(($event) => activateCallback("2"), "onClick"),
disabled: pathError.value !== ""
}, null, 8, ["label", "onClick", "disabled"])
])
]),
_: 1
}),
createVNode(unref(script$e), { value: "2" }, {
default: withCtx(({ activateCallback }) => [
createVNode(_sfc_main$3, {
sourcePath: migrationSourcePath.value,
"onUpdate:sourcePath": _cache[3] || (_cache[3] = ($event) => migrationSourcePath.value = $event),
migrationItemIds: migrationItemIds.value,
"onUpdate:migrationItemIds": _cache[4] || (_cache[4] = ($event) => migrationItemIds.value = $event)
}, null, 8, ["sourcePath", "migrationItemIds"]),
createBaseVNode("div", _hoisted_3, [
createVNode(unref(script$7), {
label: _ctx.$t("g.back"),
severity: "secondary",
icon: "pi pi-arrow-left",
onClick: /* @__PURE__ */ __name(($event) => activateCallback("1"), "onClick")
}, null, 8, ["label", "onClick"]),
createVNode(unref(script$7), {
label: _ctx.$t("g.next"),
icon: "pi pi-arrow-right",
iconPos: "right",
onClick: /* @__PURE__ */ __name(($event) => activateCallback("3"), "onClick")
}, null, 8, ["label", "onClick"])
])
]),
_: 1
}),
createVNode(unref(script$e), { value: "3" }, {
default: withCtx(({ activateCallback }) => [
createVNode(_sfc_main$6, {
autoUpdate: autoUpdate.value,
"onUpdate:autoUpdate": _cache[5] || (_cache[5] = ($event) => autoUpdate.value = $event),
allowMetrics: allowMetrics.value,
"onUpdate:allowMetrics": _cache[6] || (_cache[6] = ($event) => allowMetrics.value = $event)
}, null, 8, ["autoUpdate", "allowMetrics"]),
createVNode(_sfc_main$1, {
device: device.value,
pythonMirror: pythonMirror.value,
"onUpdate:pythonMirror": _cache[7] || (_cache[7] = ($event) => pythonMirror.value = $event),
pypiMirror: pypiMirror.value,
"onUpdate:pypiMirror": _cache[8] || (_cache[8] = ($event) => pypiMirror.value = $event),
torchMirror: torchMirror.value,
"onUpdate:torchMirror": _cache[9] || (_cache[9] = ($event) => torchMirror.value = $event),
class: "mt-6"
}, null, 8, ["device", "pythonMirror", "pypiMirror", "torchMirror"]),
createBaseVNode("div", _hoisted_4, [
createVNode(unref(script$7), {
label: _ctx.$t("g.back"),
severity: "secondary",
icon: "pi pi-arrow-left",
onClick: /* @__PURE__ */ __name(($event) => activateCallback("2"), "onClick")
}, null, 8, ["label", "onClick"]),
createVNode(unref(script$7), {
label: _ctx.$t("g.install"),
icon: "pi pi-check",
iconPos: "right",
disabled: hasError.value,
onClick: _cache[10] || (_cache[10] = ($event) => install())
}, null, 8, ["label", "disabled"])
])
]),
_: 1
})
]),
_: 1
})
]),
_: 1
})
]),
_: 1
});
};
}
});
const InstallView = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-cd6731d2"]]);
export {
InstallView as default
};
//# sourceMappingURL=InstallView-DW9xwU_F.js.map

81
web/assets/InstallView-DbJ2cGfL.css generated vendored
View File

@ -1,81 +0,0 @@
.p-tag[data-v-79125ff6] {
--p-tag-gap: 0.5rem;
}
.hover-brighten {
&[data-v-79125ff6] {
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke;
transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
transition-duration: 150ms;
transition-property: filter, box-shadow;
}
&[data-v-79125ff6]:hover {
filter: brightness(107%) contrast(105%);
box-shadow: 0 0 0.25rem #ffffff79;
}
}
.p-accordioncontent-content[data-v-79125ff6] {
border-radius: 0.5rem;
--tw-bg-opacity: 1;
background-color: rgb(23 23 23 / var(--tw-bg-opacity));
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke;
transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
transition-duration: 150ms;
}
div.selected {
.gpu-button[data-v-79125ff6]:not(.selected) {
opacity: 0.5;
}
.gpu-button[data-v-79125ff6]:not(.selected):hover {
opacity: 1;
}
}
.gpu-button[data-v-79125ff6] {
margin: 0px;
display: flex;
width: 50%;
cursor: pointer;
flex-direction: column;
align-items: center;
justify-content: space-around;
border-radius: 0.5rem;
background-color: rgb(38 38 38 / var(--tw-bg-opacity));
--tw-bg-opacity: 0.5;
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke;
transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
transition-duration: 150ms;
}
.gpu-button[data-v-79125ff6]:hover {
--tw-bg-opacity: 0.75;
}
.gpu-button {
&.selected[data-v-79125ff6] {
--tw-bg-opacity: 1;
background-color: rgb(64 64 64 / var(--tw-bg-opacity));
}
&.selected[data-v-79125ff6] {
--tw-bg-opacity: 0.5;
}
&.selected[data-v-79125ff6] {
opacity: 1;
}
&.selected[data-v-79125ff6]:hover {
--tw-bg-opacity: 0.6;
}
}
.disabled[data-v-79125ff6] {
pointer-events: none;
opacity: 0.4;
}
.p-card-header[data-v-79125ff6] {
flex-grow: 1;
text-align: center;
}
.p-card-body[data-v-79125ff6] {
padding-top: 0px;
text-align: center;
}
[data-v-cd6731d2] .p-steppanel {
background-color: transparent
}

View File

@ -1,8 +0,0 @@
[data-v-8454e24f] .p-datatable-tbody > tr > td {
padding: 0.25rem;
min-height: 2rem
}
[data-v-8454e24f] .p-datatable-row-selected .actions,[data-v-8454e24f] .p-datatable-selectable-row:hover .actions {
visibility: visible
}

View File

@ -1,292 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { d as defineComponent, c as computed, o as openBlock, f as createElementBlock, F as Fragment, D as renderList, k as createVNode, z as withCtx, a8 as createTextVNode, E as toDisplayString, j as unref, a5 as script, B as createCommentVNode, T as ref, dx as FilterMatchMode, ao as useKeybindingStore, J as useCommandStore, I as useI18n, X as normalizeI18nKey, w as watchEffect, aV as useToast, r as resolveDirective, y as createBlock, dy as SearchBox, m as createBaseVNode, l as script$2, bk as script$4, as as withModifiers, bn as script$5, ac as script$6, i as withDirectives, dz as _sfc_main$2, dA as KeyComboImpl, dB as KeybindingImpl, _ as _export_sfc } from "./index-Bv0b06LE.js";
import { g as script$1, h as script$3 } from "./index-CgMyWf7n.js";
import { u as useKeybindingService } from "./keybindingService-DyjX-nxF.js";
import "./index-Dzu9WL4p.js";
const _hoisted_1$1 = {
key: 0,
class: "px-2"
};
const _sfc_main$1 = /* @__PURE__ */ defineComponent({
__name: "KeyComboDisplay",
props: {
keyCombo: {},
isModified: { type: Boolean, default: false }
},
setup(__props) {
const props = __props;
const keySequences = computed(() => props.keyCombo.getKeySequences());
return (_ctx, _cache) => {
return openBlock(), createElementBlock("span", null, [
(openBlock(true), createElementBlock(Fragment, null, renderList(keySequences.value, (sequence, index) => {
return openBlock(), createElementBlock(Fragment, { key: index }, [
createVNode(unref(script), {
severity: _ctx.isModified ? "info" : "secondary"
}, {
default: withCtx(() => [
createTextVNode(toDisplayString(sequence), 1)
]),
_: 2
}, 1032, ["severity"]),
index < keySequences.value.length - 1 ? (openBlock(), createElementBlock("span", _hoisted_1$1, "+")) : createCommentVNode("", true)
], 64);
}), 128))
]);
};
}
});
const _hoisted_1 = { class: "actions invisible flex flex-row" };
const _hoisted_2 = ["title"];
const _hoisted_3 = { key: 1 };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "KeybindingPanel",
setup(__props) {
const filters = ref({
global: { value: "", matchMode: FilterMatchMode.CONTAINS }
});
const keybindingStore = useKeybindingStore();
const keybindingService = useKeybindingService();
const commandStore = useCommandStore();
const { t } = useI18n();
const commandsData = computed(() => {
return Object.values(commandStore.commands).map((command) => ({
id: command.id,
label: t(`commands.${normalizeI18nKey(command.id)}.label`, command.label),
keybinding: keybindingStore.getKeybindingByCommandId(command.id)
}));
});
const selectedCommandData = ref(null);
const editDialogVisible = ref(false);
const newBindingKeyCombo = ref(null);
const currentEditingCommand = ref(null);
const keybindingInput = ref(null);
const existingKeybindingOnCombo = computed(() => {
if (!currentEditingCommand.value) {
return null;
}
if (currentEditingCommand.value.keybinding?.combo?.equals(
newBindingKeyCombo.value
)) {
return null;
}
if (!newBindingKeyCombo.value) {
return null;
}
return keybindingStore.getKeybinding(newBindingKeyCombo.value);
});
function editKeybinding(commandData) {
currentEditingCommand.value = commandData;
newBindingKeyCombo.value = commandData.keybinding ? commandData.keybinding.combo : null;
editDialogVisible.value = true;
}
__name(editKeybinding, "editKeybinding");
watchEffect(() => {
if (editDialogVisible.value) {
setTimeout(() => {
keybindingInput.value?.$el?.focus();
}, 300);
}
});
function removeKeybinding(commandData) {
if (commandData.keybinding) {
keybindingStore.unsetKeybinding(commandData.keybinding);
keybindingService.persistUserKeybindings();
}
}
__name(removeKeybinding, "removeKeybinding");
function captureKeybinding(event) {
if (!event.shiftKey && !event.altKey && !event.ctrlKey && !event.metaKey) {
switch (event.key) {
case "Escape":
cancelEdit();
return;
case "Enter":
saveKeybinding();
return;
}
}
const keyCombo = KeyComboImpl.fromEvent(event);
newBindingKeyCombo.value = keyCombo;
}
__name(captureKeybinding, "captureKeybinding");
function cancelEdit() {
editDialogVisible.value = false;
currentEditingCommand.value = null;
newBindingKeyCombo.value = null;
}
__name(cancelEdit, "cancelEdit");
function saveKeybinding() {
if (currentEditingCommand.value && newBindingKeyCombo.value) {
const updated = keybindingStore.updateKeybindingOnCommand(
new KeybindingImpl({
commandId: currentEditingCommand.value.id,
combo: newBindingKeyCombo.value
})
);
if (updated) {
keybindingService.persistUserKeybindings();
}
}
cancelEdit();
}
__name(saveKeybinding, "saveKeybinding");
const toast = useToast();
async function resetKeybindings() {
keybindingStore.resetKeybindings();
await keybindingService.persistUserKeybindings();
toast.add({
severity: "info",
summary: "Info",
detail: "Keybindings reset",
life: 3e3
});
}
__name(resetKeybindings, "resetKeybindings");
return (_ctx, _cache) => {
const _directive_tooltip = resolveDirective("tooltip");
return openBlock(), createBlock(_sfc_main$2, {
value: "Keybinding",
class: "keybinding-panel"
}, {
header: withCtx(() => [
createVNode(SearchBox, {
modelValue: filters.value["global"].value,
"onUpdate:modelValue": _cache[0] || (_cache[0] = ($event) => filters.value["global"].value = $event),
placeholder: _ctx.$t("g.searchKeybindings") + "..."
}, null, 8, ["modelValue", "placeholder"])
]),
default: withCtx(() => [
createVNode(unref(script$3), {
value: commandsData.value,
selection: selectedCommandData.value,
"onUpdate:selection": _cache[1] || (_cache[1] = ($event) => selectedCommandData.value = $event),
"global-filter-fields": ["id", "label"],
filters: filters.value,
selectionMode: "single",
stripedRows: "",
pt: {
header: "px-0"
}
}, {
default: withCtx(() => [
createVNode(unref(script$1), {
field: "actions",
header: ""
}, {
body: withCtx((slotProps) => [
createBaseVNode("div", _hoisted_1, [
createVNode(unref(script$2), {
icon: "pi pi-pencil",
class: "p-button-text",
onClick: /* @__PURE__ */ __name(($event) => editKeybinding(slotProps.data), "onClick")
}, null, 8, ["onClick"]),
createVNode(unref(script$2), {
icon: "pi pi-trash",
class: "p-button-text p-button-danger",
onClick: /* @__PURE__ */ __name(($event) => removeKeybinding(slotProps.data), "onClick"),
disabled: !slotProps.data.keybinding
}, null, 8, ["onClick", "disabled"])
])
]),
_: 1
}),
createVNode(unref(script$1), {
field: "id",
header: _ctx.$t("g.command"),
sortable: "",
class: "max-w-64 2xl:max-w-full"
}, {
body: withCtx((slotProps) => [
createBaseVNode("div", {
class: "overflow-hidden text-ellipsis whitespace-nowrap",
title: slotProps.data.id
}, toDisplayString(slotProps.data.label), 9, _hoisted_2)
]),
_: 1
}, 8, ["header"]),
createVNode(unref(script$1), {
field: "keybinding",
header: _ctx.$t("g.keybinding")
}, {
body: withCtx((slotProps) => [
slotProps.data.keybinding ? (openBlock(), createBlock(_sfc_main$1, {
key: 0,
keyCombo: slotProps.data.keybinding.combo,
isModified: unref(keybindingStore).isCommandKeybindingModified(slotProps.data.id)
}, null, 8, ["keyCombo", "isModified"])) : (openBlock(), createElementBlock("span", _hoisted_3, "-"))
]),
_: 1
}, 8, ["header"])
]),
_: 1
}, 8, ["value", "selection", "filters"]),
createVNode(unref(script$6), {
class: "min-w-96",
visible: editDialogVisible.value,
"onUpdate:visible": _cache[2] || (_cache[2] = ($event) => editDialogVisible.value = $event),
modal: "",
header: currentEditingCommand.value?.label,
onHide: cancelEdit
}, {
footer: withCtx(() => [
createVNode(unref(script$2), {
label: "Save",
icon: "pi pi-check",
onClick: saveKeybinding,
disabled: !!existingKeybindingOnCombo.value,
autofocus: ""
}, null, 8, ["disabled"])
]),
default: withCtx(() => [
createBaseVNode("div", null, [
createVNode(unref(script$4), {
class: "mb-2 text-center",
ref_key: "keybindingInput",
ref: keybindingInput,
modelValue: newBindingKeyCombo.value?.toString() ?? "",
placeholder: "Press keys for new binding",
onKeydown: withModifiers(captureKeybinding, ["stop", "prevent"]),
autocomplete: "off",
fluid: "",
invalid: !!existingKeybindingOnCombo.value
}, null, 8, ["modelValue", "invalid"]),
existingKeybindingOnCombo.value ? (openBlock(), createBlock(unref(script$5), {
key: 0,
severity: "error"
}, {
default: withCtx(() => [
_cache[3] || (_cache[3] = createTextVNode(" Keybinding already exists on ")),
createVNode(unref(script), {
severity: "secondary",
value: existingKeybindingOnCombo.value.commandId
}, null, 8, ["value"])
]),
_: 1
})) : createCommentVNode("", true)
])
]),
_: 1
}, 8, ["visible", "header"]),
withDirectives(createVNode(unref(script$2), {
class: "mt-4",
label: _ctx.$t("g.reset"),
icon: "pi pi-trash",
severity: "danger",
fluid: "",
text: "",
onClick: resetKeybindings
}, null, 8, ["label"]), [
[_directive_tooltip, _ctx.$t("g.resetKeybindingsTooltip")]
])
]),
_: 1
});
};
}
});
const KeybindingPanel = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-8454e24f"]]);
export {
KeybindingPanel as default
};
//# sourceMappingURL=KeybindingPanel-oavhFdkz.js.map

25635
web/assets/MaintenanceView-Bh8OZpgl.js generated vendored

File diff suppressed because one or more lines are too long

View File

@ -1,87 +0,0 @@
.task-card-ok[data-v-c3bd7658] {
position: absolute;
right: -1rem;
bottom: -1rem;
grid-column: 1 / -1;
grid-row: 1 / -1;
--tw-text-opacity: 1;
color: rgb(150 206 76 / var(--tw-text-opacity));
opacity: 1;
transition-property: opacity;
transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
transition-duration: 150ms;
font-size: 4rem;
text-shadow: 0.25rem 0 0.5rem black;
z-index: 10;
}
.p-card {
&[data-v-c3bd7658] {
transition-property: opacity;
transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
transition-duration: 150ms;
--p-card-background: var(--p-button-secondary-background);
opacity: 0.9;
}
&.opacity-65[data-v-c3bd7658] {
opacity: 0.4;
}
&[data-v-c3bd7658]:hover {
opacity: 1;
}
}
[data-v-c3bd7658] .p-card-header {
z-index: 0;
}
[data-v-c3bd7658] .p-card-body {
z-index: 1;
flex-grow: 1;
justify-content: space-between;
}
.task-div {
> i[data-v-c3bd7658] {
pointer-events: none;
}
&:hover > i[data-v-c3bd7658] {
opacity: 0.2;
}
}
[data-v-dd50a7dd] .p-tag {
--p-tag-gap: 0.375rem;
}
.backspan[data-v-dd50a7dd]::before {
position: absolute;
margin: 0px;
color: var(--p-text-muted-color);
font-family: 'primeicons';
top: -2rem;
right: -2rem;
speak: none;
font-style: normal;
font-weight: normal;
font-variant: normal;
text-transform: none;
line-height: 1;
display: inline-block;
-webkit-font-smoothing: antialiased;
opacity: 0.02;
font-size: min(14rem, 90vw);
z-index: 0;
}

View File

@ -1,7 +0,0 @@
.p-tag[data-v-dc169863] {
--p-tag-gap: 0.5rem;
}
.comfy-installer[data-v-dc169863] {
margin-top: max(1rem, max(0px, calc((100vh - 42rem) * 0.5)));
}

View File

@ -1,74 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { d as defineComponent, I as useI18n, T as ref, p as onMounted, o as openBlock, y as createBlock, z as withCtx, m as createBaseVNode, E as toDisplayString, k as createVNode, j as unref, a5 as script, b3 as script$1, l as script$2, b9 as electronAPI, _ as _export_sfc } from "./index-Bv0b06LE.js";
import { _ as _sfc_main$1 } from "./BaseViewTemplate-BTbuZf5t.js";
const _hoisted_1 = { class: "comfy-installer grow flex flex-col gap-4 text-neutral-300 max-w-110" };
const _hoisted_2 = { class: "text-2xl font-semibold text-neutral-100" };
const _hoisted_3 = { class: "m-1 text-neutral-300" };
const _hoisted_4 = { class: "ml-2" };
const _hoisted_5 = { class: "m-1 mb-4" };
const _hoisted_6 = { class: "m-0" };
const _hoisted_7 = { class: "m-1" };
const _hoisted_8 = { class: "font-mono" };
const _hoisted_9 = { class: "m-1" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "ManualConfigurationView",
setup(__props) {
const { t } = useI18n();
const electron = electronAPI();
const basePath = ref(null);
const sep = ref("/");
const restartApp = /* @__PURE__ */ __name((message) => electron.restartApp(message), "restartApp");
onMounted(async () => {
basePath.value = await electron.getBasePath();
if (basePath.value.indexOf("/") === -1) sep.value = "\\";
});
return (_ctx, _cache) => {
return openBlock(), createBlock(_sfc_main$1, { dark: "" }, {
default: withCtx(() => [
createBaseVNode("div", _hoisted_1, [
createBaseVNode("h2", _hoisted_2, toDisplayString(_ctx.$t("install.manualConfiguration.title")), 1),
createBaseVNode("p", _hoisted_3, [
createVNode(unref(script), {
icon: "pi pi-exclamation-triangle",
severity: "warn",
value: unref(t)("icon.exclamation-triangle")
}, null, 8, ["value"]),
createBaseVNode("strong", _hoisted_4, toDisplayString(_ctx.$t("install.gpuSelection.customComfyNeedsPython")), 1)
]),
createBaseVNode("div", null, [
createBaseVNode("p", _hoisted_5, toDisplayString(_ctx.$t("install.manualConfiguration.requirements")) + ": ", 1),
createBaseVNode("ul", _hoisted_6, [
createBaseVNode("li", null, toDisplayString(_ctx.$t("install.gpuSelection.customManualVenv")), 1),
createBaseVNode("li", null, toDisplayString(_ctx.$t("install.gpuSelection.customInstallRequirements")), 1)
])
]),
createBaseVNode("p", _hoisted_7, toDisplayString(_ctx.$t("install.manualConfiguration.createVenv")) + ":", 1),
createVNode(unref(script$1), {
header: unref(t)("install.manualConfiguration.virtualEnvironmentPath")
}, {
default: withCtx(() => [
createBaseVNode("span", _hoisted_8, toDisplayString(`${basePath.value}${sep.value}.venv${sep.value}`), 1)
]),
_: 1
}, 8, ["header"]),
createBaseVNode("p", _hoisted_9, toDisplayString(_ctx.$t("install.manualConfiguration.restartWhenFinished")), 1),
createVNode(unref(script$2), {
class: "place-self-end",
label: unref(t)("menuLabels.Restart"),
severity: "warn",
icon: "pi pi-refresh",
onClick: _cache[0] || (_cache[0] = ($event) => restartApp("Manual configuration complete"))
}, null, 8, ["label"])
])
]),
_: 1
});
};
}
});
const ManualConfigurationView = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-dc169863"]]);
export {
ManualConfigurationView as default
};
//# sourceMappingURL=ManualConfigurationView-DTLyJ3VG.js.map

View File

@ -1,86 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { _ as _sfc_main$1 } from "./BaseViewTemplate-BTbuZf5t.js";
import { d as defineComponent, aV as useToast, I as useI18n, T as ref, bi as useRouter, o as openBlock, y as createBlock, z as withCtx, m as createBaseVNode, E as toDisplayString, a8 as createTextVNode, k as createVNode, j as unref, br as script, l as script$1, b9 as electronAPI } from "./index-Bv0b06LE.js";
const _hoisted_1 = { class: "h-full p-8 2xl:p-16 flex flex-col items-center justify-center" };
const _hoisted_2 = { class: "bg-neutral-800 rounded-lg shadow-lg p-6 w-full max-w-[600px] flex flex-col gap-6" };
const _hoisted_3 = { class: "text-3xl font-semibold text-neutral-100" };
const _hoisted_4 = { class: "text-neutral-400" };
const _hoisted_5 = { class: "text-neutral-400" };
const _hoisted_6 = {
href: "https://comfy.org/privacy",
target: "_blank",
class: "text-blue-400 hover:text-blue-300 underline"
};
const _hoisted_7 = { class: "flex items-center gap-4" };
const _hoisted_8 = {
id: "metricsDescription",
class: "text-neutral-100"
};
const _hoisted_9 = { class: "flex pt-6 justify-end" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "MetricsConsentView",
setup(__props) {
const toast = useToast();
const { t } = useI18n();
const allowMetrics = ref(true);
const router = useRouter();
const isUpdating = ref(false);
const updateConsent = /* @__PURE__ */ __name(async () => {
isUpdating.value = true;
try {
await electronAPI().setMetricsConsent(allowMetrics.value);
} catch (error) {
toast.add({
severity: "error",
summary: t("install.errorUpdatingConsent"),
detail: t("install.errorUpdatingConsentDetail"),
life: 3e3
});
} finally {
isUpdating.value = false;
}
router.push("/");
}, "updateConsent");
return (_ctx, _cache) => {
const _component_BaseViewTemplate = _sfc_main$1;
return openBlock(), createBlock(_component_BaseViewTemplate, { dark: "" }, {
default: withCtx(() => [
createBaseVNode("div", _hoisted_1, [
createBaseVNode("div", _hoisted_2, [
createBaseVNode("h2", _hoisted_3, toDisplayString(_ctx.$t("install.helpImprove")), 1),
createBaseVNode("p", _hoisted_4, toDisplayString(_ctx.$t("install.updateConsent")), 1),
createBaseVNode("p", _hoisted_5, [
createTextVNode(toDisplayString(_ctx.$t("install.moreInfo")) + " ", 1),
createBaseVNode("a", _hoisted_6, toDisplayString(_ctx.$t("install.privacyPolicy")), 1),
_cache[1] || (_cache[1] = createTextVNode(". "))
]),
createBaseVNode("div", _hoisted_7, [
createVNode(unref(script), {
modelValue: allowMetrics.value,
"onUpdate:modelValue": _cache[0] || (_cache[0] = ($event) => allowMetrics.value = $event),
"aria-describedby": "metricsDescription"
}, null, 8, ["modelValue"]),
createBaseVNode("span", _hoisted_8, toDisplayString(allowMetrics.value ? _ctx.$t("install.metricsEnabled") : _ctx.$t("install.metricsDisabled")), 1)
]),
createBaseVNode("div", _hoisted_9, [
createVNode(unref(script$1), {
label: _ctx.$t("g.ok"),
icon: "pi pi-check",
loading: isUpdating.value,
iconPos: "right",
onClick: updateConsent
}, null, 8, ["label", "loading"])
])
])
])
]),
_: 1
});
};
}
});
export {
_sfc_main as default
};
//# sourceMappingURL=MetricsConsentView-C80fk2cl.js.map

View File

@ -1,86 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { d as defineComponent, bi as useRouter, r as resolveDirective, o as openBlock, y as createBlock, z as withCtx, m as createBaseVNode, E as toDisplayString, k as createVNode, j as unref, l as script, i as withDirectives, _ as _export_sfc } from "./index-Bv0b06LE.js";
import { _ as _sfc_main$1 } from "./BaseViewTemplate-BTbuZf5t.js";
const _imports_0 = "" + new URL("images/sad_girl.png", import.meta.url).href;
const _hoisted_1 = { class: "sad-container" };
const _hoisted_2 = { class: "no-drag sad-text flex items-center" };
const _hoisted_3 = { class: "flex flex-col gap-8 p-8 min-w-110" };
const _hoisted_4 = { class: "text-4xl font-bold text-red-500" };
const _hoisted_5 = { class: "space-y-4" };
const _hoisted_6 = { class: "text-xl" };
const _hoisted_7 = { class: "list-disc list-inside space-y-1 text-neutral-800" };
const _hoisted_8 = { class: "flex gap-4" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "NotSupportedView",
setup(__props) {
const openDocs = /* @__PURE__ */ __name(() => {
window.open(
"https://github.com/Comfy-Org/desktop#currently-supported-platforms",
"_blank"
);
}, "openDocs");
const reportIssue = /* @__PURE__ */ __name(() => {
window.open("https://forum.comfy.org/c/v1-feedback/", "_blank");
}, "reportIssue");
const router = useRouter();
const continueToInstall = /* @__PURE__ */ __name(() => {
router.push("/install");
}, "continueToInstall");
return (_ctx, _cache) => {
const _directive_tooltip = resolveDirective("tooltip");
return openBlock(), createBlock(_sfc_main$1, null, {
default: withCtx(() => [
createBaseVNode("div", _hoisted_1, [
_cache[0] || (_cache[0] = createBaseVNode("img", {
class: "sad-girl",
src: _imports_0,
alt: "Sad girl illustration"
}, null, -1)),
createBaseVNode("div", _hoisted_2, [
createBaseVNode("div", _hoisted_3, [
createBaseVNode("h1", _hoisted_4, toDisplayString(_ctx.$t("notSupported.title")), 1),
createBaseVNode("div", _hoisted_5, [
createBaseVNode("p", _hoisted_6, toDisplayString(_ctx.$t("notSupported.message")), 1),
createBaseVNode("ul", _hoisted_7, [
createBaseVNode("li", null, toDisplayString(_ctx.$t("notSupported.supportedDevices.macos")), 1),
createBaseVNode("li", null, toDisplayString(_ctx.$t("notSupported.supportedDevices.windows")), 1)
])
]),
createBaseVNode("div", _hoisted_8, [
createVNode(unref(script), {
label: _ctx.$t("notSupported.learnMore"),
icon: "pi pi-github",
onClick: openDocs,
severity: "secondary"
}, null, 8, ["label"]),
createVNode(unref(script), {
label: _ctx.$t("notSupported.reportIssue"),
icon: "pi pi-flag",
onClick: reportIssue,
severity: "secondary"
}, null, 8, ["label"]),
withDirectives(createVNode(unref(script), {
label: _ctx.$t("notSupported.continue"),
icon: "pi pi-arrow-right",
iconPos: "right",
onClick: continueToInstall,
severity: "danger"
}, null, 8, ["label"]), [
[_directive_tooltip, _ctx.$t("notSupported.continueTooltip")]
])
])
])
])
])
]),
_: 1
});
};
}
});
const NotSupportedView = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-ebb20958"]]);
export {
NotSupportedView as default
};
//# sourceMappingURL=NotSupportedView-B78ZVR9Z.js.map

View File

@ -1,19 +0,0 @@
.sad-container {
&[data-v-ebb20958] {
display: grid;
align-items: center;
justify-content: space-evenly;
grid-template-columns: 25rem 1fr;
}
&[data-v-ebb20958] > * {
grid-row: 1;
}
}
.sad-text[data-v-ebb20958] {
grid-column: 1/3;
}
.sad-girl[data-v-ebb20958] {
grid-column: 2/3;
width: min(75vw, 100vh);
}

View File

@ -1,156 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { o as openBlock, f as createElementBlock, m as createBaseVNode, H as markRaw, d as defineComponent, a as useSettingStore, af as storeToRefs, N as watch, dJ as useCopyToClipboard, I as useI18n, y as createBlock, z as withCtx, j as unref, bn as script, E as toDisplayString, D as renderList, F as Fragment, k as createVNode, l as script$1, B as createCommentVNode, bl as script$2, dK as FormItem, dz as _sfc_main$1, b9 as electronAPI } from "./index-Bv0b06LE.js";
import { u as useServerConfigStore } from "./serverConfigStore-D2Vr0L0h.js";
const _hoisted_1$1 = {
viewBox: "0 0 24 24",
width: "1.2em",
height: "1.2em"
};
function render(_ctx, _cache) {
return openBlock(), createElementBlock("svg", _hoisted_1$1, _cache[0] || (_cache[0] = [
createBaseVNode("path", {
fill: "none",
stroke: "currentColor",
"stroke-linecap": "round",
"stroke-linejoin": "round",
"stroke-width": "2",
d: "m4 17l6-6l-6-6m8 14h8"
}, null, -1)
]));
}
__name(render, "render");
const __unplugin_components_0 = markRaw({ name: "lucide-terminal", render });
const _hoisted_1 = { class: "flex flex-col gap-2" };
const _hoisted_2 = { class: "flex justify-end gap-2" };
const _hoisted_3 = { class: "flex items-center justify-between" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "ServerConfigPanel",
setup(__props) {
const settingStore = useSettingStore();
const serverConfigStore = useServerConfigStore();
const {
serverConfigsByCategory,
serverConfigValues,
launchArgs,
commandLineArgs,
modifiedConfigs
} = storeToRefs(serverConfigStore);
const revertChanges = /* @__PURE__ */ __name(() => {
serverConfigStore.revertChanges();
}, "revertChanges");
const restartApp = /* @__PURE__ */ __name(() => {
electronAPI().restartApp();
}, "restartApp");
watch(launchArgs, (newVal) => {
settingStore.set("Comfy.Server.LaunchArgs", newVal);
});
watch(serverConfigValues, (newVal) => {
settingStore.set("Comfy.Server.ServerConfigValues", newVal);
});
const { copyToClipboard } = useCopyToClipboard();
const copyCommandLineArgs = /* @__PURE__ */ __name(async () => {
await copyToClipboard(commandLineArgs.value);
}, "copyCommandLineArgs");
const { t } = useI18n();
const translateItem = /* @__PURE__ */ __name((item) => {
return {
...item,
name: t(`serverConfigItems.${item.id}.name`, item.name),
tooltip: item.tooltip ? t(`serverConfigItems.${item.id}.tooltip`, item.tooltip) : void 0
};
}, "translateItem");
return (_ctx, _cache) => {
const _component_i_lucide58terminal = __unplugin_components_0;
return openBlock(), createBlock(_sfc_main$1, {
value: "Server-Config",
class: "server-config-panel"
}, {
header: withCtx(() => [
createBaseVNode("div", _hoisted_1, [
unref(modifiedConfigs).length > 0 ? (openBlock(), createBlock(unref(script), {
key: 0,
severity: "info",
"pt:text": "w-full"
}, {
default: withCtx(() => [
createBaseVNode("p", null, toDisplayString(_ctx.$t("serverConfig.modifiedConfigs")), 1),
createBaseVNode("ul", null, [
(openBlock(true), createElementBlock(Fragment, null, renderList(unref(modifiedConfigs), (config) => {
return openBlock(), createElementBlock("li", {
key: config.id
}, toDisplayString(config.name) + ": " + toDisplayString(config.initialValue) + " → " + toDisplayString(config.value), 1);
}), 128))
]),
createBaseVNode("div", _hoisted_2, [
createVNode(unref(script$1), {
label: _ctx.$t("serverConfig.revertChanges"),
onClick: revertChanges,
outlined: ""
}, null, 8, ["label"]),
createVNode(unref(script$1), {
label: _ctx.$t("serverConfig.restart"),
onClick: restartApp,
outlined: "",
severity: "danger"
}, null, 8, ["label"])
])
]),
_: 1
})) : createCommentVNode("", true),
unref(commandLineArgs) ? (openBlock(), createBlock(unref(script), {
key: 1,
severity: "secondary",
"pt:text": "w-full"
}, {
icon: withCtx(() => [
createVNode(_component_i_lucide58terminal, { class: "text-xl font-bold" })
]),
default: withCtx(() => [
createBaseVNode("div", _hoisted_3, [
createBaseVNode("p", null, toDisplayString(unref(commandLineArgs)), 1),
createVNode(unref(script$1), {
icon: "pi pi-clipboard",
onClick: copyCommandLineArgs,
severity: "secondary",
text: ""
})
])
]),
_: 1
})) : createCommentVNode("", true)
])
]),
default: withCtx(() => [
(openBlock(true), createElementBlock(Fragment, null, renderList(Object.entries(unref(serverConfigsByCategory)), ([label, items], i) => {
return openBlock(), createElementBlock("div", { key: label }, [
i > 0 ? (openBlock(), createBlock(unref(script$2), { key: 0 })) : createCommentVNode("", true),
createBaseVNode("h3", null, toDisplayString(_ctx.$t(`serverConfigCategories.${label}`, label)), 1),
(openBlock(true), createElementBlock(Fragment, null, renderList(items, (item) => {
return openBlock(), createElementBlock("div", {
key: item.name,
class: "mb-4"
}, [
createVNode(FormItem, {
item: translateItem(item),
formValue: item.value,
"onUpdate:formValue": /* @__PURE__ */ __name(($event) => item.value = $event, "onUpdate:formValue"),
id: item.id,
labelClass: {
"text-highlight": item.initialValue !== item.value
}
}, null, 8, ["item", "formValue", "onUpdate:formValue", "id", "labelClass"])
]);
}), 128))
]);
}), 128))
]),
_: 1
});
};
}
});
export {
_sfc_main as default
};
//# sourceMappingURL=ServerConfigPanel-BYrt6wyr.js.map

View File

@ -1,100 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { d as defineComponent, I as useI18n, T as ref, bo as ProgressStatus, p as onMounted, o as openBlock, y as createBlock, z as withCtx, m as createBaseVNode, a8 as createTextVNode, E as toDisplayString, j as unref, f as createElementBlock, B as createCommentVNode, k as createVNode, l as script, i as withDirectives, v as vShow, bp as BaseTerminal, b9 as electronAPI, _ as _export_sfc } from "./index-Bv0b06LE.js";
import { _ as _sfc_main$1 } from "./BaseViewTemplate-BTbuZf5t.js";
const _hoisted_1 = { class: "flex flex-col w-full h-full items-center" };
const _hoisted_2 = { class: "text-2xl font-bold" };
const _hoisted_3 = { key: 0 };
const _hoisted_4 = {
key: 0,
class: "flex flex-col items-center gap-4"
};
const _hoisted_5 = { class: "flex items-center my-4 gap-2" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "ServerStartView",
setup(__props) {
const electron = electronAPI();
const { t } = useI18n();
const status = ref(ProgressStatus.INITIAL_STATE);
const electronVersion = ref("");
let xterm;
const terminalVisible = ref(true);
const updateProgress = /* @__PURE__ */ __name(({ status: newStatus }) => {
status.value = newStatus;
if (newStatus === ProgressStatus.ERROR) terminalVisible.value = false;
else xterm?.clear();
}, "updateProgress");
const terminalCreated = /* @__PURE__ */ __name(({ terminal, useAutoSize }, root) => {
xterm = terminal;
useAutoSize({ root, autoRows: true, autoCols: true });
electron.onLogMessage((message) => {
terminal.write(message);
});
terminal.options.cursorBlink = false;
terminal.options.disableStdin = true;
terminal.options.cursorInactiveStyle = "block";
}, "terminalCreated");
const reinstall = /* @__PURE__ */ __name(() => electron.reinstall(), "reinstall");
const reportIssue = /* @__PURE__ */ __name(() => {
window.open("https://forum.comfy.org/c/v1-feedback/", "_blank");
}, "reportIssue");
const openLogs = /* @__PURE__ */ __name(() => electron.openLogsFolder(), "openLogs");
onMounted(async () => {
electron.sendReady();
electron.onProgressUpdate(updateProgress);
electronVersion.value = await electron.getElectronVersion();
});
return (_ctx, _cache) => {
return openBlock(), createBlock(_sfc_main$1, {
dark: "",
class: "flex-col"
}, {
default: withCtx(() => [
createBaseVNode("div", _hoisted_1, [
createBaseVNode("h2", _hoisted_2, [
createTextVNode(toDisplayString(unref(t)(`serverStart.process.${status.value}`)) + " ", 1),
status.value === unref(ProgressStatus).ERROR ? (openBlock(), createElementBlock("span", _hoisted_3, " v" + toDisplayString(electronVersion.value), 1)) : createCommentVNode("", true)
]),
status.value === unref(ProgressStatus).ERROR ? (openBlock(), createElementBlock("div", _hoisted_4, [
createBaseVNode("div", _hoisted_5, [
createVNode(unref(script), {
icon: "pi pi-flag",
severity: "secondary",
label: unref(t)("serverStart.reportIssue"),
onClick: reportIssue
}, null, 8, ["label"]),
createVNode(unref(script), {
icon: "pi pi-file",
severity: "secondary",
label: unref(t)("serverStart.openLogs"),
onClick: openLogs
}, null, 8, ["label"]),
createVNode(unref(script), {
icon: "pi pi-refresh",
label: unref(t)("serverStart.reinstall"),
onClick: reinstall
}, null, 8, ["label"])
]),
!terminalVisible.value ? (openBlock(), createBlock(unref(script), {
key: 0,
icon: "pi pi-search",
severity: "secondary",
label: unref(t)("serverStart.showTerminal"),
onClick: _cache[0] || (_cache[0] = ($event) => terminalVisible.value = true)
}, null, 8, ["label"])) : createCommentVNode("", true)
])) : createCommentVNode("", true),
withDirectives(createVNode(BaseTerminal, { onCreated: terminalCreated }, null, 512), [
[vShow, terminalVisible.value]
])
])
]),
_: 1
});
};
}
});
const ServerStartView = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-e6ba9633"]]);
export {
ServerStartView as default
};
//# sourceMappingURL=ServerStartView-B7TlHxYo.js.map

View File

@ -1,5 +0,0 @@
[data-v-e6ba9633] .xterm-helper-textarea {
/* Hide this as it moves all over when uv is running */
display: none;
}

File diff suppressed because it is too large Load Diff

101
web/assets/UserSelectView-C703HOyO.js generated vendored
View File

@ -1,101 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { d as defineComponent, ak as useUserStore, bi as useRouter, T as ref, c as computed, p as onMounted, o as openBlock, y as createBlock, z as withCtx, m as createBaseVNode, E as toDisplayString, k as createVNode, bj as withKeys, j as unref, bk as script, bl as script$1, bm as script$2, bn as script$3, a8 as createTextVNode, B as createCommentVNode, l as script$4 } from "./index-Bv0b06LE.js";
import { _ as _sfc_main$1 } from "./BaseViewTemplate-BTbuZf5t.js";
const _hoisted_1 = {
id: "comfy-user-selection",
class: "min-w-84 relative rounded-lg bg-[var(--comfy-menu-bg)] p-5 px-10 shadow-lg"
};
const _hoisted_2 = { class: "flex w-full flex-col items-center" };
const _hoisted_3 = { class: "flex w-full flex-col gap-2" };
const _hoisted_4 = { for: "new-user-input" };
const _hoisted_5 = { class: "flex w-full flex-col gap-2" };
const _hoisted_6 = { for: "existing-user-select" };
const _hoisted_7 = { class: "mt-5" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "UserSelectView",
setup(__props) {
const userStore = useUserStore();
const router = useRouter();
const selectedUser = ref(null);
const newUsername = ref("");
const loginError = ref("");
const createNewUser = computed(() => newUsername.value.trim() !== "");
const newUserExistsError = computed(() => {
return userStore.users.find((user) => user.username === newUsername.value) ? `User "${newUsername.value}" already exists` : "";
});
const error = computed(() => newUserExistsError.value || loginError.value);
const login = /* @__PURE__ */ __name(async () => {
try {
const user = createNewUser.value ? await userStore.createUser(newUsername.value) : selectedUser.value;
if (!user) {
throw new Error("No user selected");
}
userStore.login(user);
router.push("/");
} catch (err) {
loginError.value = err.message ?? JSON.stringify(err);
}
}, "login");
onMounted(async () => {
if (!userStore.initialized) {
await userStore.initialize();
}
});
return (_ctx, _cache) => {
return openBlock(), createBlock(_sfc_main$1, { dark: "" }, {
default: withCtx(() => [
createBaseVNode("main", _hoisted_1, [
_cache[2] || (_cache[2] = createBaseVNode("h1", { class: "my-2.5 mb-7 font-normal" }, "ComfyUI", -1)),
createBaseVNode("div", _hoisted_2, [
createBaseVNode("div", _hoisted_3, [
createBaseVNode("label", _hoisted_4, toDisplayString(_ctx.$t("userSelect.newUser")) + ":", 1),
createVNode(unref(script), {
id: "new-user-input",
modelValue: newUsername.value,
"onUpdate:modelValue": _cache[0] || (_cache[0] = ($event) => newUsername.value = $event),
placeholder: _ctx.$t("userSelect.enterUsername"),
onKeyup: withKeys(login, ["enter"])
}, null, 8, ["modelValue", "placeholder"])
]),
createVNode(unref(script$1)),
createBaseVNode("div", _hoisted_5, [
createBaseVNode("label", _hoisted_6, toDisplayString(_ctx.$t("userSelect.existingUser")) + ":", 1),
createVNode(unref(script$2), {
modelValue: selectedUser.value,
"onUpdate:modelValue": _cache[1] || (_cache[1] = ($event) => selectedUser.value = $event),
class: "w-full",
inputId: "existing-user-select",
options: unref(userStore).users,
"option-label": "username",
placeholder: _ctx.$t("userSelect.selectUser"),
disabled: createNewUser.value
}, null, 8, ["modelValue", "options", "placeholder", "disabled"]),
error.value ? (openBlock(), createBlock(unref(script$3), {
key: 0,
severity: "error"
}, {
default: withCtx(() => [
createTextVNode(toDisplayString(error.value), 1)
]),
_: 1
})) : createCommentVNode("", true)
]),
createBaseVNode("footer", _hoisted_7, [
createVNode(unref(script$4), {
label: _ctx.$t("userSelect.next"),
onClick: login
}, null, 8, ["label"])
])
])
])
]),
_: 1
});
};
}
});
export {
_sfc_main as default
};
//# sourceMappingURL=UserSelectView-C703HOyO.js.map

36
web/assets/WelcomeView-Brz3-luE.css generated vendored
View File

@ -1,36 +0,0 @@
.animated-gradient-text[data-v-7dfaf74c] {
font-weight: 700;
font-size: clamp(2rem, 8vw, 4rem);
background: linear-gradient(to right, #12c2e9, #c471ed, #f64f59, #12c2e9);
background-size: 300% auto;
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
animation: gradient-7dfaf74c 8s linear infinite;
}
.text-glow[data-v-7dfaf74c] {
filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
}
@keyframes gradient-7dfaf74c {
0% {
background-position: 0% center;
}
100% {
background-position: 300% center;
}
}
.fade-in-up[data-v-7dfaf74c] {
animation: fadeInUp-7dfaf74c 1.5s ease-out;
animation-fill-mode: both;
}
@keyframes fadeInUp-7dfaf74c {
0% {
opacity: 0;
transform: translateY(20px);
}
100% {
opacity: 1;
transform: translateY(0);
}
}

39
web/assets/WelcomeView-DIFvbWc2.js generated vendored
View File

@ -1,39 +0,0 @@
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import { d as defineComponent, bi as useRouter, o as openBlock, y as createBlock, z as withCtx, m as createBaseVNode, E as toDisplayString, k as createVNode, j as unref, l as script, _ as _export_sfc } from "./index-Bv0b06LE.js";
import { _ as _sfc_main$1 } from "./BaseViewTemplate-BTbuZf5t.js";
const _hoisted_1 = { class: "flex flex-col items-center justify-center gap-8 p-8" };
const _hoisted_2 = { class: "animated-gradient-text text-glow select-none" };
const _sfc_main = /* @__PURE__ */ defineComponent({
__name: "WelcomeView",
setup(__props) {
const router = useRouter();
const navigateTo = /* @__PURE__ */ __name((path) => {
router.push(path);
}, "navigateTo");
return (_ctx, _cache) => {
return openBlock(), createBlock(_sfc_main$1, { dark: "" }, {
default: withCtx(() => [
createBaseVNode("div", _hoisted_1, [
createBaseVNode("h1", _hoisted_2, toDisplayString(_ctx.$t("welcome.title")), 1),
createVNode(unref(script), {
label: _ctx.$t("welcome.getStarted"),
icon: "pi pi-arrow-right",
iconPos: "right",
size: "large",
rounded: "",
onClick: _cache[0] || (_cache[0] = ($event) => navigateTo("/install")),
class: "p-4 text-lg fade-in-up"
}, null, 8, ["label"])
])
]),
_: 1
});
};
}
});
const WelcomeView = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-7dfaf74c"]]);
export {
WelcomeView as default
};
//# sourceMappingURL=WelcomeView-DIFvbWc2.js.map

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="292" height="92pt" viewBox="0 0 219 92"><defs><clipPath id="a"><path d="M159 .79h25V69h-25Zm0 0"/></clipPath><clipPath id="b"><path d="M183 9h35.371v60H183Zm0 0"/></clipPath><clipPath id="c"><path d="M0 .79h92V92H0Zm0 0"/></clipPath></defs><path style="stroke:none;fill-rule:nonzero;fill:#fff;fill-opacity:1" d="M130.871 31.836c-4.785 0-8.351 2.352-8.351 8.008 0 4.261 2.347 7.222 8.093 7.222 4.871 0 8.18-2.867 8.18-7.398 0-5.133-2.961-7.832-7.922-7.832Zm-9.57 39.95c-1.133 1.39-2.262 2.87-2.262 4.612 0 3.48 4.434 4.524 10.527 4.524 5.051 0 11.926-.352 11.926-5.043 0-2.793-3.308-2.965-7.488-3.227Zm25.761-39.688c1.563 2.004 3.22 4.789 3.22 8.793 0 9.656-7.571 15.316-18.536 15.316-2.789 0-5.312-.348-6.879-.785l-2.87 4.613 8.526.52c15.059.96 23.934 1.398 23.934 12.968 0 10.008-8.789 15.665-23.934 15.665-15.75 0-21.757-4.004-21.757-10.88 0-3.917 1.742-6 4.789-8.878-2.875-1.211-3.828-3.387-3.828-5.739 0-1.914.953-3.656 2.523-5.312 1.566-1.652 3.305-3.305 5.395-5.219-4.262-2.09-7.485-6.617-7.485-13.058 0-10.008 6.613-16.88 19.93-16.88 3.742 0 6.004.344 8.008.872h16.972v7.394l-8.007.61"/><g clip-path="url(#a)"><path style="stroke:none;fill-rule:nonzero;fill:#fff;fill-opacity:1" d="M170.379 16.281c-4.961 0-7.832-2.87-7.832-7.836 0-4.957 2.871-7.656 7.832-7.656 5.05 0 7.922 2.7 7.922 7.656 0 4.965-2.871 7.836-7.922 7.836Zm-11.227 52.305V61.71l4.438-.606c1.219-.175 1.394-.437 1.394-1.746V33.773c0-.953-.261-1.566-1.132-1.824l-4.7-1.656.957-7.047h18.016V59.36c0 1.399.086 1.57 1.395 1.746l4.437.606v6.875h-24.805"/></g><g clip-path="url(#b)"><path style="stroke:none;fill-rule:nonzero;fill:#fff;fill-opacity:1" d="M218.371 65.21c-3.742 1.825-9.223 3.481-14.187 3.481-10.356 0-14.27-4.175-14.27-14.015V31.879c0-.524 0-.871-.7-.871h-6.093v-7.746c7.664-.871 10.707-4.703 11.664-14.188h8.27v12.36c0 .609 0 .87.695.87h12.27v8.704h-12.965v20.797c0 5.136 1.218 7.136 5.918 7.136 2.437 0 4.96-.609 7.047-1.39l2.351 7.66"/></g><g clip-path="url(#c)"><path style="stroke:none;fill-rule:nonzero;fill:#fff;fill-opacity:1" d="M89.422 42.371 49.629 2.582a5.868 5.868 0 0 0-8.3 0l-8.263 8.262 10.48 10.484a6.965 6.965 0 0 1 7.173 1.668 6.98 6.98 0 0 1 1.656 7.215l10.102 10.105a6.963 6.963 0 0 1 7.214 1.657 6.976 6.976 0 0 1 0 9.875 6.98 6.98 0 0 1-9.879 0 6.987 6.987 0 0 1-1.519-7.594l-9.422-9.422v24.793a6.979 6.979 0 0 1 1.848 1.32 6.988 6.988 0 0 1 0 9.88c-2.73 2.726-7.153 2.726-9.875 0a6.98 6.98 0 0 1 0-9.88 6.893 6.893 0 0 1 2.285-1.523V34.398a6.893 6.893 0 0 1-2.285-1.523 6.988 6.988 0 0 1-1.508-7.637L29.004 14.902 1.719 42.187a5.868 5.868 0 0 0 0 8.301l39.793 39.793a5.868 5.868 0 0 0 8.3 0l39.61-39.605a5.873 5.873 0 0 0 0-8.305"/></g></svg>

Before

Width:  |  Height:  |  Size: 2.6 KiB

BIN
web/assets/images/apple-mps-logo.png generated vendored

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

View File

@ -1,5 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg width="21.59mm" height="6.922mm" version="1.1" viewBox="0 0 21.59 6.922" xmlns="http://www.w3.org/2000/svg">
<path d="m6.667 0.941v1.345h-0.305v-1.345h-0.699v1.345h-0.304v-1.651h0.304v0.291q3e-3 -0.06 0.027-0.113 0.024-0.054 0.065-0.093 0.041-0.04 0.096-0.062 0.054-0.023 0.116-0.023h0.393q0.06 0 0.114 0.023 0.054 0.021 0.096 0.062 0.041 0.038 0.066 0.093 0.026 0.052 0.027 0.113 3e-3 -0.06 0.026-0.113 0.024-0.054 0.065-0.093 0.041-0.04 0.096-0.062 0.054-0.023 0.116-0.023h0.393q0.063 0 0.119 0.024 0.055 0.023 0.096 0.065 0.041 0.04 0.065 0.096 0.024 0.055 0.024 0.119v1.347h-0.298v-1.345zm1.512 0.624q0-0.063 0.023-0.117 0.024-0.055 0.065-0.097 0.041-0.041 0.097-0.065 0.055-0.024 0.117-0.024h0.787v-0.321h-0.996v-0.305h0.996q0.063 0 0.119 0.024 0.055 0.023 0.096 0.065 0.041 0.04 0.065 0.096 0.024 0.055 0.024 0.119v1.346h-0.302v-0.279q-4e-3 0.057-0.031 0.108-0.026 0.051-0.068 0.089-0.04 0.037-0.093 0.058-0.052 0.021-0.111 0.021h-0.483q-0.062 0-0.117-0.023-0.055-0.024-0.097-0.065-0.04-0.041-0.065-0.097-0.023-0.055-0.023-0.119zm0.303 0.415h0.787v-0.415h-0.786zm3.063 0.306h-0.306v-1.345h-0.851v1.345h-0.304v-1.651h0.303v0.291q3e-3 -0.06 0.027-0.113 0.024-0.054 0.065-0.093 0.041-0.04 0.096-0.062 0.054-0.023 0.116-0.023h0.545q0.063 0 0.119 0.024 0.055 0.023 0.096 0.065 0.041 0.04 0.065 0.096 0.024 0.055 0.024 0.119zm0.508-1.651h0.303v1.346h0.851v-1.346h0.305v1.651h-0.304v-0.279q-4e-3 0.057-0.031 0.108-0.026 0.051-0.068 0.089-0.04 0.037-0.093 0.058-0.052 0.021-0.111 0.021h-0.547q-0.062 0-0.117-0.023-0.055-0.024-0.097-0.065-0.04-0.041-0.065-0.097-0.023-0.055-0.023-0.119zm1.969 0.93q0-0.063 0.023-0.117 0.024-0.055 0.065-0.097 0.041-0.041 0.097-0.065 0.055-0.024 0.117-0.024h0.787v-0.321h-0.996v-0.305h0.996q0.063 0 0.119 0.024 0.055 0.023 0.096 0.065 0.041 0.04 0.065 0.096 0.024 0.055 0.024 0.119v1.346h-0.302v-0.279q-4e-3 0.057-0.031 0.108-0.026 0.051-0.068 0.089-0.04 0.037-0.093 0.058-0.052 0.021-0.111 0.021h-0.483q-0.062 0-0.117-0.023-0.055-0.024-0.097-0.065-0.04-0.041-0.065-0.097-0.023-0.055-0.023-0.119zm0.303 0.415h0.787v-0.415h-0.786zm1.906-1.98v2.286h-0.304v-2.286z" fill="#fff"/>
<path d="m0.303 4.909v1.04h0.787v-0.279h0.305v0.279q0 0.063-0.024 0.119-0.023 0.055-0.065 0.097-0.04 0.04-0.096 0.065-0.055 0.023-0.119 0.023h-0.788q-0.062 0-0.117-0.023-0.056-0.023-0.098-0.063-0.04-0.042-0.065-0.098-0.023-0.056-0.023-0.119v-1.04q0-0.063 0.023-0.119 0.024-0.055 0.065-0.096t0.097-0.065q0.055-0.024 0.117-0.024h0.787q0.063 0 0.119 0.024 0.055 0.023 0.096 0.065 0.041 0.04 0.065 0.096 0.024 0.055 0.024 0.119v0.279h-0.302v-0.279zm3.029 1.04q0 0.063-0.024 0.119-0.023 0.055-0.065 0.097-0.04 0.04-0.096 0.065-0.054 0.023-0.117 0.023h-0.821q-0.062 0-0.117-0.023-0.055-0.024-0.097-0.065-0.04-0.041-0.065-0.097-0.023-0.055-0.023-0.119v-1.04q0-0.063 0.023-0.119 0.024-0.055 0.065-0.096t0.097-0.065q0.055-0.024 0.117-0.024h0.82q0.063 0 0.117 0.024 0.055 0.023 0.096 0.065 0.041 0.04 0.065 0.096 0.024 0.055 0.024 0.119zm-1.123-1.04v1.04h0.82v-1.04zm3.092 1.345h-0.305v-1.345h-0.851v1.345h-0.304v-1.651h0.303v0.291q3e-3 -0.06 0.027-0.113 0.024-0.054 0.065-0.093 0.041-0.04 0.096-0.062 0.054-0.023 0.116-0.023h0.545q0.063 0 0.119 0.024 0.055 0.023 0.096 0.065 0.041 0.04 0.065 0.096 0.024 0.055 0.024 0.119zm1.12-1.981v0.33h0.542v0.305h-0.541v1.345h-0.305v-1.344h-0.403v-0.305h0.403v-0.33q0-0.063 0.023-0.117 0.024-0.055 0.066-0.097 0.041-0.041 0.097-0.065 0.055-0.024 0.117-0.024h0.542v0.305zm1.277 0.33v1.651h-0.305v-1.651zm-0.32-0.635h0.336v0.317h-0.336zm0.844 0.941q0-0.063 0.023-0.119 0.024-0.055 0.065-0.096t0.097-0.065q0.055-0.024 0.117-0.024h0.547q0.06 0 0.114 0.023 0.054 0.021 0.094 0.062 0.041 0.038 0.066 0.093 0.026 0.052 0.027 0.113v-0.291h0.305v2.012q0 0.063-0.024 0.119-0.023 0.055-0.065 0.096-0.04 0.041-0.096 0.065-0.055 0.024-0.119 0.024h-0.964v-0.305h0.964v-0.424h-0.851q-0.062 0-0.117-0.023-0.055-0.024-0.097-0.065-0.04-0.041-0.065-0.097-0.023-0.055-0.023-0.119zm1.154 0.976v-0.976h-0.851v0.976zm0.813-1.282h0.303v1.345h0.851v-1.345h0.305v1.651h-0.305v-0.279q-4e-3 0.057-0.031 0.108-0.026 0.051-0.068 0.089-0.04 0.037-0.093 0.058-0.052 0.021-0.111 0.021h-0.547q-0.062 0-0.117-0.023-0.055-0.024-0.097-0.065-0.04-0.041-0.065-0.097-0.023-0.055-0.023-0.119zm2.272 0.305v1.345h-0.303v-1.651h0.303v0.291q3e-3 -0.06 0.027-0.113 0.024-0.054 0.065-0.093 0.041-0.04 0.096-0.062 0.054-0.023 0.116-0.023h0.324q0.063 0 0.117 0.024 0.055 0.023 0.097 0.065 0.041 0.04 0.065 0.096 0.024 0.055 0.024 0.119v0.279h-0.305v-0.279zm1.314 0.624q0-0.063 0.023-0.117 0.024-0.055 0.065-0.097 0.041-0.041 0.097-0.065 0.055-0.024 0.117-0.024h0.787v-0.319h-0.996v-0.305h0.996q0.063 0 0.119 0.024 0.055 0.023 0.096 0.065 0.041 0.04 0.065 0.096 0.024 0.055 0.024 0.119v1.345h-0.305v-0.279q-4e-3 0.057-0.031 0.108-0.026 0.051-0.068 0.089-0.04 0.037-0.093 0.058-0.052 0.021-0.111 0.021h-0.483q-0.062 0-0.117-0.023-0.055-0.024-0.097-0.065-0.04-0.041-0.065-0.097-0.023-0.055-0.023-0.119zm0.303 0.415h0.787v-0.415h-0.787zm1.505-1.345h0.403v-0.508h0.305v0.508h0.542v0.305h-0.542v1.04h0.542v0.305h-0.542q-0.062 0-0.117-0.023-0.055-0.024-0.097-0.065-0.041-0.041-0.066-0.097-0.023-0.055-0.023-0.119v-1.04h-0.403zm2.08 0v1.651h-0.299v-1.649zm-0.314-0.634h0.336v0.317h-0.336zm2.272 1.981q0 0.063-0.024 0.119-0.023 0.055-0.065 0.097-0.04 0.04-0.096 0.065-0.054 0.023-0.117 0.023h-0.82q-0.062 0-0.117-0.023-0.055-0.024-0.097-0.065-0.04-0.041-0.065-0.097-0.023-0.055-0.023-0.119v-1.04q0-0.063 0.023-0.119 0.024-0.055 0.065-0.096t0.097-0.065q0.055-0.024 0.117-0.024h0.82q0.063 0 0.117 0.024 0.055 0.023 0.096 0.065 0.041 0.04 0.065 0.096 0.024 0.055 0.024 0.119zm-1.123-1.04v1.04h0.82v-1.04zm3.092 1.345h-0.305v-1.345h-0.851v1.345h-0.303v-1.651h0.303v0.291q3e-3 -0.06 0.027-0.113 0.024-0.054 0.065-0.093 0.041-0.04 0.096-0.062 0.054-0.023 0.116-0.023h0.545q0.063 0 0.119 0.024 0.055 0.023 0.096 0.065 0.041 0.04 0.065 0.096 0.024 0.055 0.024 0.119z" fill="#fff"/>
</svg>

Before

Width:  |  Height:  |  Size: 5.8 KiB

6
web/assets/images/nvidia-logo.svg generated vendored
View File

@ -1,6 +0,0 @@
<svg enable-background="new 0 0 974.7 179.7" version="1.1" viewBox="0 0 974.7 179.7" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" width="110" height="44"><title> Artificial Intelligence Computing Leadership from NVIDIA</title>
<path fill="#FFFFFF" d="m962.1 144.1v-2.7h1.7c0.9 0 2.2 0.1 2.2 1.2s-0.7 1.5-1.8 1.5h-2.1m0 1.9h1.2l2.7 4.7h2.9l-3-4.9c1.5 0.1 2.7-1 2.8-2.5v-0.4c0-2.6-1.8-3.4-4.8-3.4h-4.3v11.2h2.5v-4.7m12.6-0.9c0-6.6-5.1-10.4-10.8-10.4s-10.8 3.8-10.8 10.4 5.1 10.4 10.8 10.4 10.8-3.8 10.8-10.4m-3.2 0c0.2 4.2-3.1 7.8-7.3 8h-0.3c-4.4 0.2-8.1-3.3-8.3-7.7s3.3-8.1 7.7-8.3 8.1 3.3 8.3 7.7c-0.1 0.1-0.1 0.2-0.1 0.3z"></path>
<path fill="#FFFFFF" d="m578.2 34v118h33.3v-118h-33.3zm-262-0.2v118.1h33.6v-91.7l26.2 0.1c8.6 0 14.6 2.1 18.7 6.5 5.3 5.6 7.4 14.7 7.4 31.2v53.9h32.6v-65.2c0-46.6-29.7-52.9-58.7-52.9h-59.8zm315.7 0.2v118h54c28.8 0 38.2-4.8 48.3-15.5 7.2-7.5 11.8-24.1 11.8-42.2 0-16.6-3.9-31.4-10.8-40.6-12.2-16.5-30-19.7-56.6-19.7h-46.7zm33 25.6h14.3c20.8 0 34.2 9.3 34.2 33.5s-13.4 33.6-34.2 33.6h-14.3v-67.1zm-134.7-25.6l-27.8 93.5-26.6-93.5h-36l38 118h48l38.4-118h-34zm231.4 118h33.3v-118h-33.3v118zm93.4-118l-46.5 117.9h32.8l7.4-20.9h55l7 20.8h35.7l-46.9-117.8h-44.5zm21.6 21.5l20.2 55.2h-41l20.8-55.2z">
</path>
<path fill="#76B900" d="m101.3 53.6v-16.2c1.6-0.1 3.2-0.2 4.8-0.2 44.4-1.4 73.5 38.2 73.5 38.2s-31.4 43.6-65.1 43.6c-4.5 0-8.9-0.7-13.1-2.1v-49.2c17.3 2.1 20.8 9.7 31.1 27l23.1-19.4s-16.9-22.1-45.3-22.1c-3-0.1-6 0.1-9 0.4m0-53.6v24.2l4.8-0.3c61.7-2.1 102 50.6 102 50.6s-46.2 56.2-94.3 56.2c-4.2 0-8.3-0.4-12.4-1.1v15c3.4 0.4 6.9 0.7 10.3 0.7 44.8 0 77.2-22.9 108.6-49.9 5.2 4.2 26.5 14.3 30.9 18.7-29.8 25-99.3 45.1-138.7 45.1-3.8 0-7.4-0.2-11-0.6v21.1h170.2v-179.7h-170.4zm0 116.9v12.8c-41.4-7.4-52.9-50.5-52.9-50.5s19.9-22 52.9-25.6v14h-0.1c-17.3-2.1-30.9 14.1-30.9 14.1s7.7 27.3 31 35.2m-73.5-39.5s24.5-36.2 73.6-40v-13.2c-54.4 4.4-101.4 50.4-101.4 50.4s26.6 77 101.3 84v-14c-54.8-6.8-73.5-67.2-73.5-67.2z"></path>
</svg>

Before

Width:  |  Height:  |  Size: 1.9 KiB

Some files were not shown because too many files have changed in this diff Show More