Compare commits

...

14 Commits

Author SHA1 Message Date
Ethan Yang
481fae8aaa
Merge e14b8dfec5b49f691aedb1e5139e5d8d9015bbef into d8e5662822168101afb5e08a8ba75b6eefff6e02 2025-05-18 11:16:41 +02:00
comfyanonymous
d8e5662822
Remove default delimiter. (#8183) 2025-05-18 04:12:12 -04:00
LaVie024
3d44a09812
Update nodes_string.py (#8173) 2025-05-18 04:11:11 -04:00
comfyanonymous
62690eddec
Node to add pixel space noise to an image. (#8182) 2025-05-18 04:09:56 -04:00
Christian Byrne
05eb10b43a
Validate video inputs (#8133)
* validate kling lip sync input video

* add tooltips

* update duration estimates

* decrease epsilon

* fix rebase error
2025-05-18 04:08:47 -04:00
Silver
f5e4e976f4
Add missing category for T5TokenizerOption (#8177)
Change it if you need to but it should at least have a category.
2025-05-18 02:59:06 -04:00
Ethan Yang
e14b8dfec5
Update nodes_torch_compile.py 2025-03-07 13:23:19 +08:00
ethan
bc9eb9dfdb fix the memory leakage issue 2025-03-05 18:28:47 -08:00
Ethan Yang
8558803f44
Merge pull request #2 from comfyanonymous/master
rebase
2025-03-03 14:36:03 +08:00
ethan
ebfe7a5679 fix the issue for model first inference with lora 2025-02-10 19:54:44 -08:00
ethan
77e9294c08 add Query Device 2025-01-30 00:20:58 -08:00
ethan
317af7201f remove history commit
remove history commit

remove history commit
2025-01-29 07:11:24 -08:00
ethan
d1f61cca5e add openvino to torch compile 2025-01-29 07:03:31 -08:00
ethan
33e71e0e79 update openvino backend 2025-01-24 01:37:44 -08:00
8 changed files with 209 additions and 35 deletions

View File

@ -498,12 +498,20 @@ class ModelPatcher:
key = k[0]
if len(k) > 2:
function = k[2]
org_key=key.replace("diffusion_model", "diffusion_model._orig_mod")
if key in model_sd:
p.add(k)
current_patches = self.patches.get(key, [])
current_patches.append((strength_patch, patches[k], strength_model, offset, function))
self.patches[key] = current_patches
self.patches[org_key] = current_patches
elif org_key in model_sd:
if key in self.patches:
self.patches.pop(key)
p.add(k)
current_patches = self.patches.get(org_key, [])
current_patches.append((strength_patch, patches[k], strength_model, offset, function))
self.patches[org_key] = current_patches
self.patches_uuid = uuid.uuid4()
return list(p)

View File

@ -65,6 +65,12 @@ from comfy_api_nodes.apinode_utils import (
download_url_to_image_tensor,
)
from comfy_api_nodes.mapper_utils import model_field_to_node_input
from comfy_api_nodes.util.validation_utils import (
validate_image_dimensions,
validate_image_aspect_ratio,
validate_video_dimensions,
validate_video_duration,
)
from comfy_api.input.basic_types import AudioInput
from comfy_api.input.video_types import VideoInput
from comfy_api.input_impl import VideoFromFile
@ -80,18 +86,16 @@ PATH_CHARACTER_IMAGE = f"/proxy/kling/{KLING_API_VERSION}/images/generations"
PATH_VIRTUAL_TRY_ON = f"/proxy/kling/{KLING_API_VERSION}/images/kolors-virtual-try-on"
PATH_IMAGE_GENERATIONS = f"/proxy/kling/{KLING_API_VERSION}/images/generations"
MAX_PROMPT_LENGTH_T2V = 2500
MAX_PROMPT_LENGTH_I2V = 500
MAX_PROMPT_LENGTH_IMAGE_GEN = 500
MAX_NEGATIVE_PROMPT_LENGTH_IMAGE_GEN = 200
MAX_PROMPT_LENGTH_LIP_SYNC = 120
# TODO: adjust based on tests
AVERAGE_DURATION_T2V = 319 # 319,
AVERAGE_DURATION_I2V = 164 # 164,
AVERAGE_DURATION_LIP_SYNC = 120
AVERAGE_DURATION_VIRTUAL_TRY_ON = 19 # 19,
AVERAGE_DURATION_T2V = 319
AVERAGE_DURATION_I2V = 164
AVERAGE_DURATION_LIP_SYNC = 455
AVERAGE_DURATION_VIRTUAL_TRY_ON = 19
AVERAGE_DURATION_IMAGE_GEN = 32
AVERAGE_DURATION_VIDEO_EFFECTS = 320
AVERAGE_DURATION_VIDEO_EXTEND = 320
@ -211,23 +215,8 @@ def validate_input_image(image: torch.Tensor) -> None:
See: https://app.klingai.com/global/dev/document-api/apiReference/model/imageToVideo
"""
if len(image.shape) == 4:
height, width = image.shape[1], image.shape[2]
elif len(image.shape) == 3:
height, width = image.shape[0], image.shape[1]
else:
raise ValueError("Invalid image tensor shape.")
# Ensure minimum resolution is met
if height < 300:
raise ValueError("Image height must be at least 300px")
if width < 300:
raise ValueError("Image width must be at least 300px")
# Ensure aspect ratio is within acceptable range
aspect_ratio = width / height
if aspect_ratio < 1 / 2.5 or aspect_ratio > 2.5:
raise ValueError("Image aspect ratio must be between 1:2.5 and 2.5:1")
validate_image_dimensions(image, min_width=300, min_height=300)
validate_image_aspect_ratio(image, min_aspect_ratio=1 / 2.5, max_aspect_ratio=2.5)
def get_camera_control_input_config(
@ -1243,6 +1232,17 @@ class KlingLipSyncBase(KlingNodeBase):
RETURN_TYPES = ("VIDEO", "STRING", "STRING")
RETURN_NAMES = ("VIDEO", "video_id", "duration")
def validate_lip_sync_video(self, video: VideoInput):
"""
Validates the input video adheres to the expectations of the Kling Lip Sync API:
- Video length does not exceed 10s and is not shorter than 2s
- Length and width dimensions should both be between 720px and 1920px
See: https://app.klingai.com/global/dev/document-api/apiReference/model/videoTolip
"""
validate_video_dimensions(video, 720, 1920)
validate_video_duration(video, 2, 10)
def validate_text(self, text: str):
if not text:
raise ValueError("Text is required")
@ -1282,6 +1282,7 @@ class KlingLipSyncBase(KlingNodeBase):
) -> tuple[VideoFromFile, str, str]:
if text:
self.validate_text(text)
self.validate_lip_sync_video(video)
# Upload video to Comfy API and get download URL
video_url = upload_video_to_comfyapi(video, auth_kwargs=kwargs)
@ -1352,7 +1353,7 @@ class KlingLipSyncAudioToVideoNode(KlingLipSyncBase):
},
}
DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file."
DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file. When using, ensure that the audio contains clearly distinguishable vocals and that the video contains a distinct face. The audio file should not be larger than 5MB. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length."
def api_call(
self,
@ -1464,7 +1465,7 @@ class KlingLipSyncTextToVideoNode(KlingLipSyncBase):
},
}
DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt."
DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length."
def api_call(
self,

View File

View File

@ -0,0 +1,100 @@
import logging
from typing import Optional
import torch
from comfy_api.input.video_types import VideoInput
def get_image_dimensions(image: torch.Tensor) -> tuple[int, int]:
if len(image.shape) == 4:
return image.shape[1], image.shape[2]
elif len(image.shape) == 3:
return image.shape[0], image.shape[1]
else:
raise ValueError("Invalid image tensor shape.")
def validate_image_dimensions(
image: torch.Tensor,
min_width: Optional[int] = None,
max_width: Optional[int] = None,
min_height: Optional[int] = None,
max_height: Optional[int] = None,
):
height, width = get_image_dimensions(image)
if min_width is not None and width < min_width:
raise ValueError(f"Image width must be at least {min_width}px, got {width}px")
if max_width is not None and width > max_width:
raise ValueError(f"Image width must be at most {max_width}px, got {width}px")
if min_height is not None and height < min_height:
raise ValueError(
f"Image height must be at least {min_height}px, got {height}px"
)
if max_height is not None and height > max_height:
raise ValueError(f"Image height must be at most {max_height}px, got {height}px")
def validate_image_aspect_ratio(
image: torch.Tensor,
min_aspect_ratio: Optional[float] = None,
max_aspect_ratio: Optional[float] = None,
):
width, height = get_image_dimensions(image)
aspect_ratio = width / height
if min_aspect_ratio is not None and aspect_ratio < min_aspect_ratio:
raise ValueError(
f"Image aspect ratio must be at least {min_aspect_ratio}, got {aspect_ratio}"
)
if max_aspect_ratio is not None and aspect_ratio > max_aspect_ratio:
raise ValueError(
f"Image aspect ratio must be at most {max_aspect_ratio}, got {aspect_ratio}"
)
def validate_video_dimensions(
video: VideoInput,
min_width: Optional[int] = None,
max_width: Optional[int] = None,
min_height: Optional[int] = None,
max_height: Optional[int] = None,
):
try:
width, height = video.get_dimensions()
except Exception as e:
logging.error("Error getting dimensions of video: %s", e)
return
if min_width is not None and width < min_width:
raise ValueError(f"Video width must be at least {min_width}px, got {width}px")
if max_width is not None and width > max_width:
raise ValueError(f"Video width must be at most {max_width}px, got {width}px")
if min_height is not None and height < min_height:
raise ValueError(
f"Video height must be at least {min_height}px, got {height}px"
)
if max_height is not None and height > max_height:
raise ValueError(f"Video height must be at most {max_height}px, got {height}px")
def validate_video_duration(
video: VideoInput,
min_duration: Optional[float] = None,
max_duration: Optional[float] = None,
):
try:
duration = video.get_duration()
except Exception as e:
logging.error("Error getting duration of video: %s", e)
return
epsilon = 0.0001
if min_duration is not None and min_duration - epsilon > duration:
raise ValueError(
f"Video duration must be at least {min_duration}s, got {duration}s"
)
if max_duration is not None and duration > max_duration + epsilon:
raise ValueError(
f"Video duration must be at most {max_duration}s, got {duration}s"
)

View File

@ -31,6 +31,7 @@ class T5TokenizerOptions:
}
}
CATEGORY = "_for_testing/conditioning"
RETURN_TYPES = ("CLIP",)
FUNCTION = "set_options"

View File

@ -13,6 +13,7 @@ import os
import re
from io import BytesIO
from inspect import cleandoc
import torch
from comfy.comfy_types import FileLocator
@ -74,6 +75,24 @@ class ImageFromBatch:
s = s_in[batch_index:batch_index + length].clone()
return (s,)
class ImageAddNoise:
@classmethod
def INPUT_TYPES(s):
return {"required": { "image": ("IMAGE",),
"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff, "control_after_generate": True, "tooltip": "The random seed used for creating the noise."}),
"strength": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}),
}}
RETURN_TYPES = ("IMAGE",)
FUNCTION = "repeat"
CATEGORY = "image"
def repeat(self, image, seed, strength):
generator = torch.manual_seed(seed)
s = torch.clip((image + strength * torch.randn(image.size(), generator=generator, device="cpu").to(image)), min=0.0, max=1.0)
return (s,)
class SaveAnimatedWEBP:
def __init__(self):
self.output_dir = folder_paths.get_output_directory()
@ -295,6 +314,7 @@ NODE_CLASS_MAPPINGS = {
"ImageCrop": ImageCrop,
"RepeatImageBatch": RepeatImageBatch,
"ImageFromBatch": ImageFromBatch,
"ImageAddNoise": ImageAddNoise,
"SaveAnimatedWEBP": SaveAnimatedWEBP,
"SaveAnimatedPNG": SaveAnimatedPNG,
"SaveSVGNode": SaveSVGNode,

View File

@ -8,7 +8,8 @@ class StringConcatenate():
return {
"required": {
"string_a": (IO.STRING, {"multiline": True}),
"string_b": (IO.STRING, {"multiline": True})
"string_b": (IO.STRING, {"multiline": True}),
"delimiter": (IO.STRING, {"multiline": False, "default": ""})
}
}
@ -16,8 +17,8 @@ class StringConcatenate():
FUNCTION = "execute"
CATEGORY = "utils/string"
def execute(self, string_a, string_b, **kwargs):
return string_a + string_b,
def execute(self, string_a, string_b, delimiter, **kwargs):
return delimiter.join((string_a, string_b)),
class StringSubstring():
@classmethod

View File

@ -1,21 +1,64 @@
import torch
import importlib
class TorchCompileModel:
@classmethod
def INPUT_TYPES(s):
return {"required": { "model": ("MODEL",),
"backend": (["inductor", "cudagraphs"],),
}}
if importlib.util.find_spec("openvino") is not None:
import openvino as ov
core = ov.Core()
available_devices = core.available_devices
else:
available_devices = []
return {
"required": {
"model": ("MODEL",),
"backend": (["inductor", "cudagraphs", "openvino"],),
},
"optional": {
"openvino_device": (available_devices,),
},
}
RETURN_TYPES = ("MODEL",)
FUNCTION = "patch"
CATEGORY = "_for_testing"
EXPERIMENTAL = True
def patch(self, model, backend):
def patch(self, model, backend, openvino_device):
print(model.__class__.__name__)
if backend == "openvino":
options = {"device": openvino_device}
try:
import openvino.torch
except ImportError:
raise ImportError(
"Could not import openvino python package. "
"Please install it with `pip install openvino`."
)
import openvino.frontend.pytorch.torchdynamo.execute as ov_ex
torch._dynamo.reset()
ov_ex.compiled_cache.clear()
ov_ex.req_cache.clear()
ov_ex.partitioned_modules.clear()
else:
options = None
m = model.clone()
m.add_object_patch("diffusion_model", torch.compile(model=m.get_model_object("diffusion_model"), backend=backend))
return (m, )
m.add_object_patch(
"diffusion_model",
torch.compile(
model=m.get_model_object("diffusion_model"),
backend=backend,
options=options,
),
)
return (m,)
NODE_CLASS_MAPPINGS = {
"TorchCompileModel": TorchCompileModel,