Merge 49da9c325a8bdc4f2d18faeb384654bf0acbf470 into d8e5662822168101afb5e08a8ba75b6eefff6e02

Remove default delimiter. (#8183 )
Update nodes_string.py (#8173 )
2025-06-03 10:02:09 +08:00 · 2025-05-18 11:16:48 +02:00 · 2025-05-18 04:12:12 -04:00 · 2025-05-18 04:11:11 -04:00 · 2025-05-18 04:09:56 -04:00 · 2025-05-18 04:08:47 -04:00
12 changed files with 248 additions and 32 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,6 @@
+__pycache__/
+*.py[cod]
+input
+models
+notebooks
+output
--- a/70
+++ b/70
@ -0,0 +1,70 @@
+# 3.10.11-bullseye,  has python, git, but relatively small (<250MB)
+ARG BASE_IMAGE="python@sha256:88fb365ea5d52ec8f5799f40a4742b9fb3c91dac92f7048eabaae194a25ccc28"
+ARG GPU_MAKE="nvidia"
+ARG UID=1000
+ARG GID=1000
+
+FROM ${BASE_IMAGE}
+ARG GPU_MAKE
+ARG UID
+ARG GID
+
+SHELL [ "/bin/bash", "-uec"]
+RUN \
+  --mount=target=/var/lib/apt/lists,type=cache,sharing=locked \
+  --mount=target=/var/cache/apt,type=cache,sharing=locked \
+<<'EOF'
+  apt-get update
+  apt-get install -yq git-lfs
+  echo "comfyui" >> /etc/hostname
+EOF
+
+# run instructions as user
+USER ${UID}:${GID}
+# run python from future venv
+ENV PATH="/app/venv/bin:${PATH}"
+# copy context to obvious location
+COPY --chown=${UID}:${GID} ./ /app
+# create cache directory *with user permissions*
+WORKDIR /app/.cache
+# default to app directory
+WORKDIR /app
+# set pip cache location
+ENV XDG_CACHE_HOME="/app/.cache/pip"
+# run with mounted cache
+RUN --mount=type=cache,target=/app/.cache,uid=${UID},gid=${GID} <<'EOF'
+  mkdir -p /app/.cache/transformers
+  # choose package index based on chosen hardware
+  if [ "${GPU_MAKE}" = "nvidia" ]; then
+    EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu118"
+    EXTRAS="xformers"
+  elif [ "${GPU_MAKE}" = "amd" ]; then
+    EXTRA_INDEX_URL="https://download.pytorch.org/whl/rocm5.4.2"
+    EXTRAS=""
+  elif [ "${GPU_MAKE}" = "cpu" ]; then
+    EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
+    EXTRAS=""
+  else
+    echo "Unknown GPU_MAKE provided as docker build arg."
+    exit 2
+  fi
+  # create virtual environment to manage packages
+  python -m venv venv
+  # install framework packages
+  pip install \
+    --extra-index-url "${EXTRA_INDEX_URL}" \
+    install \
+    torch \
+    torchvision \
+    torchaudio \
+    ${EXTRAS}
+  pip install -r requirements.txt
+EOF
+
+# default environment variables
+ENV COMFYUI_ADDRESS=0.0.0.0
+ENV COMFYUI_PORT=8188
+ENV COMFYUI_EXTRA_ARGS=""
+ENV TRANSFORMERS_CACHE="/app/.cache/transformers"
+# default start command
+CMD bash -c "python -u main.py --listen ${COMFYUI_ADDRESS} --port ${COMFYUI_PORT} ${COMFYUI_EXTRA_ARGS}"
--- a/README.md
+++ b/README.md
@ -110,7 +110,6 @@ ComfyUI follows a weekly release cycle every Friday, with three interconnected r

 2. **[ComfyUI Desktop](https://github.com/Comfy-Org/desktop)**
   - Builds a new release using the latest stable core version
-   - Version numbers match the core release (e.g., Desktop v1.7.0 uses Core v1.7.0)

 3. **[ComfyUI Frontend](https://github.com/Comfy-Org/ComfyUI_frontend)**
   - Weekly frontend updates are merged into the core repository
--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -78,8 +78,6 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
            pl_sd = torch.load(ckpt, map_location=device, weights_only=True, **torch_args)
        else:
            pl_sd = torch.load(ckpt, map_location=device, pickle_module=comfy.checkpoint_pickle)
-        if "global_step" in pl_sd:
-            logging.debug(f"Global Step: {pl_sd['global_step']}")
        if "state_dict" in pl_sd:
            sd = pl_sd["state_dict"]
        else:
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@ -65,6 +65,12 @@ from comfy_api_nodes.apinode_utils import (
    download_url_to_image_tensor,
 )
 from comfy_api_nodes.mapper_utils import model_field_to_node_input
+from comfy_api_nodes.util.validation_utils import (
+    validate_image_dimensions,
+    validate_image_aspect_ratio,
+    validate_video_dimensions,
+    validate_video_duration,
+)
 from comfy_api.input.basic_types import AudioInput
 from comfy_api.input.video_types import VideoInput
 from comfy_api.input_impl import VideoFromFile
@ -80,18 +86,16 @@ PATH_CHARACTER_IMAGE = f"/proxy/kling/{KLING_API_VERSION}/images/generations"
 PATH_VIRTUAL_TRY_ON = f"/proxy/kling/{KLING_API_VERSION}/images/kolors-virtual-try-on"
 PATH_IMAGE_GENERATIONS = f"/proxy/kling/{KLING_API_VERSION}/images/generations"

-
 MAX_PROMPT_LENGTH_T2V = 2500
 MAX_PROMPT_LENGTH_I2V = 500
 MAX_PROMPT_LENGTH_IMAGE_GEN = 500
 MAX_NEGATIVE_PROMPT_LENGTH_IMAGE_GEN = 200
 MAX_PROMPT_LENGTH_LIP_SYNC = 120

-# TODO: adjust based on tests
-AVERAGE_DURATION_T2V = 319  # 319,
-AVERAGE_DURATION_I2V = 164  # 164,
-AVERAGE_DURATION_LIP_SYNC = 120
-AVERAGE_DURATION_VIRTUAL_TRY_ON = 19  # 19,
+AVERAGE_DURATION_T2V = 319
+AVERAGE_DURATION_I2V = 164
+AVERAGE_DURATION_LIP_SYNC = 455
+AVERAGE_DURATION_VIRTUAL_TRY_ON = 19
 AVERAGE_DURATION_IMAGE_GEN = 32
 AVERAGE_DURATION_VIDEO_EFFECTS = 320
 AVERAGE_DURATION_VIDEO_EXTEND = 320
@ -211,23 +215,8 @@ def validate_input_image(image: torch.Tensor) -> None:

    See: https://app.klingai.com/global/dev/document-api/apiReference/model/imageToVideo
    """
-    if len(image.shape) == 4:
-        height, width = image.shape[1], image.shape[2]
-    elif len(image.shape) == 3:
-        height, width = image.shape[0], image.shape[1]
-    else:
-        raise ValueError("Invalid image tensor shape.")
-
-    # Ensure minimum resolution is met
-    if height < 300:
-        raise ValueError("Image height must be at least 300px")
-    if width < 300:
-        raise ValueError("Image width must be at least 300px")
-
-    # Ensure aspect ratio is within acceptable range
-    aspect_ratio = width / height
-    if aspect_ratio < 1 / 2.5 or aspect_ratio > 2.5:
-        raise ValueError("Image aspect ratio must be between 1:2.5 and 2.5:1")
+    validate_image_dimensions(image, min_width=300, min_height=300)
+    validate_image_aspect_ratio(image, min_aspect_ratio=1 / 2.5, max_aspect_ratio=2.5)


 def get_camera_control_input_config(
@ -1243,6 +1232,17 @@ class KlingLipSyncBase(KlingNodeBase):
    RETURN_TYPES = ("VIDEO", "STRING", "STRING")
    RETURN_NAMES = ("VIDEO", "video_id", "duration")

+    def validate_lip_sync_video(self, video: VideoInput):
+        """
+        Validates the input video adheres to the expectations of the Kling Lip Sync API:
+        - Video length does not exceed 10s and is not shorter than 2s
+        - Length and width dimensions should both be between 720px and 1920px
+
+        See: https://app.klingai.com/global/dev/document-api/apiReference/model/videoTolip
+        """
+        validate_video_dimensions(video, 720, 1920)
+        validate_video_duration(video, 2, 10)
+
    def validate_text(self, text: str):
        if not text:
            raise ValueError("Text is required")
@ -1282,6 +1282,7 @@ class KlingLipSyncBase(KlingNodeBase):
    ) -> tuple[VideoFromFile, str, str]:
        if text:
            self.validate_text(text)
+        self.validate_lip_sync_video(video)

        # Upload video to Comfy API and get download URL
        video_url = upload_video_to_comfyapi(video, auth_kwargs=kwargs)
@ -1352,7 +1353,7 @@ class KlingLipSyncAudioToVideoNode(KlingLipSyncBase):
            },
        }

-    DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file."
+    DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file. When using, ensure that the audio contains clearly distinguishable vocals and that the video contains a distinct face. The audio file should not be larger than 5MB. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length."

    def api_call(
        self,
@ -1464,7 +1465,7 @@ class KlingLipSyncTextToVideoNode(KlingLipSyncBase):
            },
        }

-    DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt."
+    DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length."

    def api_call(
        self,
--- a/comfy_api_nodes/util/init.py
+++ b/comfy_api_nodes/util/init.py
--- a/comfy_api_nodes/util/validation_utils.py
+++ b/comfy_api_nodes/util/validation_utils.py
@ -0,0 +1,100 @@
+import logging
+from typing import Optional
+
+import torch
+from comfy_api.input.video_types import VideoInput
+
+
+def get_image_dimensions(image: torch.Tensor) -> tuple[int, int]:
+    if len(image.shape) == 4:
+        return image.shape[1], image.shape[2]
+    elif len(image.shape) == 3:
+        return image.shape[0], image.shape[1]
+    else:
+        raise ValueError("Invalid image tensor shape.")
+
+
+def validate_image_dimensions(
+    image: torch.Tensor,
+    min_width: Optional[int] = None,
+    max_width: Optional[int] = None,
+    min_height: Optional[int] = None,
+    max_height: Optional[int] = None,
+):
+    height, width = get_image_dimensions(image)
+
+    if min_width is not None and width < min_width:
+        raise ValueError(f"Image width must be at least {min_width}px, got {width}px")
+    if max_width is not None and width > max_width:
+        raise ValueError(f"Image width must be at most {max_width}px, got {width}px")
+    if min_height is not None and height < min_height:
+        raise ValueError(
+            f"Image height must be at least {min_height}px, got {height}px"
+        )
+    if max_height is not None and height > max_height:
+        raise ValueError(f"Image height must be at most {max_height}px, got {height}px")
+
+
+def validate_image_aspect_ratio(
+    image: torch.Tensor,
+    min_aspect_ratio: Optional[float] = None,
+    max_aspect_ratio: Optional[float] = None,
+):
+    width, height = get_image_dimensions(image)
+    aspect_ratio = width / height
+
+    if min_aspect_ratio is not None and aspect_ratio < min_aspect_ratio:
+        raise ValueError(
+            f"Image aspect ratio must be at least {min_aspect_ratio}, got {aspect_ratio}"
+        )
+    if max_aspect_ratio is not None and aspect_ratio > max_aspect_ratio:
+        raise ValueError(
+            f"Image aspect ratio must be at most {max_aspect_ratio}, got {aspect_ratio}"
+        )
+
+
+def validate_video_dimensions(
+    video: VideoInput,
+    min_width: Optional[int] = None,
+    max_width: Optional[int] = None,
+    min_height: Optional[int] = None,
+    max_height: Optional[int] = None,
+):
+    try:
+        width, height = video.get_dimensions()
+    except Exception as e:
+        logging.error("Error getting dimensions of video: %s", e)
+        return
+
+    if min_width is not None and width < min_width:
+        raise ValueError(f"Video width must be at least {min_width}px, got {width}px")
+    if max_width is not None and width > max_width:
+        raise ValueError(f"Video width must be at most {max_width}px, got {width}px")
+    if min_height is not None and height < min_height:
+        raise ValueError(
+            f"Video height must be at least {min_height}px, got {height}px"
+        )
+    if max_height is not None and height > max_height:
+        raise ValueError(f"Video height must be at most {max_height}px, got {height}px")
+
+
+def validate_video_duration(
+    video: VideoInput,
+    min_duration: Optional[float] = None,
+    max_duration: Optional[float] = None,
+):
+    try:
+        duration = video.get_duration()
+    except Exception as e:
+        logging.error("Error getting duration of video: %s", e)
+        return
+
+    epsilon = 0.0001
+    if min_duration is not None and min_duration - epsilon > duration:
+        raise ValueError(
+            f"Video duration must be at least {min_duration}s, got {duration}s"
+        )
+    if max_duration is not None and duration > max_duration + epsilon:
+        raise ValueError(
+            f"Video duration must be at most {max_duration}s, got {duration}s"
+        )
--- a/comfy_extras/nodes_cond.py
+++ b/comfy_extras/nodes_cond.py
@ -31,6 +31,7 @@ class T5TokenizerOptions:
            }
        }

+    CATEGORY = "_for_testing/conditioning"
    RETURN_TYPES = ("CLIP",)
    FUNCTION = "set_options"

--- a/comfy_extras/nodes_images.py
+++ b/comfy_extras/nodes_images.py
@ -13,6 +13,7 @@ import os
 import re
 from io import BytesIO
 from inspect import cleandoc
+import torch

 from comfy.comfy_types import FileLocator

@ -74,6 +75,24 @@ class ImageFromBatch:
        s = s_in[batch_index:batch_index + length].clone()
        return (s,)

+
+class ImageAddNoise:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "image": ("IMAGE",),
+                              "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff, "control_after_generate": True, "tooltip": "The random seed used for creating the noise."}),
+                              "strength": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}),
+                              }}
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "repeat"
+
+    CATEGORY = "image"
+
+    def repeat(self, image, seed, strength):
+        generator = torch.manual_seed(seed)
+        s = torch.clip((image + strength * torch.randn(image.size(), generator=generator, device="cpu").to(image)), min=0.0, max=1.0)
+        return (s,)
+
 class SaveAnimatedWEBP:
    def __init__(self):
        self.output_dir = folder_paths.get_output_directory()
@ -295,6 +314,7 @@ NODE_CLASS_MAPPINGS = {
    "ImageCrop": ImageCrop,
    "RepeatImageBatch": RepeatImageBatch,
    "ImageFromBatch": ImageFromBatch,
+    "ImageAddNoise": ImageAddNoise,
    "SaveAnimatedWEBP": SaveAnimatedWEBP,
    "SaveAnimatedPNG": SaveAnimatedPNG,
    "SaveSVGNode": SaveSVGNode,
--- a/comfy_extras/nodes_string.py
+++ b/comfy_extras/nodes_string.py
@ -8,7 +8,8 @@ class StringConcatenate():
        return {
            "required": {
                "string_a": (IO.STRING, {"multiline": True}),
-                "string_b": (IO.STRING, {"multiline": True})
+                "string_b": (IO.STRING, {"multiline": True}),
+                "delimiter": (IO.STRING, {"multiline": False, "default": ""})
            }
        }

@ -16,8 +17,8 @@ class StringConcatenate():
    FUNCTION = "execute"
    CATEGORY = "utils/string"

-    def execute(self, string_a, string_b, **kwargs):
-        return string_a + string_b,
+    def execute(self, string_a, string_b, delimiter, **kwargs):
+        return delimiter.join((string_a, string_b)),

 class StringSubstring():
    @classmethod
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -0,0 +1,20 @@
+version: "3.9"
+
+services:
+  comfyui:
+    user: "1000:1000"
+    build: .
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    ports:
+      - "8188:8188"
+    volumes:
+      - "./models:/app/models"
+      - "./input:/app/input"
+      - "./temp:/app/output/temp"
+      - "./output:/app/output"
--- a/nodes.py
+++ b/nodes.py
@ -1940,7 +1940,7 @@ class ImagePadForOutpaint:

        mask[top:top + d2, left:left + d3] = t

-        return (new_image, mask)
+        return (new_image, mask.unsqueeze(0))


 NODE_CLASS_MAPPINGS = {
Author	SHA1	Message	Date
Zac	295647dd0e	Merge 49da9c325a8bdc4f2d18faeb384654bf0acbf470 into d8e5662822168101afb5e08a8ba75b6eefff6e02	2025-05-18 11:16:48 +02:00
comfyanonymous	d8e5662822	Remove default delimiter. (#8183 )	2025-05-18 04:12:12 -04:00
LaVie024	3d44a09812	Update nodes_string.py (#8173 )	2025-05-18 04:11:11 -04:00
comfyanonymous	62690eddec	Node to add pixel space noise to an image. (#8182 )	2025-05-18 04:09:56 -04:00
Christian Byrne	05eb10b43a	Validate video inputs (#8133 ) * validate kling lip sync input video * add tooltips * update duration estimates * decrease epsilon * fix rebase error	2025-05-18 04:08:47 -04:00
Silver	f5e4e976f4	Add missing category for T5TokenizerOption (#8177 ) Change it if you need to but it should at least have a category.	2025-05-18 02:59:06 -04:00
comfyanonymous	aee2908d03	Remove useless log. (#8166 )	2025-05-17 06:27:34 -04:00
comfyanonymous	dc46db7aa4	Make ImagePadForOutpaint return a 3 channel mask. (#8157 )	2025-05-16 15:15:55 -04:00
filtered	7046983d95	Remove Desktop versioning claim from README (#8155 )	2025-05-16 10:45:36 -07:00
ZacharyACoon	49da9c325a	.	2023-04-20 22:50:36 -07:00