ComfyUI/comfy_api_nodes/nodes_kling.py

"""Kling API Nodes

For source of truth on the allowed permutations of request fields, please reference:
- [Compatibility Table](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap)
"""

from __future__ import annotations
from typing import Optional, TypeVar, Any
import math
import logging

import torch

from comfy_api_nodes.apis import (
    KlingTaskStatus,
    KlingCameraControl,
    KlingCameraConfig,
    KlingCameraControlType,
    KlingVideoGenDuration,
    KlingVideoGenMode,
    KlingVideoGenAspectRatio,
    KlingVideoGenModelName,
    KlingText2VideoRequest,
    KlingText2VideoResponse,
    KlingImage2VideoRequest,
    KlingImage2VideoResponse,
    KlingVideoExtendRequest,
    KlingVideoExtendResponse,
    KlingLipSyncVoiceLanguage,
    KlingLipSyncInputObject,
    KlingLipSyncRequest,
    KlingLipSyncResponse,
    KlingVirtualTryOnModelName,
    KlingVirtualTryOnRequest,
    KlingVirtualTryOnResponse,
    KlingVideoResult,
    KlingImageResult,
    KlingImageGenerationsRequest,
    KlingImageGenerationsResponse,
    KlingImageGenImageReferenceType,
    KlingImageGenModelName,
    KlingImageGenAspectRatio,
    KlingVideoEffectsRequest,
    KlingVideoEffectsResponse,
    KlingDualCharacterEffectsScene,
    KlingSingleImageEffectsScene,
    KlingDualCharacterEffectInput,
    KlingSingleImageEffectInput,
    KlingCharacterEffectModelName,
    KlingSingleImageEffectModelName,
)
from comfy_api_nodes.apis.client import (
    ApiEndpoint,
    HttpMethod,
    SynchronousOperation,
    PollingOperation,
    EmptyRequest,
)
from comfy_api_nodes.apinode_utils import (
    tensor_to_base64_string,
    download_url_to_video_output,
    upload_video_to_comfyapi,
    upload_audio_to_comfyapi,
    download_url_to_image_tensor,
)
from comfy_api_nodes.mapper_utils import model_field_to_node_input
from comfy_api.input.basic_types import AudioInput
from comfy_api.input.video_types import VideoInput
from comfy_api.input_impl import VideoFromFile
from comfy.comfy_types.node_typing import IO, InputTypeOptions, ComfyNodeABC

KLING_API_VERSION = "v1"
PATH_TEXT_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/text2video"
PATH_IMAGE_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/image2video"
PATH_VIDEO_EXTEND = f"/proxy/kling/{KLING_API_VERSION}/videos/video-extend"
PATH_LIP_SYNC = f"/proxy/kling/{KLING_API_VERSION}/videos/lip-sync"
PATH_VIDEO_EFFECTS = f"/proxy/kling/{KLING_API_VERSION}/videos/effects"
PATH_CHARACTER_IMAGE = f"/proxy/kling/{KLING_API_VERSION}/images/generations"
PATH_VIRTUAL_TRY_ON = f"/proxy/kling/{KLING_API_VERSION}/images/kolors-virtual-try-on"
PATH_IMAGE_GENERATIONS = f"/proxy/kling/{KLING_API_VERSION}/images/generations"


MAX_PROMPT_LENGTH_T2V = 2500
MAX_PROMPT_LENGTH_I2V = 500
MAX_PROMPT_LENGTH_IMAGE_GEN = 500
MAX_NEGATIVE_PROMPT_LENGTH_IMAGE_GEN = 200
MAX_PROMPT_LENGTH_LIP_SYNC = 120

R = TypeVar("R")


class KlingApiError(Exception):
    """Base exception for Kling API errors."""

    pass


def poll_until_finished(auth_kwargs: dict[str,str], api_endpoint: ApiEndpoint[Any, R]) -> R:
    """Polls the Kling API endpoint until the task reaches a terminal state, then returns the response."""
    return PollingOperation(
        poll_endpoint=api_endpoint,
        completed_statuses=[
            KlingTaskStatus.succeed.value,
        ],
        failed_statuses=[KlingTaskStatus.failed.value],
        status_extractor=lambda response: (
            response.data.task_status.value
            if response.data and response.data.task_status
            else None
        ),
        auth_kwargs=auth_kwargs,
    ).execute()


def is_valid_camera_control_configs(configs: list[float]) -> bool:
    """Verifies that at least one camera control configuration is non-zero."""
    return any(not math.isclose(value, 0.0) for value in configs)


def is_valid_prompt(prompt: str) -> bool:
    """Verifies that the prompt is not empty."""
    return bool(prompt)


def is_valid_task_creation_response(response: KlingText2VideoResponse) -> bool:
    """Verifies that the initial response contains a task ID."""
    return bool(response.data.task_id)


def is_valid_video_response(response: KlingText2VideoResponse) -> bool:
    """Verifies that the response contains a task result with at least one video."""
    return (
        response.data is not None
        and response.data.task_result is not None
        and response.data.task_result.videos is not None
        and len(response.data.task_result.videos) > 0
    )


def is_valid_image_response(response: KlingVirtualTryOnResponse) -> bool:
    """Verifies that the response contains a task result with at least one image."""
    return (
        response.data is not None
        and response.data.task_result is not None
        and response.data.task_result.images is not None
        and len(response.data.task_result.images) > 0
    )


def validate_prompts(prompt: str, negative_prompt: str, max_length: int) -> bool:
    """Verifies that the positive prompt is not empty and that neither promt is too long."""
    if not prompt:
        raise ValueError("Positive prompt is empty")
    if len(prompt) > max_length:
        raise ValueError(f"Positive prompt is too long: {len(prompt)} characters")
    if negative_prompt and len(negative_prompt) > max_length:
        raise ValueError(
            f"Negative prompt is too long: {len(negative_prompt)} characters"
        )
    return True


def validate_task_creation_response(response) -> None:
    """Validates that the Kling task creation request was successful."""
    if not is_valid_task_creation_response(response):
        error_msg = f"Kling initial request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
        logging.error(error_msg)
        raise KlingApiError(error_msg)


def validate_video_result_response(response) -> None:
    """Validates that the Kling task result contains a video."""
    if not is_valid_video_response(response):
        error_msg = f"Kling task {response.data.task_id} succeeded but no video data found in response."
        logging.error(f"Error: {error_msg}.\nResponse: {response}")
        raise KlingApiError(error_msg)


def validate_image_result_response(response) -> None:
    """Validates that the Kling task result contains an image."""
    if not is_valid_image_response(response):
        error_msg = f"Kling task {response.data.task_id} succeeded but no image data found in response."
        logging.error(f"Error: {error_msg}.\nResponse: {response}")
        raise KlingApiError(error_msg)


def validate_input_image(image: torch.Tensor) -> None:
    """
    Validates the input image adheres to the expectations of the Kling API:
    - The image resolution should not be less than 300*300px
    - The aspect ratio of the image should be between 1:2.5 ~ 2.5:1

    See: https://app.klingai.com/global/dev/document-api/apiReference/model/imageToVideo
    """
    if len(image.shape) == 4:
        height, width = image.shape[1], image.shape[2]
    elif len(image.shape) == 3:
        height, width = image.shape[0], image.shape[1]
    else:
        raise ValueError("Invalid image tensor shape.")

    # Ensure minimum resolution is met
    if height < 300:
        raise ValueError("Image height must be at least 300px")
    if width < 300:
        raise ValueError("Image width must be at least 300px")

    # Ensure aspect ratio is within acceptable range
    aspect_ratio = width / height
    if aspect_ratio < 1 / 2.5 or aspect_ratio > 2.5:
        raise ValueError("Image aspect ratio must be between 1:2.5 and 2.5:1")


def get_camera_control_input_config(
    tooltip: str, default: float = 0.0
) -> tuple[IO, InputTypeOptions]:
    """Returns common InputTypeOptions for Kling camera control configurations."""
    input_config = {
        "default": default,
        "min": -10.0,
        "max": 10.0,
        "step": 0.25,
        "display": "slider",
        "tooltip": tooltip,
    }
    return IO.FLOAT, input_config


def get_video_from_response(response) -> KlingVideoResult:
    """Returns the first video object from the Kling video generation task result."""
    video = response.data.task_result.videos[0]
    logging.info(
        "Kling task %s succeeded. Video URL: %s", response.data.task_id, video.url
    )
    return video


def get_images_from_response(response) -> list[KlingImageResult]:
    images = response.data.task_result.images
    logging.info("Kling task %s succeeded. Images: %s", response.data.task_id, images)
    return images


def video_result_to_node_output(
    video: KlingVideoResult,
) -> tuple[VideoFromFile, str, str]:
    """Converts a KlingVideoResult to a tuple of (VideoFromFile, str, str) to be used as a ComfyUI node output."""
    return (
        download_url_to_video_output(video.url),
        str(video.id),
        str(video.duration),
    )


def image_result_to_node_output(
    images: list[KlingImageResult],
) -> torch.Tensor:
    """
    Converts a KlingImageResult to a tuple containing a [B, H, W, C] tensor.
    If multiple images are returned, they will be stacked along the batch dimension.
    """
    if len(images) == 1:
        return download_url_to_image_tensor(images[0].url)
    else:
        return torch.cat([download_url_to_image_tensor(image.url) for image in images])


class KlingNodeBase(ComfyNodeABC):
    """Base class for Kling nodes."""

    FUNCTION = "api_call"
    CATEGORY = "api node/video/Kling"
    API_NODE = True


class KlingCameraControls(KlingNodeBase):
    """Kling Camera Controls Node"""

    @classmethod
    def INPUT_TYPES(cls):
        return {
            "required": {
                "camera_control_type": model_field_to_node_input(
                    IO.COMBO,
                    KlingCameraControl,
                    "type",
                    enum_type=KlingCameraControlType,
                ),
                "horizontal_movement": get_camera_control_input_config(
                    "Controls camera's movement along horizontal axis (x-axis). Negative indicates left, positive indicates right"
                ),
                "vertical_movement": get_camera_control_input_config(
                    "Controls camera's movement along vertical axis (y-axis). Negative indicates downward, positive indicates upward."
                ),
                "pan": get_camera_control_input_config(
                    "Controls camera's rotation in vertical plane (x-axis). Negative indicates downward rotation, positive indicates upward rotation.",
                    default=0.5,
                ),
                "tilt": get_camera_control_input_config(
                    "Controls camera's rotation in horizontal plane (y-axis). Negative indicates left rotation, positive indicates right rotation.",
                ),
                "roll": get_camera_control_input_config(
                    "Controls camera's rolling amount (z-axis). Negative indicates counterclockwise, positive indicates clockwise.",
                ),
                "zoom": get_camera_control_input_config(
                    "Controls change in camera's focal length. Negative indicates narrower field of view, positive indicates wider field of view.",
                ),
            }
        }

    DESCRIPTION = "Allows specifying configuration options for Kling Camera Controls and motion control effects."
    RETURN_TYPES = ("CAMERA_CONTROL",)
    RETURN_NAMES = ("camera_control",)
    FUNCTION = "main"

    @classmethod
    def VALIDATE_INPUTS(
        cls,
        horizontal_movement: float,
        vertical_movement: float,
        pan: float,
        tilt: float,
        roll: float,
        zoom: float,
    ) -> bool | str:
        if not is_valid_camera_control_configs(
            [
                horizontal_movement,
                vertical_movement,
                pan,
                tilt,
                roll,
                zoom,
            ]
        ):
            return "Invalid camera control configs: at least one of the values must be non-zero"
        return True

    def main(
        self,
        camera_control_type: str,
        horizontal_movement: float,
        vertical_movement: float,
        pan: float,
        tilt: float,
        roll: float,
        zoom: float,
    ) -> tuple[KlingCameraControl]:
        return (
            KlingCameraControl(
                type=KlingCameraControlType(camera_control_type),
                config=KlingCameraConfig(
                    horizontal=horizontal_movement,
                    vertical=vertical_movement,
                    pan=pan,
                    roll=roll,
                    tilt=tilt,
                    zoom=zoom,
                ),
            ),
        )


class KlingTextToVideoNode(KlingNodeBase):
    """Kling Text to Video Node"""

    @staticmethod
    def get_mode_string_mapping() -> dict[str, tuple[str, str, str]]:
        """
        Returns a mapping of mode strings to their corresponding (mode, duration, model_name) tuples.
        Only includes config combos that support the `image_tail` request field.

        See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap)
        """
        return {
            "standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"),
            "standard mode / 10s duration / kling-v1": ("std", "10", "kling-v1"),
            "pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"),
            "pro mode / 10s duration / kling-v1": ("pro", "10", "kling-v1"),
            "standard mode / 5s duration / kling-v1-6": ("std", "5", "kling-v1-6"),
            "standard mode / 10s duration / kling-v1-6": ("std", "10", "kling-v1-6"),
            "pro mode / 5s duration / kling-v2-master": ("pro", "5", "kling-v2-master"),
            "pro mode / 10s duration / kling-v2-master": ("pro", "10", "kling-v2-master"),
            "standard mode / 5s duration / kling-v2-master": ("std", "5", "kling-v2-master"),
            "standard mode / 10s duration / kling-v2-master": ("std", "10", "kling-v2-master"),
        }

    @classmethod
    def INPUT_TYPES(s):
        modes = list(KlingTextToVideoNode.get_mode_string_mapping().keys())
        return {
            "required": {
                "prompt": model_field_to_node_input(
                    IO.STRING, KlingText2VideoRequest, "prompt", multiline=True
                ),
                "negative_prompt": model_field_to_node_input(
                    IO.STRING, KlingText2VideoRequest, "negative_prompt", multiline=True
                ),
                "cfg_scale": model_field_to_node_input(
                    IO.FLOAT,
                    KlingText2VideoRequest,
                    "cfg_scale",
                    default=1.0,
                    min=0.0,
                    max=1.0,
                ),
                "aspect_ratio": model_field_to_node_input(
                    IO.COMBO,
                    KlingText2VideoRequest,
                    "aspect_ratio",
                    enum_type=KlingVideoGenAspectRatio,
                ),
                "mode": (
                    modes,
                    {
                        "default": modes[4],
                        "tooltip": "The configuration to use for the video generation following the format: mode / duration / model_name.",
                    },
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    RETURN_TYPES = ("VIDEO", "STRING", "STRING")
    RETURN_NAMES = ("VIDEO", "video_id", "duration")
    DESCRIPTION = "Kling Text to Video Node"

    def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingText2VideoResponse:
        return poll_until_finished(
            auth_kwargs,
            ApiEndpoint(
                path=f"{PATH_TEXT_TO_VIDEO}/{task_id}",
                method=HttpMethod.GET,
                request_model=EmptyRequest,
                response_model=KlingText2VideoResponse,
            ),
        )

    def api_call(
        self,
        prompt: str,
        negative_prompt: str,
        cfg_scale: float,
        mode: str,
        aspect_ratio: str,
        camera_control: Optional[KlingCameraControl] = None,
        model_name: Optional[str] = None,
        duration: Optional[str] = None,
        **kwargs,
    ) -> tuple[VideoFromFile, str, str]:
        validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V)
        if model_name is None:
            mode, duration, model_name = self.get_mode_string_mapping()[mode]
        initial_operation = SynchronousOperation(
            endpoint=ApiEndpoint(
                path=PATH_TEXT_TO_VIDEO,
                method=HttpMethod.POST,
                request_model=KlingText2VideoRequest,
                response_model=KlingText2VideoResponse,
            ),
            request=KlingText2VideoRequest(
                prompt=prompt if prompt else None,
                negative_prompt=negative_prompt if negative_prompt else None,
                duration=KlingVideoGenDuration(duration),
                mode=KlingVideoGenMode(mode),
                model_name=KlingVideoGenModelName(model_name),
                cfg_scale=cfg_scale,
                aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio),
                camera_control=camera_control,
            ),
            auth_kwargs=kwargs,
        )

        task_creation_response = initial_operation.execute()
        validate_task_creation_response(task_creation_response)

        task_id = task_creation_response.data.task_id
        final_response = self.get_response(task_id, auth_kwargs=kwargs)
        validate_video_result_response(final_response)

        video = get_video_from_response(final_response)
        return video_result_to_node_output(video)


class KlingCameraControlT2VNode(KlingTextToVideoNode):
    """
    Kling Text to Video Camera Control Node. This node is a text to video node, but it supports controlling the camera.
    Duration, mode, and model_name request fields are hard-coded because camera control is only supported in pro mode with the kling-v1-5 model at 5s duration as of 2025-05-02.
    """

    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "prompt": model_field_to_node_input(
                    IO.STRING, KlingText2VideoRequest, "prompt", multiline=True
                ),
                "negative_prompt": model_field_to_node_input(
                    IO.STRING,
                    KlingText2VideoRequest,
                    "negative_prompt",
                    multiline=True,
                ),
                "cfg_scale": model_field_to_node_input(
                    IO.FLOAT,
                    KlingText2VideoRequest,
                    "cfg_scale",
                    default=0.75,
                    min=0.0,
                    max=1.0,
                ),
                "aspect_ratio": model_field_to_node_input(
                    IO.COMBO,
                    KlingText2VideoRequest,
                    "aspect_ratio",
                    enum_type=KlingVideoGenAspectRatio,
                ),
                "camera_control": (
                    "CAMERA_CONTROL",
                    {
                        "tooltip": "Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.",
                    },
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    DESCRIPTION = "Transform text into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original text."

    def api_call(
        self,
        prompt: str,
        negative_prompt: str,
        cfg_scale: float,
        aspect_ratio: str,
        camera_control: Optional[KlingCameraControl] = None,
        **kwargs,
    ):
        return super().api_call(
            model_name=KlingVideoGenModelName.kling_v1,
            cfg_scale=cfg_scale,
            mode=KlingVideoGenMode.std,
            aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio),
            duration=KlingVideoGenDuration.field_5,
            prompt=prompt,
            negative_prompt=negative_prompt,
            camera_control=camera_control,
            **kwargs,
        )


class KlingImage2VideoNode(KlingNodeBase):
    """Kling Image to Video Node"""

    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "start_frame": model_field_to_node_input(
                    IO.IMAGE,
                    KlingImage2VideoRequest,
                    "image",
                    tooltip="The reference image used to generate the video.",
                ),
                "prompt": model_field_to_node_input(
                    IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True
                ),
                "negative_prompt": model_field_to_node_input(
                    IO.STRING,
                    KlingImage2VideoRequest,
                    "negative_prompt",
                    multiline=True,
                ),
                "model_name": model_field_to_node_input(
                    IO.COMBO,
                    KlingImage2VideoRequest,
                    "model_name",
                    enum_type=KlingVideoGenModelName,
                ),
                "cfg_scale": model_field_to_node_input(
                    IO.FLOAT,
                    KlingImage2VideoRequest,
                    "cfg_scale",
                    default=0.8,
                    min=0.0,
                    max=1.0,
                ),
                "mode": model_field_to_node_input(
                    IO.COMBO,
                    KlingImage2VideoRequest,
                    "mode",
                    enum_type=KlingVideoGenMode,
                ),
                "aspect_ratio": model_field_to_node_input(
                    IO.COMBO,
                    KlingImage2VideoRequest,
                    "aspect_ratio",
                    enum_type=KlingVideoGenAspectRatio,
                ),
                "duration": model_field_to_node_input(
                    IO.COMBO,
                    KlingImage2VideoRequest,
                    "duration",
                    enum_type=KlingVideoGenDuration,
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    RETURN_TYPES = ("VIDEO", "STRING", "STRING")
    RETURN_NAMES = ("VIDEO", "video_id", "duration")
    DESCRIPTION = "Kling Image to Video Node"

    def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingImage2VideoResponse:
        return poll_until_finished(
            auth_kwargs,
            ApiEndpoint(
                path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}",
                method=HttpMethod.GET,
                request_model=KlingImage2VideoRequest,
                response_model=KlingImage2VideoResponse,
            ),
        )

    def api_call(
        self,
        start_frame: torch.Tensor,
        prompt: str,
        negative_prompt: str,
        model_name: str,
        cfg_scale: float,
        mode: str,
        aspect_ratio: str,
        duration: str,
        camera_control: Optional[KlingCameraControl] = None,
        end_frame: Optional[torch.Tensor] = None,
        **kwargs,
    ) -> tuple[VideoFromFile]:
        validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_I2V)
        validate_input_image(start_frame)

        if camera_control is not None:
            # Camera control type for image 2 video is always `simple`
            camera_control.type = KlingCameraControlType.simple

        initial_operation = SynchronousOperation(
            endpoint=ApiEndpoint(
                path=PATH_IMAGE_TO_VIDEO,
                method=HttpMethod.POST,
                request_model=KlingImage2VideoRequest,
                response_model=KlingImage2VideoResponse,
            ),
            request=KlingImage2VideoRequest(
                model_name=KlingVideoGenModelName(model_name),
                image=tensor_to_base64_string(start_frame),
                image_tail=(
                    tensor_to_base64_string(end_frame)
                    if end_frame is not None
                    else None
                ),
                prompt=prompt,
                negative_prompt=negative_prompt if negative_prompt else None,
                cfg_scale=cfg_scale,
                mode=KlingVideoGenMode(mode),
                duration=KlingVideoGenDuration(duration),
                camera_control=camera_control,
            ),
            auth_kwargs=kwargs,
        )

        task_creation_response = initial_operation.execute()
        validate_task_creation_response(task_creation_response)
        task_id = task_creation_response.data.task_id

        final_response = self.get_response(task_id, auth_kwargs=kwargs)
        validate_video_result_response(final_response)

        video = get_video_from_response(final_response)
        return video_result_to_node_output(video)


class KlingCameraControlI2VNode(KlingImage2VideoNode):
    """
    Kling Image to Video Camera Control Node. This node is a image to video node, but it supports controlling the camera.
    Duration, mode, and model_name request fields are hard-coded because camera control is only supported in pro mode with the kling-v1-5 model at 5s duration as of 2025-05-02.
    """

    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "start_frame": model_field_to_node_input(
                    IO.IMAGE, KlingImage2VideoRequest, "image"
                ),
                "prompt": model_field_to_node_input(
                    IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True
                ),
                "negative_prompt": model_field_to_node_input(
                    IO.STRING,
                    KlingImage2VideoRequest,
                    "negative_prompt",
                    multiline=True,
                ),
                "cfg_scale": model_field_to_node_input(
                    IO.FLOAT,
                    KlingImage2VideoRequest,
                    "cfg_scale",
                    default=0.75,
                    min=0.0,
                    max=1.0,
                ),
                "aspect_ratio": model_field_to_node_input(
                    IO.COMBO,
                    KlingImage2VideoRequest,
                    "aspect_ratio",
                    enum_type=KlingVideoGenAspectRatio,
                ),
                "camera_control": (
                    "CAMERA_CONTROL",
                    {
                        "tooltip": "Can be created using the Kling Camera Controls node. Controls the camera movement and motion during the video generation.",
                    },
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    DESCRIPTION = "Transform still images into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original image."

    def api_call(
        self,
        start_frame: torch.Tensor,
        prompt: str,
        negative_prompt: str,
        cfg_scale: float,
        aspect_ratio: str,
        camera_control: KlingCameraControl,
        **kwargs,
    ):
        return super().api_call(
            model_name=KlingVideoGenModelName.kling_v1_5,
            start_frame=start_frame,
            cfg_scale=cfg_scale,
            mode=KlingVideoGenMode.pro,
            aspect_ratio=KlingVideoGenAspectRatio(aspect_ratio),
            duration=KlingVideoGenDuration.field_5,
            prompt=prompt,
            negative_prompt=negative_prompt,
            camera_control=camera_control,
            **kwargs,
        )


class KlingStartEndFrameNode(KlingImage2VideoNode):
    """
    Kling First Last Frame Node. This node allows creation of a video from a first and last frame. It calls the normal image to video endpoint, but only allows the subset of input options that support the `image_tail` request field.
    """

    @staticmethod
    def get_mode_string_mapping() -> dict[str, tuple[str, str, str]]:
        """
        Returns a mapping of mode strings to their corresponding (mode, duration, model_name) tuples.
        Only includes config combos that support the `image_tail` request field.

        See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document-api/apiReference/model/skillsMap)
        """
        return {
            "standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"),
            "pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"),
            "pro mode / 5s duration / kling-v1-5": ("pro", "5", "kling-v1-5"),
            "pro mode / 10s duration / kling-v1-5": ("pro", "10", "kling-v1-5"),
            "pro mode / 5s duration / kling-v1-6": ("pro", "5", "kling-v1-6"),
            "pro mode / 10s duration / kling-v1-6": ("pro", "10", "kling-v1-6"),
        }

    @classmethod
    def INPUT_TYPES(s):
        modes = list(KlingStartEndFrameNode.get_mode_string_mapping().keys())
        return {
            "required": {
                "start_frame": model_field_to_node_input(
                    IO.IMAGE, KlingImage2VideoRequest, "image"
                ),
                "end_frame": model_field_to_node_input(
                    IO.IMAGE, KlingImage2VideoRequest, "image_tail"
                ),
                "prompt": model_field_to_node_input(
                    IO.STRING, KlingImage2VideoRequest, "prompt", multiline=True
                ),
                "negative_prompt": model_field_to_node_input(
                    IO.STRING,
                    KlingImage2VideoRequest,
                    "negative_prompt",
                    multiline=True,
                ),
                "cfg_scale": model_field_to_node_input(
                    IO.FLOAT,
                    KlingImage2VideoRequest,
                    "cfg_scale",
                    default=0.5,
                    min=0.0,
                    max=1.0,
                ),
                "aspect_ratio": model_field_to_node_input(
                    IO.COMBO,
                    KlingImage2VideoRequest,
                    "aspect_ratio",
                    enum_type=KlingVideoGenAspectRatio,
                ),
                "mode": (
                    modes,
                    {
                        "default": modes[2],
                        "tooltip": "The configuration to use for the video generation following the format: mode / duration / model_name.",
                    },
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    DESCRIPTION = "Generate a video sequence that transitions between your provided start and end images. The node creates all frames in between, producing a smooth transformation from the first frame to the last."

    def api_call(
        self,
        start_frame: torch.Tensor,
        end_frame: torch.Tensor,
        prompt: str,
        negative_prompt: str,
        cfg_scale: float,
        aspect_ratio: str,
        mode: str,
        **kwargs,
    ):
        mode, duration, model_name = KlingStartEndFrameNode.get_mode_string_mapping()[
            mode
        ]
        return super().api_call(
            prompt=prompt,
            negative_prompt=negative_prompt,
            model_name=model_name,
            start_frame=start_frame,
            cfg_scale=cfg_scale,
            mode=mode,
            aspect_ratio=aspect_ratio,
            duration=duration,
            end_frame=end_frame,
            **kwargs,
        )


class KlingVideoExtendNode(KlingNodeBase):
    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "prompt": model_field_to_node_input(
                    IO.STRING, KlingVideoExtendRequest, "prompt", multiline=True
                ),
                "negative_prompt": model_field_to_node_input(
                    IO.STRING,
                    KlingVideoExtendRequest,
                    "negative_prompt",
                    multiline=True,
                ),
                "cfg_scale": model_field_to_node_input(
                    IO.FLOAT,
                    KlingVideoExtendRequest,
                    "cfg_scale",
                    default=0.5,
                    min=0.0,
                    max=1.0,
                ),
                "video_id": model_field_to_node_input(
                    IO.STRING, KlingVideoExtendRequest, "video_id", forceInput=True
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    RETURN_TYPES = ("VIDEO", "STRING", "STRING")
    RETURN_NAMES = ("VIDEO", "video_id", "duration")
    DESCRIPTION = "Kling Video Extend Node. Extend videos made by other Kling nodes. The video_id is created by using other Kling Nodes."

    def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingVideoExtendResponse:
        return poll_until_finished(
            auth_kwargs,
            ApiEndpoint(
                path=f"{PATH_VIDEO_EXTEND}/{task_id}",
                method=HttpMethod.GET,
                request_model=EmptyRequest,
                response_model=KlingVideoExtendResponse,
            ),
        )

    def api_call(
        self,
        prompt: str,
        negative_prompt: str,
        cfg_scale: float,
        video_id: str,
        **kwargs,
    ) -> tuple[VideoFromFile, str, str]:
        validate_prompts(prompt, negative_prompt, MAX_PROMPT_LENGTH_T2V)
        initial_operation = SynchronousOperation(
            endpoint=ApiEndpoint(
                path=PATH_VIDEO_EXTEND,
                method=HttpMethod.POST,
                request_model=KlingVideoExtendRequest,
                response_model=KlingVideoExtendResponse,
            ),
            request=KlingVideoExtendRequest(
                prompt=prompt if prompt else None,
                negative_prompt=negative_prompt if negative_prompt else None,
                cfg_scale=cfg_scale,
                video_id=video_id,
            ),
            auth_kwargs=kwargs,
        )

        task_creation_response = initial_operation.execute()
        validate_task_creation_response(task_creation_response)
        task_id = task_creation_response.data.task_id

        final_response = self.get_response(task_id, auth_kwargs=kwargs)
        validate_video_result_response(final_response)

        video = get_video_from_response(final_response)
        return video_result_to_node_output(video)


class KlingVideoEffectsBase(KlingNodeBase):
    """Kling Video Effects Base"""

    RETURN_TYPES = ("VIDEO", "STRING", "STRING")
    RETURN_NAMES = ("VIDEO", "video_id", "duration")

    def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingVideoEffectsResponse:
        return poll_until_finished(
            auth_kwargs,
            ApiEndpoint(
                path=f"{PATH_VIDEO_EFFECTS}/{task_id}",
                method=HttpMethod.GET,
                request_model=EmptyRequest,
                response_model=KlingVideoEffectsResponse,
            ),
        )

    def api_call(
        self,
        dual_character: bool,
        effect_scene: KlingDualCharacterEffectsScene | KlingSingleImageEffectsScene,
        model_name: str,
        duration: KlingVideoGenDuration,
        image_1: torch.Tensor,
        image_2: Optional[torch.Tensor] = None,
        mode: Optional[KlingVideoGenMode] = None,
        **kwargs,
    ):
        if dual_character:
            request_input_field = KlingDualCharacterEffectInput(
                model_name=model_name,
                mode=mode,
                images=[
                    tensor_to_base64_string(image_1),
                    tensor_to_base64_string(image_2),
                ],
                duration=duration,
            )
        else:
            request_input_field = KlingSingleImageEffectInput(
                model_name=model_name,
                image=tensor_to_base64_string(image_1),
                duration=duration,
            )

        initial_operation = SynchronousOperation(
            endpoint=ApiEndpoint(
                path=PATH_VIDEO_EFFECTS,
                method=HttpMethod.POST,
                request_model=KlingVideoEffectsRequest,
                response_model=KlingVideoEffectsResponse,
            ),
            request=KlingVideoEffectsRequest(
                effect_scene=effect_scene,
                input=request_input_field,
            ),
            auth_kwargs=kwargs,
        )

        task_creation_response = initial_operation.execute()
        validate_task_creation_response(task_creation_response)
        task_id = task_creation_response.data.task_id

        final_response = self.get_response(task_id, auth_kwargs=kwargs)
        validate_video_result_response(final_response)

        video = get_video_from_response(final_response)
        return video_result_to_node_output(video)


class KlingDualCharacterVideoEffectNode(KlingVideoEffectsBase):
    """Kling Dual Character Video Effect Node"""

    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "image_left": (IO.IMAGE, {"tooltip": "Left side image"}),
                "image_right": (IO.IMAGE, {"tooltip": "Right side image"}),
                "effect_scene": model_field_to_node_input(
                    IO.COMBO,
                    KlingVideoEffectsRequest,
                    "effect_scene",
                    enum_type=KlingDualCharacterEffectsScene,
                ),
                "model_name": model_field_to_node_input(
                    IO.COMBO,
                    KlingDualCharacterEffectInput,
                    "model_name",
                    enum_type=KlingCharacterEffectModelName,
                ),
                "mode": model_field_to_node_input(
                    IO.COMBO,
                    KlingDualCharacterEffectInput,
                    "mode",
                    enum_type=KlingVideoGenMode,
                ),
                "duration": model_field_to_node_input(
                    IO.COMBO,
                    KlingDualCharacterEffectInput,
                    "duration",
                    enum_type=KlingVideoGenDuration,
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene. First image will be positioned on left side, second on right side of the composite."
    RETURN_TYPES = ("VIDEO", "STRING")
    RETURN_NAMES = ("VIDEO", "duration")

    def api_call(
        self,
        image_left: torch.Tensor,
        image_right: torch.Tensor,
        effect_scene: KlingDualCharacterEffectsScene,
        model_name: KlingCharacterEffectModelName,
        mode: KlingVideoGenMode,
        duration: KlingVideoGenDuration,
        **kwargs,
    ):
        video, _, duration = super().api_call(
            dual_character=True,
            effect_scene=effect_scene,
            model_name=model_name,
            mode=mode,
            duration=duration,
            image_1=image_left,
            image_2=image_right,
            **kwargs,
        )
        return video, duration

class KlingSingleImageVideoEffectNode(KlingVideoEffectsBase):
    """Kling Single Image Video Effect Node"""

    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "image": (
                    IO.IMAGE,
                    {
                        "tooltip": " Reference Image. URL or Base64 encoded string (without data:image prefix). File size cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1"
                    },
                ),
                "effect_scene": model_field_to_node_input(
                    IO.COMBO,
                    KlingVideoEffectsRequest,
                    "effect_scene",
                    enum_type=KlingSingleImageEffectsScene,
                ),
                "model_name": model_field_to_node_input(
                    IO.COMBO,
                    KlingSingleImageEffectInput,
                    "model_name",
                    enum_type=KlingSingleImageEffectModelName,
                ),
                "duration": model_field_to_node_input(
                    IO.COMBO,
                    KlingSingleImageEffectInput,
                    "duration",
                    enum_type=KlingVideoGenDuration,
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    DESCRIPTION = "Achieve different special effects when generating a video based on the effect_scene."

    def api_call(
        self,
        image: torch.Tensor,
        effect_scene: KlingSingleImageEffectsScene,
        model_name: KlingSingleImageEffectModelName,
        duration: KlingVideoGenDuration,
        **kwargs,
    ):
        return super().api_call(
            dual_character=False,
            effect_scene=effect_scene,
            model_name=model_name,
            duration=duration,
            image_1=image,
            **kwargs,
        )


class KlingLipSyncBase(KlingNodeBase):
    """Kling Lip Sync Base"""

    RETURN_TYPES = ("VIDEO", "STRING", "STRING")
    RETURN_NAMES = ("VIDEO", "video_id", "duration")

    def validate_text(self, text: str):
        if not text:
            raise ValueError("Text is required")
        if len(text) > MAX_PROMPT_LENGTH_LIP_SYNC:
            raise ValueError(
                f"Text is too long. Maximum length is {MAX_PROMPT_LENGTH_LIP_SYNC} characters."
            )

    def get_response(self, task_id: str, auth_kwargs: dict[str,str]) -> KlingLipSyncResponse:
        """Polls the Kling API endpoint until the task reaches a terminal state."""
        return poll_until_finished(
            auth_kwargs,
            ApiEndpoint(
                path=f"{PATH_LIP_SYNC}/{task_id}",
                method=HttpMethod.GET,
                request_model=EmptyRequest,
                response_model=KlingLipSyncResponse,
            ),
        )

    def api_call(
        self,
        video: VideoInput,
        audio: Optional[AudioInput] = None,
        voice_language: Optional[str] = None,
        mode: Optional[str] = None,
        text: Optional[str] = None,
        voice_speed: Optional[float] = None,
        voice_id: Optional[str] = None,
        **kwargs
    ) -> tuple[VideoFromFile, str, str]:
        if text:
            self.validate_text(text)

        # Upload video to Comfy API and get download URL
        video_url = upload_video_to_comfyapi(video, auth_kwargs=kwargs)
        logging.info("Uploaded video to Comfy API. URL: %s", video_url)

        # Upload the audio file to Comfy API and get download URL
        if audio:
            audio_url = upload_audio_to_comfyapi(audio, auth_kwargs=kwargs)
            logging.info("Uploaded audio to Comfy API. URL: %s", audio_url)
        else:
            audio_url = None

        initial_operation = SynchronousOperation(
            endpoint=ApiEndpoint(
                path=PATH_LIP_SYNC,
                method=HttpMethod.POST,
                request_model=KlingLipSyncRequest,
                response_model=KlingLipSyncResponse,
            ),
            request=KlingLipSyncRequest(
                input=KlingLipSyncInputObject(
                    video_url=video_url,
                    mode=mode,
                    text=text,
                    voice_language=voice_language,
                    voice_speed=voice_speed,
                    audio_type="url",
                    audio_url=audio_url,
                    voice_id=voice_id,
                ),
            ),
            auth_kwargs=kwargs,
        )

        task_creation_response = initial_operation.execute()
        validate_task_creation_response(task_creation_response)
        task_id = task_creation_response.data.task_id

        final_response = self.get_response(task_id, auth_kwargs=kwargs)
        validate_video_result_response(final_response)

        video = get_video_from_response(final_response)
        return video_result_to_node_output(video)


class KlingLipSyncAudioToVideoNode(KlingLipSyncBase):
    """Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file."""

    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "video": (IO.VIDEO, {}),
                "audio": (IO.AUDIO, {}),
                "voice_language": model_field_to_node_input(
                    IO.COMBO,
                    KlingLipSyncInputObject,
                    "voice_language",
                    enum_type=KlingLipSyncVoiceLanguage,
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    DESCRIPTION = "Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file."

    def api_call(
        self,
        video: VideoInput,
        audio: AudioInput,
        voice_language: str,
        **kwargs,
    ):
        return super().api_call(
            video=video,
            audio=audio,
            voice_language=voice_language,
            mode="audio2video",
            **kwargs,
        )


class KlingLipSyncTextToVideoNode(KlingLipSyncBase):
    """Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt."""

    @staticmethod
    def get_voice_config() -> dict[str, tuple[str, str]]:
        return {
            # English voices
            "Melody": ("girlfriend_4_speech02", "en"),
            "Sunny": ("genshin_vindi2", "en"),
            "Sage": ("zhinen_xuesheng", "en"),
            "Ace": ("AOT", "en"),
            "Blossom": ("ai_shatang", "en"),
            "Peppy": ("genshin_klee2", "en"),
            "Dove": ("genshin_kirara", "en"),
            "Shine": ("ai_kaiya", "en"),
            "Anchor": ("oversea_male1", "en"),
            "Lyric": ("ai_chenjiahao_712", "en"),
            "Tender": ("chat1_female_new-3", "en"),
            "Siren": ("chat_0407_5-1", "en"),
            "Zippy": ("cartoon-boy-07", "en"),
            "Bud": ("uk_boy1", "en"),
            "Sprite": ("cartoon-girl-01", "en"),
            "Candy": ("PeppaPig_platform", "en"),
            "Beacon": ("ai_huangzhong_712", "en"),
            "Rock": ("ai_huangyaoshi_712", "en"),
            "Titan": ("ai_laoguowang_712", "en"),
            "Grace": ("chengshu_jiejie", "en"),
            "Helen": ("you_pingjing", "en"),
            "Lore": ("calm_story1", "en"),
            "Crag": ("uk_man2", "en"),
            "Prattle": ("laopopo_speech02", "en"),
            "Hearth": ("heainainai_speech02", "en"),
            "The Reader": ("reader_en_m-v1", "en"),
            "Commercial Lady": ("commercial_lady_en_f-v1", "en"),
            # Chinese voices
            "阳光少年": ("genshin_vindi2", "zh"),
            "懂事小弟": ("zhinen_xuesheng", "zh"),
            "运动少年": ("tiyuxi_xuedi", "zh"),
            "青春少女": ("ai_shatang", "zh"),
            "温柔小妹": ("genshin_klee2", "zh"),
            "元气少女": ("genshin_kirara", "zh"),
            "阳光男生": ("ai_kaiya", "zh"),
            "幽默小哥": ("tiexin_nanyou", "zh"),
            "文艺小哥": ("ai_chenjiahao_712", "zh"),
            "甜美邻家": ("girlfriend_1_speech02", "zh"),
            "温柔姐姐": ("chat1_female_new-3", "zh"),
            "职场女青": ("girlfriend_2_speech02", "zh"),
            "活泼男童": ("cartoon-boy-07", "zh"),
            "俏皮女童": ("cartoon-girl-01", "zh"),
            "稳重老爸": ("ai_huangyaoshi_712", "zh"),
            "温柔妈妈": ("you_pingjing", "zh"),
            "严肃上司": ("ai_laoguowang_712", "zh"),
            "优雅贵妇": ("chengshu_jiejie", "zh"),
            "慈祥爷爷": ("zhuxi_speech02", "zh"),
            "唠叨爷爷": ("uk_oldman3", "zh"),
            "唠叨奶奶": ("laopopo_speech02", "zh"),
            "和蔼奶奶": ("heainainai_speech02", "zh"),
            "东北老铁": ("dongbeilaotie_speech02", "zh"),
            "重庆小伙": ("chongqingxiaohuo_speech02", "zh"),
            "四川妹子": ("chuanmeizi_speech02", "zh"),
            "潮汕大叔": ("chaoshandashu_speech02", "zh"),
            "台湾男生": ("ai_taiwan_man2_speech02", "zh"),
            "西安掌柜": ("xianzhanggui_speech02", "zh"),
            "天津姐姐": ("tianjinjiejie_speech02", "zh"),
            "新闻播报男": ("diyinnansang_DB_CN_M_04-v2", "zh"),
            "译制片男": ("yizhipiannan-v1", "zh"),
            "撒娇女友": ("tianmeixuemei-v1", "zh"),
            "刀片烟嗓": ("daopianyansang-v1", "zh"),
            "乖巧正太": ("mengwa-v1", "zh"),
        }

    @classmethod
    def INPUT_TYPES(s):
        voice_options = list(s.get_voice_config().keys())
        return {
            "required": {
                "video": (IO.VIDEO, {}),
                "text": model_field_to_node_input(
                    IO.STRING, KlingLipSyncInputObject, "text", multiline=True
                ),
                "voice": (voice_options, {"default": voice_options[0]}),
                "voice_speed": model_field_to_node_input(
                    IO.FLOAT, KlingLipSyncInputObject, "voice_speed", slider=True
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    DESCRIPTION = "Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt."

    def api_call(
        self,
        video: VideoInput,
        text: str,
        voice: str,
        voice_speed: float,
        **kwargs,
    ):
        voice_id, voice_language = KlingLipSyncTextToVideoNode.get_voice_config()[voice]
        return super().api_call(
            video=video,
            text=text,
            voice_language=voice_language,
            voice_id=voice_id,
            voice_speed=voice_speed,
            mode="text2video",
            **kwargs,
        )


class KlingImageGenerationBase(KlingNodeBase):
    """Kling Image Generation Base Node."""

    RETURN_TYPES = ("IMAGE",)
    CATEGORY = "api node/image/Kling"

    def validate_prompt(self, prompt: str, negative_prompt: Optional[str] = None):
        if not prompt or len(prompt) > MAX_PROMPT_LENGTH_IMAGE_GEN:
            raise ValueError(
                f"Prompt must be less than {MAX_PROMPT_LENGTH_IMAGE_GEN} characters"
            )
        if negative_prompt and len(negative_prompt) > MAX_PROMPT_LENGTH_IMAGE_GEN:
            raise ValueError(
                f"Negative prompt must be less than {MAX_PROMPT_LENGTH_IMAGE_GEN} characters"
            )


class KlingVirtualTryOnNode(KlingImageGenerationBase):
    """Kling Virtual Try On Node."""

    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "human_image": (IO.IMAGE, {}),
                "cloth_image": (IO.IMAGE, {}),
                "model_name": model_field_to_node_input(
                    IO.COMBO,
                    KlingVirtualTryOnRequest,
                    "model_name",
                    enum_type=KlingVirtualTryOnModelName,
                ),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    DESCRIPTION = "Kling Virtual Try On Node. Input a human image and a cloth image to try on the cloth on the human."

    def get_response(
        self, task_id: str, auth_kwargs: dict[str,str] = None
    ) -> KlingVirtualTryOnResponse:
        return poll_until_finished(
            auth_kwargs,
            ApiEndpoint(
                path=f"{PATH_VIRTUAL_TRY_ON}/{task_id}",
                method=HttpMethod.GET,
                request_model=EmptyRequest,
                response_model=KlingVirtualTryOnResponse,
            ),
        )

    def api_call(
        self,
        human_image: torch.Tensor,
        cloth_image: torch.Tensor,
        model_name: KlingVirtualTryOnModelName,
        **kwargs,
    ):
        initial_operation = SynchronousOperation(
            endpoint=ApiEndpoint(
                path=PATH_VIRTUAL_TRY_ON,
                method=HttpMethod.POST,
                request_model=KlingVirtualTryOnRequest,
                response_model=KlingVirtualTryOnResponse,
            ),
            request=KlingVirtualTryOnRequest(
                human_image=tensor_to_base64_string(human_image),
                cloth_image=tensor_to_base64_string(cloth_image),
                model_name=model_name,
            ),
            auth_kwargs=kwargs,
        )

        task_creation_response = initial_operation.execute()
        validate_task_creation_response(task_creation_response)
        task_id = task_creation_response.data.task_id

        final_response = self.get_response(task_id, auth_kwargs=kwargs)
        validate_image_result_response(final_response)

        images = get_images_from_response(final_response)
        return (image_result_to_node_output(images),)


class KlingImageGenerationNode(KlingImageGenerationBase):
    """Kling Image Generation Node. Generate an image from a text prompt with an optional reference image."""

    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "prompt": model_field_to_node_input(
                    IO.STRING,
                    KlingImageGenerationsRequest,
                    "prompt",
                    multiline=True,
                    max_length=MAX_PROMPT_LENGTH_IMAGE_GEN,
                ),
                "negative_prompt": model_field_to_node_input(
                    IO.STRING,
                    KlingImageGenerationsRequest,
                    "negative_prompt",
                    multiline=True,
                ),
                "image_type": model_field_to_node_input(
                    IO.COMBO,
                    KlingImageGenerationsRequest,
                    "image_reference",
                    enum_type=KlingImageGenImageReferenceType,
                ),
                "image_fidelity": model_field_to_node_input(
                    IO.FLOAT,
                    KlingImageGenerationsRequest,
                    "image_fidelity",
                    slider=True,
                    step=0.01,
                ),
                "human_fidelity": model_field_to_node_input(
                    IO.FLOAT,
                    KlingImageGenerationsRequest,
                    "human_fidelity",
                    slider=True,
                    step=0.01,
                ),
                "model_name": model_field_to_node_input(
                    IO.COMBO,
                    KlingImageGenerationsRequest,
                    "model_name",
                    enum_type=KlingImageGenModelName,
                ),
                "aspect_ratio": model_field_to_node_input(
                    IO.COMBO,
                    KlingImageGenerationsRequest,
                    "aspect_ratio",
                    enum_type=KlingImageGenAspectRatio,
                ),
                "n": model_field_to_node_input(
                    IO.INT,
                    KlingImageGenerationsRequest,
                    "n",
                ),
            },
            "optional": {
                "image": (IO.IMAGE, {}),
            },
            "hidden": {
                "auth_token": "AUTH_TOKEN_COMFY_ORG",
                "comfy_api_key": "API_KEY_COMFY_ORG",
            },
        }

    DESCRIPTION = "Kling Image Generation Node. Generate an image from a text prompt with an optional reference image."

    def get_response(
        self, task_id: str, auth_kwargs: Optional[dict[str,str]] = None
    ) -> KlingImageGenerationsResponse:
        return poll_until_finished(
            auth_kwargs,
            ApiEndpoint(
                path=f"{PATH_IMAGE_GENERATIONS}/{task_id}",
                method=HttpMethod.GET,
                request_model=EmptyRequest,
                response_model=KlingImageGenerationsResponse,
            ),
        )

    def api_call(
        self,
        model_name: KlingImageGenModelName,
        prompt: str,
        negative_prompt: str,
        image_type: KlingImageGenImageReferenceType,
        image_fidelity: float,
        human_fidelity: float,
        n: int,
        aspect_ratio: KlingImageGenAspectRatio,
        image: Optional[torch.Tensor] = None,
        **kwargs,
    ):
        self.validate_prompt(prompt, negative_prompt)

        if image is not None:
            image = tensor_to_base64_string(image)

        initial_operation = SynchronousOperation(
            endpoint=ApiEndpoint(
                path=PATH_IMAGE_GENERATIONS,
                method=HttpMethod.POST,
                request_model=KlingImageGenerationsRequest,
                response_model=KlingImageGenerationsResponse,
            ),
            request=KlingImageGenerationsRequest(
                model_name=model_name,
                prompt=prompt,
                negative_prompt=negative_prompt,
                image=image,
                image_reference=image_type,
                image_fidelity=image_fidelity,
                human_fidelity=human_fidelity,
                n=n,
                aspect_ratio=aspect_ratio,
            ),
            auth_kwargs=kwargs,
        )

        task_creation_response = initial_operation.execute()
        validate_task_creation_response(task_creation_response)
        task_id = task_creation_response.data.task_id

        final_response = self.get_response(task_id, auth_kwargs=kwargs)
        validate_image_result_response(final_response)

        images = get_images_from_response(final_response)
        return (image_result_to_node_output(images),)


NODE_CLASS_MAPPINGS = {
    "KlingCameraControls": KlingCameraControls,
    "KlingTextToVideoNode": KlingTextToVideoNode,
    "KlingImage2VideoNode": KlingImage2VideoNode,
    "KlingCameraControlI2VNode": KlingCameraControlI2VNode,
    "KlingCameraControlT2VNode": KlingCameraControlT2VNode,
    "KlingStartEndFrameNode": KlingStartEndFrameNode,
    "KlingVideoExtendNode": KlingVideoExtendNode,
    "KlingLipSyncAudioToVideoNode": KlingLipSyncAudioToVideoNode,
    "KlingLipSyncTextToVideoNode": KlingLipSyncTextToVideoNode,
    "KlingVirtualTryOnNode": KlingVirtualTryOnNode,
    "KlingImageGenerationNode": KlingImageGenerationNode,
    "KlingSingleImageVideoEffectNode": KlingSingleImageVideoEffectNode,
    "KlingDualCharacterVideoEffectNode": KlingDualCharacterVideoEffectNode,
}

NODE_DISPLAY_NAME_MAPPINGS = {
    "KlingCameraControls": "Kling Camera Controls",
    "KlingTextToVideoNode": "Kling Text to Video",
    "KlingImage2VideoNode": "Kling Image to Video",
    "KlingCameraControlI2VNode": "Kling Image to Video (Camera Control)",
    "KlingCameraControlT2VNode": "Kling Text to Video (Camera Control)",
    "KlingStartEndFrameNode": "Kling Start-End Frame to Video",
    "KlingVideoExtendNode": "Kling Video Extend",
    "KlingLipSyncAudioToVideoNode": "Kling Lip Sync Video with Audio",
    "KlingLipSyncTextToVideoNode": "Kling Lip Sync Video with Text",
    "KlingVirtualTryOnNode": "Kling Virtual Try On",
    "KlingImageGenerationNode": "Kling Image Generation",
    "KlingSingleImageVideoEffectNode": "Kling Video Effects",
    "KlingDualCharacterVideoEffectNode": "Kling Dual Character Video Effects",
}