ComfyUI/comfy_extras/nodes_ace.py
2025-05-07 19:22:07 -04:00

50 lines
1.7 KiB
Python

import torch
import comfy.model_management
import node_helpers
class TextEncodeAceStepAudio:
@classmethod
def INPUT_TYPES(s):
return {"required": {
"clip": ("CLIP", ),
"tags": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"lyrics": ("STRING", {"multiline": True, "dynamicPrompts": True}),
"lyrics_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "encode"
CATEGORY = "conditioning"
def encode(self, clip, tags, lyrics, lyrics_strength):
tokens = clip.tokenize(tags, lyrics=lyrics)
conditioning = clip.encode_from_tokens_scheduled(tokens)
conditioning = node_helpers.conditioning_set_values(conditioning, {"lyrics_strength": lyrics_strength})
return (conditioning, )
class EmptyAceStepLatentAudio:
def __init__(self):
self.device = comfy.model_management.intermediate_device()
@classmethod
def INPUT_TYPES(s):
return {"required": {"seconds": ("FLOAT", {"default": 120.0, "min": 1.0, "max": 1000.0, "step": 0.1}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096, "tooltip": "The number of latent images in the batch."}),
}}
RETURN_TYPES = ("LATENT",)
FUNCTION = "generate"
CATEGORY = "latent/audio"
def generate(self, seconds, batch_size):
length = int(seconds * 44100 / 512 / 8)
latent = torch.zeros([batch_size, 8, 16, length], device=self.device)
return ({"samples": latent, "type": "audio"}, )
NODE_CLASS_MAPPINGS = {
"TextEncodeAceStepAudio": TextEncodeAceStepAudio,
"EmptyAceStepLatentAudio": EmptyAceStepLatentAudio,
}