mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-01-25 15:55:18 +00:00
Auto reshape 2d to 3d latent for single image generation on video model.
This commit is contained in:
parent
e1dec3c792
commit
a618f768e0
@ -3,6 +3,7 @@ import torch
|
|||||||
class LatentFormat:
|
class LatentFormat:
|
||||||
scale_factor = 1.0
|
scale_factor = 1.0
|
||||||
latent_channels = 4
|
latent_channels = 4
|
||||||
|
latent_dimensions = 2
|
||||||
latent_rgb_factors = None
|
latent_rgb_factors = None
|
||||||
latent_rgb_factors_bias = None
|
latent_rgb_factors_bias = None
|
||||||
taesd_decoder_name = None
|
taesd_decoder_name = None
|
||||||
@ -143,6 +144,7 @@ class SD3(LatentFormat):
|
|||||||
|
|
||||||
class StableAudio1(LatentFormat):
|
class StableAudio1(LatentFormat):
|
||||||
latent_channels = 64
|
latent_channels = 64
|
||||||
|
latent_dimensions = 1
|
||||||
|
|
||||||
class Flux(SD3):
|
class Flux(SD3):
|
||||||
latent_channels = 16
|
latent_channels = 16
|
||||||
@ -178,6 +180,7 @@ class Flux(SD3):
|
|||||||
|
|
||||||
class Mochi(LatentFormat):
|
class Mochi(LatentFormat):
|
||||||
latent_channels = 12
|
latent_channels = 12
|
||||||
|
latent_dimensions = 3
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.scale_factor = 1.0
|
self.scale_factor = 1.0
|
||||||
@ -219,6 +222,8 @@ class Mochi(LatentFormat):
|
|||||||
|
|
||||||
class LTXV(LatentFormat):
|
class LTXV(LatentFormat):
|
||||||
latent_channels = 128
|
latent_channels = 128
|
||||||
|
latent_dimensions = 3
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.latent_rgb_factors = [
|
self.latent_rgb_factors = [
|
||||||
[ 1.1202e-02, -6.3815e-04, -1.0021e-02],
|
[ 1.1202e-02, -6.3815e-04, -1.0021e-02],
|
||||||
@ -355,6 +360,7 @@ class LTXV(LatentFormat):
|
|||||||
|
|
||||||
class HunyuanVideo(LatentFormat):
|
class HunyuanVideo(LatentFormat):
|
||||||
latent_channels = 16
|
latent_channels = 16
|
||||||
|
latent_dimensions = 3
|
||||||
scale_factor = 0.476986
|
scale_factor = 0.476986
|
||||||
latent_rgb_factors = [
|
latent_rgb_factors = [
|
||||||
[-0.0395, -0.0331, 0.0445],
|
[-0.0395, -0.0331, 0.0445],
|
||||||
|
@ -25,9 +25,11 @@ def prepare_noise(latent_image, seed, noise_inds=None):
|
|||||||
return noises
|
return noises
|
||||||
|
|
||||||
def fix_empty_latent_channels(model, latent_image):
|
def fix_empty_latent_channels(model, latent_image):
|
||||||
latent_channels = model.get_model_object("latent_format").latent_channels #Resize the empty latent image so it has the right number of channels
|
latent_format = model.get_model_object("latent_format") #Resize the empty latent image so it has the right number of channels
|
||||||
if latent_channels != latent_image.shape[1] and torch.count_nonzero(latent_image) == 0:
|
if latent_format.latent_channels != latent_image.shape[1] and torch.count_nonzero(latent_image) == 0:
|
||||||
latent_image = comfy.utils.repeat_to_batch_size(latent_image, latent_channels, dim=1)
|
latent_image = comfy.utils.repeat_to_batch_size(latent_image, latent_format.latent_channels, dim=1)
|
||||||
|
if latent_format.latent_dimensions == 3 and latent_image.ndim == 4:
|
||||||
|
latent_image = latent_image.unsqueeze(2)
|
||||||
return latent_image
|
return latent_image
|
||||||
|
|
||||||
def prepare_sampling(model, noise_shape, positive, negative, noise_mask):
|
def prepare_sampling(model, noise_shape, positive, negative, noise_mask):
|
||||||
|
Loading…
Reference in New Issue
Block a user