diff --git a/comfy/model_base.py b/comfy/model_base.py index ad661ec7..979e2c65 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -148,13 +148,20 @@ class SDInpaint(BaseModel): super().__init__(model_config, model_type, device=device) self.concat_keys = ("mask", "masked_image") +def sdxl_pooled(args, noise_augmentor): + if "unclip_conditioning" in args: + return unclip_adm(args.get("unclip_conditioning", None), args["device"], noise_augmentor)[:,:1280] + else: + return args["pooled_output"] + class SDXLRefiner(BaseModel): def __init__(self, model_config, model_type=ModelType.EPS, device=None): super().__init__(model_config, model_type, device=device) self.embedder = Timestep(256) + self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**{"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 1280}) def encode_adm(self, **kwargs): - clip_pooled = kwargs["pooled_output"] + clip_pooled = sdxl_pooled(kwargs, self.noise_augmentor) width = kwargs.get("width", 768) height = kwargs.get("height", 768) crop_w = kwargs.get("crop_w", 0) @@ -178,9 +185,10 @@ class SDXL(BaseModel): def __init__(self, model_config, model_type=ModelType.EPS, device=None): super().__init__(model_config, model_type, device=device) self.embedder = Timestep(256) + self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**{"noise_schedule_config": {"timesteps": 1000, "beta_schedule": "squaredcos_cap_v2"}, "timestep_dim": 1280}) def encode_adm(self, **kwargs): - clip_pooled = kwargs["pooled_output"] + clip_pooled = sdxl_pooled(kwargs, self.noise_augmentor) width = kwargs.get("width", 768) height = kwargs.get("height", 768) crop_w = kwargs.get("crop_w", 0)