automatic crf detection, automation of preprocessing

This commit is contained in:
drunkplato 2024-12-19 20:34:54 +00:00 committed by Ubuntu
parent e512458a79
commit 20833211f0
8 changed files with 1433 additions and 76 deletions

View File

@ -16,23 +16,52 @@ class GenesisModifiedCrossAttention(nn.Module):
total_steps = transformer_options.get('total_steps', 0)
attn_bank = transformer_options.get('attn_bank', None)
sample_mode = transformer_options.get('sample_mode', None)
if attn_bank is not None and self.idx in attn_bank['block_map']:
len_conds = len(transformer_options['cond_or_uncond'])
pred_order = transformer_options['pred_order']
if sample_mode == 'forward' and total_steps-step-1 < attn_bank['save_steps']:
step_idx = f'{pred_order}_{total_steps-step-1}'
attn_bank['block_map'][self.idx][step_idx] = x.cpu()
block_map_entry = attn_bank['block_map'][self.idx] # Pre-compute lookup
if sample_mode == 'forward' and total_steps - step - 1 < attn_bank['save_steps']:
step_idx = f'{pred_order}_{total_steps - step - 1}'
block_map_entry[step_idx] = x.cpu()
elif sample_mode == 'reverse' and step < attn_bank['inject_steps']:
step_idx = f'{pred_order}_{step}'
inject_settings = attn_bank.get('inject_settings', {})
if len(inject_settings) > 0:
inj = attn_bank['block_map'][self.idx][step_idx].to(x.device).repeat(len_conds, 1, 1)
if 'q' in inject_settings:
x = inj
if 'k' in inject_settings:
context = inj
if 'v' in inject_settings:
context_v = inj
if inject_settings:
inj = block_map_entry[step_idx].to(x.device).repeat(len_conds, 1, 1)
# Use a dictionary or function to map settings to actions
if 'q' in inject_settings:
x = inj
if 'k' in inject_settings:
context = inj
if 'v' in inject_settings:
context_v = inj
# def forward(self, x, context=None, mask=None, pe=None, transformer_options={}):
# context = x if context is None else context
# context_v = x if context is None else context
# step = transformer_options.get('step', -1)
# total_steps = transformer_options.get('total_steps', 0)
# attn_bank = transformer_options.get('attn_bank', None)
# sample_mode = transformer_options.get('sample_mode', None)
# if attn_bank is not None and self.idx in attn_bank['block_map']:
# len_conds = len(transformer_options['cond_or_uncond'])
# pred_order = transformer_options['pred_order']
# if sample_mode == 'forward' and total_steps-step-1 < attn_bank['save_steps']:
# step_idx = f'{pred_order}_{total_steps-step-1}'
# attn_bank['block_map'][self.idx][step_idx] = x.cpu()
# elif sample_mode == 'reverse' and step < attn_bank['inject_steps']:
# step_idx = f'{pred_order}_{step}'
# inject_settings = attn_bank.get('inject_settings', {})
# if len(inject_settings) > 0:
# inj = attn_bank['block_map'][self.idx][step_idx].to(x.device).repeat(len_conds, 1, 1)
# if 'q' in inject_settings:
# x = inj
# if 'k' in inject_settings:
# context = inj
# if 'v' in inject_settings:
# context_v = inj
q = self.to_q(x)
k = self.to_k(context)

View File

@ -112,6 +112,7 @@ class MD_ImgToVideo:
}),
"terminal": ("FLOAT", {
"default": 0.1,
"step": 0.01,
"description": "The terminal values of the sigmas after stretching."
}),
# ATTENTION OVERRIDE INPUTS
@ -192,7 +193,16 @@ class MD_ImgToVideo:
image_latent = image_latent['samples']
latent = latent['samples'].clone()
# Convert negative index to positive
# # Convert negative index to positive
# if insert:
# index = max(0, min(index, latent.shape[2])) # Clamp index
# latent = torch.cat([
# latent[:,:,:index],
# image_latent[:,:,0:1],
# latent[:,:,index:]
# ], dim=2)
# else:
# latent[:,:,index] = image_latent[:,:,0]
if insert:
# Handle insertion
if index == 0:
@ -227,13 +237,19 @@ class MD_ImgToVideo:
return False
def attention_override(self, layers: str = "14"):
layers_map = set([])
for block in layers.split(','):
block = block.strip()
if self.is_integer(block):
layers_map.add(block)
try:
return set(map(int, layers.split(',')))
except ValueError:
return set()
# layers_map = set([])
# return set(map(int, layers.split(',')))
# for block in layers.split(','):
# block = block.strip()
# if self.is_integer(block):
# layers_map.add(block)
return layers_map
# return layers_map
def apply_attention_override(self, model, scale, rescale, cfg, attention_override: set):
m = model.clone()

View File

@ -1,12 +1,27 @@
from io import BytesIO
import time
import folder_paths
from comfy.cli_args import args
import torch
from PIL import Image
from PIL.PngImagePlugin import PngInfo
import cairosvg
from lxml import etree
import numpy as np
import json
import os
import logging
# setup logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
WATERMARK = """
<svg width="256" height="256" viewBox="0 0 256 256" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M60.0859 196.8C65.9526 179.067 71.5526 161.667 76.8859 144.6C79.1526 137.4 81.4859 129.867 83.8859 122C86.2859 114.133 88.6859 106.333 91.0859 98.6C93.4859 90.8667 95.6859 83.4 97.6859 76.2C99.8193 69 101.686 62.3333 103.286 56.2C110.619 56.2 117.553 55.8 124.086 55C130.619 54.2 137.686 53.4667 145.286 52.8C144.886 55.7333 144.419 59.0667 143.886 62.8C143.486 66.4 142.953 70.2 142.286 74.2C141.753 78.2 141.153 82.3333 140.486 86.6C139.819 90.8667 139.019 96.3333 138.086 103C137.153 109.667 135.886 118 134.286 128H136.886C140.753 117.867 143.953 109.467 146.486 102.8C149.019 96 151.086 90.4667 152.686 86.2C154.286 81.9333 155.886 77.8 157.486 73.8C159.219 69.6667 160.819 65.8 162.286 62.2C163.886 58.4667 165.353 55.2 166.686 52.4C170.019 52.1333 173.153 51.8 176.086 51.4C179.019 51 181.953 50.6 184.886 50.2C187.819 49.6667 190.753 49.2 193.686 48.8C196.753 48.2667 200.086 47.6667 203.686 47C202.353 54.7333 201.086 62.6667 199.886 70.8C198.686 78.9333 197.619 87.0667 196.686 95.2C195.753 103.2 194.819 111.133 193.886 119C193.086 126.867 192.353 134.333 191.686 141.4C190.086 157.933 188.686 174.067 187.486 189.8L152.686 196C152.686 195.333 152.753 193.533 152.886 190.6C153.153 187.667 153.419 184.067 153.686 179.8C154.086 175.533 154.553 170.8 155.086 165.6C155.753 160.4 156.353 155.2 156.886 150C157.553 144.8 158.219 139.8 158.886 135C159.553 130.067 160.219 125.867 160.886 122.4H159.086C157.219 128 155.153 133.933 152.886 140.2C150.619 146.333 148.286 152.6 145.886 159C143.619 165.4 141.353 171.667 139.086 177.8C136.819 183.933 134.819 189.8 133.086 195.4C128.419 195.533 124.419 195.733 121.086 196C117.753 196.133 113.886 196.333 109.486 196.6L115.886 122.4H112.886C112.619 124.133 111.953 127.067 110.886 131.2C109.819 135.2 108.553 139.867 107.086 145.2C105.753 150.4 104.286 155.867 102.686 161.6C101.086 167.2 99.5526 172.467 98.0859 177.4C96.7526 182.2 95.6193 186.2 94.6859 189.4C93.7526 192.467 93.2193 194.2 93.0859 194.6L60.0859 196.8Z" fill="white"/>
</svg>
"""
WATERMARK_SIZE = 32
class MD_SaveAnimatedWEBP:
def __init__(self):
@ -40,44 +55,291 @@ class MD_SaveAnimatedWEBP:
OUTPUT_NODE = True
CATEGORY = "MemeDeck"
def save_images(self, images, fps, filename_prefix, lossless, quality, method, crf=None, motion_prompt=None, negative_prompt=None, img2vid_metadata=None, sampler_metadata=None):
start_time = time.time()
method = self.methods.get(method)
filename_prefix += self.prefix_append
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir, images[0].shape[1], images[0].shape[0])
results = list()
results = []
pil_images = []
for image in images:
i = 255. * image.cpu().numpy()
img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8))
pil_images.append(img)
# Vectorized conversion to PIL images
pil_images = [Image.fromarray(np.clip(255. * image.cpu().numpy(), 0, 255).astype(np.uint8)) for image in images]
first_image = pil_images[0]
padding = 12
x = first_image.width - WATERMARK_SIZE - padding
y = first_image.height - WATERMARK_SIZE - padding
first_image_background_brightness = self.analyze_background_brightness(first_image, x, y, WATERMARK_SIZE)
watermarked_images = [self.add_watermark_to_image(img, first_image_background_brightness) for img in pil_images]
metadata = pil_images[0].getexif()
num_frames = len(pil_images)
json_metadata = json.dumps({
"crf": crf,
"motion_prompt": motion_prompt,
"negative_prompt": negative_prompt,
"img2vid_metadata": img2vid_metadata,
"sampler_metadata": sampler_metadata,
}, indent=4)
c = len(pil_images)
for i in range(0, c, num_frames):
json_metadata = {
"crf": crf,
"motion_prompt": motion_prompt,
"negative_prompt": negative_prompt,
"img2vid_metadata": json.loads(img2vid_metadata),
"sampler_metadata": json.loads(sampler_metadata),
}
# Optimized saving logic
if num_frames == 1: # Single image, save once
file = f"{filename}_{counter:05}_.webp"
pil_images[i].save(os.path.join(full_output_folder, file), save_all=True, duration=int(1000.0/fps), append_images=pil_images[i + 1:i + num_frames], exif=metadata, lossless=lossless, quality=quality, method=method)
watermarked_images[0].save(os.path.join(full_output_folder, file), exif=metadata, lossless=lossless, quality=quality, method=method)
results.append({
"filename": file,
"subfolder": subfolder,
"type": self.type,
})
else: # multiple images, save as animation
file = f"{filename}_{counter:05}_.webp"
watermarked_images[0].save(os.path.join(full_output_folder, file), save_all=True, duration=int(1000.0 / fps), append_images=watermarked_images[1:], exif=metadata, lossless=lossless, quality=quality, method=method)
results.append({
"filename": file,
"subfolder": subfolder,
"type": self.type,
})
counter += 1
animated = num_frames != 1
end_time = time.time()
logger.info(f"Save images took: {end_time - start_time} seconds")
return {
"ui": {
"images": results,
"animated": (animated,),
"metadata": (json.dumps(json_metadata),)
},
}
return { "ui": { "images": results, "animated": (animated,), "metadata": json_metadata } }
def add_watermark_to_image(self, img, background_brightness=None):
"""
Adds a watermark to a single PIL Image.
Args:
img: A PIL Image object.
Returns:
A PIL Image object with the watermark added.
"""
padding = 12
x = img.width - WATERMARK_SIZE - padding
y = img.height - WATERMARK_SIZE - padding
if background_brightness is None:
background_brightness = self.analyze_background_brightness(img, x, y, WATERMARK_SIZE)
# Generate watermark image (replace this with your actual watermark generation)
watermark = self.generate_watermark(WATERMARK_SIZE, background_brightness)
# Overlay the watermark
img.paste(watermark, (x, y), watermark)
return img
def analyze_background_brightness(self, img, x, y, size):
"""
Analyzes the average brightness of a region in the image.
Args:
img: A PIL Image object.
x: The x-coordinate of the top-left corner of the region.
y: The y-coordinate of the top-left corner of the region.
size: The size of the region (square).
Returns:
The average brightness of the region as an integer.
"""
region = img.crop((x, y, x + size, y + size))
pixels = np.array(region)
total_brightness = np.sum(
0.299 * pixels[:, :, 0] + 0.587 * pixels[:, :, 1] + 0.114 * pixels[:, :, 2]
) / 1000
print(f"total_brightness: {total_brightness}")
return max(0, min(255, total_brightness))
def generate_watermark(self, size, background_brightness):
"""
Generates a watermark image from an SVG string.
Args:
size: The size of the watermark (square).
background_brightness: The background brightness at the watermark position.
Returns:
A PIL Image object representing the watermark.
"""
# Determine watermark color based on background brightness
watermark_color = (0, 0, 0, 165) if background_brightness > 128 else (255, 255, 255, 165)
# Parse the SVG string
svg_tree = etree.fromstring(WATERMARK)
# Find the path element and set its fill attribute
path_element = svg_tree.find(".//{http://www.w3.org/2000/svg}path")
if path_element is not None:
r, g, b, a = watermark_color
fill_color = f"rgba({r},{g},{b},{a/255})" # Convert to rgba string
path_element.set("fill", fill_color)
# Convert the modified SVG tree back to a string
modified_svg = etree.tostring(svg_tree, encoding="unicode")
# Render the modified SVG to a PNG image with a transparent background
png_data = cairosvg.svg2png(
bytestring=modified_svg,
output_width=size,
output_height=size,
background_color="transparent"
)
watermark_img = Image.open(BytesIO(png_data))
# Convert the watermark to RGBA to handle transparency
watermark_img = watermark_img.convert("RGBA")
return watermark_img
# def save_images(self, images, fps, filename_prefix, lossless, quality, method, crf=None, motion_prompt=None, negative_prompt=None, img2vid_metadata=None, sampler_metadata=None):
# start_time = time.time()
# method = self.methods.get(method)
# filename_prefix += self.prefix_append
# full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(
# filename_prefix, self.output_dir, images[0].shape[1], images[0].shape[0]
# )
# results = []
# # Prepare PIL images in one loop
# pil_images = [
# Image.fromarray(np.clip((255. * image.cpu().numpy()), 0, 255).astype(np.uint8))
# for image in images
# ]
# metadata = pil_images[0].getexif()
# num_frames = len(pil_images)
# # Pre-serialize JSON metadata
# json_metadata = json.dumps({
# "crf": crf,
# "motion_prompt": motion_prompt,
# "negative_prompt": negative_prompt,
# "img2vid_metadata": json.loads(img2vid_metadata),
# "sampler_metadata": json.loads(sampler_metadata),
# })
# # Save images directly
# duration = int(1000.0 / fps)
# for i in range(0, len(pil_images), num_frames):
# file = f"{filename}_{counter:05}_.webp"
# pil_images[i].save(
# os.path.join(full_output_folder, file),
# save_all=True,
# duration=duration,
# append_images=pil_images[i + 1:i + num_frames],
# exif=metadata,
# lossless=lossless,
# quality=quality,
# method=method
# )
# results.append({"filename": file, "subfolder": subfolder, "type": self.type})
# counter += 1
# end_time = time.time()
# logger.info(f"Save images took: {end_time - start_time} seconds")
# return {
# "ui": {
# "images": results,
# "animated": (num_frames != 1,),
# "metadata": (json_metadata,),
# },
# }
# def save_images(self, images, fps, filename_prefix, lossless, quality, method, crf=None, motion_prompt=None, negative_prompt=None, img2vid_metadata=None, sampler_metadata=None):
# method = self.methods.get(method)
# filename_prefix += self.prefix_append
# full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir, images[0].shape[1], images[0].shape[0])
# results = list()
# pil_images = []
# for image in images:
# i = 255. * image.cpu().numpy()
# img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8))
# pil_images.append(img)
# metadata = pil_images[0].getexif()
# num_frames = len(pil_images)
# json_metadata = {
# "crf": crf,
# "motion_prompt": motion_prompt,
# "negative_prompt": negative_prompt,
# "img2vid_metadata": json.loads(img2vid_metadata),
# "sampler_metadata": json.loads(sampler_metadata),
# }
# c = len(pil_images)
# for i in range(0, c, num_frames):
# file = f"{filename}_{counter:05}_.webp"
# pil_images[i].save(os.path.join(full_output_folder, file), save_all=True, duration=int(1000.0/fps), append_images=pil_images[i + 1:i + num_frames], exif=metadata, lossless=lossless, quality=quality, method=method)
# results.append({
# "filename": file,
# "subfolder": subfolder,
# "type": self.type,
# })
# counter += 1
# animated = num_frames != 1
# # properly serialize metadata
# return {
# "ui": {
# "images": results,
# "animated": (animated,),
# "metadata": (json.dumps(json_metadata),)
# },
# }
class MD_VAEDecode:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"samples": ("LATENT", {"tooltip": "The latent to be decoded."}),
"vae": ("VAE", {"tooltip": "The VAE model used for decoding the latent."})
}
}
RETURN_TYPES = ("IMAGE",)
OUTPUT_TOOLTIPS = ("The decoded image.",)
FUNCTION = "decode"
CATEGORY = "latent"
DESCRIPTION = "Decodes latent images back into pixel space images."
def decode(self, vae, samples):
start_time = time.time()
with torch.profiler.profile(
activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA],
record_shapes=True,
profile_memory=True,
) as prof:
images = vae.decode(samples["samples"])
print(prof.key_averages().table(sort_by="cuda_time_total")) # Print profiling results
if len(images.shape) == 5:
images = images.reshape(-1, images.shape[-3], images.shape[-2], images.shape[-1])
end_time = time.time()
print(f"VAE decoding time: {end_time - start_time:.4f} seconds")
return (images,)
class MD_SaveMP4:

View File

@ -15,7 +15,7 @@ import torch
import subprocess
import torchvision.transforms as transforms
from .lib import image, utils
from .lib.image import pil2tensor
from .lib.image import pil2tensor, tensor2pil
import os
import logging
@ -32,9 +32,6 @@ class MD_LoadImageFromUrl:
"required": {
"url": (
"STRING",
{
"default": "https://media.memedeck.xyz/memes/user:08bdc8ed_6015_44f2_9808_7cb54051c666/35c95dfd_b186_4a40_9ef1_ac770f453706.jpeg"
},
),
}
}
@ -48,6 +45,9 @@ class MD_LoadImageFromUrl:
url = url.replace("'", "")
url = url.replace('"', '')
if url is None:
raise ValueError("URL is required")
img = Image.open(requests.get(url, stream=True).raw)
img = ImageOps.exif_transpose(img)
return (pil2tensor(img),)
@ -127,19 +127,38 @@ class MD_CompressAdjustNode:
"required": {
"image": ("IMAGE",),
"desired_crf": ("INT", {
"default": 25,
"default": 28,
"min": 0,
"max": 51,
"step": 1
}),
"width": ("INT", {
"default": 640,
"description": "The width of the video."
}),
"height": ("INT", {
"default": 640,
"description": "The height of the video."
}),
},
}
RETURN_TYPES = ("IMAGE", "INT")
RETURN_NAMES = ("adjusted_image", "crf")
RETURN_TYPES = ("IMAGE", "INT", "INT", "INT")
RETURN_NAMES = ("adjusted_image", "crf", "width", "height")
FUNCTION = "tensor_to_video_and_back"
CATEGORY = "MemeDeck"
def __init__(self):
# baseline values
self.ideal_blockiness = 600
self.ideal_edge_density = 12
self.ideal_color_variation = 10000
# weights
self.blockiness_weight = -0.006
self.edge_density_weight = 0.32
self.color_variation_weight = -0.00005
def tensor_to_int(self,tensor, bits):
tensor = tensor.cpu().numpy() * (2**bits-1)
return np.clip(tensor, 0, (2**bits-1))
@ -176,8 +195,57 @@ class MD_CompressAdjustNode:
# 0 is the lowest clarity
# 100 is the highest clarity
return 100
def analyze_compression_artifacts(self, img, width=640, height=640):
"""
Analyzes an image for potential compression artifacts.
Args:
image_path (str): Path to the image file.
Returns:
dict: A dictionary containing metrics related to compression artifacts.
"""
# img = cv2.imread(image_path)
# resize image to 640x640
img = cv2.resize(img, (width, height))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Calculate blockiness (common in high compression)
blockiness = cv2.Laplacian(gray, cv2.CV_64F).var()
# Edge detection (blurring can indicate compression)
edges = cv2.Canny(gray, 50, 150)
edge_density = np.sum(edges) / (gray.shape[0] * gray.shape[1])
# Color histogram analysis (color banding in low bitrate compression)
hist = cv2.calcHist([img], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
color_variation = np.std(hist)
return {
"blockiness": blockiness,
"edge_density": edge_density,
"color_variation": color_variation
}
def calculate_crf(self, analysis_results, ideal_blockiness, ideal_edge_density,
ideal_color_variation, blockiness_weight,
edge_density_weight, color_variation_weight):
"""
Calculates the target CRF based on analysis results and weights.
"""
target_crf = 28 + (blockiness_weight * (analysis_results["blockiness"] - ideal_blockiness)) \
+ (edge_density_weight * (analysis_results["edge_density"] - ideal_edge_density)) \
+ (color_variation_weight * (analysis_results["color_variation"] - ideal_color_variation))
# Clamp CRF to a reasonable range (optional)
target_crf = max(18, min(35, target_crf))
target_crf = round(target_crf, 2)
return target_crf
def tensor_to_video_and_back(self, image, desired_crf=30):
def tensor_to_video_and_back(self, image, desired_crf=28, width=832, height=832):
temp_dir = "temp_video"
filename = f"frame_{time.time()}".split('.')[0]
os.makedirs(temp_dir, exist_ok=True)
@ -219,6 +287,15 @@ class MD_CompressAdjustNode:
# default bitrate and frame rate
frame_rate = 25
image_cv2 = cv2.cvtColor(np.array(tensor2pil(image)), cv2.COLOR_RGB2BGR)
# calculate the crf based on the image
analysis_results = self.analyze_compression_artifacts(image_cv2, width=width, height=height)
desired_crf = self.calculate_crf(analysis_results, self.ideal_blockiness, self.ideal_edge_density,
self.ideal_color_variation, self.blockiness_weight,
self.edge_density_weight, self.color_variation_weight)
logger.info(f"detected crf: {desired_crf}")
args = [
utils.ffmpeg_path,
"-v", "error",
@ -231,7 +308,6 @@ class MD_CompressAdjustNode:
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
"-crf", str(desired_crf),
]
video_path = os.path.abspath(str(Path(temp_dir) / f"{filename}.mp4"))
@ -273,4 +349,5 @@ class MD_CompressAdjustNode:
# convert the frame to a PIL image for ComfyUI
frame = Image.fromarray(frame)
frame_tensor = pil2tensor(frame)
return (frame_tensor, desired_crf)
return (frame_tensor, desired_crf, width, height)

View File

@ -60,6 +60,7 @@ class MemedeckWorker:
self.queue_name = os.getenv('QUEUE_NAME') or 'generic-queue'
self.api_url = os.getenv('API_ADDRESS') or 'http://0.0.0.0:8079/v2'
self.api_key = os.getenv('API_KEY') or 'eb46e20a-cc25-4ed4-a39b-f47ca8ff3383'
self.is_dev = os.getenv('IS_DEV') or False
self.training_only = os.getenv('TRAINING_ONLY') or False
self.video_gen_only = False
@ -163,6 +164,7 @@ class MemedeckWorker:
end_node_id = None
video_source_image_id = None
output_asset_id = None # output asset id
# Find the end_node_id
if not self.video_gen_only and not self.training_only:
@ -173,6 +175,7 @@ class MemedeckWorker:
elif self.video_gen_only:
end_node_id = payload['end_node_id']
video_source_image_id = payload['image_id']
output_asset_id = payload['output_asset_id']
elif self.training_only:
end_node_id = "130"
@ -216,6 +219,7 @@ class MemedeckWorker:
# video data
"image_id": video_source_image_id,
"user_id": user_id,
"output_asset_id": output_asset_id,
}
if valid[0]:
@ -404,12 +408,16 @@ class MemedeckWorker:
"image_id": task['image_id'],
"user_id": task['user_id'],
"status": "generating",
"output_asset_id": task['output_asset_id'],
"progress": percentage * 0.9 # 90% of the progress is the gen step, 10% is the video encode step
})
if event == "executed":
if data['node'] == task['end_node_id']:
filename = data['output']['images'][0]['filename']
metadata = json.loads(data['output']['metadata'][0])
self.logger.info(f"[memedeck]: video gen completed {metadata}")
current_dir = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(current_dir, "output", filename)
@ -431,7 +439,9 @@ class MemedeckWorker:
"image_id": task['image_id'],
"user_id": task['user_id'],
"status": "completed",
"output_video_url": url
"output_video_url": url,
"output_asset_id": task['output_asset_id'],
"metadata": metadata
})
# video gen task is done
del self.tasks_by_ws_id[sid]
@ -513,14 +523,37 @@ class MemedeckWorker:
if task['workflow'] == 'video_gen':
api_endpoint = '/generation/video/update'
# self.logger.info(f"[memedeck]: sending to api: {api_endpoint}")
# self.logger.info(f"[memedeck]: data: {data}")
try:
# this request is not sending properly for faceswap
post_func = partial(requests.post, f"{self.api_url}{api_endpoint}", json=data)
# run a second time on another port
await self.loop.run_in_executor(None, post_func)
except Exception as e:
self.logger.info(f"[memedeck]: error sending to api: {e}")
if not self.is_dev:
self.logger.info(f"[memedeck]: error sending to api: {e}")
if self.is_dev:
try:
# this request is not sending properly for faceswap
post_func_2 = partial(requests.post, f"http://0.0.0.0:9091/v2{api_endpoint}", json=data)
# run a second time on another port
await self.loop.run_in_executor(None, post_func_2)
except Exception as e:
if not self.is_dev:
self.logger.info(f"[memedeck]: error sending to api: {e}")
try:
# this request is not sending properly for faceswap
post_func_3 = partial(requests.post, f"http://0.0.0.0:9092/v2{api_endpoint}", json=data)
# run a second time on another port
await self.loop.run_in_executor(None, post_func_3)
except Exception as e:
if not self.is_dev:
self.logger.info(f"[memedeck]: error sending to api: {e}")
# --------------------------------------------------------------------------
# MemedeckAzureStorage

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,247 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"from PIL import Image, ImageDraw\n",
"import imageio\n",
"import numpy as np\n",
"from io import BytesIO\n",
"import cairosvg\n",
"from lxml import etree\n",
"\n",
"# Assuming WATERMARK and WATERMARK_SIZE are defined elsewhere, e.g.,\n",
"WATERMARK = \"\"\"\n",
"<svg width=\"256\" height=\"256\" viewBox=\"0 0 256 256\" fill=\"none\" xmlns=\"http://www.w3.org/2000/svg\">\n",
" <path d=\"M60.0859 196.8C65.9526 179.067 71.5526 161.667 76.8859 144.6C79.1526 137.4 81.4859 129.867 83.8859 122C86.2859 114.133 88.6859 106.333 91.0859 98.6C93.4859 90.8667 95.6859 83.4 97.6859 76.2C99.8193 69 101.686 62.3333 103.286 56.2C110.619 56.2 117.553 55.8 124.086 55C130.619 54.2 137.686 53.4667 145.286 52.8C144.886 55.7333 144.419 59.0667 143.886 62.8C143.486 66.4 142.953 70.2 142.286 74.2C141.753 78.2 141.153 82.3333 140.486 86.6C139.819 90.8667 139.019 96.3333 138.086 103C137.153 109.667 135.886 118 134.286 128H136.886C140.753 117.867 143.953 109.467 146.486 102.8C149.019 96 151.086 90.4667 152.686 86.2C154.286 81.9333 155.886 77.8 157.486 73.8C159.219 69.6667 160.819 65.8 162.286 62.2C163.886 58.4667 165.353 55.2 166.686 52.4C170.019 52.1333 173.153 51.8 176.086 51.4C179.019 51 181.953 50.6 184.886 50.2C187.819 49.6667 190.753 49.2 193.686 48.8C196.753 48.2667 200.086 47.6667 203.686 47C202.353 54.7333 201.086 62.6667 199.886 70.8C198.686 78.9333 197.619 87.0667 196.686 95.2C195.753 103.2 194.819 111.133 193.886 119C193.086 126.867 192.353 134.333 191.686 141.4C190.086 157.933 188.686 174.067 187.486 189.8L152.686 196C152.686 195.333 152.753 193.533 152.886 190.6C153.153 187.667 153.419 184.067 153.686 179.8C154.086 175.533 154.553 170.8 155.086 165.6C155.753 160.4 156.353 155.2 156.886 150C157.553 144.8 158.219 139.8 158.886 135C159.553 130.067 160.219 125.867 160.886 122.4H159.086C157.219 128 155.153 133.933 152.886 140.2C150.619 146.333 148.286 152.6 145.886 159C143.619 165.4 141.353 171.667 139.086 177.8C136.819 183.933 134.819 189.8 133.086 195.4C128.419 195.533 124.419 195.733 121.086 196C117.753 196.133 113.886 196.333 109.486 196.6L115.886 122.4H112.886C112.619 124.133 111.953 127.067 110.886 131.2C109.819 135.2 108.553 139.867 107.086 145.2C105.753 150.4 104.286 155.867 102.686 161.6C101.086 167.2 99.5526 172.467 98.0859 177.4C96.7526 182.2 95.6193 186.2 94.6859 189.4C93.7526 192.467 93.2193 194.2 93.0859 194.6L60.0859 196.8Z\" fill=\"white\"/>\n",
"</svg>\n",
"\"\"\"\n",
"WATERMARK_SIZE = 40\n",
"\n",
"# def add_watermark_to_image_sequence(pil_images, base_blob_name):\n",
"# \"\"\"\n",
"# Adds a watermark to a sequence of PIL images.\n",
"\n",
"# Args:\n",
"# pil_images: A list of PIL Image objects.\n",
"# base_blob_name: The base name of the blob (used for naming the output).\n",
"\n",
"# Returns:\n",
"# A list of bytes objects representing the watermarked images.\n",
"# \"\"\"\n",
"\n",
"# watermarked_images = []\n",
"# for img in pil_images:\n",
"# img = add_watermark_to_image(img)\n",
" \n",
"# # Save the image to a bytes buffer\n",
"# buffer = BytesIO()\n",
"# img.save(buffer, format=\"JPEG\")\n",
"# watermarked_images.append(buffer.getvalue())\n",
"\n",
"# return watermarked_images\n",
" \n",
" \n",
"def add_watermark_to_image_sequence(pil_images, base_blob_name):\n",
" \"\"\"\n",
" Adds a watermark to a sequence of PIL images and saves as a WEBP animation.\n",
"\n",
" Args:\n",
" pil_images: A list of PIL Image objects.\n",
" base_blob_name: The base name of the blob (used for naming the output).\n",
"\n",
" Returns:\n",
" The filename of the saved WEBP animation.\n",
" \"\"\"\n",
"\n",
" watermarked_images = []\n",
" for img in pil_images:\n",
" img = add_watermark_to_image(img)\n",
" watermarked_images.append(img)\n",
"\n",
" # Save the images as a WEBP animation\n",
" output_filename = f\"{base_blob_name.split('.')[0]}_watermarked.webp\"\n",
" imageio.mimsave(output_filename, watermarked_images, fps=24) # Adjust fps as needed\n",
"\n",
" return output_filename\n",
"\n",
"def add_watermark_to_image(img):\n",
" \"\"\"\n",
" Adds a watermark to a single PIL Image.\n",
"\n",
" Args:\n",
" img: A PIL Image object.\n",
"\n",
" Returns:\n",
" A PIL Image object with the watermark added.\n",
" \"\"\"\n",
"\n",
" # Calculate position (bottom right corner)\n",
" padding = 12\n",
" x = img.width - WATERMARK_SIZE - padding\n",
" y = img.height - WATERMARK_SIZE - padding\n",
"\n",
" background_brightness = analyze_background_brightness(img, x, y, WATERMARK_SIZE)\n",
" print(f\"background_brightness: {background_brightness}\")\n",
"\n",
" # Generate watermark image (replace this with your actual watermark generation)\n",
" watermark = generate_watermark(WATERMARK_SIZE, background_brightness)\n",
"\n",
" # Overlay the watermark\n",
" img.paste(watermark, (x, y), watermark)\n",
"\n",
" return img\n",
"\n",
"\n",
"def analyze_background_brightness(img, x, y, size):\n",
" \"\"\"\n",
" Analyzes the average brightness of a region in the image.\n",
"\n",
" Args:\n",
" img: A PIL Image object.\n",
" x: The x-coordinate of the top-left corner of the region.\n",
" y: The y-coordinate of the top-left corner of the region.\n",
" size: The size of the region (square).\n",
"\n",
" Returns:\n",
" The average brightness of the region as an integer.\n",
" \"\"\"\n",
" region = img.crop((x, y, x + size, y + size))\n",
" pixels = np.array(region)\n",
" total_brightness = np.sum(\n",
" 0.299 * pixels[:, :, 0] + 0.587 * pixels[:, :, 1] + 0.114 * pixels[:, :, 2]\n",
" ) / 1000\n",
" print(f\"total_brightness: {total_brightness}\")\n",
" return max(0, min(255, total_brightness)) \n",
"\n",
"def generate_watermark(size, background_brightness):\n",
" \"\"\"\n",
" Generates a watermark image from an SVG string.\n",
"\n",
" Args:\n",
" size: The size of the watermark (square).\n",
" background_brightness: The background brightness at the watermark position.\n",
"\n",
" Returns:\n",
" A PIL Image object representing the watermark.\n",
" \"\"\"\n",
"\n",
" # Determine watermark color based on background brightness\n",
" watermark_color = (0, 0, 0, 165) if background_brightness > 128 else (255, 255, 255, 165)\n",
"\n",
" # Parse the SVG string\n",
" svg_tree = etree.fromstring(WATERMARK)\n",
"\n",
" # Find the path element and set its fill attribute\n",
" path_element = svg_tree.find(\".//{http://www.w3.org/2000/svg}path\")\n",
" if path_element is not None:\n",
" r, g, b, a = watermark_color\n",
" fill_color = f\"rgba({r},{g},{b},{a/255})\" # Convert to rgba string\n",
" path_element.set(\"fill\", fill_color)\n",
"\n",
" # Convert the modified SVG tree back to a string\n",
" modified_svg = etree.tostring(svg_tree, encoding=\"unicode\")\n",
"\n",
" # Render the modified SVG to a PNG image with a transparent background\n",
" png_data = cairosvg.svg2png(\n",
" bytestring=modified_svg,\n",
" output_width=size,\n",
" output_height=size,\n",
" background_color=\"transparent\"\n",
" )\n",
" watermark_img = Image.open(BytesIO(png_data))\n",
"\n",
" # Convert the watermark to RGBA to handle transparency\n",
" watermark_img = watermark_img.convert(\"RGBA\")\n",
"\n",
" return watermark_img"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'numpy.ndarray' object has no attribute 'width'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[35], line 15\u001b[0m\n\u001b[1;32m 13\u001b[0m start_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# Add the watermark\u001b[39;00m\n\u001b[0;32m---> 15\u001b[0m watermarked_img \u001b[38;5;241m=\u001b[39m \u001b[43madd_watermark_to_image_sequence\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mvideo_72b0b56c_0c40_40ff_89d2_2dd1cb01a163_2fbca50b_3e1b_42e6_9391_c2b3efa091ad\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m end_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTime taken: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mend_time\u001b[38;5;250m \u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;250m \u001b[39mstart_time\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m seconds\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"Cell \u001b[0;32mIn[34], line 54\u001b[0m, in \u001b[0;36madd_watermark_to_image_sequence\u001b[0;34m(pil_images, base_blob_name)\u001b[0m\n\u001b[1;32m 52\u001b[0m watermarked_images \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m img \u001b[38;5;129;01min\u001b[39;00m pil_images:\n\u001b[0;32m---> 54\u001b[0m img \u001b[38;5;241m=\u001b[39m \u001b[43madd_watermark_to_image\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 55\u001b[0m watermarked_images\u001b[38;5;241m.\u001b[39mappend(img)\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# Save the images as a WEBP animation\u001b[39;00m\n",
"Cell \u001b[0;32mIn[34], line 77\u001b[0m, in \u001b[0;36madd_watermark_to_image\u001b[0;34m(img)\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;66;03m# Calculate position (bottom right corner)\u001b[39;00m\n\u001b[1;32m 76\u001b[0m padding \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m12\u001b[39m\n\u001b[0;32m---> 77\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[43mimg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwidth\u001b[49m \u001b[38;5;241m-\u001b[39m WATERMARK_SIZE \u001b[38;5;241m-\u001b[39m padding\n\u001b[1;32m 78\u001b[0m y \u001b[38;5;241m=\u001b[39m img\u001b[38;5;241m.\u001b[39mheight \u001b[38;5;241m-\u001b[39m WATERMARK_SIZE \u001b[38;5;241m-\u001b[39m padding\n\u001b[1;32m 80\u001b[0m background_brightness \u001b[38;5;241m=\u001b[39m analyze_background_brightness(img, x, y, WATERMARK_SIZE)\n",
"\u001b[0;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'width'"
]
}
],
"source": [
"# Load the webp image using the requests library\n",
"import time\n",
"import requests\n",
"\n",
"\n",
"image_url = \"https://media.memedeck.xyz/memes/user:d38ee417_a500_4cd4_a455_432c3cbb61fe/video_gen/video_72b0b56c_0c40_40ff_89d2_2dd1cb01a163_2fbca50b_3e1b_42e6_9391_c2b3efa091ad.webp\" # Example webp image URL\n",
"response = requests.get(image_url, stream=True)\n",
"response.raise_for_status() # Raise an exception for bad status codes\n",
"\n",
"# Open the image using PIL\n",
"img = Image.open(response.raw)\n",
"\n",
"# img_to_tensor = img_to_tensor(img)\n",
"\n",
"start_time = time.time()\n",
"# Add the watermark\n",
"\n",
"watermarked_img = add_watermark_to_image_sequence(img, 'video_72b0b56c_0c40_40ff_89d2_2dd1cb01a163_2fbca50b_3e1b_42e6_9391_c2b3efa091ad')\n",
"end_time = time.time()\n",
"print(f\"Time taken: {end_time - start_time} seconds\")\n",
"\n",
"# Save the watermarked image (optional)\n",
"# watermarked_img.save(\"watermarked_image.webp\")"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: lxml in /home/holium/ComfyUI/comfy-venv/lib/python3.12/site-packages (5.3.0)\n"
]
}
],
"source": [
"!pip install lxml"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "comfy-venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -34,3 +34,4 @@ aio_pika
torchao
insightface
onnxruntime-gpu
cairosvg