Merge remote-tracking branch 'origin/master' into feature/custom_workflow_templates

This commit is contained in:
bezo97 2024-12-27 20:44:09 +01:00
commit 1ea319ee07
5 changed files with 66 additions and 6 deletions

View File

@ -17,7 +17,7 @@ jobs:
path: "ComfyUI"
- uses: actions/setup-python@v4
with:
python-version: '3.8'
python-version: '3.9'
- name: Install requirements
run: |
python -m pip install --upgrade pip

View File

@ -224,6 +224,16 @@ You can install ComfyUI in Apple Mac silicon (M1 or M2) with any recent macOS ve
```pip install torch-directml``` Then you can launch ComfyUI with: ```python main.py --directml```
#### Ascend NPUs
For models compatible with Ascend Extension for PyTorch (torch_npu). To get started, ensure your environment meets the prerequisites outlined on the [installation](https://ascend.github.io/docs/sources/ascend/quick_install.html) page. Here's a step-by-step guide tailored to your platform and installation method:
1. Begin by installing the recommended or newer kernel version for Linux as specified in the Installation page of torch-npu, if necessary.
2. Proceed with the installation of Ascend Basekit, which includes the driver, firmware, and CANN, following the instructions provided for your specific platform.
3. Next, install the necessary packages for torch-npu by adhering to the platform-specific instructions on the [Installation](https://ascend.github.io/docs/sources/pytorch/install.html#pytorch) page.
4. Finally, adhere to the [ComfyUI manual installation](#manual-install-windows-linux) guide for Linux. Once all components are installed, you can run ComfyUI as described earlier.
# Running
```python main.py```

View File

@ -86,6 +86,13 @@ try:
except:
pass
try:
import torch_npu # noqa: F401
_ = torch.npu.device_count()
npu_available = torch.npu.is_available()
except:
npu_available = False
if args.cpu:
cpu_state = CPUState.CPU
@ -97,6 +104,12 @@ def is_intel_xpu():
return True
return False
def is_ascend_npu():
global npu_available
if npu_available:
return True
return False
def get_torch_device():
global directml_enabled
global cpu_state
@ -110,6 +123,8 @@ def get_torch_device():
else:
if is_intel_xpu():
return torch.device("xpu", torch.xpu.current_device())
elif is_ascend_npu():
return torch.device("npu", torch.npu.current_device())
else:
return torch.device(torch.cuda.current_device())
@ -130,6 +145,12 @@ def get_total_memory(dev=None, torch_total_too=False):
mem_reserved = stats['reserved_bytes.all.current']
mem_total_torch = mem_reserved
mem_total = torch.xpu.get_device_properties(dev).total_memory
elif is_ascend_npu():
stats = torch.npu.memory_stats(dev)
mem_reserved = stats['reserved_bytes.all.current']
_, mem_total_npu = torch.npu.mem_get_info(dev)
mem_total_torch = mem_reserved
mem_total = mem_total_npu
else:
stats = torch.cuda.memory_stats(dev)
mem_reserved = stats['reserved_bytes.all.current']
@ -209,7 +230,7 @@ try:
if int(torch_version[0]) >= 2:
if ENABLE_PYTORCH_ATTENTION == False and args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
ENABLE_PYTORCH_ATTENTION = True
if is_intel_xpu():
if is_intel_xpu() or is_ascend_npu():
if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
ENABLE_PYTORCH_ATTENTION = True
except:
@ -274,6 +295,8 @@ def get_torch_device_name(device):
return "{}".format(device.type)
elif is_intel_xpu():
return "{} {}".format(device, torch.xpu.get_device_name(device))
elif is_ascend_npu():
return "{} {}".format(device, torch.npu.get_device_name(device))
else:
return "CUDA {}: {}".format(device, torch.cuda.get_device_name(device))
@ -860,6 +883,8 @@ def xformers_enabled():
return False
if is_intel_xpu():
return False
if is_ascend_npu():
return False
if directml_enabled:
return False
return XFORMERS_IS_AVAILABLE
@ -884,6 +909,8 @@ def pytorch_attention_flash_attention():
return True
if is_intel_xpu():
return True
if is_ascend_npu():
return True
return False
def mac_version():
@ -923,6 +950,13 @@ def get_free_memory(dev=None, torch_free_too=False):
mem_free_torch = mem_reserved - mem_active
mem_free_xpu = torch.xpu.get_device_properties(dev).total_memory - mem_reserved
mem_free_total = mem_free_xpu + mem_free_torch
elif is_ascend_npu():
stats = torch.npu.memory_stats(dev)
mem_active = stats['active_bytes.all.current']
mem_reserved = stats['reserved_bytes.all.current']
mem_free_npu, _ = torch.npu.mem_get_info(dev)
mem_free_torch = mem_reserved - mem_active
mem_free_total = mem_free_npu + mem_free_torch
else:
stats = torch.cuda.memory_stats(dev)
mem_active = stats['active_bytes.all.current']
@ -984,6 +1018,9 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
if is_intel_xpu():
return True
if is_ascend_npu():
return True
if torch.version.hip:
return True
@ -1081,6 +1118,8 @@ def soft_empty_cache(force=False):
torch.mps.empty_cache()
elif is_intel_xpu():
torch.xpu.empty_cache()
elif is_ascend_npu():
torch.npu.empty_cache()
elif torch.cuda.is_available():
if force or is_nvidia(): #This seems to make things worse on ROCm so I only do it for cuda
torch.cuda.empty_cache()

View File

@ -572,13 +572,20 @@ class VAE:
elif dims == 2:
samples = self.encode_tiled_(pixel_samples, **args)
elif dims == 3:
if tile_t is not None:
tile_t_latent = max(2, self.downscale_ratio[0](tile_t))
else:
tile_t_latent = 9999
args["tile_t"] = self.upscale_ratio[0](tile_t_latent)
if overlap_t is None:
args["overlap"] = (1, overlap, overlap)
else:
args["overlap"] = (overlap_t, overlap, overlap)
if tile_t is not None:
args["tile_t"] = tile_t
samples = self.encode_tiled_3d(pixel_samples, **args)
args["overlap"] = (self.upscale_ratio[0](max(1, min(tile_t_latent // 2, self.downscale_ratio[0](overlap_t)))), overlap, overlap)
maximum = pixel_samples.shape[2]
maximum = self.upscale_ratio[0](self.downscale_ratio[0](maximum))
samples = self.encode_tiled_3d(pixel_samples[:,:,:maximum], **args)
return samples

View File

@ -608,6 +608,8 @@ class PixArtAlpha(supported_models_base.BASE):
unet_extra_config = {}
latent_format = latent_formats.SD15
memory_usage_factor = 0.5
vae_key_prefix = ["vae."]
text_encoder_key_prefix = ["text_encoders."]
@ -640,6 +642,8 @@ class HunyuanDiT(supported_models_base.BASE):
latent_format = latent_formats.SDXL
memory_usage_factor = 1.3
vae_key_prefix = ["vae."]
text_encoder_key_prefix = ["text_encoders."]