Add a --force-fp32 argument to force fp32 for debugging.

This commit is contained in:
comfyanonymous 2023-04-07 00:27:54 -04:00
parent bceccca0e5
commit 64557d6781
2 changed files with 9 additions and 0 deletions

View File

@ -9,6 +9,7 @@ parser.add_argument("--extra-model-paths-config", type=str, default=None, metava
parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.") parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.") parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.")
parser.add_argument("--force-fp32", action="store_true", help="Force fp32 (If this makes your GPU work better please report it).")
attn_group = parser.add_mutually_exclusive_group() attn_group = parser.add_mutually_exclusive_group()
attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.") attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.")

View File

@ -69,6 +69,11 @@ elif args.novram:
elif args.highvram: elif args.highvram:
vram_state = VRAMState.HIGH_VRAM vram_state = VRAMState.HIGH_VRAM
FORCE_FP32 = False
if args.force_fp32:
print("Forcing FP32, if this improves things please report it.")
FORCE_FP32 = True
if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM): if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM):
try: try:
@ -273,6 +278,9 @@ def mps_mode():
def should_use_fp16(): def should_use_fp16():
global xpu_available global xpu_available
if FORCE_FP32:
return False
if cpu_mode() or mps_mode() or xpu_available: if cpu_mode() or mps_mode() or xpu_available:
return False #TODO ? return False #TODO ?