throw away alpha channel in clip vision preprocessor (#7769)

saves users having to explicitly discard the channel
This commit is contained in:
thot experiment 2025-04-23 18:28:36 -07:00 committed by GitHub
parent 11b68ebd22
commit e2eed9eb9b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -18,6 +18,7 @@ class Output:
setattr(self, key, item)
def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711], crop=True):
image = image[:, :, :, :3] if image.shape[3] > 3 else image
mean = torch.tensor(mean, device=image.device, dtype=image.dtype)
std = torch.tensor(std, device=image.device, dtype=image.dtype)
image = image.movedim(-1, 1)