From 09f4f838a420ee1d705a5ace4499ff6d96f4083f Mon Sep 17 00:00:00 2001 From: Marc-Andre Ferland Date: Sun, 25 Dec 2022 19:58:18 -0500 Subject: [PATCH 1/4] Add a check to ATTN_XFORMERS to disable xformers if installed. --- ldm/modules/attention.py | 13 ++++++------- ldm/modules/diffusionmodules/model.py | 3 ++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py index 509cd8737..97596745c 100644 --- a/ldm/modules/attention.py +++ b/ldm/modules/attention.py @@ -8,18 +8,17 @@ from ldm.modules.diffusionmodules.util import checkpoint +# CrossAttn precision handling +import os +_ATTN_PRECISION = os.environ.get("ATTN_PRECISION", "fp32") try: import xformers import xformers.ops - XFORMERS_IS_AVAILBLE = True + XFORMERS_IS_AVAILBLE = os.environ.get("ATTN_XFORMERS", "enabled") == "enabled" except: XFORMERS_IS_AVAILBLE = False -# CrossAttn precision handling -import os -_ATTN_PRECISION = os.environ.get("ATTN_PRECISION", "fp32") - def exists(val): return val is not None @@ -177,9 +176,9 @@ def forward(self, x, context=None, mask=None): sim = einsum('b i d, b j d -> b i j', q, k) * self.scale else: sim = einsum('b i d, b j d -> b i j', q, k) * self.scale - + del q, k - + if exists(mask): mask = rearrange(mask, 'b ... -> b (...)') max_neg_value = -torch.finfo(sim.dtype).max diff --git a/ldm/modules/diffusionmodules/model.py b/ldm/modules/diffusionmodules/model.py index b089eebbe..1c3add189 100644 --- a/ldm/modules/diffusionmodules/model.py +++ b/ldm/modules/diffusionmodules/model.py @@ -9,9 +9,10 @@ from ldm.modules.attention import MemoryEfficientCrossAttention try: + import os import xformers import xformers.ops - XFORMERS_IS_AVAILBLE = True + XFORMERS_IS_AVAILBLE = os.environ.get("ATTN_XFORMERS", "enabled") == "enabled" except: XFORMERS_IS_AVAILBLE = False print("No module 'xformers'. Proceeding without it.") From 16bb247e6043440d1f1ddb83fbf53c25f1e4e602 Mon Sep 17 00:00:00 2001 From: Marc-Andre Ferland Date: Sun, 25 Dec 2022 20:02:22 -0500 Subject: [PATCH 2/4] 'type' isn't defined replaced by 'attn_type' Fix bug when 'attn_kwargs' is None. --- ldm/modules/diffusionmodules/model.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ldm/modules/diffusionmodules/model.py b/ldm/modules/diffusionmodules/model.py index 1c3add189..b022954d6 100644 --- a/ldm/modules/diffusionmodules/model.py +++ b/ldm/modules/diffusionmodules/model.py @@ -289,8 +289,11 @@ def make_attn(in_channels, attn_type="vanilla", attn_kwargs=None): elif attn_type == "vanilla-xformers": print(f"building MemoryEfficientAttnBlock with {in_channels} in_channels...") return MemoryEfficientAttnBlock(in_channels) - elif type == "memory-efficient-cross-attn": - attn_kwargs["query_dim"] = in_channels + elif attn_type == "memory-efficient-cross-attn": + if attn_kwargs is None: + attn_kwargs = {"query_dim": in_channels} + else: + attn_kwargs["query_dim"] = in_channels return MemoryEfficientCrossAttentionWrapper(**attn_kwargs) elif attn_type == "none": return nn.Identity(in_channels) From 589b5f96a50faef3c8caa5a490a4a5ee95c9d863 Mon Sep 17 00:00:00 2001 From: Marc-Andre Ferland Date: Sun, 25 Dec 2022 20:03:21 -0500 Subject: [PATCH 3/4] Thanks @Dango233 for this improvement. MemoryEfficientAttentionCutlassOp --- ldm/modules/attention.py | 2 +- ldm/modules/diffusionmodules/model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py index 97596745c..4ad6ce63c 100644 --- a/ldm/modules/attention.py +++ b/ldm/modules/attention.py @@ -210,7 +210,7 @@ def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0. self.to_v = nn.Linear(context_dim, inner_dim, bias=False) self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) - self.attention_op: Optional[Any] = None + self.attention_op: Optional[Any] = xformers.ops.MemoryEfficientAttentionCutlassOp if hasattr(xformers.ops, "MemoryEfficientAttentionCutlassOp") else None def forward(self, x, context=None, mask=None): q = self.to_q(x) diff --git a/ldm/modules/diffusionmodules/model.py b/ldm/modules/diffusionmodules/model.py index b022954d6..06c58bc25 100644 --- a/ldm/modules/diffusionmodules/model.py +++ b/ldm/modules/diffusionmodules/model.py @@ -235,7 +235,7 @@ def __init__(self, in_channels): kernel_size=1, stride=1, padding=0) - self.attention_op: Optional[Any] = None + self.attention_op: Optional[Any] = xformers.ops.MemoryEfficientAttentionCutlassOp if hasattr(xformers.ops, "MemoryEfficientAttentionCutlassOp") else None def forward(self, x): h_ = x From 6b4e5e7ad78a423dee0d187ba61e1206b76d787d Mon Sep 17 00:00:00 2001 From: Marc-Andre Ferland Date: Sun, 25 Dec 2022 20:06:37 -0500 Subject: [PATCH 4/4] Thanks @uservar for this improvement. Use open_clip.create_model to only get the part needed. --- ldm/modules/encoders/modules.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ldm/modules/encoders/modules.py b/ldm/modules/encoders/modules.py index 4edd5496b..07dd7116f 100644 --- a/ldm/modules/encoders/modules.py +++ b/ldm/modules/encoders/modules.py @@ -144,9 +144,8 @@ def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", device="cuda", freeze=True, layer="last"): super().__init__() assert layer in self.LAYERS - model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device('cpu'), pretrained=version) - del model.visual - self.model = model + self.model = open_clip.create_model(arch, device=torch.device('cpu'), pretrained=version) + del self.model.visual self.device = device self.max_length = max_length