sdas / Euler-Smea-Dyn-Sampler /smea_sampling_test.py
dikdimon's picture
Upload Euler-Smea-Dyn-Sampler using SD-Hub
228adcd verified
import torch
import numpy as np
import torch.nn.functional as F
from tqdm.auto import trange
from importlib import import_module
sampling = None
BACKEND = None
if not BACKEND:
try:
_ = import_module("modules.sd_samplers_kdiffusion")
sampling = import_module("k_diffusion.sampling")
BACKEND = "WebUI"
except ImportError:
pass
if not BACKEND:
try:
sampling = import_module("comfy.k_diffusion.sampling")
BACKEND = "ComfyUI"
except ImportError:
pass
class _Rescaler:
"""Context manager for resizing model inputs (e.g., latents, masks) to match tensor size."""
def __init__(self, model, x, mode='nearest-exact', **extra_args):
self.model = model
self.x = x
self.mode = mode
self.extra_args = extra_args
self.backend = BACKEND
if self.backend == "WebUI":
self.init_latent = getattr(model, "init_latent", None)
self.mask = getattr(model, "mask", None)
self.nmask = getattr(model, "nmask", None)
elif self.backend == "ComfyUI":
self.latent_image = getattr(model, "latent_image", None)
self.noise = getattr(model, "noise", None)
self.denoise_mask = self.extra_args.get("denoise_mask", None)
def __enter__(self):
if self.x.shape[1] not in [1, 3, 4]:
raise ValueError(f"Unsupported number of channels: {self.x.shape[1]}")
if self.backend == "WebUI":
if self.init_latent is not None and self.init_latent.shape[2:4] != self.x.shape[2:4]:
self.model.init_latent = F.interpolate(self.init_latent, size=self.x.shape[2:4], mode=self.mode)
if self.mask is not None and self.mask.shape[1:3] != self.x.shape[2:4]:
self.model.mask = F.interpolate(self.mask.unsqueeze(0), size=self.x.shape[2:4], mode=self.mode).squeeze(0)
if self.nmask is not None and self.nmask.shape[1:3] != self.x.shape[2:4]:
self.model.nmask = F.interpolate(self.nmask.unsqueeze(0), size=self.x.shape[2:4], mode=self.mode).squeeze(0)
elif self.backend == "ComfyUI":
if self.latent_image is not None and self.latent_image.shape[2:4] != self.x.shape[2:4]:
self.model.latent_image = F.interpolate(self.latent_image, size=self.x.shape[2:4], mode=self.mode)
if self.noise is not None and self.noise.shape[2:4] != self.x.shape[2:4]:
self.model.noise = F.interpolate(self.noise, size=self.x.shape[2:4], mode=self.mode)
if self.denoise_mask is not None and self.denoise_mask.shape[2:4] != self.x.shape[2:4]:
self.extra_args["denoise_mask"] = F.interpolate(self.denoise_mask, size=self.x.shape[2:4], mode=self.mode)
return self
def __exit__(self, exc_type, exc_value, traceback):
if self.backend == "WebUI":
if hasattr(self, "init_latent"):
self.model.init_latent = self.init_latent
if hasattr(self, "mask"):
self.model.mask = self.mask
if hasattr(self, "nmask"):
self.model.nmask = self.nmask
elif self.backend == "ComfyUI":
if hasattr(self, "latent_image"):
self.model.latent_image = self.latent_image
if hasattr(self, "noise"):
self.model.noise = self.noise
if hasattr(self, "denoise_mask"):
self.extra_args["denoise_mask"] = self.denoise_mask
def default_noise_sampler(x):
"""Generate random noise with the same shape as x."""
return lambda sigma, sigma_next: torch.randn_like(x)
def get_ancestral_step(sigma_from, sigma_to, eta=1.):
"""Calculate sigma_down and sigma_up for ancestral sampling step."""
if not eta:
return sigma_to, 0.
sigma_up = min(sigma_to, eta * (sigma_to ** 2 * (sigma_from ** 2 - sigma_to ** 2) / sigma_from ** 2) ** 0.5)
sigma_down = (sigma_to ** 2 - sigma_up ** 2) ** 0.5
return sigma_down, sigma_up
def compute_gaussian_curvature(x):
"""Compute Gaussian curvature of the input tensor.
Args:
x: Input tensor of shape [batch, channels, height, width].
Returns:
torch.Tensor: Curvature tensor of shape [batch, height, width].
"""
if x.dim() != 4 or min(x.shape[2], x.shape[3]) < 2:
raise ValueError(f"Invalid tensor dimensions or size: {x.shape}")
x_3d = torch.mean(x, dim=1, keepdim=True)
grad_x, grad_y = torch.gradient(x_3d.squeeze(1), dim=(1, 2))
grad_x = torch.clamp(grad_x, -1e2, 1e2)
grad_y = torch.clamp(grad_y, -1e2, 1e2)
grad_xx, grad_xy = torch.gradient(grad_x, dim=(1, 2))
grad_yx, grad_yy = torch.gradient(grad_y, dim=(1, 2))
grad_xx = torch.clamp(grad_xx, -1e2, 1e2)
grad_xy = torch.clamp(grad_xy, -1e2, 1e2)
grad_yy = torch.clamp(grad_yy, -1e2, 1e2)
curvature = (grad_xx * grad_yy - grad_xy**2) / (1 + grad_x**2 + grad_y**2 + 1e-8)**2
curvature = torch.clamp(curvature, min=-0.5, max=0.5)
# TODO: Implement convolution-based gradient for better performance
return curvature
def compute_simple_curvature(x):
"""Compute simple curvature based on gradient magnitudes.
Args:
x: Input tensor of shape [batch, channels, height, width].
Returns:
torch.Tensor: Curvature tensor of shape [batch, height, width].
"""
if x.dim() != 4 or min(x.shape[2], x.shape[3]) < 2:
raise ValueError(f"Invalid tensor dimensions or size: {x.shape}")
x_3d = torch.mean(x, dim=1, keepdim=True)
grad_x, grad_y = torch.gradient(x_3d.squeeze(1), dim=(1, 2))
grad_x = torch.clamp(grad_x, -1e2, 1e2)
grad_y = torch.clamp(grad_y, -1e2, 1e2)
curvature = torch.abs(grad_x) + torch.abs(grad_y)
curvature = torch.clamp(curvature, min=0.0, max=0.5)
return curvature
def compute_normals(x):
"""Compute surface normals of the input tensor.
Args:
x: Input tensor of shape [batch, channels, height, width].
Returns:
torch.Tensor: Normals tensor of shape [batch, 3, height, width].
"""
if x.dim() != 4 or min(x.shape[2], x.shape[3]) < 2:
raise ValueError(f"Invalid tensor dimensions or size: {x.shape}")
x_3d = torch.mean(x, dim=1, keepdim=True)
grad_x, grad_y = torch.gradient(x_3d.squeeze(1), dim=(1, 2))
grad_x = torch.clamp(grad_x, -1e2, 1e2)
grad_y = torch.clamp(grad_y, -1e2, 1e2)
normals = torch.stack([-grad_x, -grad_y, torch.ones_like(grad_x)], dim=1)
norm = torch.norm(normals, dim=1, keepdim=True)
normals = normals / (norm + 1e-6)
# TODO: Implement convolution-based gradient for better performance
return normals
def compute_dynamic_eta(sigma, sigma_max, eta_start=0.0, eta_end=0.5):
"""Compute dynamic eta based on sigma ratio."""
sigma_ratio = sigma / sigma_max
return eta_end + (eta_start - eta_end) * sigma_ratio
def apply_geometric_blur(x, curvature, sigma=1.0):
"""Apply Gaussian blur modulated by curvature.
Args:
x: Input tensor of shape [batch, channels, height, width].
curvature: Curvature tensor of shape [batch, height, width].
sigma: Base sigma for Gaussian blur.
Returns:
torch.Tensor: Blurred tensor of same shape as x.
"""
if x.dim() != 4:
raise ValueError(f"Invalid tensor dimensions: {x.shape}")
sigma = sigma * (1 - curvature.mean().item())
kernel_size = min(int(2 * np.ceil(2 * sigma) + 1), 15) # Cap kernel size
if kernel_size % 2 == 0:
kernel_size += 1
return F.gaussian_blur(x, kernel_size=[kernel_size, kernel_size], sigma=[sigma, sigma])
def apply_mask(x, mask=None, latent_mask=None):
"""Apply mask to the input tensor.
Args:
x: Input tensor of shape [batch, channels, height, width].
mask: Mask tensor of same shape as x.
latent_mask: Latent mask tensor of same shape as x.
Returns:
torch.Tensor: Masked tensor of same shape as x.
"""
if mask is not None and latent_mask is not None:
if mask.shape != x.shape or latent_mask.shape != x.shape:
raise ValueError(f"Mismatch in mask shapes: x={x.shape}, mask={mask.shape}, latent_mask={latent_mask.shape}")
x = x * (1 - latent_mask) + mask * latent_mask
return x
@torch.no_grad()
def _in_resized_space_vec(x, model, dt, sigma_hat, interpolation_mode='nearest-exact', **extra_args):
"""Perform denoising in resized space with interpolation."""
if x.dim() != 4 or min(x.shape[2], x.shape[3]) < 2:
raise ValueError(f"Invalid tensor dimensions or size: {x.shape}")
m, n = x.shape[2], x.shape[3]
y = F.interpolate(x, size=(m + 2, n + 2), mode=interpolation_mode)
with _Rescaler(model, y, interpolation_mode, **extra_args) as rescaler:
denoised = model(y, sigma_hat * y.new_ones([y.shape[0]]), **extra_args)
d = (y - denoised) / sigma_hat
d = torch.clamp(d, -1e2, 1e2)
d = F.interpolate(d * dt, size=(m, n), mode=interpolation_mode)
return d
@torch.no_grad()
def dy_sampling_step(x, model, dt, sigma_hat, interpolation_mode='nearest-exact', **extra_args):
"""Perform dynamic sampling step with reduced grid."""
if x.shape[1] not in [1, 3, 4]:
raise ValueError(f"Unsupported number of channels: {x.shape[1]}")
original_shape = x.shape
batch_size, channels, m, n = original_shape[0], original_shape[1], original_shape[2] // 2, original_shape[3] // 2
extra_row = x.shape[2] % 2 == 1
extra_col = x.shape[3] % 2 == 1
if extra_row:
extra_row_content = x[:, :, -1:, :]
x = x[:, :, :-1, :]
if extra_col:
extra_col_content = x[:, :, :, -1:]
x = x[:, :, :, :-1]
a_list = x.unfold(2, 2, 2).unfold(3, 2, 2).contiguous().view(batch_size, channels, m * n, 2, 2)
c = a_list[:, :, :, 1, 1].view(batch_size, channels, m, n)
with _Rescaler(model, c, interpolation_mode, **extra_args) as rescaler:
denoised = model(c, sigma_hat * c.new_ones([c.shape[0]]), **rescaler.extra_args)
d = sampling.to_d(c, sigma_hat, denoised)
c = c + d * dt
d_list = c.view(batch_size, channels, m * n, 1, 1)
a_list[:, :, :, 1, 1] = d_list[:, :, :, 0, 0]
x = a_list.view(batch_size, channels, m, n, 2, 2).permute(0, 1, 2, 4, 3, 5).reshape(batch_size, channels, 2 * m, 2 * n)
if extra_row or extra_col:
x_expanded = torch.zeros(original_shape, dtype=x.dtype, device=x.device)
x_expanded[:, :, :2 * m, :2 * n] = x
if extra_row:
x_expanded[:, :, -1:, :2 * n + 1] = extra_row_content
if extra_col:
x_expanded[:, :, :2 * m, -1:] = extra_col_content
if extra_row and extra_col:
x_expanded[:, :, -1:, -1:] = extra_col_content[:, :, -1:, :]
x = x_expanded
return x
@torch.no_grad()
def sample_Kohaku_LoNyu_Yog_v1_test(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0.,
s_tmax=float('inf'), s_noise=1., noise_sampler=None, eta=1., interpolation_mode='nearest-exact'):
"""Kohaku_LoNyu_Yog sampling with combined standard and inverted steps."""
if x.shape[1] not in [1, 3, 4]:
raise ValueError(f"Unsupported number of channels: {x.shape[1]}")
extra_args = {} if extra_args is None else extra_args
s_in = x.new_ones([x.shape[0]])
noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler
for i in trange(len(sigmas) - 1, disable=disable):
gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.
eps = torch.randn_like(x) * s_noise
sigma_hat = sigmas[i] * (gamma + 1)
if gamma > 0:
x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5
denoised = model(x, sigma_hat * s_in, **extra_args)
d = sampling.to_d(x, sigma_hat, denoised)
sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta)
dt = sigma_down - sigmas[i]
if i <= (len(sigmas) - 1) / 2:
x2 = -x
with _Rescaler(model, x2, interpolation_mode, **extra_args) as rescaler:
denoised2 = model(x2, sigma_hat * s_in, **extra_args)
d2 = sampling.to_d(x2, sigma_hat, denoised2)
x3 = x + ((d + d2) / 2) * dt
with _Rescaler(model, x3, interpolation_mode, **extra_args) as rescaler:
denoised3 = model(x3, sigma_hat * s_in, **extra_args)
d3 = sampling.to_d(x3, sigma_hat, denoised3)
real_d = (d + d3) / 2
x = x + real_d * dt
x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
else:
x = x + d * dt
return x
@torch.no_grad()
def kohaku_lonyu_yog_stochastic_v1_test(model, x, sigmas, extra_args=None, callback=None, disable=None, langevin_strength=0.05,
interpolation_mode='nearest-exact'):
"""Stochastic Kohaku_LoNyu_Yog sampling with curvature-based noise."""
if x.shape[1] not in [1, 3, 4]:
raise ValueError(f"Unsupported number of channels: {x.shape[1]}")
extra_args = {} if extra_args is None else extra_args
s_in = x.new_ones([x.shape[0]])
for i in trange(len(sigmas) - 1, disable=disable):
dt = sigmas[i + 1] - sigmas[i]
denoised = model(x, sigmas[i] * s_in, **extra_args)
curvature = compute_simple_curvature(x)
noise_scale = min(langevin_strength * curvature.mean(), 0.4)
noise = torch.randn_like(x) * noise_scale * torch.sqrt(sigmas[i])
grad = (x - denoised) / sigmas[i]
grad = torch.clamp(grad, -1e2, 1e2)
x = x + grad * dt + noise * curvature
return x
@torch.no_grad()
def kohaku_lonyu_yog_compatible_v1_test(model, x, sigmas, extra_args=None, callback=None, disable=None, interpolation_mode='nearest-exact'):
"""Kohaku_LoNyu_Yog sampling compatible with masks."""
if x.shape[1] not in [1, 3, 4]:
raise ValueError(f"Unsupported number of channels: {x.shape[1]}")
extra_args = {} if extra_args is None else extra_args
mask = extra_args.get('mask', None)
latent_mask = extra_args.get('latent_mask', None)
s_in = x.new_ones([x.shape[0]])
for i in trange(len(sigmas) - 1, disable=disable):
dt = sigmas[i + 1] - sigmas[i]
denoised = model(x, sigmas[i] * s_in, **extra_args)
grad = (x - denoised) / sigmas[i]
grad = torch.clamp(grad, -1e2, 1e2)
x = x + grad * dt
x = apply_mask(x, mask, latent_mask)
return x
@torch.no_grad()
def sample_Kohaku_LoNyu_Yog_v2_v1_test(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0.,
s_tmax=float('inf'), s_noise=1.0, noise_sampler=None, eta_start=0.9, eta_end=0.6,
use_normals=True, interpolation_mode='nearest-exact'):
"""Kohaku_LoNyu_Yog v2 sampling with geometric corrections."""
if x.shape[1] not in [1, 3, 4]:
raise ValueError(f"Unsupported number of channels: {x.shape[1]}")
extra_args = {} if extra_args is None else extra_args
s_in = x.new_ones([x.shape[0]])
noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler
sigma_max = torch.max(sigmas)
old_denoised = None
for i in trange(len(sigmas) - 1, disable=disable):
sigma = sigmas[i]
dt = sigmas[i + 1] - sigma
gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.
sigma_hat = sigma * (1 + gamma)
curvature = compute_gaussian_curvature(x)
eta = compute_dynamic_eta(sigma, sigma_max, eta_start, eta_end)
if gamma > 0:
eps = torch.randn_like(x) * s_noise
x = x + eps * torch.sqrt(sigma_hat**2 - sigma**2)
denoised = model(x, sigma_hat * s_in, **extra_args)
grad = (x - denoised) / sigma_hat
grad = torch.clamp(grad, -1e2, 1e2)
if use_normals:
normals = compute_normals(x)
normal_correction = torch.einsum('bchw,bkhw->bchw', grad, normals)
normal_correction = torch.clamp(normal_correction, -1e2, 1e2)
curvature_weight = 1.0 + 0.5 * torch.abs(curvature)
grad = grad * curvature_weight + 0.05 * normal_correction
if old_denoised is not None:
denoised = 0.6 * denoised + 0.4 * old_denoised
x = x + grad * dt
if sigmas[i + 1] > 0:
noise = noise_sampler(sigma, sigmas[i + 1]) * s_noise * eta
x = x + noise * curvature
old_denoised = denoised
return x
@torch.no_grad()
def kohaku_lonyu_yog_geo_compatible_v1_test(model, x, sigmas, extra_args=None, callback=None, disable=None, interpolation_mode='nearest-exact'):
"""Kohaku_LoNyu_Yog sampling with geometric corrections and mask support."""
if x.shape[1] not in [1, 3, 4]:
raise ValueError(f"Unsupported number of channels: {x.shape[1]}")
extra_args = {} if extra_args is None else extra_args
mask = extra_args.get('mask', None)
latent_mask = extra_args.get('latent_mask', None)
s_in = x.new_ones([x.shape[0]])
old_denoised = None
for i in trange(len(sigmas) - 1, disable=disable):
dt = sigmas[i + 1] - sigmas[i]
denoised = model(x, sigmas[i] * s_in, **extra_args)
curvature = compute_gaussian_curvature(x)
normals = compute_normals(x)
grad = (x - denoised) / sigmas[i]
grad = torch.clamp(grad, -1e2, 1e2)
curvature_weight = 1.0 + 0.5 * torch.abs(curvature)
normal_correction = torch.einsum('bchw,bkhw->bchw', grad, normals)
normal_correction = torch.clamp(normal_correction, -1e2, 1e2)
corrected_grad = grad * curvature_weight + 0.05 * normal_correction
if old_denoised is not None:
denoised = 0.6 * denoised + 0.4 * old_denoised
x = x + corrected_grad * dt
x = apply_mask(x, mask, latent_mask)
old_denoised = denoised
return x
@torch.no_grad()
def kohaku_lonyu_yog_dy_v1_test(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0.05, s_tmin=0.,
s_tmax=float('inf'), s_noise=0.5, interpolation_mode='nearest-exact'):
"""Kohaku_LoNyu_Yog sampling with dynamic steps and geometric corrections."""
if x.shape[1] not in [1, 3, 4]:
raise ValueError(f"Unsupported number of channels: {x.shape[1]}")
extra_args = {} if extra_args is None else extra_args
s_in = x.new_ones([x.shape[0]])
old_denoised = None
for i in trange(len(sigmas) - 1, disable=disable):
sigma = sigmas[i]
dt = sigmas[i + 1] - sigma
gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.
sigma_hat = sigma * (1 + gamma)
if gamma > 0:
eps = torch.randn_like(x) * s_noise
x = x + eps * torch.sqrt(sigma_hat**2 - sigma**2)
denoised = model(x, sigma_hat * s_in, **extra_args)
grad = (x - denoised) / sigma_hat
grad = torch.clamp(grad, -1e2, 1e2)
curvature = compute_gaussian_curvature(x)
normals = compute_normals(x)
curvature_weight = 1.0 + 0.5 * torch.abs(curvature)
normal_correction = torch.einsum('bchw,bkhw->bchw', grad, normals)
normal_correction = torch.clamp(normal_correction, -1e2, 1e2)
corrected_grad = grad * curvature_weight + 0.05 * normal_correction
if sigmas[i + 1] > 0 and i % 2 == 1:
x = dy_sampling_step(x, model, dt, sigma_hat, interpolation_mode, **extra_args)
else:
x = x + corrected_grad * dt
if old_denoised is not None:
denoised = 0.6 * denoised + 0.4 * old_denoised
old_denoised = denoised
return x