226 lines
8.6 KiB
Python
226 lines
8.6 KiB
Python
import ignite.distributed as idist
|
|
import torch
|
|
import torch.nn as nn
|
|
|
|
|
|
def gaussian_radial_basis_function(x, mu, sigma):
|
|
# (kernel_size) -> (batch_size, kernel_size, c*h*w)
|
|
mu = mu.view(1, mu.size(0), 1).expand(x.size(0), -1, x.size(1) * x.size(2) * x.size(3))
|
|
# (batch_size, c, h, w) -> (batch_size, kernel_size, c*h*w)
|
|
x = x.view(x.size(0), 1, -1).expand(-1, mu.size(1), -1)
|
|
return torch.exp((x - mu).pow(2) / (2 * sigma ** 2))
|
|
|
|
|
|
class ImporveMyLoss(torch.nn.Module):
|
|
def __init__(self, device=idist.device()):
|
|
super().__init__()
|
|
mu = torch.Tensor([-1.0, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1.0]).to(device)
|
|
self.x_mu_list = mu.repeat(9).view(-1, 81)
|
|
self.y_mu_list = mu.unsqueeze(0).t().repeat(1, 9).view(-1, 81)
|
|
self.R = torch.eye(81).to(device)
|
|
|
|
def batch_ERSMI(self, I1, I2):
|
|
batch_size = I1.shape[0]
|
|
img_size = I1.shape[1] * I1.shape[2] * I1.shape[3]
|
|
if I2.shape[1] == 1 and I1.shape[1] != 1:
|
|
I2 = I2.repeat(1, 3, 1, 1)
|
|
|
|
def kernel_F(y, mu_list, sigma):
|
|
tmp_mu = mu_list.view(-1, 1).repeat(1, img_size).repeat(batch_size, 1, 1) # [81, 784]
|
|
tmp_y = y.view(batch_size, 1, -1).repeat(1, 81, 1)
|
|
tmp_y = tmp_mu - tmp_y
|
|
mat_L = torch.exp(tmp_y.pow(2) / (2 * sigma ** 2))
|
|
return mat_L
|
|
|
|
mat_K = kernel_F(I1, self.x_mu_list, 1)
|
|
mat_L = kernel_F(I2, self.y_mu_list, 1)
|
|
mat_k_l = mat_K * mat_L
|
|
|
|
H1 = (mat_K @ mat_K.transpose(1, 2)) * (mat_L @ mat_L.transpose(1, 2)) / (img_size ** 2)
|
|
h_hat = mat_k_l @ mat_k_l.transpose(1, 2) / img_size
|
|
small_h_hat = mat_K.sum(2).view(batch_size, -1, 1) * mat_L.sum(2).view(batch_size, -1, 1) / (img_size ** 2)
|
|
h_hat = 0.5 * H1 + 0.5 * h_hat
|
|
alpha = (h_hat + 0.05 * self.R).inverse() @ small_h_hat
|
|
|
|
ersmi = 2 * alpha.transpose(1, 2) @ small_h_hat - alpha.transpose(1, 2) @ h_hat @ alpha - 1
|
|
|
|
ersmi = -ersmi.squeeze().mean()
|
|
return ersmi
|
|
|
|
def forward(self, fakeI, realI):
|
|
return self.batch_ERSMI(fakeI, realI)
|
|
|
|
|
|
class MyLoss(torch.nn.Module):
|
|
def __init__(self):
|
|
super(MyLoss, self).__init__()
|
|
|
|
def forward(self, fakeI, realI):
|
|
fakeI = fakeI.cuda()
|
|
realI = realI.cuda()
|
|
|
|
def batch_ERSMI(I1, I2):
|
|
batch_size = I1.shape[0]
|
|
img_size = I1.shape[1] * I1.shape[2] * I1.shape[3]
|
|
if I2.shape[1] == 1 and I1.shape[1] != 1:
|
|
I2 = I2.repeat(1, 3, 1, 1)
|
|
|
|
def kernel_F(y, mu_list, sigma):
|
|
tmp_mu = mu_list.view(-1, 1).repeat(1, img_size).repeat(batch_size, 1, 1).cuda() # [81, 784]
|
|
tmp_y = y.view(batch_size, 1, -1).repeat(1, 81, 1)
|
|
tmp_y = tmp_mu - tmp_y
|
|
mat_L = torch.exp(tmp_y.pow(2) / (2 * sigma ** 2))
|
|
return mat_L
|
|
|
|
mu = torch.Tensor([-1.0, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1.0]).cuda()
|
|
|
|
x_mu_list = mu.repeat(9).view(-1, 81)
|
|
y_mu_list = mu.unsqueeze(0).t().repeat(1, 9).view(-1, 81)
|
|
|
|
mat_K = kernel_F(I1, x_mu_list, 1)
|
|
mat_L = kernel_F(I2, y_mu_list, 1)
|
|
|
|
H1 = ((mat_K.matmul(mat_K.transpose(1, 2))).mul(mat_L.matmul(mat_L.transpose(1, 2))) / (
|
|
img_size ** 2)).cuda()
|
|
H2 = ((mat_K.mul(mat_L)).matmul((mat_K.mul(mat_L)).transpose(1, 2)) / img_size).cuda()
|
|
h2 = ((mat_K.sum(2).view(batch_size, -1, 1)).mul(mat_L.sum(2).view(batch_size, -1, 1)) / (
|
|
img_size ** 2)).cuda()
|
|
H2 = 0.5 * H1 + 0.5 * H2
|
|
tmp = H2 + 0.05 * torch.eye(len(H2[0])).cuda()
|
|
alpha = (tmp.inverse())
|
|
|
|
alpha = alpha.matmul(h2)
|
|
ersmi = (2 * (alpha.transpose(1, 2)).matmul(h2) - ((alpha.transpose(1, 2)).matmul(H2)).matmul(
|
|
alpha) - 1).squeeze()
|
|
|
|
ersmi = -ersmi.mean()
|
|
return ersmi
|
|
|
|
batch_loss = batch_ERSMI(fakeI, realI)
|
|
return batch_loss
|
|
|
|
|
|
class MGCLoss(nn.Module):
|
|
"""
|
|
Minimal Geometry-Distortion Constraint Loss from https://openreview.net/forum?id=R5M7Mxl1xZ
|
|
"""
|
|
|
|
def __init__(self, beta=0.5, lambda_=0.05, device=idist.device()):
|
|
super().__init__()
|
|
self.beta = beta
|
|
self.lambda_ = lambda_
|
|
mu_y, mu_x = torch.meshgrid([torch.arange(-1, 1.25, 0.25), torch.arange(-1, 1.25, 0.25)])
|
|
self.mu_x = mu_x.flatten().to(device)
|
|
self.mu_y = mu_y.flatten().to(device)
|
|
self.R = torch.eye(81).unsqueeze(0).to(device)
|
|
|
|
@staticmethod
|
|
def batch_rSMI(img1, img2, mu_x, mu_y, beta, lambda_, R):
|
|
assert img1.size() == img2.size()
|
|
|
|
num_pixel = img1.size(1) * img1.size(2) * img2.size(3)
|
|
|
|
mat_k = gaussian_radial_basis_function(img1, mu_x, sigma=1)
|
|
mat_l = gaussian_radial_basis_function(img2, mu_y, sigma=1)
|
|
|
|
mat_k_mul_mat_l = mat_k * mat_l
|
|
h_hat = (1 - beta) * (mat_k_mul_mat_l @ mat_k_mul_mat_l.transpose(1, 2)) / num_pixel
|
|
h_hat += beta * ((mat_k @ mat_k.transpose(1, 2)) * (mat_l @ mat_l.transpose(1, 2))) / (num_pixel ** 2)
|
|
small_h_hat = mat_k.sum(2, keepdim=True) * mat_l.sum(2, keepdim=True) / (num_pixel ** 2)
|
|
|
|
alpha = (h_hat + lambda_ * R).inverse() @ small_h_hat
|
|
rSMI = 2 * alpha.transpose(1, 2) @ small_h_hat - alpha.transpose(1, 2) @ h_hat @ alpha - 1
|
|
return rSMI.squeeze()
|
|
|
|
def forward(self, fake, real):
|
|
rSMI = self.batch_rSMI(fake, real, self.mu_x, self.mu_y, self.beta, self.lambda_, self.R)
|
|
return -rSMI.mean()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
mg = MGCLoss(device=torch.device("cpu"))
|
|
my = MyLoss().to("cuda")
|
|
imy = ImporveMyLoss()
|
|
|
|
from data.transform import transform_pipeline
|
|
|
|
pipeline = transform_pipeline(
|
|
['Load', 'ToTensor', {'Normalize': {'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5]}}])
|
|
|
|
img_a1 = pipeline("/data/i2i/VoxCeleb2Anime/trainA/id00022-twCPGo2rtCo-00294_1.jpg")
|
|
img_a2 = pipeline("/data/i2i/VoxCeleb2Anime/trainA/id00022-twCPGo2rtCo-00294_2.jpg")
|
|
img_a3 = pipeline("/data/i2i/VoxCeleb2Anime/trainA/id00022-twCPGo2rtCo-00294_3.jpg")
|
|
img_b1 = pipeline("/data/i2i/VoxCeleb2Anime/trainA/id01222-2gHw81dNQiA-00005_1.jpg")
|
|
img_b2 = pipeline("/data/i2i/VoxCeleb2Anime/trainA/id01222-2gHw81dNQiA-00005_2.jpg")
|
|
img_b3 = pipeline("/data/i2i/VoxCeleb2Anime/trainA/id01222-2gHw81dNQiA-00005_3.jpg")
|
|
|
|
img_a1.requires_grad_(True)
|
|
img_a2.requires_grad_(True)
|
|
img_a3.requires_grad_(True)
|
|
|
|
# print("MyLoss")
|
|
# l1 = my(img_a1.unsqueeze(0), img_b1.unsqueeze(0))
|
|
# l2 = my(img_a2.unsqueeze(0), img_b2.unsqueeze(0))
|
|
# l3 = my(img_a3.unsqueeze(0), img_b3.unsqueeze(0))
|
|
# l = (l1+l2+l3)/3
|
|
# l.backward()
|
|
# print(img_a1.grad[0][0][0:10])
|
|
# print(img_a2.grad[0][0][0:10])
|
|
# print(img_a3.grad[0][0][0:10])
|
|
#
|
|
# img_a1.grad = None
|
|
# img_a2.grad = None
|
|
# img_a3.grad = None
|
|
#
|
|
# print("---")
|
|
# l = my(torch.stack([img_a1, img_a2, img_a3]), torch.stack([img_b1, img_b2, img_b3]))
|
|
# l.backward()
|
|
# print(img_a1.grad[0][0][0:10])
|
|
# print(img_a2.grad[0][0][0:10])
|
|
# print(img_a3.grad[0][0][0:10])
|
|
# img_a1.grad = None
|
|
# img_a2.grad = None
|
|
# img_a3.grad = None
|
|
|
|
print("MGCLoss")
|
|
l1 = mg(img_a1.unsqueeze(0), img_b1.unsqueeze(0))
|
|
l2 = mg(img_a2.unsqueeze(0), img_b2.unsqueeze(0))
|
|
l3 = mg(img_a3.unsqueeze(0), img_b3.unsqueeze(0))
|
|
l = (l1 + l2 + l3) / 3
|
|
l.backward()
|
|
print(img_a1.grad[0][0][0:10])
|
|
print(img_a2.grad[0][0][0:10])
|
|
print(img_a3.grad[0][0][0:10])
|
|
|
|
img_a1.grad = None
|
|
img_a2.grad = None
|
|
img_a3.grad = None
|
|
|
|
print("---")
|
|
l = mg(torch.stack([img_a1, img_a2, img_a3]), torch.stack([img_b1, img_b2, img_b3]))
|
|
l.backward()
|
|
print(img_a1.grad[0][0][0:10])
|
|
print(img_a2.grad[0][0][0:10])
|
|
print(img_a3.grad[0][0][0:10])
|
|
|
|
# print("\nMGCLoss")
|
|
# mg(img_a1.unsqueeze(0), img_b1.unsqueeze(0))
|
|
# mg(img_a2.unsqueeze(0), img_b2.unsqueeze(0))
|
|
# mg(img_a3.unsqueeze(0), img_b3.unsqueeze(0))
|
|
#
|
|
# print("---")
|
|
# mg(torch.stack([img_a1, img_a2, img_a3]), torch.stack([img_b1, img_b2, img_b3]))
|
|
#
|
|
# import pprofile
|
|
#
|
|
# profiler = pprofile.Profile()
|
|
# with profiler:
|
|
# iter_times = 1000
|
|
# for _ in range(iter_times):
|
|
# mg(torch.stack([img_a1, img_a2, img_a3]), torch.stack([img_b1, img_b2, img_b3]))
|
|
# for _ in range(iter_times):
|
|
# my(torch.stack([img_a1, img_a2, img_a3]), torch.stack([img_b1, img_b2, img_b3]))
|
|
# for _ in range(iter_times):
|
|
# imy(torch.stack([img_a1, img_a2, img_a3]), torch.stack([img_b1, img_b2, img_b3]))
|
|
# profiler.print_stats()
|