add context loss

2020-09-24 16:38:03 +08:00 · 2020-09-24 16:38:03 +08:00 · ca55318253
commit ca55318253
parent b01016edb5
2 changed files with 93 additions and 6 deletions
--- a/loss/I2I/context_loss.py
+++ b/loss/I2I/context_loss.py
@ -0,0 +1,44 @@
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from .perceptual_loss import PerceptualVGG
+
+
+class ContextLoss(nn.Module):
+    def __init__(self, layer_weights, h=0.1, vgg_type='vgg19', norm_image_with_imagenet_param=True, norm_img=True,
+                 eps=1e-5):
+        super(ContextLoss, self).__init__()
+        self.eps = eps
+        self.h = h
+        self.layer_weights = layer_weights
+        self.norm_img = norm_img
+        self.vgg = PerceptualVGG(layer_name_list=list(layer_weights.keys()), vgg_type=vgg_type,
+                                 norm_image_with_imagenet_param=norm_image_with_imagenet_param)
+
+    def single_forward(self, source_feature, target_feature):
+        mean_target_feature = target_feature.mean(dim=[2, 3], keepdim=True)
+        source_feature = (source_feature - mean_target_feature).view(*source_feature.size()[:2], -1)  # NxCxHW
+        target_feature = (target_feature - mean_target_feature).view(*source_feature.size()[:2], -1)  # NxCxHW
+        source_feature = F.normalize(source_feature, p=2, dim=1)
+        target_feature = F.normalize(target_feature, p=2, dim=1)
+        cosine_distance = (1 - torch.bmm(source_feature.transpose(1, 2), target_feature)) / 2  # NxHWxHW
+        rel_distance = cosine_distance / (cosine_distance.min(2, keepdim=True)[0] + self.eps)
+        w = torch.exp((1 - rel_distance) / self.h)
+        cx = w.div(w.sum(dim=2, keepdim=True))
+        cx = cx.max(dim=1, keepdim=True)[0].mean(dim=2)
+        return -torch.log(cx).mean()
+
+    def forward(self, x, gt):
+        if self.norm_img:
+            x = (x + 1.) * 0.5
+            gt = (gt + 1.) * 0.5
+
+        # extract vgg features
+        x_features = self.vgg(x)
+        gt_features = self.vgg(gt.detach())
+
+        loss = 0
+        for k in x_features.keys():
+            loss += self.single_forward(x_features[k], gt_features[k]) * self.layer_weights[k]
+        return loss
--- a/loss/I2I/perceptual_loss.py
+++ b/loss/I2I/perceptual_loss.py
@ -4,6 +4,49 @@ import torch.nn.functional as F
 import torchvision.models.vgg as vgg


+# Sequential(
+#   (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (1): ReLU(inplace=True)
+#   (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (3): ReLU(inplace=True)
+#   (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
+
+#   (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (6): ReLU(inplace=True)
+#   (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (8): ReLU(inplace=True)
+#   (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
+
+#   (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (11): ReLU(inplace=True)
+#   (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (13): ReLU(inplace=True)
+#   (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (15): ReLU(inplace=True)
+#   (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (17): ReLU(inplace=True)
+#   (18): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
+
+#   (19): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (20): ReLU(inplace=True)
+#   (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (22): ReLU(inplace=True)
+#   (23): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (24): ReLU(inplace=True)
+#   (25): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (26): ReLU(inplace=True)
+#   (27): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
+
+#   (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (29): ReLU(inplace=True)
+#   (30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (31): ReLU(inplace=True)
+#   (32): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (33): ReLU(inplace=True)
+#   (34): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+#   (35): ReLU(inplace=True)
+#   (36): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
+# )
 class PerceptualVGG(nn.Module):
    """VGG network used in calculating perceptual loss.
    In this implementation, we allow users to choose whether use normalization
@ -15,15 +58,15 @@ class PerceptualVGG(nn.Module):
            list contains the name each layer in `vgg.feature`. An example
            of this list is ['4', '10'].
        vgg_type (str): Set the type of vgg network. Default: 'vgg19'.
-        use_input_norm (bool): If True, normalize the input image.
+        norm_image_with_imagenet_param (bool): If True, normalize the input image.
            Importantly, the input feature must in the range [0, 1].
            Default: True.
    """

-    def __init__(self, layer_name_list, vgg_type='vgg19', use_input_norm=True):
+    def __init__(self, layer_name_list, vgg_type='vgg19', norm_image_with_imagenet_param=True):
        super(PerceptualVGG, self).__init__()
        self.layer_name_list = layer_name_list
-        self.use_input_norm = use_input_norm
+        self.use_input_norm = norm_image_with_imagenet_param

        # get vgg model and load pretrained vgg weight
        # remove _vgg from attributes to avoid `find_unused_parameters` bug
@ -75,7 +118,7 @@ class PerceptualLoss(nn.Module):
            in calculating losses.
        vgg_type (str): The type of vgg network used as feature extractor.
            Default: 'vgg19'.
-        use_input_norm (bool):  If True, normalize the input image in vgg.
+        norm_image_with_imagenet_param (bool):  If True, normalize the input image in vgg.
            Default: True.
        perceptual_loss (bool): If `perceptual_loss == True`, the perceptual
            loss will be calculated.
@ -88,7 +131,7 @@ class PerceptualLoss(nn.Module):
            Importantly, the input image must be in range [-1, 1].
    """

-    def __init__(self, layer_weights, vgg_type='vgg19', use_input_norm=True, perceptual_loss=True,
+    def __init__(self, layer_weights, vgg_type='vgg19', norm_image_with_imagenet_param=True, perceptual_loss=True,
                 style_loss=False, norm_img=True, criterion='L1'):
        super(PerceptualLoss, self).__init__()
        self.norm_img = norm_img
@ -97,7 +140,7 @@ class PerceptualLoss(nn.Module):
        self.style_loss = style_loss
        self.layer_weights = layer_weights
        self.vgg = PerceptualVGG(layer_name_list=list(layer_weights.keys()), vgg_type=vgg_type,
-                                 use_input_norm=use_input_norm)
+                                 norm_image_with_imagenet_param=norm_image_with_imagenet_param)

        self.percep_criterion, self.style_criterion = self.set_criterion(criterion)