diff --git a/.idea/deployment.xml b/.idea/deployment.xml index 4c3f8f7..fd2b0b6 100644 --- a/.idea/deployment.xml +++ b/.idea/deployment.xml @@ -1,6 +1,6 @@ - + diff --git a/.idea/sshConfigs.xml b/.idea/sshConfigs.xml new file mode 100644 index 0000000..e5044da --- /dev/null +++ b/.idea/sshConfigs.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/configs/synthesizers/TAFG.yml b/configs/synthesizers/TAFG.yml index 2624240..0436434 100644 --- a/configs/synthesizers/TAFG.yml +++ b/configs/synthesizers/TAFG.yml @@ -28,7 +28,7 @@ model: _type: MultiScaleDiscriminator num_scale: 2 discriminator_cfg: - _type: pix2pixHD + _type: pix2pixHD-PatchDiscriminator in_channels: 3 base_channels: 64 use_spectral: True @@ -47,10 +47,10 @@ loss: "11": 0.125 "20": 0.25 "29": 1 - criterion: 'NL1' + criterion: 'L2' style_loss: False perceptual_loss: True - weight: 5 + weight: 0.5 style: layer_weights: "1": 0.03125 @@ -64,13 +64,13 @@ loss: weight: 0 fm: level: 1 - weight: 1 + weight: 10 recon: level: 1 weight: 10 style_recon: level: 1 - weight: 10 + weight: 0 optimizers: generator: diff --git a/configs/synthesizers/UGATIT-VoxCeleb2Anime.yml b/configs/synthesizers/UGATIT-VoxCeleb2Anime.yml index cc1692e..428aa29 100644 --- a/configs/synthesizers/UGATIT-VoxCeleb2Anime.yml +++ b/configs/synthesizers/UGATIT-VoxCeleb2Anime.yml @@ -1,24 +1,20 @@ -name: VoxCeleb2Anime -engine: UGATIT +name: selfie2anime-vox2 +engine: U-GAT-IT result_dir: ./result max_pairs: 1000000 -distributed: - model: - # broadcast_buffers: False - misc: random_seed: 324 -checkpoint: - epoch_interval: 1 # one checkpoint every 1 epoch - n_saved: 2 - -interval: - print_per_iteration: 10 # print once per 10 iteration +handler: + clear_cuda_cache: True + set_epoch_for_dist_sampler: True + checkpoint: + epoch_interval: 1 # checkpoint once per `epoch_interval` epoch + n_saved: 2 tensorboard: - scalar: 10 - image: 500 + scalar: 100 # log scalar `scalar` times per epoch + image: 2 # log image `image` times per epoch model: generator: @@ -27,7 +23,7 @@ model: out_channels: 3 base_channels: 64 num_blocks: 4 - img_size: 128 + img_size: 256 light: True local_discriminator: _type: UGATIT-Discriminator @@ -74,7 +70,7 @@ data: target_lr: 0 buffer_size: 50 dataloader: - batch_size: 20 + batch_size: 6 shuffle: True num_workers: 2 pin_memory: True @@ -87,9 +83,9 @@ data: pipeline: - Load - Resize: - size: [ 135, 135 ] + size: [ 286, 286 ] - RandomCrop: - size: [ 128, 128 ] + size: [ 256, 256 ] - RandomHorizontalFlip - ToTensor - Normalize: @@ -97,7 +93,7 @@ data: std: [ 0.5, 0.5, 0.5 ] test: dataloader: - batch_size: 8 + batch_size: 4 shuffle: False num_workers: 1 pin_memory: False @@ -110,7 +106,7 @@ data: pipeline: - Load - Resize: - size: [ 128, 128 ] + size: [ 256, 256 ] - ToTensor - Normalize: mean: [ 0.5, 0.5, 0.5 ] @@ -122,7 +118,7 @@ data: pipeline: - Load - Resize: - size: [ 128, 128 ] + size: [ 256, 256 ] - ToTensor - Normalize: mean: [ 0.5, 0.5, 0.5 ] diff --git a/configs/synthesizers/UGATIT.yml b/configs/synthesizers/UGATIT.yml index c04e250..81c68b5 100644 --- a/configs/synthesizers/UGATIT.yml +++ b/configs/synthesizers/UGATIT.yml @@ -120,3 +120,6 @@ data: - Resize: size: [ 256, 256 ] - ToTensor + - Normalize: + mean: [ 0.5, 0.5, 0.5 ] + std: [ 0.5, 0.5, 0.5 ] diff --git a/engine/TAFG.py b/engine/TAFG.py index cd02502..fec01de 100644 --- a/engine/TAFG.py +++ b/engine/TAFG.py @@ -90,10 +90,10 @@ class TAFGEngineKernel(EngineKernel): loss_fm += self.fm_loss(pred_fake[i][j], pred_real[i][j].detach()) / num_scale_discriminator loss[f"fm_{phase}"] = self.config.loss.fm.weight * loss_fm loss["recon"] = self.recon_loss(generated["a"], batch["a"]) * self.config.loss.recon.weight - loss["style_recon"] = self.config.loss.style_recon.weight * self.style_recon_loss( - self.generators["main"].module.style_encoders["b"](batch["b"]), - self.generators["main"].module.style_encoders["b"](generated["b"]) - ) + # loss["style_recon"] = self.config.loss.style_recon.weight * self.style_recon_loss( + # self.generators["main"].module.style_encoders["b"](batch["b"]), + # self.generators["main"].module.style_encoders["b"](generated["b"]) + # ) return loss def criterion_discriminators(self, batch, generated) -> dict: diff --git a/engine/U-GAT-IT.py b/engine/U-GAT-IT.py index 1a089ec..4eb0886 100644 --- a/engine/U-GAT-IT.py +++ b/engine/U-GAT-IT.py @@ -148,6 +148,9 @@ class UGATITTestEngineKernel(TestEngineKernel): def run(task, config, _): if task == "train": kernel = UGATITEngineKernel(config) - if task == "test": + run_kernel(task, config, kernel) + elif task == "test": kernel = UGATITTestEngineKernel(config) - run_kernel(task, config, kernel) + run_kernel(task, config, kernel) + else: + raise NotImplemented diff --git a/engine/base/i2i.py b/engine/base/i2i.py index 94e1154..f7177b2 100644 --- a/engine/base/i2i.py +++ b/engine/base/i2i.py @@ -160,7 +160,7 @@ def get_trainer(config, kernel: EngineKernel): for k in output["img"]: image_list = output["img"][k] - tensorboard_handler.writer.add_image(f"train/{k}", make_2d_grid(image_list), + tensorboard_handler.writer.add_image(f"train/{k}", make_2d_grid(image_list, range=(-1, 1)), engine.state.iteration * pairs_per_iteration) test_images[k] = [] for i in range(len(image_list)): @@ -183,7 +183,7 @@ def get_trainer(config, kernel: EngineKernel): for k in test_images: tensorboard_handler.writer.add_image( f"test/{k}", - make_2d_grid([torch.cat(ti) for ti in test_images[k]]), + make_2d_grid([torch.cat(ti) for ti in test_images[k]], range=(-1, 1)), engine.state.iteration * pairs_per_iteration ) return trainer @@ -218,14 +218,12 @@ def get_tester(config, kernel: TestEngineKernel): for i in range(batch_size): image_name = Path(paths[i]).name torchvision.utils.save_image([img[i] for img in img_tensors], engine.state.img_output_dir / image_name, - nrow=len(img_tensors)) + nrow=len(img_tensors), padding=0, normalize=True, range=(-1, 1)) return tester def run_kernel(task, config, kernel): - assert torch.backends.cudnn.enabled - torch.backends.cudnn.benchmark = True logger = logging.getLogger(config.name) with read_write(config): real_batch_size = config.data.train.dataloader.batch_size * idist.get_world_size() diff --git a/loss/I2I/perceptual_loss.py b/loss/I2I/perceptual_loss.py index 4436089..378b699 100644 --- a/loss/I2I/perceptual_loss.py +++ b/loss/I2I/perceptual_loss.py @@ -98,13 +98,13 @@ class PerceptualLoss(nn.Module): self.vgg = PerceptualVGG(layer_name_list=list(layer_weights.keys()), vgg_type=vgg_type, use_input_norm=use_input_norm) - self.criterion = self.set_criterion(criterion) + self.percep_criterion, self.style_criterion = self.set_criterion(criterion) def set_criterion(self, criterion: str): assert criterion in ["NL1", "NL2", "L1", "L2"] norm = F.instance_norm if criterion.startswith("N") else lambda x: x fn = F.l1_loss if criterion.endswith("L1") else F.mse_loss - return lambda x, t: fn(norm(x), norm(t)) + return lambda x, t: fn(norm(x), norm(t)), lambda x, t: fn(x, t) def forward(self, x, gt): """Forward function. @@ -126,7 +126,7 @@ class PerceptualLoss(nn.Module): if self.perceptual_loss: percep_loss = 0 for k in x_features.keys(): - percep_loss += self.criterion(x_features[k], gt_features[k]) * self.layer_weights[k] + percep_loss += self.percep_criterion(x_features[k], gt_features[k]) * self.layer_weights[k] else: percep_loss = None @@ -134,7 +134,7 @@ class PerceptualLoss(nn.Module): if self.style_loss: style_loss = 0 for k in x_features.keys(): - style_loss += self.criterion(self._gram_mat(x_features[k]), self._gram_mat(gt_features[k])) * \ + style_loss += self.style_criterion(self._gram_mat(x_features[k]), self._gram_mat(gt_features[k])) * \ self.layer_weights[k] else: style_loss = None diff --git a/main.py b/main.py index 7e1c1d5..a515813 100644 --- a/main.py +++ b/main.py @@ -65,6 +65,8 @@ def run(task, config: str, *omega_options, **kwargs): backup_config = kwargs.get("backup_config", False) setup_output_dir = kwargs.get("setup_output_dir", False) setup_random_seed = kwargs.get("setup_random_seed", False) + assert torch.backends.cudnn.enabled + torch.backends.cudnn.benchmark = True with idist.Parallel(backend=backend) as parallel: parallel.run(running, conf, task, backup_config=backup_config, setup_output_dir=setup_output_dir, setup_random_seed=setup_random_seed) diff --git a/model/GAN/UGATIT.py b/model/GAN/UGATIT.py index 5b857fc..f4d4bb0 100644 --- a/model/GAN/UGATIT.py +++ b/model/GAN/UGATIT.py @@ -1,6 +1,6 @@ import torch import torch.nn as nn -from .residual_generator import ResidualBlock +from .base import ResidualBlock from model.registry import MODEL diff --git a/tool/verify_loss.py b/tool/verify_loss.py new file mode 100644 index 0000000..d3e208e --- /dev/null +++ b/tool/verify_loss.py @@ -0,0 +1,69 @@ +import torch +from torch.utils.data import DataLoader +from ignite.utils import convert_tensor +from omegaconf import OmegaConf + +from data.dataset import SingleFolderDataset +from loss.I2I.perceptual_loss import PerceptualLoss + +import ignite.distributed as idist + +CONFIG = """ +loss: + perceptual: + layer_weights: + "1": 0.03125 + "6": 0.0625 + "11": 0.125 + "20": 0.25 + "29": 1 + criterion: 'NL2' + style_loss: False + perceptual_loss: True +match_data: + root: "/tmp/generated/" + pipeline: + - Load + - ToTensor + - Normalize: + mean: [ 0.5, 0.5, 0.5 ] + std: [ 0.5, 0.5, 0.5 ] +not_match_data: + root: "/data/i2i/selfie2anime/trainB/" + pipeline: + - Load + - ToTensor + - Normalize: + mean: [ 0.5, 0.5, 0.5 ] + std: [ 0.5, 0.5, 0.5 ] +""" + +config = OmegaConf.create(CONFIG) +dataset = SingleFolderDataset(**config.match_data) +data_loader = DataLoader(dataset, 1, False, num_workers=1) + +perceptual_loss = PerceptualLoss(**config.loss.perceptual).to("cuda:0") + +pls = [] +for batch in data_loader: + with torch.no_grad(): + batch = convert_tensor(batch, "cuda:0") + x, t = torch.chunk(batch, 2, -1) + pl, _ = perceptual_loss(x, t) + print(pl) + pls.append(pl) + +torch.save(torch.stack(pls).cpu(), "verify_loss.match.pt") + +dataset = SingleFolderDataset(**config.not_match_data) +data_loader = DataLoader(dataset, 4, False, num_workers=1) +pls = [] +for batch in data_loader: + with torch.no_grad(): + batch = convert_tensor(batch, "cuda:0") + for i, j in [(0, 1), (1, 2), (2, 3), (3, 0)]: + x, t = batch[i].unsqueeze(dim=0), batch[j].unsqueeze(dim=0) + pl, _ = perceptual_loss(x, t) + print(pl) + pls.append(pl) +torch.save(torch.stack(pls).cpu(), "verify_loss.not_match.pt")