7 changed files with 19 additions and 228 deletions
--- a/configs/synthesizers/UGATIT-VoxCeleb2Anime.yml
+++ b/configs/synthesizers/UGATIT-VoxCeleb2Anime.yml
@ -1,129 +0,0 @@
 name: VoxCeleb2Anime
 engine: UGATIT
 result_dir: ./result
 max_pairs: 1000000
 distributed:
  model:
  # broadcast_buffers: False
 misc:
  random_seed: 324
 checkpoint:
  epoch_interval: 1 # one checkpoint every 1 epoch
  n_saved: 2
 interval:
  print_per_iteration: 10 # print once per 10 iteration
  tensorboard:
    scalar: 10
    image: 500
 model:
  generator:
    _type: UGATIT-Generator
    in_channels: 3
    out_channels: 3
    base_channels: 64
    num_blocks: 4
    img_size: 128
    light: True
  local_discriminator:
    _type: UGATIT-Discriminator
    in_channels: 3
    base_channels: 64
    num_blocks: 5
  global_discriminator:
    _type: UGATIT-Discriminator
    in_channels: 3
    base_channels: 64
    num_blocks: 7
 loss:
  gan:
    loss_type: lsgan
    weight: 1.0
    real_label_val: 1.0
    fake_label_val: 0.0
  cycle:
    level: 1
    weight: 10.0
  id:
    level: 1
    weight: 10.0
  cam:
    weight: 1000
 optimizers:
  generator:
    _type: Adam
    lr: 0.0001
    betas: [ 0.5, 0.999 ]
    weight_decay: 0.0001
  discriminator:
    _type: Adam
    lr: 1e-4
    betas: [ 0.5, 0.999 ]
    weight_decay: 0.0001
 data:
  train:
    scheduler:
      start_proportion: 0.5
      target_lr: 0
    buffer_size: 50
    dataloader:
      batch_size: 20
      shuffle: True
      num_workers: 2
      pin_memory: True
      drop_last: True
    dataset:
      _type: GenerationUnpairedDataset
      root_a: "/data/i2i/VoxCeleb2Anime/trainA"
      root_b: "/data/i2i/VoxCeleb2Anime/trainB"
      random_pair: True
      pipeline:
        - Load
        - Resize:
            size: [ 135, 135 ]
        - RandomCrop:
            size: [ 128, 128 ]
        - RandomHorizontalFlip
        - ToTensor
        - Normalize:
            mean: [ 0.5, 0.5, 0.5 ]
            std: [ 0.5, 0.5, 0.5 ]
  test:
    dataloader:
      batch_size: 8
      shuffle: False
      num_workers: 1
      pin_memory: False
      drop_last: False
    dataset:
      _type: GenerationUnpairedDataset
      root_a: "/data/i2i/VoxCeleb2Anime/testA"
      root_b: "/data/i2i/VoxCeleb2Anime/testB"
      random_pair: False
      pipeline:
        - Load
        - Resize:
            size: [ 128, 128 ]
        - ToTensor
        - Normalize:
            mean: [ 0.5, 0.5, 0.5 ]
            std: [ 0.5, 0.5, 0.5 ]
    video_dataset:
      _type: SingleFolderDataset
      root: "/data/i2i/VoxCeleb2Anime/test_video_frames/"
      with_path: True
      pipeline:
        - Load
        - Resize:
            size: [ 128, 128 ]
        - ToTensor
        - Normalize:
            mean: [ 0.5, 0.5, 0.5 ]
            std: [ 0.5, 0.5, 0.5 ]
--- a/configs/synthesizers/UGATIT.yml
+++ b/configs/synthesizers/UGATIT.yml
@ -115,15 +115,3 @@ data:
        - Normalize:
            mean: [0.5, 0.5, 0.5]
            std: [0.5, 0.5, 0.5]
    video_dataset:
      _type: SingleFolderDataset
      root: "/data/i2i/VoxCeleb2Anime/test_video_frames/"
      with_path: True
      pipeline:
        - Load
        - Resize:
            size: [ 256, 256 ]
        - ToTensor
        - Normalize:
            mean: [ 0.5, 0.5, 0.5 ]
            std: [ 0.5, 0.5, 0.5 ]
--- a/engine/UGATIT.py
+++ b/engine/UGATIT.py
@ -219,12 +219,12 @@ def get_trainer(config, logger):
            with torch.no_grad():
                g = torch.Generator()
                g.manual_seed(config.misc.random_seed)
-                random_start = torch.randperm(len(engine.state.test_dataset)-11, generator=g).tolist()[0]
+                indices = torch.randperm(len(engine.state.test_dataset), generator=g).tolist()[:10]
                test_images = dict(
                    a=[[], [], [], []],
                    b=[[], [], [], []]
                )
-                for i in range(random_start, random_start+10):
+                for i in indices:
                    batch = convert_tensor(engine.state.test_dataset[i], idist.device())
                    real_a, real_b = batch["a"].view(1, *batch["a"].size()), batch["b"].view(1, *batch["a"].size())
@ -278,6 +278,7 @@ def get_tester(config, logger):
        paths = engine.state.output["path"]
        batch_size = img_tensors[0].size(0)
        for i in range(batch_size):
            # image_name = f"{engine.state.iteration * batch_size - batch_size + i + 1}.png"
            image_name = Path(paths[i]).name
            torchvision.utils.save_image([img[i] for img in img_tensors], engine.state.img_output_dir / image_name,
                                         nrow=len(img_tensors))
@ -307,7 +308,7 @@ def run(task, config, logger):
            print(traceback.format_exc())
    elif task == "test":
        assert config.resume_from is not None
-        test_dataset = data.DATASET.build_with(config.data.test.video_dataset)
+        test_dataset = data.DATASET.build_with(config.data.test.dataset)
        logger.info(f"test with dataset:\n{test_dataset}")
        test_data_loader = idist.auto_dataloader(test_dataset, **config.data.test.dataloader)
        tester = get_tester(config, logger)
--- a/model/GAN/residual_generator.py
+++ b/model/GAN/residual_generator.py
@ -1,7 +1,18 @@
 import torch
 import torch.nn as nn
 import functools
 from model.registry import MODEL
-from model.normalization import select_norm_layer
+
 def _select_norm_layer(norm_type):
    if norm_type == "BN":
        return functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True)
    elif norm_type == "IN":
        return functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
    elif norm_type == "NONE":
        return lambda x: nn.Identity()
    else:
        raise NotImplemented(f'normalization layer {norm_type} is not found')
 class GANImageBuffer(object):
@ -66,7 +77,7 @@ class ResidualBlock(nn.Module):
        if use_bias is None:
            # Only for IN, use bias since it does not have affine parameters.
            use_bias = norm_type == "IN"
-        norm_layer = select_norm_layer(norm_type)
+        norm_layer = _select_norm_layer(norm_type)
        models = [nn.Sequential(
            nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1, padding_mode=padding_mode, bias=use_bias),
            norm_layer(num_channels),
@ -90,7 +101,7 @@ class ResGenerator(nn.Module):
                 norm_type="IN", use_dropout=False):
        super(ResGenerator, self).__init__()
        assert num_blocks >= 0, f'Number of residual blocks must be non-negative, but got {num_blocks}.'
-        norm_layer = select_norm_layer(norm_type)
+        norm_layer = _select_norm_layer(norm_type)
        use_bias = norm_type == "IN"
        self.start_conv = nn.Sequential(
@ -146,7 +157,7 @@ class PatchDiscriminator(nn.Module):
    def __init__(self, in_channels, base_channels=64, num_conv=3, norm_type="IN"):
        super(PatchDiscriminator, self).__init__()
        assert num_conv >= 0, f'Number of conv blocks must be non-negative, but got {num_conv}.'
-        norm_layer = select_norm_layer(norm_type)
+        norm_layer = _select_norm_layer(norm_type)
        use_bias = norm_type == "IN"
        kernel_size = 4
--- a/model/normalization.py
+++ b/model/normalization.py
@ -1,13 +0,0 @@
 import torch.nn as nn
 import functools
 def select_norm_layer(norm_type):
    if norm_type == "BN":
        return functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True)
    elif norm_type == "IN":
        return functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
    elif norm_type == "NONE":
        return lambda x: nn.Identity()
    else:
        raise NotImplemented(f'normalization layer {norm_type} is not found')
--- a/tool/generate_video.sh
+++ b/tool/generate_video.sh
@ -1,20 +0,0 @@
 #!/usr/bin/env bash
 set -o noclobber
 set -o errexit  # Used to exit upon error, avoiding cascading errors
 set -o pipefail # Unveils hidden failures
 set -o nounset  # Exposes unset variables
 pt=${1}
 pt=${pt//[[:black:]]/}
 output_dir=/tmp/frames
 # .rm -rf $output_dir && /run.sh configs/synthesizers/UGATIT.yml test 0 resume_from=${pt} img_output_dir=${output_dir}
 ids=$(ls ${output_dir} | cut -d "@" -f 1 | uniq)
 mkdir tmp
 for id in $ids; do
  echo $id
  ffmpeg -y -i "${output_dir}/${id}@%d.png" -vcodec mpeg4 tmp/${id}.mp4
  # ffmpeg -y -f image2 -i "${output_dir}/${id}@%d.png" tmp/${id}.gif;
 done
--- a/tool/process/vox2_get_I_frame.sh
+++ b/tool/process/vox2_get_I_frame.sh
@ -1,47 +0,0 @@
 #!/usr/bin/env bash
 META_FILE="/data/VoxCeleb2/vox2_meta.csv"
 VOX2_TEST_PATH="/data/VoxCeleb2/test/mp4/"
 VOX2_DEV_PATH="/data/VoxCeleb2/dev/mp4/"
 generate_frame() {
  clip_path=$1
  save_path=$3
  fn="${clip_path##$2}"
  fn=${fn//\//-}
  ffmpeg -hide_banner -loglevel panic -threads 8 -i "$clip_path" -vf select="'eq(pict_type\,I)'" -vsync 2 -f image2 "$save_path${fn%.mp4}_%d.jpg" &>>ff.log
  gnum=$(ls $save_path${fn%.mp4}_* | wc -l)
  if [ $gnum -eq 0 ]; then
    echo $clip_path >>"not_done.txt"
    echo $clip_path ERROR
  else
    echo $clip_path >>"done.txt"
  fi
 }
 iter_videos() {
  idl=$1
  root=$2
  save_path=$3
  cat "$idl" | wc -l
  while read -u 10 pid; do
    echo $pid
    for vp in $root$pid/*/; do
      num=$(ls $vp | wc -l)
      if [ $num -ge 4 ]; then
        echo $vp
        for cp in $vp*.mp4; do
          generate_frame "$cp" "$root" "$save_path"
        done
      fi
    done
  done 10<"$idl"
 }
 cat $META_FILE | grep f | grep dev | grep -Po "id[0-9]+" >f_dev.txt
 cat $META_FILE | grep f | grep test | grep -Po "id[0-9]+" >f_test.txt
 iter_videos f_dev.txt $VOX2_DEV_PATH "temp/dev/"
 iter_videos f_test.txt $VOX2_TEST_PATH "temp/test/"