UGATIT pipeline

2020-08-28 08:15:29 +08:00 · 2020-08-28 08:15:29 +08:00 · 42d6253a1d
commit 42d6253a1d
parent 09db0a413f
5 changed files with 211 additions and 4 deletions
--- a/configs/synthesizers/UGATIT-VoxCeleb2Anime.yml
+++ b/configs/synthesizers/UGATIT-VoxCeleb2Anime.yml
@ -0,0 +1,129 @@
+name: VoxCeleb2Anime
+engine: UGATIT
+result_dir: ./result
+max_pairs: 1000000
+
+distributed:
+  model:
+  # broadcast_buffers: False
+
+misc:
+  random_seed: 324
+
+checkpoint:
+  epoch_interval: 1 # one checkpoint every 1 epoch
+  n_saved: 2
+
+interval:
+  print_per_iteration: 10 # print once per 10 iteration
+  tensorboard:
+    scalar: 10
+    image: 500
+
+model:
+  generator:
+    _type: UGATIT-Generator
+    in_channels: 3
+    out_channels: 3
+    base_channels: 64
+    num_blocks: 4
+    img_size: 128
+    light: True
+  local_discriminator:
+    _type: UGATIT-Discriminator
+    in_channels: 3
+    base_channels: 64
+    num_blocks: 5
+  global_discriminator:
+    _type: UGATIT-Discriminator
+    in_channels: 3
+    base_channels: 64
+    num_blocks: 7
+
+loss:
+  gan:
+    loss_type: lsgan
+    weight: 1.0
+    real_label_val: 1.0
+    fake_label_val: 0.0
+  cycle:
+    level: 1
+    weight: 10.0
+  id:
+    level: 1
+    weight: 10.0
+  cam:
+    weight: 1000
+
+optimizers:
+  generator:
+    _type: Adam
+    lr: 0.0001
+    betas: [ 0.5, 0.999 ]
+    weight_decay: 0.0001
+  discriminator:
+    _type: Adam
+    lr: 1e-4
+    betas: [ 0.5, 0.999 ]
+    weight_decay: 0.0001
+
+data:
+  train:
+    scheduler:
+      start_proportion: 0.5
+      target_lr: 0
+    buffer_size: 50
+    dataloader:
+      batch_size: 20
+      shuffle: True
+      num_workers: 2
+      pin_memory: True
+      drop_last: True
+    dataset:
+      _type: GenerationUnpairedDataset
+      root_a: "/data/i2i/VoxCeleb2Anime/trainA"
+      root_b: "/data/i2i/VoxCeleb2Anime/trainB"
+      random_pair: True
+      pipeline:
+        - Load
+        - Resize:
+            size: [ 135, 135 ]
+        - RandomCrop:
+            size: [ 128, 128 ]
+        - RandomHorizontalFlip
+        - ToTensor
+        - Normalize:
+            mean: [ 0.5, 0.5, 0.5 ]
+            std: [ 0.5, 0.5, 0.5 ]
+  test:
+    dataloader:
+      batch_size: 8
+      shuffle: False
+      num_workers: 1
+      pin_memory: False
+      drop_last: False
+    dataset:
+      _type: GenerationUnpairedDataset
+      root_a: "/data/i2i/VoxCeleb2Anime/testA"
+      root_b: "/data/i2i/VoxCeleb2Anime/testB"
+      random_pair: False
+      pipeline:
+        - Load
+        - Resize:
+            size: [ 128, 128 ]
+        - ToTensor
+        - Normalize:
+            mean: [ 0.5, 0.5, 0.5 ]
+            std: [ 0.5, 0.5, 0.5 ]
+    video_dataset:
+      _type: SingleFolderDataset
+      root: "/data/i2i/VoxCeleb2Anime/test_video_frames/"
+      with_path: True
+      pipeline:
+        - Load
+        - Resize:
+            size: [ 128, 128 ]
+        - ToTensor
+        - Normalize:
+            mean: [ 0.5, 0.5, 0.5 ]
+            std: [ 0.5, 0.5, 0.5 ]
--- a/configs/synthesizers/UGATIT.yml
+++ b/configs/synthesizers/UGATIT.yml
@ -115,3 +115,15 @@ data:
        - Normalize:
            mean: [0.5, 0.5, 0.5]
            std: [0.5, 0.5, 0.5]
+    video_dataset:
+      _type: SingleFolderDataset
+      root: "/data/i2i/VoxCeleb2Anime/test_video_frames/"
+      with_path: True
+      pipeline:
+        - Load
+        - Resize:
+            size: [ 256, 256 ]
+        - ToTensor
+        - Normalize:
+            mean: [ 0.5, 0.5, 0.5 ]
+            std: [ 0.5, 0.5, 0.5 ]
--- a/engine/UGATIT.py
+++ b/engine/UGATIT.py
@ -219,12 +219,12 @@ def get_trainer(config, logger):
            with torch.no_grad():
                g = torch.Generator()
                g.manual_seed(config.misc.random_seed)
-                indices = torch.randperm(len(engine.state.test_dataset), generator=g).tolist()[:10]
+                random_start = torch.randperm(len(engine.state.test_dataset)-11, generator=g).tolist()[0]
                test_images = dict(
                    a=[[], [], [], []],
                    b=[[], [], [], []]
                )
-                for i in indices:
+                for i in range(random_start, random_start+10):
                    batch = convert_tensor(engine.state.test_dataset[i], idist.device())

                    real_a, real_b = batch["a"].view(1, *batch["a"].size()), batch["b"].view(1, *batch["a"].size())
@ -278,7 +278,6 @@ def get_tester(config, logger):
        paths = engine.state.output["path"]
        batch_size = img_tensors[0].size(0)
        for i in range(batch_size):
-            # image_name = f"{engine.state.iteration * batch_size - batch_size + i + 1}.png"
            image_name = Path(paths[i]).name
            torchvision.utils.save_image([img[i] for img in img_tensors], engine.state.img_output_dir / image_name,
                                         nrow=len(img_tensors))
@ -308,7 +307,7 @@ def run(task, config, logger):
            print(traceback.format_exc())
    elif task == "test":
        assert config.resume_from is not None
-        test_dataset = data.DATASET.build_with(config.data.test.dataset)
+        test_dataset = data.DATASET.build_with(config.data.test.video_dataset)
        logger.info(f"test with dataset:\n{test_dataset}")
        test_data_loader = idist.auto_dataloader(test_dataset, **config.data.test.dataloader)
        tester = get_tester(config, logger)
--- a/tool/generate_video.sh
+++ b/tool/generate_video.sh
@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+set -o noclobber
+set -o errexit  # Used to exit upon error, avoiding cascading errors
+set -o pipefail # Unveils hidden failures
+set -o nounset  # Exposes unset variables
+
+pt=${1}
+pt=${pt//[[:black:]]/}
+
+output_dir=/tmp/frames
+# .rm -rf $output_dir && /run.sh configs/synthesizers/UGATIT.yml test 0 resume_from=${pt} img_output_dir=${output_dir}
+ids=$(ls ${output_dir} | cut -d "@" -f 1 | uniq)
+
+mkdir tmp
+for id in $ids; do
+  echo $id
+  ffmpeg -y -i "${output_dir}/${id}@%d.png" -vcodec mpeg4 tmp/${id}.mp4
+  # ffmpeg -y -f image2 -i "${output_dir}/${id}@%d.png" tmp/${id}.gif;
+done
--- a/tool/process/vox2_get_I_frame.sh
+++ b/tool/process/vox2_get_I_frame.sh
@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+META_FILE="/data/VoxCeleb2/vox2_meta.csv"
+VOX2_TEST_PATH="/data/VoxCeleb2/test/mp4/"
+VOX2_DEV_PATH="/data/VoxCeleb2/dev/mp4/"
+
+generate_frame() {
+  clip_path=$1
+  save_path=$3
+  fn="${clip_path##$2}"
+  fn=${fn//\//-}
+  ffmpeg -hide_banner -loglevel panic -threads 8 -i "$clip_path" -vf select="'eq(pict_type\,I)'" -vsync 2 -f image2 "$save_path${fn%.mp4}_%d.jpg" &>>ff.log
+
+  gnum=$(ls $save_path${fn%.mp4}_* | wc -l)
+  if [ $gnum -eq 0 ]; then
+    echo $clip_path >>"not_done.txt"
+    echo $clip_path ERROR
+  else
+    echo $clip_path >>"done.txt"
+  fi
+}
+
+iter_videos() {
+  idl=$1
+  root=$2
+  save_path=$3
+
+  cat "$idl" | wc -l
+  while read -u 10 pid; do
+    echo $pid
+    for vp in $root$pid/*/; do
+      num=$(ls $vp | wc -l)
+      if [ $num -ge 4 ]; then
+        echo $vp
+        for cp in $vp*.mp4; do
+          generate_frame "$cp" "$root" "$save_path"
+        done
+      fi
+    done
+  done 10<"$idl"
+}
+
+cat $META_FILE | grep f | grep dev | grep -Po "id[0-9]+" >f_dev.txt
+cat $META_FILE | grep f | grep test | grep -Po "id[0-9]+" >f_test.txt
+
+iter_videos f_dev.txt $VOX2_DEV_PATH "temp/dev/"
+iter_videos f_test.txt $VOX2_TEST_PATH "temp/test/"