From 42d6253a1dd543f0aa7bf4d6d40f6372ed88605d Mon Sep 17 00:00:00 2001 From: budui Date: Fri, 28 Aug 2020 08:15:29 +0800 Subject: [PATCH] UGATIT pipeline --- .../synthesizers/UGATIT-VoxCeleb2Anime.yml | 129 ++++++++++++++++++ configs/synthesizers/UGATIT.yml | 12 ++ engine/UGATIT.py | 7 +- tool/generate_video.sh | 20 +++ tool/process/vox2_get_I_frame.sh | 47 +++++++ 5 files changed, 211 insertions(+), 4 deletions(-) create mode 100644 configs/synthesizers/UGATIT-VoxCeleb2Anime.yml create mode 100644 tool/generate_video.sh create mode 100644 tool/process/vox2_get_I_frame.sh diff --git a/configs/synthesizers/UGATIT-VoxCeleb2Anime.yml b/configs/synthesizers/UGATIT-VoxCeleb2Anime.yml new file mode 100644 index 0000000..cc1692e --- /dev/null +++ b/configs/synthesizers/UGATIT-VoxCeleb2Anime.yml @@ -0,0 +1,129 @@ +name: VoxCeleb2Anime +engine: UGATIT +result_dir: ./result +max_pairs: 1000000 + +distributed: + model: + # broadcast_buffers: False + +misc: + random_seed: 324 + +checkpoint: + epoch_interval: 1 # one checkpoint every 1 epoch + n_saved: 2 + +interval: + print_per_iteration: 10 # print once per 10 iteration + tensorboard: + scalar: 10 + image: 500 + +model: + generator: + _type: UGATIT-Generator + in_channels: 3 + out_channels: 3 + base_channels: 64 + num_blocks: 4 + img_size: 128 + light: True + local_discriminator: + _type: UGATIT-Discriminator + in_channels: 3 + base_channels: 64 + num_blocks: 5 + global_discriminator: + _type: UGATIT-Discriminator + in_channels: 3 + base_channels: 64 + num_blocks: 7 + +loss: + gan: + loss_type: lsgan + weight: 1.0 + real_label_val: 1.0 + fake_label_val: 0.0 + cycle: + level: 1 + weight: 10.0 + id: + level: 1 + weight: 10.0 + cam: + weight: 1000 + +optimizers: + generator: + _type: Adam + lr: 0.0001 + betas: [ 0.5, 0.999 ] + weight_decay: 0.0001 + discriminator: + _type: Adam + lr: 1e-4 + betas: [ 0.5, 0.999 ] + weight_decay: 0.0001 + +data: + train: + scheduler: + start_proportion: 0.5 + target_lr: 0 + buffer_size: 50 + dataloader: + batch_size: 20 + shuffle: True + num_workers: 2 + pin_memory: True + drop_last: True + dataset: + _type: GenerationUnpairedDataset + root_a: "/data/i2i/VoxCeleb2Anime/trainA" + root_b: "/data/i2i/VoxCeleb2Anime/trainB" + random_pair: True + pipeline: + - Load + - Resize: + size: [ 135, 135 ] + - RandomCrop: + size: [ 128, 128 ] + - RandomHorizontalFlip + - ToTensor + - Normalize: + mean: [ 0.5, 0.5, 0.5 ] + std: [ 0.5, 0.5, 0.5 ] + test: + dataloader: + batch_size: 8 + shuffle: False + num_workers: 1 + pin_memory: False + drop_last: False + dataset: + _type: GenerationUnpairedDataset + root_a: "/data/i2i/VoxCeleb2Anime/testA" + root_b: "/data/i2i/VoxCeleb2Anime/testB" + random_pair: False + pipeline: + - Load + - Resize: + size: [ 128, 128 ] + - ToTensor + - Normalize: + mean: [ 0.5, 0.5, 0.5 ] + std: [ 0.5, 0.5, 0.5 ] + video_dataset: + _type: SingleFolderDataset + root: "/data/i2i/VoxCeleb2Anime/test_video_frames/" + with_path: True + pipeline: + - Load + - Resize: + size: [ 128, 128 ] + - ToTensor + - Normalize: + mean: [ 0.5, 0.5, 0.5 ] + std: [ 0.5, 0.5, 0.5 ] diff --git a/configs/synthesizers/UGATIT.yml b/configs/synthesizers/UGATIT.yml index 05d5311..96f1981 100644 --- a/configs/synthesizers/UGATIT.yml +++ b/configs/synthesizers/UGATIT.yml @@ -115,3 +115,15 @@ data: - Normalize: mean: [0.5, 0.5, 0.5] std: [0.5, 0.5, 0.5] + video_dataset: + _type: SingleFolderDataset + root: "/data/i2i/VoxCeleb2Anime/test_video_frames/" + with_path: True + pipeline: + - Load + - Resize: + size: [ 256, 256 ] + - ToTensor + - Normalize: + mean: [ 0.5, 0.5, 0.5 ] + std: [ 0.5, 0.5, 0.5 ] diff --git a/engine/UGATIT.py b/engine/UGATIT.py index 51956ad..256bdb4 100644 --- a/engine/UGATIT.py +++ b/engine/UGATIT.py @@ -219,12 +219,12 @@ def get_trainer(config, logger): with torch.no_grad(): g = torch.Generator() g.manual_seed(config.misc.random_seed) - indices = torch.randperm(len(engine.state.test_dataset), generator=g).tolist()[:10] + random_start = torch.randperm(len(engine.state.test_dataset)-11, generator=g).tolist()[0] test_images = dict( a=[[], [], [], []], b=[[], [], [], []] ) - for i in indices: + for i in range(random_start, random_start+10): batch = convert_tensor(engine.state.test_dataset[i], idist.device()) real_a, real_b = batch["a"].view(1, *batch["a"].size()), batch["b"].view(1, *batch["a"].size()) @@ -278,7 +278,6 @@ def get_tester(config, logger): paths = engine.state.output["path"] batch_size = img_tensors[0].size(0) for i in range(batch_size): - # image_name = f"{engine.state.iteration * batch_size - batch_size + i + 1}.png" image_name = Path(paths[i]).name torchvision.utils.save_image([img[i] for img in img_tensors], engine.state.img_output_dir / image_name, nrow=len(img_tensors)) @@ -308,7 +307,7 @@ def run(task, config, logger): print(traceback.format_exc()) elif task == "test": assert config.resume_from is not None - test_dataset = data.DATASET.build_with(config.data.test.dataset) + test_dataset = data.DATASET.build_with(config.data.test.video_dataset) logger.info(f"test with dataset:\n{test_dataset}") test_data_loader = idist.auto_dataloader(test_dataset, **config.data.test.dataloader) tester = get_tester(config, logger) diff --git a/tool/generate_video.sh b/tool/generate_video.sh new file mode 100644 index 0000000..3aa76f6 --- /dev/null +++ b/tool/generate_video.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -o noclobber +set -o errexit # Used to exit upon error, avoiding cascading errors +set -o pipefail # Unveils hidden failures +set -o nounset # Exposes unset variables + +pt=${1} +pt=${pt//[[:black:]]/} + +output_dir=/tmp/frames +# .rm -rf $output_dir && /run.sh configs/synthesizers/UGATIT.yml test 0 resume_from=${pt} img_output_dir=${output_dir} +ids=$(ls ${output_dir} | cut -d "@" -f 1 | uniq) + +mkdir tmp +for id in $ids; do + echo $id + ffmpeg -y -i "${output_dir}/${id}@%d.png" -vcodec mpeg4 tmp/${id}.mp4 + # ffmpeg -y -f image2 -i "${output_dir}/${id}@%d.png" tmp/${id}.gif; +done diff --git a/tool/process/vox2_get_I_frame.sh b/tool/process/vox2_get_I_frame.sh new file mode 100644 index 0000000..3a0b16b --- /dev/null +++ b/tool/process/vox2_get_I_frame.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +META_FILE="/data/VoxCeleb2/vox2_meta.csv" +VOX2_TEST_PATH="/data/VoxCeleb2/test/mp4/" +VOX2_DEV_PATH="/data/VoxCeleb2/dev/mp4/" + +generate_frame() { + clip_path=$1 + save_path=$3 + fn="${clip_path##$2}" + fn=${fn//\//-} + ffmpeg -hide_banner -loglevel panic -threads 8 -i "$clip_path" -vf select="'eq(pict_type\,I)'" -vsync 2 -f image2 "$save_path${fn%.mp4}_%d.jpg" &>>ff.log + + gnum=$(ls $save_path${fn%.mp4}_* | wc -l) + if [ $gnum -eq 0 ]; then + echo $clip_path >>"not_done.txt" + echo $clip_path ERROR + else + echo $clip_path >>"done.txt" + fi +} + +iter_videos() { + idl=$1 + root=$2 + save_path=$3 + + cat "$idl" | wc -l + while read -u 10 pid; do + echo $pid + for vp in $root$pid/*/; do + num=$(ls $vp | wc -l) + if [ $num -ge 4 ]; then + echo $vp + for cp in $vp*.mp4; do + generate_frame "$cp" "$root" "$save_path" + done + fi + done + done 10<"$idl" +} + +cat $META_FILE | grep f | grep dev | grep -Po "id[0-9]+" >f_dev.txt +cat $META_FILE | grep f | grep test | grep -Po "id[0-9]+" >f_test.txt + +iter_videos f_dev.txt $VOX2_DEV_PATH "temp/dev/" +iter_videos f_test.txt $VOX2_TEST_PATH "temp/test/"