name: TAFG-vox2 engine: TAFG result_dir: ./result max_pairs: 1000000 handler: clear_cuda_cache: True set_epoch_for_dist_sampler: True checkpoint: epoch_interval: 1 # checkpoint once per `epoch_interval` epoch n_saved: 2 tensorboard: scalar: 100 # log scalar `scalar` times per epoch image: 4 # log image `image` times per epoch test: random: True images: 10 misc: random_seed: 1004 add_new_loss_epoch: -1 model: generator: _type: TAFG-Generator _bn_to_sync_bn: False style_in_channels: 3 content_in_channels: 24 use_spectral_norm: False style_encoder_type: StyleEncoder num_style_conv: 4 style_dim: 8 num_adain_blocks: 4 num_res_blocks: 4 discriminator: _type: MultiScaleDiscriminator num_scale: 2 discriminator_cfg: _type: PatchDiscriminator in_channels: 3 base_channels: 64 use_spectral: True need_intermediate_feature: True loss: gan: loss_type: hinge real_label_val: 1.0 fake_label_val: 0.0 weight: 1.0 perceptual: layer_weights: "1": 0.03125 "6": 0.0625 "11": 0.125 "20": 0.25 "29": 1 criterion: 'L1' style_loss: False perceptual_loss: True weight: 0 style: layer_weights: "3": 1 criterion: 'L1' style_loss: True perceptual_loss: False weight: 10 recon: level: 1 weight: 10 style_recon: level: 1 weight: 1 content_recon: level: 1 weight: 1 edge: weight: 5 hed_pretrained_model_path: ./network-bsds500.pytorch cycle: level: 1 weight: 10 optimizers: generator: _type: Adam lr: 0.0001 betas: [ 0, 0.9 ] weight_decay: 0.0001 discriminator: _type: Adam lr: 4e-4 betas: [ 0, 0.9 ] weight_decay: 0.0001 data: train: scheduler: start_proportion: 0.5 target_lr: 0 buffer_size: 50 dataloader: batch_size: 8 shuffle: True num_workers: 1 pin_memory: True drop_last: True dataset: _type: GenerationUnpairedDatasetWithEdge root_a: "/data/i2i/VoxCeleb2Anime/trainA" root_b: "/data/i2i/VoxCeleb2Anime/trainB" edges_path: "/data/i2i/VoxCeleb2Anime/edges" landmarks_path: "/data/i2i/VoxCeleb2Anime/landmarks" edge_type: "landmark_hed" size: [ 128, 128 ] random_pair: True pipeline: - Load - Resize: size: [ 128, 128 ] - ToTensor - Normalize: mean: [ 0.5, 0.5, 0.5 ] std: [ 0.5, 0.5, 0.5 ] test: which: video_dataset dataloader: batch_size: 1 shuffle: False num_workers: 1 pin_memory: False drop_last: False dataset: _type: GenerationUnpairedDatasetWithEdge root_a: "/data/i2i/VoxCeleb2Anime/testA" root_b: "/data/i2i/VoxCeleb2Anime/testB" edges_path: "/data/i2i/VoxCeleb2Anime/edges" landmarks_path: "/data/i2i/VoxCeleb2Anime/landmarks" edge_type: "landmark_hed" random_pair: False size: [ 128, 128 ] pipeline: - Load - Resize: size: [ 128, 128 ] - ToTensor - Normalize: mean: [ 0.5, 0.5, 0.5 ] std: [ 0.5, 0.5, 0.5 ] video_dataset: _type: SingleFolderDataset root: "/data/i2i/VoxCeleb2Anime/test_video_frames/" with_path: True pipeline: - Load - Resize: size: [ 128, 128 ] - ToTensor - Normalize: mean: [ 0.5, 0.5, 0.5 ] std: [ 0.5, 0.5, 0.5 ]