name: talking_anime engine: talking_anime result_dir: ./result max_pairs: 1000000 handler: clear_cuda_cache: True set_epoch_for_dist_sampler: True checkpoint: epoch_interval: 1 # checkpoint once per `epoch_interval` epoch n_saved: 2 tensorboard: scalar: 100 # log scalar `scalar` times per epoch image: 100 # log image `image` times per epoch test: random: True images: 10 misc: random_seed: 1004 loss: gan: loss_type: hinge real_label_val: 1.0 fake_label_val: 0.0 weight: 1.0 fm: level: 1 weight: 1 style: layer_weights: "3": 1 criterion: 'L1' style_loss: True perceptual_loss: False weight: 10 perceptual: layer_weights: "1": 0.03125 "6": 0.0625 "11": 0.125 "20": 0.25 "29": 1 criterion: 'L1' style_loss: False perceptual_loss: True weight: 0 context: layer_weights: #"13": 1 "22": 1 weight: 5 recon: level: 1 weight: 10 edge: weight: 5 hed_pretrained_model_path: ./network-bsds500.pytorch model: face_generator: _type: TAFG-SingleGenerator _bn_to_sync_bn: False style_in_channels: 3 content_in_channels: 1 use_spectral_norm: True style_encoder_type: VGG19StyleEncoder num_style_conv: 4 style_dim: 512 num_adain_blocks: 8 num_res_blocks: 8 anime_generator: _type: TAFG-ResGenerator _bn_to_sync_bn: False in_channels: 6 use_spectral_norm: True num_res_blocks: 8 discriminator: _type: MultiScaleDiscriminator num_scale: 2 discriminator_cfg: _type: PatchDiscriminator in_channels: 3 base_channels: 64 use_spectral: True need_intermediate_feature: True optimizers: generator: _type: Adam lr: 0.0001 betas: [ 0, 0.9 ] weight_decay: 0.0001 discriminator: _type: Adam lr: 4e-4 betas: [ 0, 0.9 ] weight_decay: 0.0001 data: train: scheduler: start_proportion: 0.5 target_lr: 0 dataloader: batch_size: 8 shuffle: True num_workers: 1 pin_memory: True drop_last: True dataset: _type: PoseFacesWithSingleAnime root_face: "/data/i2i/VoxCeleb2Anime/trainA" root_anime: "/data/i2i/VoxCeleb2Anime/trainB" landmark_path: "/data/i2i/VoxCeleb2Anime/landmarks" num_face: 2 img_size: [ 128, 128 ] with_order: False face_pipeline: - Load - Resize: size: [ 128, 128 ] - ToTensor - Normalize: mean: [ 0.5, 0.5, 0.5 ] std: [ 0.5, 0.5, 0.5 ] anime_pipeline: - Load - Resize: size: [ 144, 144 ] - RandomCrop: size: [ 128, 128 ] - RandomHorizontalFlip - ToTensor - Normalize: mean: [ 0.5, 0.5, 0.5 ] std: [ 0.5, 0.5, 0.5 ] test: which: dataset dataloader: batch_size: 1 shuffle: False num_workers: 1 pin_memory: False drop_last: False dataset: _type: PoseFacesWithSingleAnime root_face: "/data/i2i/VoxCeleb2Anime/testA" root_anime: "/data/i2i/VoxCeleb2Anime/testB" landmark_path: "/data/i2i/VoxCeleb2Anime/landmarks" num_face: 2 img_size: [ 128, 128 ] with_order: False face_pipeline: - Load - Resize: size: [ 128, 128 ] - ToTensor - Normalize: mean: [ 0.5, 0.5, 0.5 ] std: [ 0.5, 0.5, 0.5 ] anime_pipeline: - Load - Resize: size: [ 128, 128 ] - ToTensor - Normalize: mean: [ 0.5, 0.5, 0.5 ] std: [ 0.5, 0.5, 0.5 ]