171 lines
3.6 KiB
YAML
171 lines
3.6 KiB
YAML
name: talking_anime
|
|
engine: talking_anime
|
|
result_dir: ./result
|
|
max_pairs: 1000000
|
|
|
|
handler:
|
|
clear_cuda_cache: True
|
|
set_epoch_for_dist_sampler: True
|
|
checkpoint:
|
|
epoch_interval: 1 # checkpoint once per `epoch_interval` epoch
|
|
n_saved: 2
|
|
tensorboard:
|
|
scalar: 100 # log scalar `scalar` times per epoch
|
|
image: 100 # log image `image` times per epoch
|
|
test:
|
|
random: True
|
|
images: 10
|
|
|
|
misc:
|
|
random_seed: 1004
|
|
|
|
loss:
|
|
gan:
|
|
loss_type: hinge
|
|
real_label_val: 1.0
|
|
fake_label_val: 0.0
|
|
weight: 1.0
|
|
fm:
|
|
level: 1
|
|
weight: 1
|
|
style:
|
|
layer_weights:
|
|
"3": 1
|
|
criterion: 'L1'
|
|
style_loss: True
|
|
perceptual_loss: False
|
|
weight: 10
|
|
perceptual:
|
|
layer_weights:
|
|
"1": 0.03125
|
|
"6": 0.0625
|
|
"11": 0.125
|
|
"20": 0.25
|
|
"29": 1
|
|
criterion: 'L1'
|
|
style_loss: False
|
|
perceptual_loss: True
|
|
weight: 0
|
|
context:
|
|
layer_weights:
|
|
#"13": 1
|
|
"22": 1
|
|
weight: 5
|
|
recon:
|
|
level: 1
|
|
weight: 10
|
|
edge:
|
|
weight: 5
|
|
hed_pretrained_model_path: ./network-bsds500.pytorch
|
|
|
|
model:
|
|
face_generator:
|
|
_type: TAFG-SingleGenerator
|
|
_bn_to_sync_bn: False
|
|
style_in_channels: 3
|
|
content_in_channels: 1
|
|
use_spectral_norm: True
|
|
style_encoder_type: VGG19StyleEncoder
|
|
num_style_conv: 4
|
|
style_dim: 512
|
|
num_adain_blocks: 8
|
|
num_res_blocks: 8
|
|
anime_generator:
|
|
_type: TAFG-ResGenerator
|
|
_bn_to_sync_bn: False
|
|
in_channels: 6
|
|
use_spectral_norm: True
|
|
num_res_blocks: 8
|
|
|
|
discriminator:
|
|
_type: MultiScaleDiscriminator
|
|
num_scale: 2
|
|
discriminator_cfg:
|
|
_type: PatchDiscriminator
|
|
in_channels: 3
|
|
base_channels: 64
|
|
use_spectral: True
|
|
need_intermediate_feature: True
|
|
|
|
optimizers:
|
|
generator:
|
|
_type: Adam
|
|
lr: 0.0001
|
|
betas: [ 0, 0.9 ]
|
|
weight_decay: 0.0001
|
|
discriminator:
|
|
_type: Adam
|
|
lr: 4e-4
|
|
betas: [ 0, 0.9 ]
|
|
weight_decay: 0.0001
|
|
|
|
data:
|
|
train:
|
|
scheduler:
|
|
start_proportion: 0.5
|
|
target_lr: 0
|
|
dataloader:
|
|
batch_size: 8
|
|
shuffle: True
|
|
num_workers: 1
|
|
pin_memory: True
|
|
drop_last: True
|
|
dataset:
|
|
_type: PoseFacesWithSingleAnime
|
|
root_face: "/data/i2i/VoxCeleb2Anime/trainA"
|
|
root_anime: "/data/i2i/VoxCeleb2Anime/trainB"
|
|
landmark_path: "/data/i2i/VoxCeleb2Anime/landmarks"
|
|
num_face: 2
|
|
img_size: [ 128, 128 ]
|
|
with_order: False
|
|
face_pipeline:
|
|
- Load
|
|
- Resize:
|
|
size: [ 128, 128 ]
|
|
- ToTensor
|
|
- Normalize:
|
|
mean: [ 0.5, 0.5, 0.5 ]
|
|
std: [ 0.5, 0.5, 0.5 ]
|
|
anime_pipeline:
|
|
- Load
|
|
- Resize:
|
|
size: [ 144, 144 ]
|
|
- RandomCrop:
|
|
size: [ 128, 128 ]
|
|
- RandomHorizontalFlip
|
|
- ToTensor
|
|
- Normalize:
|
|
mean: [ 0.5, 0.5, 0.5 ]
|
|
std: [ 0.5, 0.5, 0.5 ]
|
|
test:
|
|
which: dataset
|
|
dataloader:
|
|
batch_size: 1
|
|
shuffle: False
|
|
num_workers: 1
|
|
pin_memory: False
|
|
drop_last: False
|
|
dataset:
|
|
_type: PoseFacesWithSingleAnime
|
|
root_face: "/data/i2i/VoxCeleb2Anime/testA"
|
|
root_anime: "/data/i2i/VoxCeleb2Anime/testB"
|
|
landmark_path: "/data/i2i/VoxCeleb2Anime/landmarks"
|
|
num_face: 2
|
|
img_size: [ 128, 128 ]
|
|
with_order: False
|
|
face_pipeline:
|
|
- Load
|
|
- Resize:
|
|
size: [ 128, 128 ]
|
|
- ToTensor
|
|
- Normalize:
|
|
mean: [ 0.5, 0.5, 0.5 ]
|
|
std: [ 0.5, 0.5, 0.5 ]
|
|
anime_pipeline:
|
|
- Load
|
|
- Resize:
|
|
size: [ 128, 128 ]
|
|
- ToTensor
|
|
- Normalize:
|
|
mean: [ 0.5, 0.5, 0.5 ]
|
|
std: [ 0.5, 0.5, 0.5 ] |