name: TAFG
engine: TAFG
result_dir: ./result
max_pairs: 1000000

misc:
  random_seed: 324

checkpoint:
  epoch_interval: 1 # one checkpoint every 1 epoch
  n_saved: 2

interval:
  print_per_iteration: 10 # print once per 10 iteration
  tensorboard:
    scalar: 100
    image: 2

model:
  generator:
    _type: TAHG-Generator
    _bn_to_sync_bn: False
    style_in_channels: 3
    content_in_channels: 1
    num_blocks: 4
  discriminator:
    _type: MultiScaleDiscriminator
    num_scale: 2
    discriminator_cfg:
      _type: base-PatchDiscriminator
      in_channels: 3
      base_channels: 64
      use_spectral: True
      need_intermediate_feature: True

loss:
  gan:
    loss_type: hinge
    real_label_val: 1.0
    fake_label_val: 0.0
    weight: 1.0
  perceptual:
    layer_weights:
      "1": 0.03125
      "6": 0.0625
      "11": 0.125
      "20": 0.25
      "29": 1
    criterion: 'L1'
    style_loss: False
    perceptual_loss: True
    weight: 5
  style:
    layer_weights:
      "1": 0.03125
      "6": 0.0625
      "11": 0.125
      "20": 0.25
      "29": 1
    criterion: 'L2'
    style_loss: True
    perceptual_loss: False
    weight: 0
  fm:
    level: 1
    weight: 10
  recon:
    level: 1
    weight: 5

optimizers:
  generator:
    _type: Adam
    lr: 0.0001
    betas: [ 0, 0.9 ]
    weight_decay: 0.0001
  discriminator:
    _type: Adam
    lr: 4e-4
    betas: [ 0, 0.9 ]
    weight_decay: 0.0001

data:
  train:
    scheduler:
      start_proportion: 0.5
      target_lr: 0
    buffer_size: 50
    dataloader:
      batch_size: 256
      shuffle: True
      num_workers: 2
      pin_memory: True
      drop_last: True
    dataset:
      _type: GenerationUnpairedDatasetWithEdge
      root_a: "/data/i2i/VoxCeleb2Anime/trainA"
      root_b: "/data/i2i/VoxCeleb2Anime/trainB"
      edges_path: "/data/i2i/VoxCeleb2Anime/edges"
      landmarks_path: "/data/i2i/VoxCeleb2Anime/landmarks"
      edge_type: "landmark_canny"
      size: [128, 128]
      random_pair: True
      pipeline:
        - Load
        - Resize:
            size: [128, 128]
        - ToTensor
        - Normalize:
            mean: [ 0.5, 0.5, 0.5 ]
            std: [ 0.5, 0.5, 0.5 ]
  test:
    dataloader:
      batch_size: 8
      shuffle: False
      num_workers: 1
      pin_memory: False
      drop_last: False
    dataset:
      _type: GenerationUnpairedDatasetWithEdge
      root_a: "/data/i2i/VoxCeleb2Anime/testA"
      root_b: "/data/i2i/VoxCeleb2Anime/testB"
      edges_path: "/data/i2i/VoxCeleb2Anime/edges"
      edge_type: "hed"
      random_pair: False
      size: [128, 128]
      pipeline:
        - Load
        - Resize:
            size: [128, 128]
        - ToTensor
        - Normalize:
            mean: [ 0.5, 0.5, 0.5 ]
            std: [ 0.5, 0.5, 0.5 ]
    video_dataset:
      _type: SingleFolderDataset
      root: "/data/i2i/VoxCeleb2Anime/test_video_frames/"
      with_path: True
      pipeline:
        - Load
        - Resize:
            size: [ 256, 256 ]
        - ToTensor
        - Normalize:
            mean: [ 0.5, 0.5, 0.5 ]
            std: [ 0.5, 0.5, 0.5 ]