/
config.yaml
87 lines (76 loc) · 2.62 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
defaults:
- hydra/job_logging: colorlog
- hydra/hydra_logging: colorlog
seed: 123
is_load_encoder: false # whether to load the encoder instead of training it
alice_modes: ["avg", "worst"] # evaluate on average and worst case
name: ???
current_mode: ???
paths:
base_dir: ???
data: ${paths.base_dir}/data
pretrained: ${paths.base_dir}/pretrained/${name}/${current_mode}/beta${loss.beta}/seed${seed}/
logs: ${paths.base_dir}/logs/
eval: ${paths.base_dir}/results/${name}.csv
data:
x_shape: ???
n_classes: ???
n_train: ???
kwargs:
data_dir: ${paths.data}
seed: ${seed}
batch_size: 256
encoder:
x_shape: ${data.x_shape}
n_classes: ${data.n_classes}
z_dim: 1024
is_stochastic: True
n_test_samples: 12
is_contrain_norm: True
# by default uses very large encoder because in theory the encoder should not be
# constrained. The regularization should rather come from the encoder's loss (DIB)
n_hid_layers: 3
dim_hid: 2048
V: # functional family V of the classifier
n_hid_layers: 1
dim_hid: 128
loss:
beta: 10
z_dim: ${encoder.z_dim}
inp_min: "Z,Y"
n_heads: 5 # log_Y(X) = log10(50000) = 4,69 is how much you should need if V=U
n_train: ${data.n_train}
n_classes: ${data.n_classes}
optimizer:
lr: 5e-5 # use small lr because don't want implicit regularization
# by how much to increase the learning rate for V minimality heads (to make sure that can keep up with the encoder),
# this needs to be increase in case #param_encoder / #param_clf because if encoder has many param it can "change" more with smaller lr
lr_factor_Vmin: 50
scheduling_factor: 100 # by how much to reduce lr during training
logger:
loggers: ["tensorboard","csv","wandb"]
tensorboard:
save_dir: ${paths.logs}/tensorboard/${name}/${current_mode}
name: beta${loss.beta}_seed${seed}
csv:
save_dir: ${paths.logs}/csv/${name}/${current_mode}
name: beta${loss.beta}_seed${seed}
wandb:
name: seed${seed}
project: dib
group: ${name}/${current_mode}/beta${loss.beta}
offline: false # Run offline (data can be streamed later to wandb servers).
trainer:
#default_root_dir: ${paths.results}
max_epochs: 200
terminate_on_nan: true
progress_bar_refresh_rate: 0 # increase to show progress bar
# ENGINEERING / SPEED
gpus: 1
num_nodes: 1
precision: 16 # use 16 bit for speed
# DEBUGGING
fast_dev_run: false # use true to make a quick test (not full epoch)
track_grad_norm: -1 # use 2 to track L2 norms of grad
overfit_batches: 0.0 # use 0.01 to make sure you can overfit 1% of training data => training works
weights_summary: top # full to print show the entire model