default.conf

# Default configurations that are used as a fallback
#
# Do NOT delete or modify this file unless you know what you are doing


[Default]

# Path to the parent directory containing the program output
work_path = _workspace

# Path to the directory containing the dataset
dataset_path = _dataset

# Path to the directory containing extracted feature vectors
extraction_path = %(work_path)s/features

# Path to the directory containing saved training models
model_path = %(work_path)s/models

# Path to the directory containing log files
log_path = %(work_path)s/logs

# Path to the directory containing predictions
prediction_path = %(work_path)s/predictions

# Path to the directory containing results
result_path = %(work_path)s/predictions


[Extraction]

# Whether to recompute feature vectors if they already exist
recompute = False


[Extraction.Logmel]

# Target sample rate
sample_rate = 32000

# Length of the FFT window
n_fft = 1024

# Number of audio samples between frames
hop_length = 512

# Number of Mel bands
n_mels = 64


[Training]

# String identifying a particular training instance
training_id = default

# Neural network architecture
#
# Choices: vgg, densenet
model = vgg

# Mask for selecting a subset of the training set
#
# Format: key1=value1,key2=value2,...
# Example: mask = manually_verified=0,noisy_small!=1
#
# The keys should correspond to the column names of the metadata file.
# Currently, only integer values are accepted.
mask =

# Random seed used prior to training
seed = 1000

# Size of a block after partitioning the feature vectors
#
# If a feature vector has size TxF, it is paritioned into blocks of size
# BxF, where B is the block size. The BxF blocks are then used as inputs
# to train the neural network.
block_size = 128

# Number of examples in a mini-batch
batch_size = 128

# Number of epochs to train the network 
n_epochs = 40

# Initial learning rate
lr = 0.0005

# Factor for learning rate decay
lr_decay = 0.90

# Frequency of learning rate decay in epochs
lr_decay_rate = 2

# Whether to use data augmentation
augment = False

# Whether to overwrite any previously-saved models
# Setting this to False means that training can be resumed
overwrite = False


[Training.Relabel]

# Whether relabeling should be enabled
relabel = False

# Threshold used to select out-of-distribution instances
relabel_threshold = 0.55

# Weight used to relabel out-of-distribution instances
relabel_weight = 0.5

# Path to CSV file containing pseudo-labels
pseudolabel_path = metadata/training_pseudo.csv

# Path to CSV file containing confidence estimates
# Leave blank to use pseudo-labels for confidence estimation
confidence_path =


[Prediction]

# Specification of which models (epochs) to select for prediction
#
# Either a list of epoch numbers (e.g. '1,2,3') or a string with format
# 'metric:n' specifying which metric to use to select the top n epochs.
#
# Valid metrics: val_loss, val_acc, val_mAP
epochs = val_mAP:3

# Whether to use ODIN when generating predictions
odin = False

# Whether to remove the model files that were not used for prediction
clean = False