-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_data.sh
executable file
·40 lines (33 loc) · 1.17 KB
/
prepare_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
# Copyright 2022 Hongji Wang (jijijiang77@gmail.com)
# 2022 Chengdong Liang (liangchengdong@mail.nwpu.edu.cn)
. ./path.sh || exit 1
stage=-1
stop_stage=-1
data=data
data_type="shard" # shard/raw
. tools/parse_options.sh || exit 1
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "Prepare datasets ..."
./local/prepare_data.sh --stage 1 --stop_stage 4 --data ${data}
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
echo "Covert train and test data to ${data_type}..."
for dset in vox2_dev vox1; do
if [ $data_type == "shard" ]; then
python tools/make_shard_list.py --num_utts_per_shard 1000 \
--num_threads 16 \
--prefix shards \
--shuffle \
${data}/$dset/wav.scp ${data}/$dset/utt2spk \
${data}/$dset/shards ${data}/$dset/shard.list
else
python tools/make_raw_list.py ${data}/$dset/wav.scp \
${data}/$dset/utt2spk ${data}/$dset/raw.list
fi
done
# Convert all musan data to LMDB
python tools/make_lmdb.py ${data}/musan/wav.scp ${data}/musan/lmdb
# Convert all rirs data to LMDB
python tools/make_lmdb.py ${data}/rirs/wav.scp ${data}/rirs/lmdb
fi