-
Notifications
You must be signed in to change notification settings - Fork 3
/
bos-demo.sh
executable file
·36 lines (31 loc) · 1.41 KB
/
bos-demo.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env bash
echo "CAUTION: This will download Google word2vec vectors (3.4 GB)."
read -p "Press enter to continue"
DATADIR=./datasets
RESULTSDIR=./results/bos/demo
mkdir -p "${RESULTSDIR}"
mkdir -p "${DATADIR}"
if [ ! -f "${DATADIR}/word2vec/GoogleNews-vectors-negative300.bin" ]
then
URL="https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz"
mkdir "${DATADIR}/word2vec"
wget -O "${DATADIR}/word2vec/GoogleNews-vectors-negative300.bin.gz" ${URL}
echo "Unzipping..."
gunzip "${DATADIR}/word2vec/GoogleNews-vectors-negative300.bin.gz"
fi
if [ ! -f "${DATADIR}/word2vec/processed.txt" ]
then
python data-process.py --input "${DATADIR}/word2vec/GoogleNews-vectors-negative300.bin" --output "${DATADIR}/word2vec/processed.txt"
fi
if [ ! -f "${DATADIR}/rw/rw.txt" ]
then
wget -c 'https://nlp.stanford.edu/~lmthang/morphoNLM/rw.zip' -P "${DATADIR}"
unzip "${DATADIR}/rw.zip" -d "${DATADIR}"
fi
if [ ! -f "${DATADIR}/rw/queries.txt" ]
then
cut -f 1,2 "${DATADIR}/rw/rw.txt" | awk '{print tolower($0)}' | tr '\t' '\n' > "${DATADIR}/rw/queries.txt"
fi
python bos-train.py --target "${DATADIR}/word2vec/processed.txt" --save "${RESULTSDIR}" --no-timestamp --epochs 20
python bos-pred.py --queries "${DATADIR}/rw/queries.txt" --save "${RESULTSDIR}/rw_vectors.txt" --model "${RESULTSDIR}/model.bos"
python ./fastText/eval.py --data "${DATADIR}/rw/rw.txt" --model "${RESULTSDIR}/rw_vectors.txt"