forked from uwdata/termite-stm
/
demo-20newsgroups.sh
executable file
·74 lines (64 loc) · 1.96 KB
/
demo-20newsgroups.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/bin/bash
DEMO_PATH=demo-20newsgroups
DEMO_APP=20newsgroups
DOWNLOAD_PATH=$DEMO_PATH/download
CORPUS_PATH=$DEMO_PATH/corpus
MODEL_PATH=$DEMO_PATH/model
function __create_folder__ {
FOLDER=$1
TAB=$2
if [ ! -d $FOLDER ]
then
echo "${TAB}Creating folder: $FOLDER"
mkdir $FOLDER
fi
}
function __fetch_data__ {
echo "# Setting up the 20newsgroups dataset..."
__create_folder__ $DEMO_PATH " "
if [ ! -e "$DEMO_PATH/README" ]
then
echo "After a model is imported into a Termite server, you can technically delete all content in this folder without affecting the server. However you may wish to retain your model for other analysis purposes." > $DEMO_PATH/README
fi
if [ ! -d "$DOWNLOAD_PATH" ]
then
__create_folder__ $DOWNLOAD_PATH " "
echo " Downloading the 20newsgroups dataset..."
curl --insecure --location http://qwone.com/~jason/20Newsgroups/20news-18828.tar.gz > $DOWNLOAD_PATH/20news-18828.tar.gz
echo " Setting up 20newsgroups information page..."
echo "<html><head><meta http-equiv='refresh' content='0;url=http://qwone.com/~jason/20Newsgroups/'></head></html>" > $DOWNLOAD_PATH/index.html
else
echo " Already downloaded: $DOWNLOAD_PATH"
fi
if [ ! -d "$CORPUS_PATH" ]
then
__create_folder__ $CORPUS_PATH " "
echo " Uncompressing the 20newsgroups dataset..."
tar -zxf $DOWNLOAD_PATH/20news-18828.tar.gz 20news-18828 &&\
mv 20news-18828/* $CORPUS_PATH &&\
rmdir 20news-18828
else
echo " Already available: $CORPUS_PATH"
fi
echo
}
function __train_model__ {
echo "# Training an LDA model..."
echo
echo "bin/train_mallet_from_folder.sh $CORPUS_PATH $MODEL_PATH"
echo
bin/train_mallet_from_folder.sh $CORPUS_PATH $MODEL_PATH
}
function __import_model__ {
echo "# Importing an LDA model..."
echo
echo "bin/ImportMallet.py $MODEL_PATH $DEMO_APP"
echo
bin/ImportMallet.py $MODEL_PATH $DEMO_APP
mkdir apps/$DEMO_APP/data/stm
}
bin/setup.sh
__fetch_data__
__train_model__
__import_model__
bin/start_server.sh