-
Notifications
You must be signed in to change notification settings - Fork 1.8k
/
TrainNews.java
132 lines (114 loc) · 5.98 KB
/
TrainNews.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/* *****************************************************************************
*
*
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.examples.advanced.modelling.textclassification.customcorpusword2vec;
import org.deeplearning4j.examples.utils.DownloaderUtility;
import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer;
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors;
import org.deeplearning4j.nn.conf.GradientNormalization;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.layers.LSTM;
import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.api.InvocationType;
import org.deeplearning4j.optimize.listeners.EvaluativeListener;
import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor;
import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
import org.nd4j.evaluation.classification.Evaluation;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.learning.config.RmsProp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.io.File;
/**
* This program trains a RNN to predict category of a news headlines.
* It uses word vector generated from PrepareWordVector.java so please make sure to run that first.
*
* Below are training results with the news data given with this example.
* ==========================Scores========================================
* Accuracy: 0.9343
* Precision: 0.9249
* Recall: 0.9327
* F1 Score: 0.9288
* ========================================================================
* <p>
* <b>KIT Solutions Pvt. Ltd. (www.kitsol.com)</b>
*/
public class TrainNews {
public static String DATA_PATH = "";
public static WordVectors wordVectors;
public static void main(String[] args) throws Exception {
String dataLocalPath = DownloaderUtility.NEWSDATA.Download();
DATA_PATH = new File(dataLocalPath,"LabelledNews").getAbsolutePath();
int batchSize = 50; //Number of examples in each minibatch
int nEpochs = 10; //Number of epochs (full passes of training data) to train on
int truncateReviewsToLength = 300; //Truncate reviews with length (# words) greater than this
//DataSetIterators for training and testing respectively
//Using AsyncDataSetIterator to do data loading in a separate thread; this may improve performance vs. waiting for data to load
wordVectors = WordVectorSerializer.readWord2VecModel(new File(dataLocalPath,"NewsWordVector.txt"));
TokenizerFactory tokenizerFactory = new DefaultTokenizerFactory();
tokenizerFactory.setTokenPreProcessor(new CommonPreprocessor());
NewsIterator iTrain = new NewsIterator.Builder()
.dataDirectory(DATA_PATH)
.wordVectors(wordVectors)
.batchSize(batchSize)
.truncateLength(truncateReviewsToLength)
.tokenizerFactory(tokenizerFactory)
.train(true)
.build();
NewsIterator iTest = new NewsIterator.Builder()
.dataDirectory(DATA_PATH)
.wordVectors(wordVectors)
.batchSize(batchSize)
.tokenizerFactory(tokenizerFactory)
.truncateLength(truncateReviewsToLength)
.train(false)
.build();
//DataSetIterator train = new AsyncDataSetIterator(iTrain,1);
//DataSetIterator test = new AsyncDataSetIterator(iTest,1);
int inputNeurons = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).length; // 100 in our case
int outputs = iTrain.getLabels().size();
tokenizerFactory = new DefaultTokenizerFactory();
tokenizerFactory.setTokenPreProcessor(new CommonPreprocessor());
//Set up network configuration
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.updater(new RmsProp(0.0018))
.l2(1e-5)
.weightInit(WeightInit.XAVIER)
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
.list()
.layer( new LSTM.Builder().nIn(inputNeurons).nOut(200)
.activation(Activation.TANH).build())
.layer(new RnnOutputLayer.Builder().activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).nIn(200).nOut(outputs).build())
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
System.out.println("Starting training...");
net.setListeners(new ScoreIterationListener(1), new EvaluativeListener(iTest, 1, InvocationType.EPOCH_END));
net.fit(iTrain, nEpochs);
System.out.println("Evaluating...");
Evaluation eval = net.evaluate(iTest);
System.out.println(eval.stats());
net.save(new File(dataLocalPath,"NewsModel.net"), true);
System.out.println("----- Example complete -----");
}
}