-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_minitraining.py
134 lines (109 loc) · 6.39 KB
/
test_minitraining.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import unittest
import math
import main
class TestSentimentAnalysisBaselineMiniTrain(unittest.TestCase):
def setUp(self):
#Sets the Training File Path
# Feel free to edit to reflect where they are on your machine
self.trainingFilePath="training_files/minitrain.txt"
self.devFilePath="training_files/minidev.txt"
def test_GenerateTuplesFromTrainingFile(self):
#Tests the tuple generation from the sentences
sa = main.SentimentAnalysis()
examples = main.generate_tuples_from_file(self.trainingFilePath)
actualExamples = [('ID-2', 'The hotel was not liked by me', '0'), ('ID-3', 'I loved the hotel', '1'), ('ID-1', 'The hotel was great', '1'), ('ID-4', 'I hated the hotel', '0')]
self.assertListEqual(sorted(actualExamples), sorted(examples))
def test_ScorePositiveExample(self):
#Tests the Probability Distribution of each class for a positive example
sa = main.SentimentAnalysis()
examples = main.generate_tuples_from_file(self.trainingFilePath)
#Trains the Naive Bayes Classifier based on the tuples from the training data
sa.train(examples)
#Returns a probability distribution of each class for the given test sentence
score=sa.score(('id', "I loved the hotel"))
#P(C|text)=P(I|C)*P(loved|C)*P(the|C)*P(hotel|C),where C is either 0 or 1(Classifier)
pos = ((1+1)/(8+12))*((1+1)/(8+12))*((1+1)/(8+12))*((2+1)/(8+12))*(2/4)
neg = ((1+1)/(11+12))*((0+1)/(11+12))*((1+1)/(11+12))*((2+1)/(11+12))*(2/4)
actualScoreDistribution={'1': pos, '0': neg}
self.assertAlmostEqual(actualScoreDistribution['0'], math.exp(score['0']), places=5)
self.assertAlmostEqual(actualScoreDistribution['1'], math.exp(score['1']), places=5)
def test_ScorePositiveExampleRepeats(self):
#Tests the Probability Distribution of each class for a positive example
sa = main.SentimentAnalysis()
examples = main.generate_tuples_from_file(self.trainingFilePath)
#Trains the Naive Bayes Classifier based on the tuples from the training data
sa.train(examples)
#Returns a probability distribution of each class for the given test sentence
score=sa.score(('id', "I loved the hotel loved the hotel"))
#P(C|text)=P(I|C)*P(loved|C)*P(the|C)*P(hotel|C),where C is either 0 or 1(Classifier)
pos = ((1+1)/(8+12))*((1+1)/(8+12))*((1+1)/(8+12))*((2+1)/(8+12))*((1+1)/(8+12))*((1+1)/(8+12))*((2+1)/(8+12))*(2/4)
neg = ((1+1)/(11+12))*((0+1)/(11+12))*((1+1)/(11+12))*((2+1)/(11+12))*((0+1)/(11+12))*((1+1)/(11+12))*((2+1)/(11+12))*(2/4)
actualScoreDistribution={'1': pos, '0': neg}
self.assertAlmostEqual(actualScoreDistribution['0'], math.exp(score['0']), places=5)
self.assertAlmostEqual(actualScoreDistribution['1'], math.exp(score['1']), places=5)
def test_ScorePositiveExampleWithUnkowns(self):
#Tests the Probability Distribution of each class for a positive example
sa = main.SentimentAnalysis()
examples = main.generate_tuples_from_file(self.trainingFilePath)
#Trains the Naive Bayes Classifier based on the tuples from the training data
sa.train(examples)
#Returns a probability distribution of each class for the given test sentence
score=sa.score(('id', "I loved the hotel a lot"))
#P(C|text)=P(I|C)*P(loved|C)*P(the|C)*P(hotel|C)*P(a|C)*P(lot|C)*P(C),where C is either 0 or 1(Classifier)
pos = ((1+1)/(8+12))*((1+1)/(8+12))*((1+1)/(8+12))*((2+1)/(8+12))*(2/4)
neg = ((1+1)/(11+12))*((0+1)/(11+12))*((1+1)/(11+12))*((2+1)/(11+12))*(2/4)
actualScoreDistribution={'1': pos, '0': neg}
self.assertAlmostEqual(actualScoreDistribution['0'], math.exp(score['0']), places=5)
self.assertAlmostEqual(actualScoreDistribution['1'], math.exp(score['1']), places=5)
def test_ClassifyForPositiveExample(self):
#Tests the label classified for the positive test sentence
sa = main.SentimentAnalysis()
examples = main.generate_tuples_from_file(self.trainingFilePath)
sa.train(examples)
#Classifies the test sentence based on the probability distribution of each class
label=sa.classify(('id', "I loved the hotel a lot"))
actualLabel='1'
self.assertEqual(actualLabel,label)
def test_ScoreForNegativeExample(self):
#Tests the Probability Distribution of each class for a negative example
sa = main.SentimentAnalysis()
examples = main.generate_tuples_from_file(self.trainingFilePath)
sa.train(examples)
score=sa.score(('id', "I hated the hotel"))
#P(C|text)=P(I|C)*P(hated|C)*P(the|C)*P(hotel|C)*P(C),where C is either 0 or 1(Classifier)
pos = ((1+1)/(8+12))*((0+1)/(8+12))*((1+1)/(8+12))*((2+1)/(8+12))*(2/4)
neg = ((1+1)/(11+12))*((1+1)/(11+12))*((1+1)/(11+12))*((2+1)/(11+12))*(2/4)
actualScoreDistribution={'1': pos, '0': neg}
self.assertAlmostEqual(actualScoreDistribution['0'], math.exp(score['0']), places=5)
self.assertAlmostEqual(actualScoreDistribution['1'], math.exp(score['1']), places=5)
def test_ClassifyForNegativeExample(self):
#Tests the label classified for the negative test sentence
sa = main.SentimentAnalysis()
examples = main.generate_tuples_from_file(self.trainingFilePath)
sa.train(examples)
label=sa.classify(('id', "I hated the hotel"))
actualLabel='0'
self.assertEqual(actualLabel,label)
def test_precision(self):
gold = [1, 1, 1, 0, 0]
gold = [str(b) for b in gold]
classified = [1, 0, 0, 0, 1]
classified = [str(b) for b in classified]
self.assertEqual((1 / 2), main.precision(gold, classified))
def test_recall(self):
gold = [1, 1, 1, 0, 0]
gold = [str(b) for b in gold]
classified = [1, 0, 0, 0, 1]
classified = [str(b) for b in classified]
self.assertEqual((1 / 3), main.recall(gold, classified))
def test_f1(self):
gold = [1, 1, 1, 0, 0]
gold = [str(b) for b in gold]
classified = [1, 0, 0, 0, 1]
classified = [str(b) for b in classified]
p = 1 / 2
r = 1 / 3
self.assertEqual((2 * p * r) / (p + r), main.f1(gold, classified))
if __name__ == "__main__":
print("Usage: python test_minitraining.py")
unittest.main()