/
naive_bayes.py
57 lines (36 loc) · 1.33 KB
/
naive_bayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import nltk
import random
from nltk.corpus import twitter_samples
DELEGATION_RESULTS = None
def word_delegation():
positive = twitter_samples.tokenized('positive_tweets.json')
negative = twitter_samples.tokenized('negative_tweets.json')
tweets, all_words = [], []
for tweet in positive:
tweets.append((tweet, 'pos'))
for word in tweet:
all_words.append(word.lower())
for tweet in negative:
tweets.append((tweet, 'neg'))
for word in tweet:
all_words.append(word.lower())
word_features = list(nltk.FreqDist(all_words).keys())[:1000]
return tweets, word_features
def find_features(tweet):
global DELEGATION_RESULTS
if DELEGATION_RESULTS is None:
DELEGATION_RESULTS = word_delegation()
tweets, word_features = DELEGATION_RESULTS
words = set(tweet)
return {w: w in words for w in word_features}
def bayes(count):
tweets, word_features = word_delegation()
feature_sets = [(find_features(tweet), category) for (tweet, category) in tweets]
random.shuffle(feature_sets)
training_set = feature_sets[:count]
classifier = nltk.NaiveBayesClassifier.train(training_set)
return classifier
def sentiment(text):
classifier = bayes(count=1900)
features = find_features(text)
return classifier.classify(features)