New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Poisson naive Bayes classifier (PoissonNB) #3708
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,7 +29,7 @@ | |
from .utils.multiclass import _check_partial_fit_first_call | ||
from .externals import six | ||
|
||
__all__ = ['BernoulliNB', 'GaussianNB', 'MultinomialNB'] | ||
__all__ = ['BernoulliNB', 'GaussianNB', 'MultinomialNB', 'PoissonNB'] | ||
|
||
|
||
class BaseNB(six.with_metaclass(ABCMeta, BaseEstimator, ClassifierMixin)): | ||
|
@@ -320,18 +320,113 @@ def _partial_fit(self, X, y, classes=None, _refit=False): | |
|
||
def _joint_log_likelihood(self, X): | ||
X = check_array(X) | ||
joint_log_likelihood = [] | ||
for i in range(np.size(self.classes_)): | ||
|
||
joint_log_likelihood = np.zeros((np.shape(X)[0], | ||
np.size(self.classes_))) | ||
for i in range(len(self.classes_)): | ||
jointi = np.log(self.class_prior_[i]) | ||
n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.sigma_[i, :])) | ||
n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) / | ||
(self.sigma_[i, :]), 1) | ||
joint_log_likelihood.append(jointi + n_ij) | ||
joint_log_likelihood[:, i] = jointi + n_ij | ||
|
||
joint_log_likelihood = np.array(joint_log_likelihood).T | ||
return joint_log_likelihood | ||
|
||
|
||
class PoissonNB(BaseNB): | ||
""" | ||
Poisson Naive Bayes (PoissonNB) | ||
|
||
Attributes | ||
---------- | ||
class_prior_ : array, shape (n_classes,) | ||
probability of each class. | ||
|
||
class_count_ : array, shape (n_classes,) | ||
number of training samples observed in each class. | ||
|
||
lambda_ : array, shape (n_classes, n_features) | ||
mean of each feature per class | ||
|
||
Examples | ||
-------- | ||
>>> import numpy as np | ||
>>> X = np.array([[5, 2, 6, 1, 8, 1], [0, 0, 1, 3, 3, 1]]).T | ||
>>> y = np.array([1, 1, 1, 2, 2, 2]) | ||
>>> from sklearn.naive_bayes import PoissonNB | ||
>>> clf = PoissonNB() | ||
>>> clf.fit(X, y) | ||
PoissonNB() | ||
>>> print(clf.predict(X)) | ||
[1 1 1 2 2 2] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add here a reference paper which includes the derivation? |
||
|
||
References | ||
---------- | ||
T. D. Sanger. (1994) Probability Density Estimation for the Interpretation | ||
of Neural Population Codes. J Neurophys. 76(4):2790-3 (Eq. 8) | ||
http://jn.physiology.org/content/jn/76/4/2790.full.pdf | ||
""" | ||
|
||
def fit(self, X, y): | ||
"""Fit Poisson Naive Bayes according to X, y | ||
|
||
Parameters | ||
---------- | ||
X : array-like, shape (n_samples, n_features) | ||
Training vectors, where n_samples is the number of samples | ||
and n_features is the number of features. X expects non-negative | ||
integers, although in practice non-integer values may also work. | ||
Negative counts will result in a ValueError. | ||
|
||
y : array-like, shape (n_samples,) | ||
Target values. | ||
|
||
Returns | ||
------- | ||
self : object | ||
Returns self. | ||
""" | ||
X, y = check_X_y(X, y) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you should check that X entries are all positive and raise a meaningful error message if not |
||
PoissonNB._check_non_negative(X) | ||
|
||
n_samples, n_features = X.shape | ||
|
||
self.classes_ = unique_y = np.unique(y) | ||
n_classes = unique_y.shape[0] | ||
|
||
epsilon = 1e-9 | ||
|
||
self.lambda_ = np.zeros((n_classes, n_features)) | ||
self.class_prior_ = np.zeros(n_classes) | ||
|
||
for i, y_i in enumerate(unique_y): | ||
Xi = X[y == y_i, :] | ||
self.lambda_[i, :] = np.mean(Xi, axis=0) + epsilon | ||
self.class_prior_[i] = float(Xi.shape[0]) / n_samples | ||
|
||
return self | ||
|
||
def _joint_log_likelihood(self, X): | ||
X = check_array(X) | ||
PoissonNB._check_non_negative(X) | ||
|
||
joint_log_likelihood = np.zeros((np.shape(X)[0], | ||
np.size(self.classes_))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. np.size -> len |
||
|
||
for i in range(len(self.classes_)): | ||
jointi = np.log(self.class_prior_[i]) | ||
n_ij = np.sum(X * np.log(self.lambda_[i, :]), axis=1) | ||
n_ij -= np.sum(self.lambda_[i, :]) | ||
joint_log_likelihood[:, i] = jointi + n_ij | ||
|
||
return joint_log_likelihood | ||
|
||
@staticmethod | ||
def _check_non_negative(X): | ||
if np.any(X < 0.): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this will not work with a sparse matrix (I think) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Much of the I'm happy to do either, or hold off on sparse matrix support for the time being. Let me know which of those three you prefer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it should not be much work to support sparse data so if you have the time |
||
raise ValueError("Input X must be non-negative") | ||
|
||
|
||
class BaseDiscreteNB(BaseNB): | ||
"""Abstract base class for naive Bayes on discrete/categorical data | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
np.size -> len