Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

How to create custom component for sentiment analyzer in Rasa 3.X. #171

Open
shreyashgupta68 opened this issue Nov 23, 2022 · 0 comments
Open

Comments

@shreyashgupta68
Copy link

Hi everyone,

First of all, thanks for this amazing project. I am working on a sentiment analyzer and am having some trouble understanding how it works. I need to use a naive Bayes classifier to train my NLU training examples on sentiment analysis. Do any of you know how to do it? I am attaching my custom sentiment analyzer component that I have made for Rasa **3.1.0. **

import logging
from typing import Any, Text, Dict, List

from joblib import dump, load
from nltk.classify import NaiveBayesClassifier

from rasa.engine.recipes.default_recipe import DefaultV1Recipe
from rasa.engine.graph import ExecutionContext, GraphComponent
from rasa.engine.storage.resource import Resource
from rasa.engine.storage.storage import ModelStorage
from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.constants import (TEXT)
logger = logging.getLogger(name)

@DefaultV1Recipe.register(
DefaultV1Recipe.ComponentType.MESSAGE_TOKENIZER, is_trainable=True
)
class DemoSentiment(GraphComponent):
name = "sentiment"
provides = ["entities"]
requires = ["tokens"]
defaults = {}
language_list = ["en"]
print('initialised the class')

def __init__(
        self,
        config: Dict[Text, Any],
        name: Text,
        model_storage: ModelStorage,
        resource: Resource,
) -> None:
    self.name = name
    # self.clf = NaiveBayesClassifier(
    #     feature_probdist=None,
    #     label_probdist=None
    # )

    # We need to use these later when saving the trained component.
    self._model_storage = model_storage
    self._resource = resource

def preprocessing(self, tokens):
    """Create bag-of-words representation of the training examples."""

    return {word: True for word in tokens}

def train(self, training_data: TrainingData) -> Resource:
    """Trains the component from training data."""
    texts = [e.get(TEXT) for e in training_data.intent_examples if e.get(TEXT)]
    with open('labels.txt', 'r') as f: # in this labels .txt I have store the labels like positive, negative and neutral
        labels = f.read().splitlines()
    print("type : ", self)
    processed_tokens = [self.preprocessing(t) for t in texts]
    labeled_data = [(t, x) for t, x in zip(processed_tokens, labels)]
    self.clf = NaiveBayesClassifier.train(labeled_data)
    self.persist()
    return self._resource

def convert_to_rasa(self, value, confidence):
    """Convert model output into the Rasa NLU compatible output format."""

    entity = {"value": value,
              "confidence": confidence,
              "entity": "sentiment",
              "extractor": "sentiment_extractor"}

    return entity

@classmethod
def create(
        cls,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext
) -> GraphComponent:
    print("Model_Create :", model_storage)
    return cls(config, execution_context.node_name, model_storage, resource)

def process(self, messages: List[Message]) -> List[Message]:
    # TODO: This is the method which Rasa Open Source will call during inference.
    if not self.clf:
        # component is either not trained or didn't
        # receive enough training data
        entity = None
    else:
        for message in messages:
            tokens = [t for t in message.get(TEXT)]
            tb = self.preprocessing(tokens)
            pre = self.clf.prob_classify(tb)

            sentiment = pre.max()
            confidence = pre.prob(sentiment)

            entity = self.convert_to_rasa(sentiment, confidence)

            message.set("entities", [entity], add_to_output=True)
    return messages

def persist(self) -> None:
    """
    Persist this model into the passed directory.

    Returns the metadata necessary to load the model again. In this case; `None`.
    """

    with self._model_storage.write_to(self._resource) as model_dir:
        dump(self.clf, model_dir / f"{self.name}.joblib")
        # classifier_file = os.path.join(model_dir, SENTIMENT_MODEL_FILE_NAME)
        # utils.json_pickle(classifier_file, self)
        # return {"classifier_file": SENTIMENT_MODEL_FILE_NAME}

@classmethod
def load(
        cls,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext,
) -> GraphComponent:
    """Loads trained component from disk."""
    with model_storage.read_from(resource) as model_dir:
        classifier = load(model_dir / f"{resource.name}.joblib")
        component = cls(
            config, execution_context.node_name, model_storage, resource
        )
        component.clf = classifier
        return component
        # file_name = config.get("classifier_file")
        # classifier_file = os.path.join(model_dir, file_name)
        # return utils.json_unpickle(classifier_file)

def process_training_data(self, training_data: TrainingData) -> TrainingData:
    #self.process(training_data.intent_examples)
    pass
    #return training_data

@classmethod
def validate_config(cls, config: Dict[Text, Any]) -> None:
    """Validates that the component is configured properly."""
    pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant