Merge pull request #174 from deepmipt/dev

Release v0.1.10
deeppavlov · Jun 27, 2022 · 525783a · 525783a
2 parents ab2dcbd + 110c566
commit 525783a
Show file tree

Hide file tree

Showing 19 changed files with 252 additions and 123 deletions.
diff --git a/annotators/NER/requirements.txt b/annotators/NER/requirements.txt
@@ -8,4 +8,5 @@ gunicorn==19.9.0
 requests==2.22.0
 sentry-sdk==0.12.3
 jinja2<=3.0.3
-Werkzeug<=2.0.3
+Werkzeug<=2.0.3
+protobuf<4
diff --git a/annotators/SentSeg/requirements.txt b/annotators/SentSeg/requirements.txt
@@ -9,3 +9,4 @@ requests==2.22.0
 sentry-sdk==0.12.3
 jinja2<=3.0.3
 Werkzeug<=2.0.3
+protobuf<4
diff --git a/assistant_dists/dream/docker-compose.override.yml b/assistant_dists/dream/docker-compose.override.yml
@@ -101,6 +101,20 @@ services:
   convers-evaluation-selector:
     env_file: [.env]
     build:
+      args:
+        TAG_BASED_SELECTION: 1
+        CALL_BY_NAME_PROBABILITY: 0.5
+        PROMPT_PROBA: 0.3
+        ACKNOWLEDGEMENT_PROBA: 0.3
+        PRIORITIZE_WITH_REQUIRED_ACT: 1
+        PRIORITIZE_NO_DIALOG_BREAKDOWN: 0
+        PRIORITIZE_WITH_SAME_TOPIC_ENTITY: 1
+        IGNORE_DISLIKED_SKILLS: 0
+        GREETING_FIRST: 1
+        RESTRICTION_FOR_SENSITIVE_CASE: 1
+        PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS: 1
+        ADD_ACKNOWLEDGMENTS_IF_POSSIBLE: 1
+        PRIORITIZE_SCRIPTED_SKILLS: 1
       context: .
       dockerfile: ./response_selectors/convers_evaluation_based_selector/Dockerfile
     command: flask run -h 0.0.0.0 -p 8009
@@ -1151,7 +1165,8 @@ services:
       args:
         SERVICE_PORT: 8125
         SERVICE_NAME: dialogpt
-        PRETRAINED_MODEL_NAME_OR_PATH: microsoft/DialoGPT-small
+        PRETRAINED_MODEL_NAME_OR_PATH: microsoft/DialoGPT-medium
+        N_HYPOTHESES_TO_GENERATE: 5
       context: ./services/dialogpt/
     command: flask run -h 0.0.0.0 -p 8125
     environment:

diff --git a/assistant_dists/dream_mini/docker-compose.override.yml b/assistant_dists/dream_mini/docker-compose.override.yml
@@ -60,6 +60,20 @@ services:
   convers-evaluation-selector:
     env_file: [.env]
     build:
+      args:
+        TAG_BASED_SELECTION: 1
+        CALL_BY_NAME_PROBABILITY: 0.5
+        PROMPT_PROBA: 0.3
+        ACKNOWLEDGEMENT_PROBA: 0.3
+        PRIORITIZE_WITH_REQUIRED_ACT: 1
+        PRIORITIZE_NO_DIALOG_BREAKDOWN: 0
+        PRIORITIZE_WITH_SAME_TOPIC_ENTITY: 1
+        IGNORE_DISLIKED_SKILLS: 0
+        GREETING_FIRST: 1
+        RESTRICTION_FOR_SENSITIVE_CASE: 1
+        PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS: 0
+        ADD_ACKNOWLEDGMENTS_IF_POSSIBLE: 1
+        PRIORITIZE_SCRIPTED_SKILLS: 0
       context: .
       dockerfile: ./response_selectors/convers_evaluation_based_selector/Dockerfile
     command: flask run -h 0.0.0.0 -p 8009
@@ -136,7 +150,8 @@ services:
       args:
         SERVICE_PORT: 8125
         SERVICE_NAME: dialogpt
-        PRETRAINED_MODEL_NAME_OR_PATH: microsoft/DialoGPT-small
+        PRETRAINED_MODEL_NAME_OR_PATH: microsoft/DialoGPT-medium
+        N_HYPOTHESES_TO_GENERATE: 5
       context: ./services/dialogpt/
     command: flask run -h 0.0.0.0 -p 8125
     environment:

diff --git a/common/sensitive.py b/common/sensitive.py
@@ -1,6 +1,7 @@
 from common.utils import get_topics, get_intents
 
 
+psycho_help_spec = "you can call the National Suicide Prevention Lifeline"
 sensitive_topics = {"Politics", "Religion", "Sex_Profanity"}
 sensitive_dialogact_topics = {"Politics", "Inappropriate_Content"}
 sensitive_all_intents = {

diff --git a/common/utils.py b/common/utils.py
@@ -1206,3 +1206,20 @@ def is_special_factoid_question(annotated_utterance):
     r"|here's a fact about [a-zA-Z0-9\- \,]+\.)",
     re.IGNORECASE,
 )
+
+
+def get_conv_eval_annotations(annotated_utterance):
+    default_conv_eval = {
+        "isResponseOnTopic": 0.0,
+        "isResponseInteresting": 0.0,
+        "responseEngagesUser": 0.0,
+        "isResponseComprehensible": 0.0,
+        "isResponseErroneous": 0.0,
+    }
+
+    return annotated_utterance.get("annotations", {}).get("convers_evaluator_annotator", default_conv_eval)
+
+
+def get_dialog_breakdown_annotations(annotated_utterance):
+    breakdown = annotated_utterance.get("annotations", {}).get("dialog_breakdown", {}).get("breakdown", 0.0) > 0.5
+    return breakdown
diff --git a/response_selectors/convers_evaluation_based_selector/Dockerfile b/response_selectors/convers_evaluation_based_selector/Dockerfile
@@ -2,6 +2,33 @@ FROM python:3.7.4
 
 WORKDIR /src
 
+ARG TAG_BASED_SELECTION
+ENV TAG_BASED_SELECTION ${TAG_BASED_SELECTION}
+ARG PRIORITIZE_WITH_SAME_TOPIC_ENTITY
+ENV PRIORITIZE_WITH_SAME_TOPIC_ENTITY ${PRIORITIZE_WITH_SAME_TOPIC_ENTITY}
+ARG PRIORITIZE_NO_DIALOG_BREAKDOWN
+ENV PRIORITIZE_NO_DIALOG_BREAKDOWN ${PRIORITIZE_NO_DIALOG_BREAKDOWN}
+ARG PRIORITIZE_WITH_REQUIRED_ACT
+ENV PRIORITIZE_WITH_REQUIRED_ACT ${PRIORITIZE_WITH_REQUIRED_ACT}
+ARG IGNORE_DISLIKED_SKILLS
+ENV IGNORE_DISLIKED_SKILLS ${IGNORE_DISLIKED_SKILLS}
+ARG GREETING_FIRST
+ENV GREETING_FIRST ${GREETING_FIRST}
+ARG RESTRICTION_FOR_SENSITIVE_CASE
+ENV RESTRICTION_FOR_SENSITIVE_CASE ${RESTRICTION_FOR_SENSITIVE_CASE}
+ARG PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS
+ENV PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS ${PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS}
+ARG ADD_ACKNOWLEDGMENTS_IF_POSSIBLE
+ENV ADD_ACKNOWLEDGMENTS_IF_POSSIBLE ${ADD_ACKNOWLEDGMENTS_IF_POSSIBLE}
+ARG PROMPT_PROBA
+ENV PROMPT_PROBA ${PROMPT_PROBA}
+ARG ACKNOWLEDGEMENT_PROBA
+ENV ACKNOWLEDGEMENT_PROBA ${ACKNOWLEDGEMENT_PROBA}
+ARG CALL_BY_NAME_PROBABILITY
+ENV CALL_BY_NAME_PROBABILITY ${CALL_BY_NAME_PROBABILITY}
+ARG PRIORITIZE_SCRIPTED_SKILLS
+ENV PRIORITIZE_SCRIPTED_SKILLS ${PRIORITIZE_SCRIPTED_SKILLS}
+
 COPY ./response_selectors/convers_evaluation_based_selector/requirements.txt requirements.txt
 RUN pip install -r requirements.txt
 RUN python -c "import nltk; nltk.download('punkt')"

diff --git a/response_selectors/convers_evaluation_based_selector/server.py b/response_selectors/convers_evaluation_based_selector/server.py
@@ -15,7 +15,13 @@
 
 from common.greeting import greeting_spec
 from common.universal_templates import if_chat_about_particular_topic, if_choose_topic
-from common.utils import get_intent_name, low_priority_intents, substitute_nonwords, is_toxic_or_badlisted_utterance
+from common.utils import (
+    get_intent_name,
+    low_priority_intents,
+    substitute_nonwords,
+    is_toxic_or_badlisted_utterance,
+    get_conv_eval_annotations,
+)
 from tag_based_selection import tag_based_response_selection
 from utils import (
     add_question_to_statement,
@@ -27,10 +33,8 @@
     CONFIDENCE_STRENGTH,
     how_are_you_spec,
     what_i_can_do_spec,
-    psycho_help_spec,
     misheard_with_spec1,
     misheard_with_spec2,
-    alexa_abilities_spec,
 )
 
 
@@ -41,9 +45,8 @@
 
 app = Flask(__name__)
 
-CALL_BY_NAME_PROBABILITY = 0.5  # if name is already known
-SHOW_DIALOG_ID = False
-TAG_BASED_SELECTION = True
+CALL_BY_NAME_PROBABILITY = float(getenv("CALL_BY_NAME_PROBABILITY", 0.5))  # if name is already known
+TAG_BASED_SELECTION = getenv("TAG_BASED_SELECTION", False)
 MOST_DUMMY_RESPONSES = [
     "I really do not know what to answer.",
     "Sorry, probably, I didn't get what you mean.",
@@ -81,7 +84,6 @@ def respond():
                     assert len(dialog["bot_utterances"]) > 0
 
                 curr_confidences += [skill_data["confidence"]]
-                annotation = skill_data.get("annotations", {})
                 if skill_data["text"] and skill_data["confidence"]:
                     if not skill_data.get("annotations"):
                         logger.warning(f"Valid skill data without annotations: {skill_data}")
@@ -101,14 +103,7 @@ def respond():
                         )
                         logger.info(msg)
 
-                default_conv_eval = {
-                    "isResponseOnTopic": 0.0,
-                    "isResponseInteresting": 0.0,
-                    "responseEngagesUser": 0.0,
-                    "isResponseComprehensible": 0.0,
-                    "isResponseErroneous": 0.0,
-                }
-                curr_scores += [annotation.get("convers_evaluator_annotator", default_conv_eval)]
+                curr_scores += [get_conv_eval_annotations(skill_data)]
 
             curr_is_toxics = np.array(curr_is_toxics)
             curr_scores = np.array(curr_scores)
@@ -126,8 +121,10 @@ def respond():
             logger.exception(e)
             sentry_sdk.capture_exception(e)
             if dialog["human_utterances"][-1].get("hypotheses", []):
+                logger.info("Response Selector Error: randomly choosing final response among hypotheses.")
                 best_cand = random.choice(dialog["human_utterances"][-1]["hypotheses"])
             else:
+                logger.info("Response Selector Error: randomly choosing response among dummy responses.")
                 best_cand = {
                     "text": random.choice(MOST_DUMMY_RESPONSES),
                     "confidence": 0.1,
@@ -249,8 +246,6 @@ def rule_score_based_selection(dialog, candidates, scores, confidences, is_toxic
             and len(dialog["utterances"]) < 16
         ):
             curr_score = very_big_score
-        elif skill_names[i] == "program_y_dangerous" and psycho_help_spec in candidates[i]["text"]:
-            curr_score = very_big_score
         elif skill_names[i] == "dff_friendship_skill" and greeting_spec in candidates[i]["text"]:
             if len(dialog["utterances"]) < 2:
                 curr_score = very_big_score
@@ -272,8 +267,6 @@ def rule_score_based_selection(dialog, candidates, scores, confidences, is_toxic
             curr_score = very_big_score
         elif is_intent_candidate:
             curr_score = very_big_score
-        elif skill_names[i] == "program_y" and alexa_abilities_spec in candidates[i]["text"]:
-            curr_score = very_big_score
         elif skill_names[i] in ["dummy_skill", "convert_reddit", "alice", "eliza", "tdidf_retrieval", "program_y"]:
             if "question" in candidates[i].get("type", "") or "?" in candidates[i]["text"]:
                 penalty_start_utt = 1
@@ -366,6 +359,7 @@ def select_response(candidates, scores, confidences, is_toxics, dialog, all_prev
             dialog, candidates, scores, confidences, bot_utterances, all_prev_active_skills
         )
     else:
+        logger.info("Confidence & ConvEvaluationAnnotator Scores based selection")
         best_candidate, best_id, curr_single_scores = rule_score_based_selection(
             dialog, candidates, scores, confidences, is_toxics, bot_utterances
         )