Merge pull request #557 from deeppavlov/dev

Release v1.10.1
deeppavlov · Aug 16, 2023 · 9d1e48d · 9d1e48d
2 parents b0f1df3 + e015241
commit 9d1e48d
Show file tree

Hide file tree

Showing 20 changed files with 414 additions and 12 deletions.
diff --git a/components.tsv b/components.tsv
@@ -181,3 +181,8 @@
 8177	dff-journalist-helper-ru-prompted-skill
 8178    transformers-lm-rugpt35
 8179	dff-robot-prompted-skill
+8180
+8181
+8182
+8183	external-integration-skill
+8184	external-fake-server
diff --git a/components/knoIA98f3bijjao9d9pqkne.yml b/components/knoIA98f3bijjao9d9pqkne.yml
@@ -0,0 +1,24 @@
+name: external_integration_skill
+display_name: External Integration Skill
+component_type: Script-based w/o NNs
+model_type: Dictionary/Pattern-based
+is_customizable: false
+author: publisher@deeppavlov.ai
+description: Generic skill to provide responses from external skills and services.
+ram_usage: 128M
+gpu_usage: null
+group: skills
+connector:
+  protocol: http
+  timeout: 2.0
+  url: http://external-integration-skill:8183/respond
+dialog_formatter: state_formatters.dp_formatters:external_integration_skill_formatter
+response_formatter: state_formatters.dp_formatters:skill_with_attributes_formatter_service
+previous_services:
+- skill_selectors
+required_previous_services: null
+state_manager_method: add_hypothesis
+tags: null
+endpoint: respond
+service: skills/external_integration_skill/service_configs/external-integration-skill
+date_created: '2023-06-28T09:45:32'
diff --git a/services/external_fake_server/Dockerfile b/services/external_fake_server/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.9
+
+ARG SERVICE_PORT
+ENV SERVICE_PORT ${SERVICE_PORT}
+
+RUN mkdir /src
+
+COPY ./services/external_fake_server/requirements.txt /src/requirements.txt
+RUN pip install -r /src/requirements.txt
+
+COPY ./services/external_fake_server /src/
+COPY ./common/ /src/common/
+WORKDIR /src
+
+CMD gunicorn --workers=2 server:app
diff --git a/services/external_fake_server/requirements.txt b/services/external_fake_server/requirements.txt
@@ -0,0 +1,8 @@
+flask==1.1.1
+itsdangerous==2.0.1
+gunicorn==19.9.0
+requests==2.22.0
+sentry-sdk==0.12.3
+click==7.1.2
+jinja2<=3.0.3
+Werkzeug<=2.0.3
diff --git a/services/external_fake_server/server.py b/services/external_fake_server/server.py
@@ -0,0 +1,36 @@
+import logging
+import time
+from os import getenv
+
+import sentry_sdk
+
+from flask import Flask, request, jsonify
+
+
+sentry_sdk.init(getenv("SENTRY_DSN"))
+
+logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+app = Flask(__name__)
+
+
+@app.route("/ping", methods=["POST"])
+def ping():
+    return "pong"
+
+
+@app.route("/return_response", methods=["POST"])
+def return_response():
+    st_time = time.time()
+    message = request.json.get("payload", None)
+    dialog_id = request.json.get("dialog_id", None)
+    logger.info(f"fake-external-server got message: {message}, dialog_id: {dialog_id}")
+    if message and dialog_id:
+        results = {"response": "Success!", "confidence": 0.9}
+    else:
+        results = {"response": "", "confidence": 0.0}
+    logger.info(f"fake-external-server `return_response` results: {results}")
+    total_time = time.time() - st_time
+    logger.info(f"fake-external-server `return_response` exec time: {total_time:.3f}s")
+    return jsonify(results)
diff --git a/services/external_fake_server/service_configs/external-fake-service/environment.yml b/services/external_fake_server/service_configs/external-fake-service/environment.yml
@@ -0,0 +1,2 @@
+SERVICE_PORT: 8184
+SERVICE_NAME: external_fake_server
diff --git a/services/external_fake_server/service_configs/external-fake-service/service.yml b/services/external_fake_server/service_configs/external-fake-service/service.yml
@@ -0,0 +1,24 @@
+name: external-fake-server
+endpoints:
+- return_response
+compose:
+  env_file:
+  - .env
+  build:
+    args:
+      SERVICE_PORT: 8184
+      SERVICE_NAME: external_fake_server
+    context: .
+    dockerfile: ./skills/external_fake_server/Dockerfile
+  command: gunicorn --workers=2 server:app 
+  deploy:
+    resources:
+      limits:
+        memory: 100M
+      reservations:
+        memory: 100M
+  volumes:
+  - ./skills/external_fake_server:/src
+  - ./common:/src/common
+  ports:
+  - 8184:8184
diff --git a/services/external_fake_server/test.py b/services/external_fake_server/test.py
@@ -0,0 +1,24 @@
+import requests
+
+
+def main():
+    url = "http://0.0.0.0:8169/return_response"
+
+    request_datas = [
+        {"dialog_id": "jknvawoioqb783HGGIUUGI", "payload": "How are you doing?"},
+        {"dialog_id": None, "payload": ""},
+    ]
+    gold_results = [
+        {"response": "Success!", "confidence": 0.9},
+        {"response": "", "confidence": 0.0},
+    ]
+    i = 0
+    for request_data in request_datas:
+        result = requests.post(url, json=request_data).json()
+        assert result == gold_results[i], print(f"Got result: {result}, something is wrong.")
+        i += 1
+    print("Success!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/services/external_fake_server/test.sh b/services/external_fake_server/test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+python test.py
diff --git a/services/transformers_lm/server.py b/services/transformers_lm/server.py
@@ -52,13 +52,13 @@
 
 def add_replacement_tokens(text, replacement):
     for pair in replacement:
-        text = text.replace(pair[0], f"{pair[1]} ")
+        text = re.sub(pair[0], f"{pair[1]} ", text)
     return text
 
 
 def remove_replacement_tokens(text, replacement):
     for pair in replacement:
-        text = text.replace(pair[1], pair[0])
+        text = re.sub(pair[1], pair[0], text)
 
     text = text.replace("\n ", "\n")
     return text
@@ -133,6 +133,9 @@ def generate_responses(context, model, tokenizer, prompt, generation_params, con
         # preprocess dialog context to correctly remove it from output
         dialog_context = re.sub(r"  +", " ", dialog_context)
         dialog_context = dialog_context.replace("\n ", "\n")
+        output = re.sub(r"  +", " ", output)
+        output = output.replace("\n ", "\n")
+
         result_cut = output.replace(dialog_context + " ", "")
         result_cut = cut_predictions_by_additional_eos(result_cut)
         result_cut = remove_replacement_tokens(result_cut, replacement)

diff --git a/services/transformers_lm/test.py b/services/transformers_lm/test.py
@@ -2,14 +2,6 @@
 import requests
 
 
-DEFAULT_CONFIG = {
-    "max_new_tokens": 60,
-    "min_new_tokens": 8,
-    "top_p": 0.9,
-    "temperature": 0.9,
-    "do_sample": True,
-    "num_return_sequences": 2,
-}
 SERVICE_PORT = int(os.getenv("SERVICE_PORT"))
 
 
@@ -24,15 +16,14 @@ def test_respond():
         ["Привет, Маркус! Я в порядке. Как дела?", "Я отлично. Какие у тебя планы на сегодня?"],
     ]
     prompts = [
-        "Respond like a friendly chatbot.",
+        "Respond like a friendly chatbot. \n Dialog:\n",
         "Отвечай как дружелюбный бот.",
     ]
     result = requests.post(
         url,
         json={
             "dialog_contexts": contexts,
             "prompts": prompts,
-            "configs": [DEFAULT_CONFIG] * len(contexts),
         },
     ).json()
     print(result)

diff --git a/skills/external_integration_skill/Dockerfile b/skills/external_integration_skill/Dockerfile
@@ -0,0 +1,26 @@
+FROM python:3.9
+
+WORKDIR /src
+
+COPY ./skills/external_integration_skill/requirements.txt /src/requirements.txt
+RUN pip install -r /src/requirements.txt
+
+ARG SERVICE_NAME
+ENV SERVICE_NAME ${SERVICE_NAME}
+ARG SERVICE_PORT
+ENV SERVICE_PORT ${SERVICE_PORT}
+ARG EXTERNAL_SKILL_URL
+ENV EXTERNAL_SKILL_URL ${EXTERNAL_SKILL_URL}
+ARG ARGUMENTS_TO_SEND
+ENV ARGUMENTS_TO_SEND ${ARGUMENTS_TO_SEND}
+ARG RESPONSE_KEY
+ENV RESPONSE_KEY ${RESPONSE_KEY}
+ARG PAYLOAD_ARGUMENT_NAME
+ENV PAYLOAD_ARGUMENT_NAME ${PAYLOAD_ARGUMENT_NAME}
+ARG EXTERNAL_TIMEOUT
+ENV EXTERNAL_TIMEOUT ${EXTERNAL_TIMEOUT}
+
+COPY skills/external_integration_skill /src
+COPY common /src/common
+
+CMD gunicorn --workers=1 server:app -b 0.0.0.0:${SERVICE_PORT} --timeout=300
diff --git a/skills/external_integration_skill/README.md b/skills/external_integration_skill/README.md
@@ -0,0 +1,104 @@
+# Light-weighted skill for external service integration
+
+This skill can be used to integrate external services and skills into DeepPavlov Dream pipeline. 
+
+## Testing the skill
+
+You may test the skill using external_fake_server component that imitates the work of an external service.
+To do so, add the following files to the distribution you want to use for testing:
+
+__docker-compose.override.yml (add to WAIT_HOSTS)__
+```
+external-integration-skill:8183, external-fake-server:8184
+```
+
+__docker-compose.override.yml__
+```
+  external-integration-skill:
+    env_file: [ .env ]
+    build:
+      args:
+        SERVICE_NAME: external_integration_skill
+        EXTERNAL_SKILL_URL: http://external-fake-server:8184/return_response
+        ARGUMENTS_TO_SEND: dialog_id
+        PAYLOAD_ARGUMENT_NAME: payload
+        RESPONSE_KEY: response
+        EXTERNAL_TIMEOUT: 10
+      context: .
+      dockerfile: ./skills/external_integration_skill/Dockerfile
+    command: gunicorn --workers=1 server:app -b 0.0.0.0:8183 --reload
+    deploy:
+      resources:
+        limits:
+          memory: 128M
+        reservations:
+          memory: 128M
+
+  external-fake-server:
+    env_file: [ .env ]
+    build:
+      args:
+        SERVICE_PORT: 8184
+        SERVICE_NAME: external_fake_server
+      context: .
+      dockerfile: ./services/external_fake_server/Dockerfile
+    command: flask run -h 0.0.0.0 -p 8184
+    environment:
+      - FLASK_APP=server
+    deploy:
+      resources:
+        limits:
+          memory: 100M
+        reservations:
+          memory: 100M
+```
+
+__dev.yml__
+```
+  external-integration-skill:
+    volumes:
+      - "./skills/external_integration_skill:/src"
+      - "./common:/src/common"
+    ports:
+      - 8183:8183
+
+  external-fake-server:
+    volumes:
+      - "./services/external_fake_server:/src"
+      - "./common:/src/common"
+    ports:
+      - 8184:8184
+```
+
+__pipeline_conf.json (add to skills)__ 
+```
+"external_integration_skill": {
+    "connector": {
+        "protocol": "http",
+        "timeout": 2,
+        "url": "http://external-integration-skill:8183/respond"
+    },
+    "dialog_formatter": "state_formatters.dp_formatters:external_integration_skill_formatter",
+    "response_formatter": "state_formatters.dp_formatters:skill_with_attributes_formatter_service",
+    "previous_services": [
+        "skill_selectors"
+    ],
+    "state_manager_method": "add_hypothesis",
+    "is_enabled": true,
+    "source": {
+        "component": "components/knoIA98f3bijjao9d9pqkne.yml",
+        "service": "skills/external_integration_skill/service_configs/external-integration-skill"
+    }
+}
+```
+
+To leave only your skill in the pipeline you can either get rid of the others in docker-compose.yml and dev.yml or do the following:
+
+__skill_selectors/rule_based_selector/connector.py__
+```
+asyncio.create_task(callback(task_id=payload["task_id"], response=list(set(skills_for_uttr)))) -> asyncio.create_task(callback(task_id=payload["task_id"], response=['external_integration_skill']))
+```
+
+## Integrating real external services
+
+Do the same, but leave out external-fake-server component. Also, pay attention to ```EXTERNAL_SKILL_URL```, ```PAYLOAD_ARGUMENT_NAME```, ```RESPONSE_KEY```, ```ARGUMENTS_TO_SEND```. ```EXTERNAL_SKILL_URL``` is the link to the external service. ```PAYLOAD_ARGUMENT_NAME```, ```RESPONSE_KEY``` and ```ARGUMENTS_TO_SEND``` all depend on the input and output format of the external service. ```PAYLOAD_ARGUMENT_NAME``` is the key of the input json in which the external skill is expecting to receive the text of the message to reply to ("payload" by default); ```RESPONSE_KEY``` is the key in which the output json of the external skills contains the text of the reply we want to get (None by default); ```ARGUMENTS_TO_SEND``` are the arguments that the external servers needs to receive alongside with the message text, e.g. dialog_id or user_id.
diff --git a/skills/external_integration_skill/requirements.txt b/skills/external_integration_skill/requirements.txt
@@ -0,0 +1,9 @@
+flask==1.1.1
+itsdangerous==2.0.1
+gunicorn==19.9.0
+requests==2.22.0
+sentry-sdk[flask]==0.14.1
+healthcheck==1.3.3
+jinja2<=3.0.3
+Werkzeug<=2.0.3
+openai==0.27.6