Skip to content

Commit

Permalink
Merge pull request #155 from deepmipt/dev
Browse files Browse the repository at this point in the history
Release v0.1.7
  • Loading branch information
dilyararimovna committed May 6, 2022
2 parents ed42f0c + 8476288 commit 30f290c
Show file tree
Hide file tree
Showing 14 changed files with 62 additions and 35 deletions.
4 changes: 2 additions & 2 deletions .env
Expand Up @@ -22,8 +22,8 @@ COMET_SERVICE_URL=http://comet-atomic:8053/comet
CONCEPTNET_SERVICE_URL=http://comet-conceptnet:8065/comet
MASKED_LM_SERVICE_URL=http://masked-lm:8088/respond
SENTIMENT_CLASSIFICATION_SERVICE_URL=http://sentiment-classification:8024/model
WIKIDATA_URL=http://wiki-parser:8077/model
ENTITY_LINKING_URL=http://entity-linking:8075/model
DP_WIKIDATA_URL=http://wiki-parser:8077/model
DP_ENTITY_LINKING_URL=http://entity-linking:8075/model
KNOWLEDGE_GROUNDING_SERVICE_URL=http://knowledge-grounding:8083/respond
WIKIDATA_DIALOGUE_SERVICE_URL=http://wikidata-dial-service:8092/model
NEWS_API_ANNOTATOR_URL=http://news-api-annotator:8112/respond
Expand Down
11 changes: 10 additions & 1 deletion annotators/NER/server.py
Expand Up @@ -43,7 +43,8 @@ def extract_good_entities(preds, sentences):
good_entities_for_sent = []

for ent in entities_for_sent:
ent_text = ent["text"].lower()
ent_text = ent["text"]
ent_text = " ".join([ent_word[0].capitalize() + ent_word[1:] for ent_word in ent_text.split()])
# remove everything except of letters, digitals, spaces and -
ent_text = EVERYTHING_EXCEPT_LETTERS_DIGITALS_AND_SPACE.sub(" ", ent_text)
ent_text = DOUBLE_SPACES.sub(" ", ent_text).strip()
Expand All @@ -56,6 +57,7 @@ def extract_good_entities(preds, sentences):
is_long_enough = len(ent_text) > 2
is_not_banned = not re.match(BANNED_ENTITIES, ent_text)
if is_not_stopword and is_not_banned and is_long_enough:
ent["text"] = ent_text
good_entities_for_sent.append(deepcopy(ent))

good_preds.append(good_entities_for_sent)
Expand All @@ -64,7 +66,14 @@ def extract_good_entities(preds, sentences):

def get_predictions_for_list_sentences(sentences):
sents = [word_tokenize(sent.lower()) for sent in sentences]
sents_upper = [word_tokenize(sent) for sent in sentences]
preds = ner.predict(sents)
for i in range(len(preds)):
sent_upper = sents_upper[i]
for j in range(len(preds[i])):
ent_upper = " ".join(sent_upper[preds[i][j]["start_pos"] : preds[i][j]["end_pos"]])
if ent_upper.lower() == preds[i][j]["text"]:
preds[i][j]["text"] = ent_upper
# each sample is a list of sentences of current utterance
# so, preds is a list of length = number of sents in utterances
# each element of preds is a list of entities.
Expand Down
17 changes: 14 additions & 3 deletions annotators/entity_detection/server.py
Expand Up @@ -49,6 +49,7 @@ def get_result(request, what_to_annotate):
logger.info(f"annotating: {what_to_annotate}, input (the last utterances): {last_utts}")

utts_list = []
utts_list_init = []
utts_nums = []
last_utt_starts = []
for n, hist_utt in enumerate(last_utts):
Expand All @@ -71,6 +72,7 @@ def get_result(request, what_to_annotate):
utts_list.append(concat_utt.lower())
else:
utts_list.append(concat_utt)
utts_list_init.append(concat_utt)
utts_nums.append(n)

utt_entities_batch = [{} for _ in last_utts]
Expand All @@ -91,14 +93,23 @@ def get_result(request, what_to_annotate):
) = entity_detection(utts_list)
logger.info(f"entity_substr_batch {entity_substr_batch} finegrained_tags_batch {finegrained_tags_batch}")

for entity_substr_list, tags_list, finegrained_tags_list, entity_offsets_list, last_utt_start, num in zip(
entity_substr_batch, tags_batch, finegrained_tags_batch, entity_offsets_batch, last_utt_starts, utts_nums
for entity_substr_list, tags_list, finegrained_tags_list, entity_offsets_list, last_utt_start, uttr, num in zip(
entity_substr_batch,
tags_batch,
finegrained_tags_batch,
entity_offsets_batch,
last_utt_starts,
utts_list_init,
utts_nums,
):
utt_entities = {}
for entity, tag, finegrained_tag, (start_offset, end_offset) in zip(
entity_substr_list, tags_list, finegrained_tags_list, entity_offsets_list
):
if entity not in stopwords and len(entity) > 2 and start_offset >= last_utt_start:
entity_init = uttr[start_offset:end_offset]
if entity_init.lower() == entity:
entity = entity_init
if entity.lower() not in stopwords and len(entity) > 2 and start_offset >= last_utt_start:
entity = EVERYTHING_EXCEPT_LETTERS_DIGITALS_AND_SPACE.sub(" ", entity)
entity = DOUBLE_SPACES.sub(" ", entity).strip()
if finegrained_tag[0][0] > 0.5:
Expand Down
1 change: 1 addition & 0 deletions annotators/entity_linking/server.py
Expand Up @@ -87,6 +87,7 @@ def respond():
entity_substr_batch = inp.get("entity_substr", [[""]])
template_batch = inp.get("template", [""])
context_batch = inp.get("context", [[""]])
logger.info(f"entity linking, input {entity_substr_batch}")
long_context_batch = []
short_context_batch = []
for entity_substr_list, context_list in zip(entity_substr_batch, context_batch):
Expand Down
8 changes: 5 additions & 3 deletions annotators/kbqa/kbqa_cq.json
Expand Up @@ -37,14 +37,14 @@
{
"class_name": "api_requester",
"id": "linker_entities",
"url": "http://entity-linking:8075/model",
"url": "{ENTITY_LINKING_URL}",
"out": ["entity_ids"],
"param_names": ["entity_substr", "template_found"]
},
{
"class_name": "api_requester",
"id": "wiki_p",
"url": "http://wiki-parser:8077/model",
"url": "{WIKIDATA_URL}",
"out": ["wiki_parser_output"],
"param_names": ["parser_info", "query"]
},
Expand Down Expand Up @@ -132,7 +132,9 @@
"MODELS_PATH": "{ROOT_PATH}/models",
"BERT_PATH": "{DOWNLOADS_PATH}/bert_models_kbqa/cased_L-12_H-768_A-12",
"NER_PATH": "{MODELS_PATH}/ner_lcquad_ent_and_type",
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs",
"ENTITY_LINKING_URL": "http://entity-linking:8075/model",
"WIKIDATA_URL": "http://wiki-parser:8077/model"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/tf.txt",
Expand Down
8 changes: 5 additions & 3 deletions annotators/kbqa/kbqa_cq_mt_bert.json
Expand Up @@ -117,14 +117,14 @@
{
"class_name": "api_requester",
"id": "linker_entities",
"url": "http://entity-linking:8075/model",
"url": "{ENTITY_LINKING_URL}",
"out": ["entity_ids"],
"param_names": ["entity_substr", "template_found"]
},
{
"class_name": "api_requester",
"id": "wiki_p",
"url": "http://wiki-parser:8077/model",
"url": "{WIKIDATA_URL}",
"out": ["wiki_parser_output"],
"param_names": ["parser_info", "query"]
},
Expand Down Expand Up @@ -228,7 +228,9 @@
"MODELS_PATH": "{ROOT_PATH}/models",
"BERT_PATH": "{DOWNLOADS_PATH}/bert_models_kbqa/cased_L-12_H-768_A-12",
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs",
"MT_BERT_PATH": "{MODELS_PATH}/mt_bert_kbqa"
"MT_BERT_PATH": "{MODELS_PATH}/mt_bert_kbqa",
"ENTITY_LINKING_URL": "http://entity-linking:8075/model",
"WIKIDATA_URL": "http://wiki-parser:8077/model"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/tf.txt",
Expand Down
5 changes: 3 additions & 2 deletions annotators/kbqa/kbqa_cq_mt_bert_lite.json
Expand Up @@ -12,7 +12,7 @@
{
"class_name": "api_requester",
"id": "linker_entities",
"url": "http://entity-linking:8075/model",
"url": "{ENTITY_LINKING_URL}",
"out": ["entity_ids"],
"param_names": ["entity_substr", "template_found"]
},
Expand Down Expand Up @@ -90,7 +90,8 @@
"MODELS_PATH": "{ROOT_PATH}/models",
"BERT_PATH": "{DOWNLOADS_PATH}/bert_models_kbqa/cased_L-12_H-768_A-12",
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs",
"MT_BERT_PATH": "{MODELS_PATH}/mt_bert_kbqa"
"MT_BERT_PATH": "{MODELS_PATH}/mt_bert_kbqa",
"ENTITY_LINKING_URL": "http://entity-linking:8075/model"
},
"requirements": [
"{DEEPPAVLOV_PATH}/requirements/tf.txt",
Expand Down
2 changes: 2 additions & 0 deletions annotators/wiki_parser/wiki_parser.py
Expand Up @@ -620,6 +620,8 @@ def find_top_triplets(entity, entity_substr, pos=None, token_conf=None, conf=Non
triplets["token_conf"] = token_conf
if conf is not None:
triplets["conf"] = conf
if entity_substr.lower() in entity_label.lower():
entity_substr = entity_label
triplets_info[entity_substr] = triplets
return triplets_info

Expand Down
4 changes: 2 additions & 2 deletions common/custom_requests.py
Expand Up @@ -8,8 +8,8 @@
sentry_sdk.init(getenv("SENTRY_DSN"))
logger = logging.getLogger(__name__)

WIKIDATA_URL = getenv("WIKIDATA_URL")
ENTITY_LINKING_URL = getenv("ENTITY_LINKING_URL")
WIKIDATA_URL = getenv("DP_WIKIDATA_URL")
ENTITY_LINKING_URL = getenv("DP_ENTITY_LINKING_URL")
assert WIKIDATA_URL and ENTITY_LINKING_URL


Expand Down
4 changes: 2 additions & 2 deletions skills/dff_bot_persona_skill/dialogflows/flows/shared.py
Expand Up @@ -9,8 +9,8 @@
sentry_sdk.init(dsn=os.getenv("SENTRY_DSN"))


ENTITY_LINKING_URL = os.getenv("ENTITY_LINKING_URL")
WIKIDATA_URL = os.getenv("WIKIDATA_URL")
ENTITY_LINKING_URL = os.getenv("DP_ENTITY_LINKING_URL")
WIKIDATA_URL = os.getenv("DP_WIKIDATA_URL")
assert ENTITY_LINKING_URL, ENTITY_LINKING_URL
assert WIKIDATA_URL, WIKIDATA_URL

Expand Down
4 changes: 2 additions & 2 deletions skills/dff_gossip_skill/dialogflows/flows/utils.py
Expand Up @@ -18,8 +18,8 @@

sentry_sdk.init(dsn=os.getenv("SENTRY_DSN"))

ENTITY_LINKING_URL = os.getenv("ENTITY_LINKING_URL")
WIKIDATA_URL = os.getenv("WIKIDATA_URL")
ENTITY_LINKING_URL = os.getenv("DP_ENTITY_LINKING_URL")
WIKIDATA_URL = os.getenv("DP_WIKIDATA_URL")
assert ENTITY_LINKING_URL, ENTITY_LINKING_URL
assert WIKIDATA_URL, WIKIDATA_URL

Expand Down
10 changes: 5 additions & 5 deletions skills/dff_sport_skill/dialogflows/flows/sport.py
Expand Up @@ -245,7 +245,7 @@ def entity_in_last_uttr_from_sport_area(vars):

def get_dict_entity(entity_substr, entity_ids, type):
try:
WIKIDATA_URL = "http://wiki-parser:8077/model"
WIKIDATA_URL = os.getenv("DP_WIKIDATA_URL")
dict_result = requests.post(
WIKIDATA_URL,
json={
Expand Down Expand Up @@ -352,8 +352,8 @@ def user_ask_about_sport_request(ngrams, vars):
def lets_chat_about_sport_response(vars):
# USR_ASK_ABOUT_SPORT
responses = [
f"I have no physical embodiment. Sport is interesting and useful. Tell me what sport do you enjoy?",
f"I live on a cloud, so i can't do sport , but I'm really curious about what sport are you fond of?",
"I have no physical embodiment. Sport is interesting and useful. Tell me what sport do you enjoy?",
"I live on a cloud, so i can't do sport , but I'm really curious about what sport are you fond of?",
]
try:
state_utils.set_confidence(vars, confidence=SUPER_CONFIDENCE)
Expand Down Expand Up @@ -398,7 +398,7 @@ def user_ask_about_athletes_response(vars):
try:
state_utils.set_confidence(vars, confidence=SUPER_CONFIDENCE)
state_utils.set_can_continue(vars, continue_flag=MUST_CONTINUE)
return f"I know all the athletes on this planet. Which athlete do you like the most?"
return "I know all the athletes on this planet. Which athlete do you like the most?"
except Exception as exc:
logger.exception(exc)
sentry_sdk.capture_exception(exc)
Expand Down Expand Up @@ -922,7 +922,7 @@ def last_chance_response(vars):

def error_response(vars):
state_utils.set_confidence(vars, ZERO_CONFIDENCE)
return f""
return ""


##################################################################################################################
Expand Down
3 changes: 0 additions & 3 deletions skills/factoid_qa/server.py
Expand Up @@ -363,9 +363,6 @@ def respond():
logger.info("Question is not classified as factoid.")
response = ""
confidence = 0.0

if confidence == 1.0:
confidence = 0.99
responses.append(response)
confidences.append(confidence)
attributes.append(attr)
Expand Down
16 changes: 9 additions & 7 deletions state_formatters/dp_formatters.py
Expand Up @@ -535,21 +535,23 @@ def el_formatter_dialog(dialog: Dict):
# Used by: entity_linking annotator
num_last_utterances = 2
ner_output = get_entities(dialog["human_utterances"][-1], only_named=True, with_labels=True)
nounphrases = dialog["human_utterances"][-1]["annotations"].get("cobot_entities", {}).get("entities", [])
entity_substr = []
nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False)
entity_substr_list = []
if ner_output:
for entity in ner_output:
if entity and isinstance(entity, dict) and "text" in entity and entity["text"].lower() != "alexa":
entity_substr.append(entity["text"].lower())

entity_substr_list.append(entity["text"])
entity_substr_lower_list = {entity_substr.lower() for entity_substr in entity_substr_list}
dialog = utils.get_last_n_turns(dialog, bot_last_turns=1)
dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
context = [[uttr["text"] for uttr in dialog["utterances"][-num_last_utterances:]]]
if nounphrases:
entity_substr += [nounphrase.lower() for nounphrase in nounphrases]
entity_substr = list(set(entity_substr))
entity_substr_list += [
nounphrase for nounphrase in nounphrases if nounphrase.lower() not in entity_substr_lower_list
]
entity_substr_list = list(set(entity_substr_list))

return [{"entity_substr": [entity_substr], "template": [""], "context": context}]
return [{"entity_substr": [entity_substr_list], "template": [""], "context": context}]


def kbqa_formatter_dialog(dialog: Dict):
Expand Down

0 comments on commit 30f290c

Please sign in to comment.