Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

Commit

Permalink
Wizard of Wikipedia, knowledge source page title. (#3845)
Browse files Browse the repository at this point in the history
  • Loading branch information
mojtaba-komeili committed Jul 23, 2021
1 parent f800ac1 commit fb4d8aa
Show file tree
Hide file tree
Showing 5 changed files with 267 additions and 0 deletions.
108 changes: 108 additions & 0 deletions parlai/tasks/wizard_of_wikipedia/agents.py
Expand Up @@ -24,6 +24,7 @@
import copy
from parlai.core.teachers import FixedDialogTeacher, MultiTaskTeacher
from parlai.utils.io import PathManager
from parlai.utils import logging
from parlai.utils.misc import warn_once
from .build import build

Expand Down Expand Up @@ -757,6 +758,113 @@ def get(self, episode_idx, entry_idx=0):
return a


class WikiPageTitleTeacher(WizardDialogKnowledgeTeacher):
"""
Generates the title of Wikipedia page used as source of knowledge.
The context provided by this teacher (`text`) is the conversation history, with chosen topic removed.
The label is the title of the Wikipedia page of the passage that wizard selected for crafting
the next utterance; in other words, the source of knowledge for this utterance.
"""

def __init__(self, opt, shared=None):
self.opt = copy.deepcopy(opt)
self.opt['label_type'] = 'response'
super().__init__(self.opt, shared=shared)
self.id = 'WikiPageTitleTeacher'
self._conv_history_len = self.opt['conversation_history_length']
if not (self._conv_history_len > 0 or self._conv_history_len == -1):
logging.warning(
f'"{self._conv_history_len}" is an invalid value for --conversation-history-length flag.'
' Changing it to default of -1 (include the entire message history).'
)
self._conv_history_len = -1
self._skip_no_title = self.opt['skip_no_title']
if not shared:
self._preprocess_data()
else:
self.titles_data = shared['titles_data']

@classmethod
def add_cmdline_args(cls, parser, partial_opt=None):
super().add_cmdline_args(parser, partial_opt=partial_opt)
agent = parser.add_argument_group('Wikipedia Page Title Arguments')
agent.add_argument(
'--conversation-history-length',
type=int,
default=-1,
help='Number of previous utterances to keep in context, 0 (default) includes all',
)
agent.add_argument(
'--skip-no-title',
type='bool',
default=True,
help=(
'Whether to skip the example if no passage was selected. If `false` '
f'uses `{TOKEN_NOCHOSEN}` instead of title if no knowledge source was selected.'
),
)
return parser

def share(self):
shared = super().share()
shared['titles_data'] = self.titles_data
return shared

def _generate_messages(self, hist, action):
include_hist = (
hist[-self._conv_history_len :] if self._conv_history_len > 0 else hist
)
context = '\n'.join(include_hist)
return Message(
{
'id': "Wikipedia Title Teacher",
'text': context,
'labels': [action["title"]],
'episode_done': True,
}
)

def _should_include(self, act):
return not (self._skip_no_title and act['labels'][0] == TOKEN_NOCHOSEN)

def _preprocess_data(self):
data = []
for episode_idx in range(super().num_episodes()):
dialog_history = []
ex_idx = 0
while True:
a = super().get(episode_idx, ex_idx)
text_parts = a['text'].split('\n')
if ex_idx == 0:
# throwing away chosen_topic
text_parts = text_parts[1:]
if text_parts:
dialog_history.append(text_parts[0])
title_act = self._generate_messages(dialog_history, a)
if self._should_include(title_act):
data.append(title_act)
if a['episode_done']:
break
ex_idx += 1
dialog_history.append(a['labels'][0])

logging.info(
f'{len(data)} title generation examples generated '
f'from {super().num_examples()} original examples'
)
self.titles_data = data

def num_episodes(self):
return len(self.titles_data)

def num_examples(self):
return self.num_episodes()

def get(self, episode_idx, entry_idx=0):
return self.titles_data[episode_idx]


####################################################
# #
# Doc Reader Teachers #
Expand Down
4 changes: 4 additions & 0 deletions parlai/tasks/wizard_of_wikipedia/test.py
Expand Up @@ -39,5 +39,9 @@ class TestGeneratorTeacher(AutoTeacherTest):
task = "wizard_of_wikipedia:generator"


class TestWikiPageTitleTeacher(AutoTeacherTest):
task = "wizard_of_wikipedia:wiki_page_title"


class TestDocreaderTeacher(AutoTeacherTest):
task = "wizard_of_wikipedia:docreader"
@@ -0,0 +1,46 @@
acts:
- - episode_done: true
eval_labels:
- Royal Blue (train)
id: WikiPageTitleTeacher
text: 'Blue is my favorite primary color.
Blue is always nice. I like royal blue.'
- - episode_done: true
eval_labels:
- Blue Skies (1946 film)
id: WikiPageTitleTeacher
text: 'Blue is my favorite primary color.
Blue is always nice. I like royal blue.
I once road on The Royal Blue train from New York to D.C
Oh that sounds really nice. I bet there was a lot of scenery and blue skies.'
- - episode_done: true
eval_labels:
- Cinematography
id: WikiPageTitleTeacher
text: Hi buddy, What you think about cinematography
- - episode_done: true
eval_labels:
- Cinematography
id: WikiPageTitleTeacher
text: "Hi buddy, What you think about cinematography\nCinematography,is a type\
\ of motion picture , captured electronically by means of an image \nYes buddy,\
\ Images captured with an electronic image-sensor, produces an electrical charge.The\
\ word \"cinematography\" is based on the Greek words meaning movement, motion."
- - episode_done: true
eval_labels:
- Photography
id: WikiPageTitleTeacher
text: "Hi buddy, What you think about cinematography\nCinematography,is a type\
\ of motion picture , captured electronically by means of an image \nYes buddy,\
\ Images captured with an electronic image-sensor, produces an electrical charge.The\
\ word \"cinematography\" is based on the Greek words meaning movement, motion.\n\
It works by lens used to repeatedly focus the light reflected from objects into\
\ real images on the light-sensitive surface .\n Muybridge sequence of a horse\
\ galloping In the 1830s, moving images were produced on revolving drums and\
\ disks"
num_episodes: 3181
num_examples: 3181
@@ -0,0 +1,48 @@
acts:
- - episode_done: true
id: WikiPageTitleTeacher
labels:
- Science fiction film
text: 'I think science fiction is an amazing genre for anything. Future science,
technology, time travel, FTL travel, they''re all such interesting concepts.
I''m a huge fan of science fiction myself! '
- - episode_done: true
id: WikiPageTitleTeacher
labels:
- Time travel in fiction
text: "I think science fiction is an amazing genre for anything. Future science,\
\ technology, time travel, FTL travel, they're all such interesting concepts.\n\
I'm a huge fan of science fiction myself! \nAwesome! I really love how sci-fi\
\ storytellers focus on political/social/philosophical issues that would still\
\ be around even in the future. Makes them relatable.\nI agree. One of my favorite\
\ forms of science fiction is anything related to time travel! I find it fascinating."
- - episode_done: true
id: WikiPageTitleTeacher
labels:
- Science fiction
text: "I think science fiction is an amazing genre for anything. Future science,\
\ technology, time travel, FTL travel, they're all such interesting concepts.\n\
I'm a huge fan of science fiction myself! \nAwesome! I really love how sci-fi\
\ storytellers focus on political/social/philosophical issues that would still\
\ be around even in the future. Makes them relatable.\nI agree. One of my favorite\
\ forms of science fiction is anything related to time travel! I find it fascinating.\n\
It's not quite sci-fi, but my favorite version of time travel is in Harry Potter\
\ and the Prisoner of Azkaban. Breaks zero logical rules.\nAnd that's difficult\
\ to do when dealing with time travel. I actually haven't seen the latest Harry\
\ Potter movies. Guess it's time to check them out!"
- - episode_done: true
id: WikiPageTitleTeacher
labels:
- Internet access
text: 'Can you imagine the world without internet access? '
- - episode_done: true
id: WikiPageTitleTeacher
labels:
- Internet access
text: "Can you imagine the world without internet access? \nNo I could not! I\
\ couldn't imagine living when internet access was rare and very few people\
\ had it!\nOh me either! It seems like such a long time ago. I wonder when Internet\
\ was first created?"
num_episodes: 60797
num_examples: 60797
@@ -0,0 +1,61 @@
acts:
- - episode_done: true
eval_labels:
- Gardening
id: WikiPageTitleTeacher
text: I like Gardening, even when I've only been doing it for a short time.
- - episode_done: true
eval_labels:
- Gardening
id: WikiPageTitleTeacher
text: 'I like Gardening, even when I''ve only been doing it for a short time.
I live on a farm, we garden all year long, it is very relaxing.
That sounds great. I''ve always thought that I would love living in a farm,
but I;ve always lived in the city. What do you mostly plant?'
- - episode_done: true
eval_labels:
- Gardening
id: WikiPageTitleTeacher
text: 'I like Gardening, even when I''ve only been doing it for a short time.
I live on a farm, we garden all year long, it is very relaxing.
That sounds great. I''ve always thought that I would love living in a farm,
but I;ve always lived in the city. What do you mostly plant?
I have planted several fruits trees, tomatoes, jalepenos, bell peppers, onions,
Garlic, and potatoes mostly.
Great, I love the idea of growing my own vegetables and fruits! Do you have
animals in the farm?'
- - episode_done: true
eval_labels:
- Gardening
id: WikiPageTitleTeacher
text: 'I like Gardening, even when I''ve only been doing it for a short time.
I live on a farm, we garden all year long, it is very relaxing.
That sounds great. I''ve always thought that I would love living in a farm,
but I;ve always lived in the city. What do you mostly plant?
I have planted several fruits trees, tomatoes, jalepenos, bell peppers, onions,
Garlic, and potatoes mostly.
Great, I love the idea of growing my own vegetables and fruits! Do you have
animals in the farm?
yes i do. Cows, chickens, Micro pigs, Guinneas, We also do forest growing also.
we plants large pine trees.
Wow, it sounds amazing, the Micro-pigs are so cute! are they trainable to be
well behaved?'
- - episode_done: true
eval_labels:
- Bob Ross
id: WikiPageTitleTeacher
text: I would like to know more about bob ross
num_episodes: 3236
num_examples: 3236

0 comments on commit fb4d8aa

Please sign in to comment.