-
Notifications
You must be signed in to change notification settings - Fork 1
/
val_addition_single.jsonl
86 lines (86 loc) · 105 KB
/
val_addition_single.jsonl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{"video_name": "Juezhannanjing", "ep_num": "Ep_1", "timestamp": "10:31", "link": "https://www.bilibili.com/bangumi/play/ss22911/?from=search&seid=4482171785455891211", "keywords": ["government", "military", "politics", "president", "war", "respect"], "img_fn": "MC-VCR_sample/0.jpg", "objects": ["person", "person", "person"], "metadata_fn": "MC-VCR_sample/0.json", "question_orig": ["What", "is", "[person3]", "doing", "?"], "question": ["What", "is", [2], "doing", "?"], "answer_orig": ["[person3]", "is", "talking", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-0", "question_number": 0, "annot_id": "val-0", "answer_choices": [[[2], "is", "talking", "."], ["Filming", "a", "school", "event", "."], ["Working", "on", "a", "computer", "."], [[2], "is", "curious", "about", "what", [2], "is", "doing", "."]], "rationale_choices": [[[2], "is", "talking", "."], ["Filming", "a", "school", "event", "."], ["Working", "on", "a", "computer", "."], [[2], "is", "curious", "about", "what", [2], "is", "doing", "."]]}
{"video_name": "Juezhannanjing", "ep_num": "Ep_1", "timestamp": "22:42", "link": "https://www.bilibili.com/bangumi/play/ss22911/?from=search&seid=4482171785455891211", "keywords": ["campaign", "teacher", "student", "suggestion", "revolt"], "img_fn": "MC-VCR_sample/2.jpg", "objects": ["person", "person", "book", "book", "book", "dining table", "book", "handbag"], "metadata_fn": "MC-VCR_sample/2.json", "question_orig": ["What", "is", "[person1]", "wearing", "?"], "question": ["What", "is", [0], "wearing", "?"], "answer_orig": ["[person1]", "wears", "a", "vest", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-1", "question_number": 0, "annot_id": "val-1", "answer_choices": [[[0], "wears", "a", "vest", "."], [[0], "is", "wearing", "a", "glove", "."], [[0], "'", "s", "wearing", "a", "seatbelt", "."], [[0], "is", "a", "chain", "for", "his", "pocket", "watch", "."]], "rationale_choices": [[[0], "wears", "a", "vest", "."], [[0], "is", "wearing", "a", "glove", "."], [[0], "'", "s", "wearing", "a", "seatbelt", "."], [[0], "is", "a", "chain", "for", "his", "pocket", "watch", "."]]}
{"video_name": "A Little Reunion", "ep_num": "Ep_21", "timestamp": "22:46", "link": "https://www.youtube.com/watch?v=WPMFmCGaAeU&t=2101s", "keywords": ["home", "student", "uniform", "parents", "breakfast", "go to school"], "img_fn": "MC-VCR_sample/4.jpg", "objects": ["person", "person", "person", "chair", "bowl", "chair", "potted plant", "potted plant", "dining table", "bowl", "vase", "potted plant", "frisbee", "bowl"], "metadata_fn": "MC-VCR_sample/4.json", "question_orig": ["What", "is", "[person2]", "doing", "?"], "question": ["What", "is", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "sitting", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-2", "question_number": 0, "annot_id": "val-2", "answer_choices": [[[1], "is", "handing", "her", "drink", "to", [1], "."], [[1], "is", "showing", "off", "what", "he", "has", "in", "his", "hand", "to", [1], "."], [[1], "is", "listening", "to", "someone", "talk", "and", "taking", "notes", "."], [[1], "is", "sitting", "."]], "rationale_choices": [[[1], "is", "handing", "her", "drink", "to", [1], "."], [[1], "is", "showing", "off", "what", "he", "has", "in", "his", "hand", "to", [1], "."], [[1], "is", "listening", "to", "someone", "talk", "and", "taking", "notes", "."], [[1], "is", "sitting", "."]]}
{"video_name": "A Little Reunion", "ep_num": "Ep_21", "timestamp": "32:57", "link": "https://www.youtube.com/watch?v=WPMFmCGaAeU&t=2101s", "keywords": ["exam", "score", "school", "high school", "uniform", "shy", "praise"], "img_fn": "MC-VCR_sample/5.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "book", "person", "book", "book", "book", "person"], "metadata_fn": "MC-VCR_sample/5.json", "question_orig": ["What", "is", "[person1]", "doing", "?"], "question": ["What", "is", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "clapping", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-3", "question_number": 0, "annot_id": "val-3", "answer_choices": [[[0], "is", "operating", "a", "space", "ship", "'", "s", "computer", "."], ["Opening", "a", "car", "door", "for", "a", "lady", "."], ["Pulling", "up", "the", "bridge", "to", "the", "boat", "."], [[0], "is", "clapping", "."]], "rationale_choices": [[[0], "is", "operating", "a", "space", "ship", "'", "s", "computer", "."], ["Opening", "a", "car", "door", "for", "a", "lady", "."], ["Pulling", "up", "the", "bridge", "to", "the", "boat", "."], [[0], "is", "clapping", "."]]}
{"video_name": "Wodeboshilaogong", "ep_num": "Ep_1", "timestamp": "14:59", "link": "https://www.bilibili.com/bangumi/play/ss28093", "keywords": ["job", "seek", "competitor", "resume", "rush"], "img_fn": "MC-VCR_sample/12.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/12.json", "question_orig": ["How", "is", "[person1]", "feeling", "this", "moment", "?"], "question": ["How", "is", [0], "feeling", "this", "moment", "?"], "answer_orig": ["He", "feels", "anxious", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-4", "question_number": 0, "annot_id": "val-4", "answer_choices": [["She", "is", "satisfied", "with", "the", "way", "she", "looks", "."], ["Just", "a", "little", "bit", ",", "but", "not", "much", "."], ["She", "is", "oblivious", "to", "what", "'", "s", "happening", "in", "the", "real", "world", "."], ["He", "feels", "anxious", "."]], "rationale_choices": [["She", "is", "satisfied", "with", "the", "way", "she", "looks", "."], ["Just", "a", "little", "bit", ",", "but", "not", "much", "."], ["She", "is", "oblivious", "to", "what", "'", "s", "happening", "in", "the", "real", "world", "."], ["He", "feels", "anxious", "."]]}
{"video_name": "Wodeboshilaogong", "ep_num": "Ep_1", "timestamp": "26:40", "link": "https://www.bilibili.com/bangumi/play/ss28093", "keywords": ["beijing", "stadium", "photo", "camera", "couple"], "img_fn": "MC-VCR_sample/13.jpg", "objects": ["person", "person", "person", "handbag", "person", "bench"], "metadata_fn": "MC-VCR_sample/13.json", "question_orig": ["What", "is", "[person1]", "doing", "?"], "question": ["What", "is", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "talking", "to", "others", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-5", "question_number": 0, "annot_id": "val-5", "answer_choices": [[[0], "is", "guarding", [0], "with", "a", "firearm", "."], [[0], "is", "helping", [0], "get", "ready", "."], [[0], "is", "throwing", "a", "bag", "to", [0], "."], [[0], "is", "talking", "to", "others", "."]], "rationale_choices": [[[0], "is", "guarding", [0], "with", "a", "firearm", "."], [[0], "is", "helping", [0], "get", "ready", "."], [[0], "is", "throwing", "a", "bag", "to", [0], "."], [[0], "is", "talking", "to", "others", "."]]}
{"video_name": "Ordinary World", "ep_num": "Ep_2", "timestamp": "5:14", "link": "https://www.bilibili.com/bangumi/play/ep137582", "keywords": ["drought", "dry", "house", "children", "grandpa", "cave"], "img_fn": "MC-VCR_sample/16.jpg", "objects": ["person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/16.json", "question_orig": ["What", "'s", "[person1]", "doing", "?"], "question": ["What", "'s", [0], "doing", "?"], "answer_orig": ["[person1]", "bends", "down", "and", "is", "talking", "to", "others", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-6", "question_number": 0, "annot_id": "val-6", "answer_choices": [[[0], "bends", "down", "and", "is", "talking", "to", "others", "."], ["Teasing", "someone", "off", "screen", "."], [[0], "looks", "like", "he", "is", "yelling", "at", "someone", "."], [[0], "is", "playing", "the", "piano", "as", "practice", "to", "get", "better", "."]], "rationale_choices": [[[0], "bends", "down", "and", "is", "talking", "to", "others", "."], ["Teasing", "someone", "off", "screen", "."], [[0], "looks", "like", "he", "is", "yelling", "at", "someone", "."], [[0], "is", "playing", "the", "piano", "as", "practice", "to", "get", "better", "."]]}
{"video_name": "Ordinary World", "ep_num": "Ep_11", "timestamp": "41:15", "link": "https://www.bilibili.com/bangumi/play/ep137591", "keywords": ["funeral", "mourn", "dead", "family"], "img_fn": "MC-VCR_sample/17.jpg", "objects": ["person", "person"], "metadata_fn": "MC-VCR_sample/17.json", "question_orig": ["What", "'s", "[person1]", "doing", "?"], "question": ["What", "'s", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "holding", "a", "baby", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-7", "question_number": 0, "annot_id": "val-7", "answer_choices": [[[0], "is", "hiding", "from", [0], "horse", ",", "and", [1], "."], [[0], "is", "keeping", "an", "eye", "on", "the", "handcar", ",", "making", "sure", "she", "has", "enough", "time", "to", "cross", "the", "railroad", "."], [[0], "is", "holding", "a", "baby", "."], [[0], "is", "comforting", "someone", "who", "is", "scared", "."]], "rationale_choices": [[[0], "is", "hiding", "from", [0], "horse", ",", "and", [1], "."], [[0], "is", "keeping", "an", "eye", "on", "the", "handcar", ",", "making", "sure", "she", "has", "enough", "time", "to", "cross", "the", "railroad", "."], [[0], "is", "holding", "a", "baby", "."], [[0], "is", "comforting", "someone", "who", "is", "scared", "."]]}
{"video_name": "Empresses in the Palace", "ep_num": "Ep_5", "timestamp": "19:12", "link": "https://www.youtube.com/watch?v=f47kYHyHzSg&list=PLU39TFGUKRrMwdhF9GeledFzM6C_8zhcP&index=5", "keywords": ["dance", "palace", "party", "servant", "royalty", "happy"], "img_fn": "MC-VCR_sample/22.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/22.json", "question_orig": ["What", "is", "[person3]", "doing", "?"], "question": ["What", "is", [2], "doing", "?"], "answer_orig": ["[person3]", "is", "dancing", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-8", "question_number": 0, "annot_id": "val-8", "answer_choices": [[[2], "'", "s", "kissing", "a", "woman", "in", "bed", "."], [[2], "is", "hugging", [2], "because", "she", "is", "scared", "."], [[2], "is", "dancing", "."], [[2], "is", "trying", "to", "give", "the", "crowd", "a", "drunken", "toast", "."]], "rationale_choices": [[[2], "'", "s", "kissing", "a", "woman", "in", "bed", "."], [[2], "is", "hugging", [2], "because", "she", "is", "scared", "."], [[2], "is", "dancing", "."], [[2], "is", "trying", "to", "give", "the", "crowd", "a", "drunken", "toast", "."]]}
{"video_name": "Juezhannanjing", "ep_num": "Ep_6", "timestamp": "28:31", "link": "https://www.bilibili.com/bangumi/play/ep175229", "keywords": ["military", "strategy", "war", "soldier", "general", "meeting"], "img_fn": "MC-VCR_sample/24.jpg", "objects": ["person", "person", "person", "person", "person", "person", "chair", "chair", "cup", "suitcase"], "metadata_fn": "MC-VCR_sample/24.json", "question_orig": ["What", "is", "[person3]", "doing", "?"], "question": ["What", "is", [2], "doing", "?"], "answer_orig": ["[person3]", "is", "presenting", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-9", "question_number": 0, "annot_id": "val-9", "answer_choices": [[[2], "is", "presenting", "."], [[2], "is", "staring", "at", "some", "cans", "on", "a", "shelf", "."], [[2], "'", "s", "showing", [2], "what", "kind", "of", "ice", "cream", "he", "wants", "."], [[2], "is", "reaching", "for", "the", "door", "to", "shut", "it", "as", "he", "walks", "out", "of", "the", "building", "."]], "rationale_choices": [[[2], "is", "presenting", "."], [[2], "is", "staring", "at", "some", "cans", "on", "a", "shelf", "."], [[2], "'", "s", "showing", [2], "what", "kind", "of", "ice", "cream", "he", "wants", "."], [[2], "is", "reaching", "for", "the", "door", "to", "shut", "it", "as", "he", "walks", "out", "of", "the", "building", "."]]}
{"video_name": "Juezhannanjing", "ep_num": "Ep_12", "timestamp": "28:51", "link": "https://www.bilibili.com/bangumi/play/ep175235", "keywords": ["temple", "buddist", "religion", "lovers", "general", "bless", "luck", "pray"], "img_fn": "MC-VCR_sample/25.jpg", "objects": ["person", "person", "tie"], "metadata_fn": "MC-VCR_sample/25.json", "question_orig": ["What", "'s", "[person1]", "'s", "job", "?"], "question": ["What", "'s", [0], "'s", "job", "?"], "answer_orig": ["[person1]", "is", "a", "soldier", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-10", "question_number": 0, "annot_id": "val-10", "answer_choices": [[[0], "is", "a", "soldier", "."], [[0], "works", "in", "a", "factory", "or", "a", "warehouse", "."], [[0], "is", "a", "firefighter", "."], [[0], "is", "a", "schoolteacher", "there", "to", "supervise", "the", "children", "."]], "rationale_choices": [[[0], "is", "a", "soldier", "."], [[0], "works", "in", "a", "factory", "or", "a", "warehouse", "."], [[0], "is", "a", "firefighter", "."], [[0], "is", "a", "schoolteacher", "there", "to", "supervise", "the", "children", "."]]}
{"video_name": "Like a Flowing River", "ep_num": "Ep_1", "timestamp": "33:07", "link": "https://www.youtube.com/watch?v=cW75NoR6XjQ&list=PLQqbdnAgoRmZBp0GklnKNIvr2dn1GyYcS", "keywords": ["criticize", "apologize", "office", "officer", "slogan"], "img_fn": "MC-VCR_sample/27.jpg", "objects": ["person", "person", "person", "person", "cup"], "metadata_fn": "MC-VCR_sample/27.json", "question_orig": ["How", "is", "[person2]", "feeling", "?"], "question": ["How", "is", [1], "feeling", "?"], "answer_orig": ["[person2]", "feels", "ashamed", "and", "sorry", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-11", "question_number": 0, "annot_id": "val-11", "answer_choices": [[[1], "is", "happy", "."], ["Just", "a", "little", "bit", ",", "but", "not", "much", "."], [[1], "feels", "ashamed", "and", "sorry", "."], ["Distrust", "and", "some", "curiosity", "."]], "rationale_choices": [[[1], "is", "happy", "."], ["Just", "a", "little", "bit", ",", "but", "not", "much", "."], [[1], "feels", "ashamed", "and", "sorry", "."], ["Distrust", "and", "some", "curiosity", "."]]}
{"video_name": "Like a Flowing River", "ep_num": "Ep_8", "timestamp": "33:32", "link": "https://www.youtube.com/watch?v=VOIcGch6bVI&list=PLQqbdnAgoRmZBp0GklnKNIvr2dn1GyYcS&index=8", "keywords": ["wedding", "countryside", "couple", "villager", "party"], "img_fn": "MC-VCR_sample/29.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "umbrella", "person", "chair", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/29.json", "question_orig": ["How", "does", "[person6]", "feel", "?"], "question": ["How", "does", [5], "feel", "?"], "answer_orig": ["[person6]", "is", "very", "happy", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-12", "question_number": 0, "annot_id": "val-12", "answer_choices": [[[5], "is", "very", "happy", "."], [[5], "is", "feeling", "very", "unhappy", "."], ["Nervous", "about", "what", [5], "is", "giving", [5], "."], [[5], "is", "in", "shock", "from", "something", "she", "just", "saw", "."]], "rationale_choices": [[[5], "is", "very", "happy", "."], [[5], "is", "feeling", "very", "unhappy", "."], ["Nervous", "about", "what", [5], "is", "giving", [5], "."], [[5], "is", "in", "shock", "from", "something", "she", "just", "saw", "."]]}
{"video_name": "Like a Flowing River", "ep_num": "Ep_22", "timestamp": "4:28", "link": "https://www.youtube.com/watch?v=FPF6xOfSXwc&list=PLQqbdnAgoRmZBp0GklnKNIvr2dn1GyYcS&index=22", "keywords": ["funeral", "mourn", "angry", "cry", "sadness"], "img_fn": "MC-VCR_sample/30.jpg", "objects": ["person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/30.json", "question_orig": ["What", "is", "[person1]", "'s", "feeling", "?"], "question": ["What", "is", [0], "'s", "feeling", "?"], "answer_orig": ["[person1]", "is", "very", "sad", "and", "angry", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-13", "question_number": 0, "annot_id": "val-13", "answer_choices": [[[0], "is", "feeling", "ecstasy", "."], [[0], "is", "deep", "in", "thought", "about", "what", "her", "next", "move", "is", "."], ["Nothing", ",", [0], "is", "watching", "in", "shock", "."], [[0], "is", "very", "sad", "and", "angry", "."]], "rationale_choices": [[[0], "is", "feeling", "ecstasy", "."], [[0], "is", "deep", "in", "thought", "about", "what", "her", "next", "move", "is", "."], ["Nothing", ",", [0], "is", "watching", "in", "shock", "."], [[0], "is", "very", "sad", "and", "angry", "."]]}
{"video_name": "So Young", "ep_num": "NaN", "timestamp": "51:13", "link": "https://www.youtube.com/watch?v=y5AbXeijWQ8", "keywords": ["dormitory", "boyfriend", "visit", "college", "university"], "img_fn": "MC-VCR_sample/36.jpg", "objects": ["person", "person", "bed", "person", "person", "person", "person", "bowl", "person", "bottle", "bottle"], "metadata_fn": "MC-VCR_sample/36.json", "question_orig": ["What", "is", "[person4]", "'s", "job", "?"], "question": ["What", "is", [4], "'s", "job", "?"], "answer_orig": ["[person4]", "is", "a", "student", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-14", "question_number": 0, "annot_id": "val-14", "answer_choices": [[[4], "is", "a", "waiter", "."], [[4], "is", "a", "musician", "."], [[4], "works", "on", "a", "boat", "."], [[4], "is", "a", "student", "."]], "rationale_choices": [[[4], "is", "a", "waiter", "."], [[4], "is", "a", "musician", "."], [[4], "works", "on", "a", "boat", "."], [[4], "is", "a", "student", "."]]}
{"video_name": "A Little Reunion", "ep_num": "Ep_2", "timestamp": "37:27", "link": "https://www.youtube.com/watch?v=nnxSTdpBAV4&list=PLkvG4EWPDB0kkjJ-RBHMFdCLDFzn5UNlJ&index=2", "keywords": ["school", "parents", "support", "cheer", "sports"], "img_fn": "MC-VCR_sample/43.jpg", "objects": ["person", "person", "person", "person", "person", "tie", "person", "person", "person", "tie", "person", "person", "person", "person", "tie", "person", "person", "person", "person", "person", "person", "person", "tie", "person", "laptop", "umbrella", "tie", "tie", "tie", "person", "person"], "metadata_fn": "MC-VCR_sample/43.json", "question_orig": ["How", "is", "[person4]", "feeling", "?"], "question": ["How", "is", [3], "feeling", "?"], "answer_orig": ["[person4]", "feels", "happy", "and", "even", "stands", "up", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-15", "question_number": 0, "annot_id": "val-15", "answer_choices": [[[3], "is", "amused", "by", "the", "others", "dancing", "."], [[3], "is", "feeling", "suspicious", "right", "now", "."], [[3], "feels", "happy", "and", "even", "stands", "up", "."], [[3], "is", "feeling", "ecstasy", "."]], "rationale_choices": [[[3], "is", "amused", "by", "the", "others", "dancing", "."], [[3], "is", "feeling", "suspicious", "right", "now", "."], [[3], "feels", "happy", "and", "even", "stands", "up", "."], [[3], "is", "feeling", "ecstasy", "."]]}
{"video_name": "I will find you a better home", "ep_num": "Ep_52", "timestamp": "41:51", "link": "https://www.youtube.com/watch?v=7zAr0BBo9m8&list=PLDuzvnRIfxZoCApBs2bldB_kCvEk9c2AQ&index=52", "keywords": ["salesman", "house", "slogan", "encourage", "staff"], "img_fn": "MC-VCR_sample/45.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/45.json", "question_orig": ["What", "is", "[person2]", "doing", "?"], "question": ["What", "is", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "dancing", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-16", "question_number": 0, "annot_id": "val-16", "answer_choices": [[[1], "is", "keeping", "men", "from", "entering", "a", "lifeboat", "ahead", "of", "the", "women", "and", "children", "."], [[1], "is", "dancing", "."], [[1], "is", "giving", "directions", "based", "on", "the", "information", "in", "front", "of", "her", "."], [[1], "is", "a", "server", "and", "he", "just", "brought", "drinks", "cup", "over", "to", [1], "."]], "rationale_choices": [[[1], "is", "keeping", "men", "from", "entering", "a", "lifeboat", "ahead", "of", "the", "women", "and", "children", "."], [[1], "is", "dancing", "."], [[1], "is", "giving", "directions", "based", "on", "the", "information", "in", "front", "of", "her", "."], [[1], "is", "a", "server", "and", "he", "just", "brought", "drinks", "cup", "over", "to", [1], "."]]}
{"video_name": "Mr. Sunshine", "ep_num": "Ep_1", "timestamp": "1:01:12", "link": "https://www.hanjuwang.net/hanju/2018/1300.html#", "keywords": ["colonizer", "policeman", "family", "brave", "korea", "station"], "img_fn": "MC-VCR_sample/kor_3.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "tie"], "metadata_fn": "MC-VCR_sample/kor_3.json", "question_orig": ["What", "are", "[person4]", "and", "[person6]", "doing", "?"], "question": ["What", "are", [3, 5], "doing", "?"], "answer_orig": ["They", "are", "trying", "to", "shoot", "someone", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-17", "question_number": 0, "annot_id": "val-17", "answer_choices": [["They", "are", "trying", "to", "shoot", "someone", "."], ["They", "are", "working", "as", "security", "for", "the", "building", "."], ["It", "looks", "like", "they", "leaving", "the", "store", "."], ["They", "are", "getting", "ready", "to", "dance", "in", "a", "dancing", "competition", "."]], "rationale_choices": [["They", "are", "trying", "to", "shoot", "someone", "."], ["They", "are", "working", "as", "security", "for", "the", "building", "."], ["It", "looks", "like", "they", "leaving", "the", "store", "."], ["They", "are", "getting", "ready", "to", "dance", "in", "a", "dancing", "competition", "."]]}
{"video_name": "Inside Men", "ep_num": "NaN", "timestamp": "04:48", "link": "https://www.bilibili.com/bangumi/play/ss25329/", "keywords": ["apologize", "bow"], "img_fn": "MC-VCR_sample/kor_10.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "tie", "person", "person", "tie", "tie", "person", "tie"], "metadata_fn": "MC-VCR_sample/kor_10.json", "question_orig": ["What", "is", "[person1]", "doing", "?"], "question": ["What", "is", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "apologizing", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-18", "question_number": 0, "annot_id": "val-18", "answer_choices": [["Turning", [0], "'", "s", "back", "to", [0], ",", "and", [14], "who", "are", "making", "fun", "of", "him", "."], [[0], "is", "apologizing", "."], ["Running", "away", "from", "something", "."], [[0], "is", "calling", [0], "'", "s", "name", ",", "in", "order", "to", "wake", "her", "up", "."]], "rationale_choices": [["Turning", [0], "'", "s", "back", "to", [0], ",", "and", [14], "who", "are", "making", "fun", "of", "him", "."], [[0], "is", "apologizing", "."], ["Running", "away", "from", "something", "."], [[0], "is", "calling", [0], "'", "s", "name", ",", "in", "order", "to", "wake", "her", "up", "."]]}
{"video_name": "Mom Has an Affair", "ep_num": "Ep_7", "timestamp": "23:30", "link": "https://www.hanjuwang.net/hanju/2020/2280.html#", "keywords": ["school", "pick up", "rainy", "isolated", "students"], "img_fn": "MC-VCR_sample/kor_12.jpg", "objects": ["person", "person", "umbrella", "umbrella", "person", "person", "umbrella", "person"], "metadata_fn": "MC-VCR_sample/kor_12.json", "question_orig": ["What", "'s", "[person1]", "'s", "identity", "?"], "question": ["What", "'s", [0], "'s", "identity", "?"], "answer_orig": ["[person1]", "is", "a", "student", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-19", "question_number": 0, "annot_id": "val-19", "answer_choices": [[[0], "is", "a", "student", "."], [[0], "is", "a", "lawyer", "."], [[0], "works", "at", "a", "news", "station", "."], [[0], "is", "proud", "of", "his", "heritage", "."]], "rationale_choices": [[[0], "is", "a", "student", "."], [[0], "is", "a", "lawyer", "."], [[0], "works", "at", "a", "news", "station", "."], [[0], "is", "proud", "of", "his", "heritage", "."]]}
{"video_name": "High Kick!", "ep_num": "Ep_25", "timestamp": "20:26", "link": "https://www.hanjuwang.net/hanju/2006/728.html#", "keywords": ["restaurant", "cuisine", "chinese", "waitress", "pay", "refuse"], "img_fn": "MC-VCR_sample/kor_18.jpg", "objects": ["person", "person", "person", "dining table", "pizza", "person", "bowl", "cup", "cup", "pizza", "bowl", "bowl", "bowl", "person", "backpack", "chair", "person", "bowl", "bowl", "person", "potted plant", "cell phone", "bowl"], "metadata_fn": "MC-VCR_sample/kor_18.json", "question_orig": ["Who", "is", "[person4]", "?"], "question": ["Who", "is", [5], "?"], "answer_orig": ["[person4]", "is", "a", "waitress", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-20", "question_number": 0, "annot_id": "val-20", "answer_choices": [[[5], "is", "a", "catholic", "priest", "."], [[5], "works", "in", "the", "office", "."], [[5], "is", "a", "waitress", "."], [[5], "is", "the", "hotel", "concierge", "."]], "rationale_choices": [[[5], "is", "a", "catholic", "priest", "."], [[5], "works", "in", "the", "office", "."], [[5], "is", "a", "waitress", "."], [[5], "is", "the", "hotel", "concierge", "."]]}
{"video_name": "Reply 1988", "ep_num": "Ep_1", "timestamp": "43:45", "link": "https://www.hanjuwang.net/hanju/2015/188.html#", "keywords": ["aunt", "gossip", "trim", "vegetable", "neighbor"], "img_fn": "MC-VCR_sample/kor_19.jpg", "objects": ["person", "person", "person", "cake", "cup", "person", "cake", "person", "dining table"], "metadata_fn": "MC-VCR_sample/kor_19.json", "question_orig": ["What", "is", "[person2]", "doing", "?"], "question": ["What", "is", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "looking", "at", "something", "interesting", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-21", "question_number": 0, "annot_id": "val-21", "answer_choices": [[[1], "is", "breaking", "into", "a", "cabinet", "."], [[1], "is", "spinning", [1], "around", "in", "a", "circle", "as", "they", "dance", "together", "."], [[1], "is", "looking", "at", "something", "interesting", "."], [[1], "is", "waiting", "for", [1], "to", "bring", "him", "his", "meal", "."]], "rationale_choices": [[[1], "is", "breaking", "into", "a", "cabinet", "."], [[1], "is", "spinning", [1], "around", "in", "a", "circle", "as", "they", "dance", "together", "."], [[1], "is", "looking", "at", "something", "interesting", "."], [[1], "is", "waiting", "for", [1], "to", "bring", "him", "his", "meal", "."]]}
{"video_name": "Midnight Diner", "ep_num": "NaN", "timestamp": "11:18", "link": "https://www.bilibili.com/bangumi/play/ss12075/", "keywords": ["cook", "customer", "food", "taste"], "img_fn": "MC-VCR_sample/jpn_1.jpg", "objects": ["person", "person", "person", "bottle", "cup", "bottle", "chair"], "metadata_fn": "MC-VCR_sample/jpn_1.json", "question_orig": ["What", "is", "[person3]", "doing", "?"], "question": ["What", "is", [2], "doing", "?"], "answer_orig": ["[person3]", "is", "eating", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-22", "question_number": 0, "annot_id": "val-22", "answer_choices": [[[2], "is", "running", "to", "boxing", "practice", "."], [[2], "'", "s", "dancing", "with", [2], "."], [[2], "is", "eating", "."], [[2], "is", "trying", "to", "get", "a", "drink", "from", "the", "bartender", "."]], "rationale_choices": [[[2], "is", "running", "to", "boxing", "practice", "."], [[2], "'", "s", "dancing", "with", [2], "."], [[2], "is", "eating", "."], [[2], "is", "trying", "to", "get", "a", "drink", "from", "the", "bartender", "."]]}
{"video_name": "Kyoto Otomeguri", "ep_num": "Ep_2", "timestamp": "2:50", "link": "https://www.bilibili.com/video/BV13s411e7VX?p=3", "keywords": ["pray", "festival", "ritual", "custom", "walk in water", "luck"], "img_fn": "MC-VCR_sample/jpn_10.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "tv", "cup", "book", "dining table", "cup", "book", "book", "bowl", "bottle", "book", "dining table", "dining table"], "metadata_fn": "MC-VCR_sample/jpn_10.json", "question_orig": ["What", "is", "[person5]", "doing", "?"], "question": ["What", "is", [4], "doing", "?"], "answer_orig": ["[person5]", "is", "drinking", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-23", "question_number": 0, "annot_id": "val-23", "answer_choices": [[[4], "is", "completing", "an", "obstacle", "course", "."], ["Giving", "a", "gift", "to", [4], "."], [[4], "is", "trying", "to", "steal", "a", "valuable", "item", "from", [4], "."], [[4], "is", "drinking", "."]], "rationale_choices": [[[4], "is", "completing", "an", "obstacle", "course", "."], ["Giving", "a", "gift", "to", [4], "."], [[4], "is", "trying", "to", "steal", "a", "valuable", "item", "from", [4], "."], [[4], "is", "drinking", "."]]}
{"video_name": "Rental Nan mo Shinai Hito", "ep_num": "Ep_2", "timestamp": "0:46", "link": "https://www.zxzj.me/video/2770-1-2.html", "keywords": ["barbecue", "roast", "meat", "full", "restaurant"], "img_fn": "MC-VCR_sample/jpn_14.jpg", "objects": ["person", "person", "person", "cup", "cup", "dining table", "cup", "dining table", "cup", "bowl"], "metadata_fn": "MC-VCR_sample/jpn_14.json", "question_orig": ["How", "is", "[person2]", "feeling", "?"], "question": ["How", "is", [1], "feeling", "?"], "answer_orig": ["[person2]", "eats", "too", "much", "and", "looks", "full", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-24", "question_number": 0, "annot_id": "val-24", "answer_choices": [[[1], "eats", "too", "much", "and", "looks", "full", "."], [[1], "is", "feeling", "suspicious", "right", "now", "."], [[1], "is", "apprehensive", "to", "follow", [1], "to", "train", "she", "is", "headed", "to", "."], [[1], "is", "in", "awe", "of", "the", "architecture", "."]], "rationale_choices": [[[1], "eats", "too", "much", "and", "looks", "full", "."], [[1], "is", "feeling", "suspicious", "right", "now", "."], [[1], "is", "apprehensive", "to", "follow", [1], "to", "train", "she", "is", "headed", "to", "."], [[1], "is", "in", "awe", "of", "the", "architecture", "."]]}
{"video_name": "Midnight Diner5", "ep_num": "Ep_2", "timestamp": "14:18", "link": "https://www.zxzj.me/video/2501-1-2.html", "keywords": ["fortune-telling", "shy", "hands"], "img_fn": "MC-VCR_sample/jpn_21.jpg", "objects": ["person", "person"], "metadata_fn": "MC-VCR_sample/jpn_21.json", "question_orig": ["How", "is", "[person1]", "feeling", "?"], "question": ["How", "is", [0], "feeling", "?"], "answer_orig": ["[person1]", "is", "shy", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "east-asia", "img_id": "val-25", "question_number": 0, "annot_id": "val-25", "answer_choices": [[[0], "is", "shy", "."], [[0], "looks", "very", "eager", "."], [[0], "is", "overwhelmed", "with", "emotion", "and", "uncertainty", "."], ["Sitting", "next", "to", [0], "is", "making", "him", "uncomfortable", "."]], "rationale_choices": [[[0], "is", "shy", "."], [[0], "looks", "very", "eager", "."], [[0], "is", "overwhelmed", "with", "emotion", "and", "uncertainty", "."], ["Sitting", "next", "to", [0], "is", "making", "him", "uncomfortable", "."]]}
{"video_name": "Indiana Jones and the Last Crusade", "ep_num": "8/10", "timestamp": "0:52", "link": "https://www.youtube.com/watch?v=U6tzqlxOr2U", "keywords": ["enemy", "fight", "peace", "nazi"], "img_fn": "MC-VCR_sample/west_1.jpg", "objects": ["person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/west_1.json", "question_orig": ["What", "'s", "[person4]", "doing", "?"], "question": ["What", "'s", [3], "doing", "?"], "answer_orig": ["[person4]", "is", "fighting", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "west", "img_id": "val-26", "question_number": 0, "annot_id": "val-26", "answer_choices": [[[3], "is", "fighting", "."], ["Showing", [3], "what", "'", "s", "inside", "his", "basket", "."], [[3], "is", "running", "to", "talk", "to", [3], "."], [[3], "is", "taking", [3], "out", "of", "the", "dance", "hall", "."]], "rationale_choices": [[[3], "is", "fighting", "."], ["Showing", [3], "what", "'", "s", "inside", "his", "basket", "."], [[3], "is", "running", "to", "talk", "to", [3], "."], [[3], "is", "taking", [3], "out", "of", "the", "dance", "hall", "."]]}
{"video_name": "Pretty Baby", "ep_num": "6/8", "timestamp": "1:15", "link": "https://www.youtube.com/watch?v=QXB9qatHDSU", "keywords": ["wedding", "witness", "bride", "groom", "family", "flower"], "img_fn": "MC-VCR_sample/west_9.jpg", "objects": ["person", "person", "person", "person", "person", "person", "tie", "person", "person", "chair", "person", "person"], "metadata_fn": "MC-VCR_sample/west_9.json", "question_orig": ["What", "is", "[person3]", "wearing", "?"], "question": ["What", "is", [2], "wearing", "?"], "answer_orig": ["[person3]", "is", "wearing", "suit", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "west", "img_id": "val-27", "question_number": 0, "annot_id": "val-27", "answer_choices": [[[2], "is", "wearing", "a", "monster", "costume", "."], [[2], "has", "a", "bonnet", "on", "her", "head", "."], [[2], "is", "wearing", "suit", "."], [[2], "is", "wearing", "the", "coat", "to", "hide", "what", "he", "is", "wearing", "underneath", "."]], "rationale_choices": [[[2], "is", "wearing", "a", "monster", "costume", "."], [[2], "has", "a", "bonnet", "on", "her", "head", "."], [[2], "is", "wearing", "suit", "."], [[2], "is", "wearing", "the", "coat", "to", "hide", "what", "he", "is", "wearing", "underneath", "."]]}
{"video_name": "Pretty Baby", "ep_num": "6/8", "timestamp": "1:38", "link": "https://www.youtube.com/watch?v=QXB9qatHDSU", "keywords": ["priest", "witness", "wedding", "bride", "groom", "family", "church", "vow"], "img_fn": "MC-VCR_sample/west_10.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "tie", "person", "person", "car", "person", "person", "person", "motorcycle", "bench", "person", "person"], "metadata_fn": "MC-VCR_sample/west_10.json", "question_orig": ["What", "'s", "[person2]", "doing", "?"], "question": ["What", "'s", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "listening", "to", "others", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "west", "img_id": "val-28", "question_number": 0, "annot_id": "val-28", "answer_choices": [[[1], "is", "mixing", "up", "dough", "in", "a", "large", "mixer", "."], [[1], "is", "trying", "to", "give", "a", "gift", "to", [1], "."], ["Helping", "serve", "the", "kids", [1], "'", "s", "food", "."], [[1], "is", "listening", "to", "others", "."]], "rationale_choices": [[[1], "is", "mixing", "up", "dough", "in", "a", "large", "mixer", "."], [[1], "is", "trying", "to", "give", "a", "gift", "to", [1], "."], ["Helping", "serve", "the", "kids", [1], "'", "s", "food", "."], [[1], "is", "listening", "to", "others", "."]]}
{"video_name": "Employee of the Month", "ep_num": "1/12", "timestamp": "0:46", "link": "https://www.youtube.com/watch?v=AVEnTcDIo0A", "keywords": ["shop assistant", "supermarket"], "img_fn": "MC-VCR_sample/west_16.jpg", "objects": ["person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/west_16.json", "question_orig": ["What", "is", "[person2]", "'s", "job", "?"], "question": ["What", "is", [1], "'s", "job", "?"], "answer_orig": ["He", "is", "a", "shop", "assistant", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "west", "img_id": "val-29", "question_number": 0, "annot_id": "val-29", "answer_choices": [[[1], "is", "working", "for", "the", "local", "media", "and", "documenting", "a", "dancing", "contest", "."], ["To", "monitor", "what", [1], "is", "doing", "."], ["He", "is", "a", "shop", "assistant", "."], ["He", "is", "working", "as", "a", "security", "guard", "."]], "rationale_choices": [[[1], "is", "working", "for", "the", "local", "media", "and", "documenting", "a", "dancing", "contest", "."], ["To", "monitor", "what", [1], "is", "doing", "."], ["He", "is", "a", "shop", "assistant", "."], ["He", "is", "working", "as", "a", "security", "guard", "."]]}
{"video_name": "The Long Rider", "ep_num": "4/11", "timestamp": "0:59", "link": "https://www.youtube.com/watch?v=zSCukxfXdAQ", "keywords": ["smoke", "gun", "cowboy", "horse"], "img_fn": "MC-VCR_sample/west_21.jpg", "objects": ["person", "person", "cup", "handbag"], "metadata_fn": "MC-VCR_sample/west_21.json", "question_orig": ["What", "'s", "[person1]", "doing", "?"], "question": ["What", "'s", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "smoking", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "west", "img_id": "val-30", "question_number": 0, "annot_id": "val-30", "answer_choices": [[[0], "is", "operating", "a", "space", "ship", "'", "s", "computer", "."], ["Driving", "a", "jeep", "."], ["Deciding", "what", "to", "invent", "next", "."], [[0], "is", "smoking", "."]], "rationale_choices": [[[0], "is", "operating", "a", "space", "ship", "'", "s", "computer", "."], ["Driving", "a", "jeep", "."], ["Deciding", "what", "to", "invent", "next", "."], [[0], "is", "smoking", "."]]}
{"video_name": "The North Country", "ep_num": "3/10", "timestamp": "0:32", "link": "https://www.youtube.com/watch?v=fGiaJDWSWKE&list=PLZbXA4lyCtqp0AgeNNKd4azRopRzVfm_Z&index=3", "keywords": ["ice hockey", "audience", "player", "stadium", "game"], "img_fn": "MC-VCR_sample/west_23.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "tie", "person", "toothbrush", "person", "person", "refrigerator"], "metadata_fn": "MC-VCR_sample/west_23.json", "question_orig": ["What", "is", "[person2]", "doing", "?"], "question": ["What", "is", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "looking", "for", "someone", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "west", "img_id": "val-31", "question_number": 0, "annot_id": "val-31", "answer_choices": [[[1], "is", "helping", [1], "off", "the", "train", "."], [[1], "is", "looking", "for", "someone", "."], [[1], "is", "handcuffing", [1], "to", "a", "pipe", "so", "he", "can", "'", "t", "go", "anywhere", "."], [[1], "is", "explaining", "the", "history", "of", "the", "building", "to", [1], "."]], "rationale_choices": [[[1], "is", "helping", [1], "off", "the", "train", "."], [[1], "is", "looking", "for", "someone", "."], [[1], "is", "handcuffing", [1], "to", "a", "pipe", "so", "he", "can", "'", "t", "go", "anywhere", "."], [[1], "is", "explaining", "the", "history", "of", "the", "building", "to", [1], "."]]}
{"video_name": "Inglourious Basterds Official Trailer", "ep_num": "NaN", "timestamp": "0:33", "link": "https://www.youtube.com/watch?v=KnrRy6kSFF0", "keywords": ["general", "troop", "soldier", "instruction", "shout", "response"], "img_fn": "MC-VCR_sample/west_25.jpg", "objects": ["person", "person"], "metadata_fn": "MC-VCR_sample/west_25.json", "question_orig": ["What", "'s", "[person2]", "'s", "identity", "?"], "question": ["What", "'s", [1], "'s", "identity", "?"], "answer_orig": ["[person2]", "is", "a", "soldier", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "west", "img_id": "val-32", "question_number": 0, "annot_id": "val-32", "answer_choices": [[[1], "is", "a", "soldier", "."], [[1], "is", "a", "professor", "."], [[1], "is", "a", "bartender", "."], [[1], "is", "a", "police", "officer", "."]], "rationale_choices": [[[1], "is", "a", "soldier", "."], [[1], "is", "a", "professor", "."], [[1], "is", "a", "bartender", "."], [[1], "is", "a", "police", "officer", "."]]}
{"video_name": "World's Greatest Dad", "ep_num": "4/10", "timestamp": "0:44", "link": "https://www.youtube.com/watch?v=dEfBtVYzWks&list=PLZbXA4lyCtqoFNG1BCXck8XkSFZO-_bwv&index=4", "keywords": ["school", "locker", "beautiful", "uniform", "student"], "img_fn": "MC-VCR_sample/west_34.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "handbag", "backpack", "person"], "metadata_fn": "MC-VCR_sample/west_34.json", "question_orig": ["Where", "are", "[person1]", "and", "[person2]", "?"], "question": ["Where", "are", [0, 1], "?"], "answer_orig": ["They", "are", "in", "a", "school", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "west", "img_id": "val-33", "question_number": 0, "annot_id": "val-33", "answer_choices": [["They", "are", "at", "the", "zoo", "."], ["They", "are", "at", "a", "party", "in", "a", "ballroom", "."], [[0, 1], "are", "at", "a", "store", "."], ["They", "are", "in", "a", "school", "."]], "rationale_choices": [["They", "are", "at", "the", "zoo", "."], ["They", "are", "at", "a", "party", "in", "a", "ballroom", "."], [[0, 1], "are", "at", "a", "store", "."], ["They", "are", "in", "a", "school", "."]]}
{"video_name": "The Assassination Bureau", "ep_num": "4/8", "timestamp": "0:01", "link": "https://www.youtube.com/watch?v=WRtOCCfKEvQ&list=PLF7qpGcOR58fNMYZq1YssKq8ujQssSg1Z&index=4", "keywords": ["noble", "coachman", "carriage", "passenger"], "img_fn": "MC-VCR_sample/west_37.jpg", "objects": ["person", "person", "person", "person", "person", "bicycle", "chair"], "metadata_fn": "MC-VCR_sample/west_37.json", "question_orig": ["What", "'s", "[person1]", "doing", "?"], "question": ["What", "'s", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "driving", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "west", "img_id": "val-34", "question_number": 0, "annot_id": "val-34", "answer_choices": [[[0], "is", "hiding", "from", [0], "horse", ",", "and", [5], "."], ["Leading", [0], "somewhere", "else", "."], [[0], "is", "driving", "."], [[0], "is", "saying", "something", "rude", "to", "the", "girls", "."]], "rationale_choices": [[[0], "is", "hiding", "from", [0], "horse", ",", "and", [5], "."], ["Leading", [0], "somewhere", "else", "."], [[0], "is", "driving", "."], [[0], "is", "saying", "something", "rude", "to", "the", "girls", "."]]}
{"video_name": "The Assassination Bureau", "ep_num": "7/8", "timestamp": "0:10", "link": "https://www.youtube.com/watch?v=ETqX2DZqAN8&list=PLF7qpGcOR58fNMYZq1YssKq8ujQssSg1Z&index=7", "keywords": ["party", "religion", "christan"], "img_fn": "MC-VCR_sample/west_38.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/west_38.json", "question_orig": ["Why", "is", "[person4]", "wearing", "like", "this", "?"], "question": ["Why", "is", [3], "wearing", "like", "this", "?"], "answer_orig": ["[person4]", "is", "at", "a", "party", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "west", "img_id": "val-35", "question_number": 0, "annot_id": "val-35", "answer_choices": [[[3], "is", "upscale", "."], ["Everyone", "in", "the", "group", "picked", "what", [3], "wanted", "to", "wear", "to", "the", "event", "."], [[3], "was", "taking", "a", "cruise", "."], [[3], "is", "at", "a", "party", "."]], "rationale_choices": [[[3], "is", "upscale", "."], ["Everyone", "in", "the", "group", "picked", "what", [3], "wanted", "to", "wear", "to", "the", "event", "."], [[3], "was", "taking", "a", "cruise", "."], [[3], "is", "at", "a", "party", "."]]}
{"video_name": "Cheaper by the Dozen", "ep_num": "3/5", "timestamp": "0:26", "link": "https://www.youtube.com/watch?v=1ZVwo_elP7Y", "keywords": ["kitchen", "house", "breakfast", "pajamas"], "img_fn": "MC-VCR_sample/west_45.jpg", "objects": ["person", "person", "person", "bottle", "bottle", "cup", "cup", "bottle", "dining table", "bottle", "bowl"], "metadata_fn": "MC-VCR_sample/west_45.json", "question_orig": ["What", "is", "[person2]", "holding", "?"], "question": ["What", "is", [1], "holding", "?"], "answer_orig": ["Many", "plates", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "west", "img_id": "val-36", "question_number": 0, "annot_id": "val-36", "answer_choices": [["The", "wall", "decorations", "."], ["The", "bridal", "bouquet", "."], ["A", "white", "bandana", "."], ["Many", "plates", "."]], "rationale_choices": [["The", "wall", "decorations", "."], ["The", "bridal", "bouquet", "."], ["A", "white", "bandana", "."], ["Many", "plates", "."]]}
{"video_name": "Big", "ep_num": "4/5", "timestamp": "0:04", "link": "https://www.youtube.com/watch?v=11Kv8mnxdCM&list=RDQMwDv5h4GHpy0&index=2", "keywords": ["buffet", "party", "gorge", "eat"], "img_fn": "MC-VCR_sample/west_51.jpg", "objects": ["person", "person", "bowl", "person", "person", "vase", "person", "tie", "person", "potted plant", "person", "bowl", "person", "person", "dining table"], "metadata_fn": "MC-VCR_sample/west_51.json", "question_orig": ["What", "is", "[person2]", "doing", "?"], "question": ["What", "is", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "eating", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "west", "img_id": "val-37", "question_number": 0, "annot_id": "val-37", "answer_choices": [[[1], "is", "eating", "."], [[1], "is", "rolling", "her", "eyes", "because", [1], "told", "a", "terrible", "joke", "."], [[1], "is", "photographing", [1], "as", "they", "dance", "."], [[1], "is", "trying", "to", "get", "a", "wagon", "."]], "rationale_choices": [[[1], "is", "eating", "."], [[1], "is", "rolling", "her", "eyes", "because", [1], "told", "a", "terrible", "joke", "."], [[1], "is", "photographing", [1], "as", "they", "dance", "."], [[1], "is", "trying", "to", "get", "a", "wagon", "."]]}
{"video_name": "Fat Man and Little Boy", "ep_num": "3/9", "timestamp": "0:31", "link": "https://www.youtube.com/watch?v=YuuMHvaxXFY", "keywords": ["kitchen", "general", "oven"], "img_fn": "MC-VCR_sample/west_64.jpg", "objects": ["person", "person", "bottle", "oven", "cup", "remote"], "metadata_fn": "MC-VCR_sample/west_64.json", "question_orig": ["What", "is", "[person2]", "'s", "job", "?"], "question": ["What", "is", [1], "'s", "job", "?"], "answer_orig": ["[person2]", "is", "a", "general", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "west", "img_id": "val-38", "question_number": 0, "annot_id": "val-38", "answer_choices": [[[1], "is", "here", "to", "give", "a", "sperm", "sample", "."], [[1], "looks", "like", "she", "could", "be", "a", "maid", "."], [[1], "is", "a", "general", "."], [[1], "is", "teaching", "an", "exercise", "class", "."]], "rationale_choices": [[[1], "is", "here", "to", "give", "a", "sperm", "sample", "."], [[1], "looks", "like", "she", "could", "be", "a", "maid", "."], [[1], "is", "a", "general", "."], [[1], "is", "teaching", "an", "exercise", "class", "."]]}
{"video_name": "Glory", "ep_num": "2/6", "timestamp": "0:31", "link": "https://www.youtube.com/watch?v=7SNohNGz_f0", "keywords": ["presentation", "europe", "child", "parents", "host"], "img_fn": "MC-VCR_sample/west_65.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "tie", "person", "person"], "metadata_fn": "MC-VCR_sample/west_65.json", "question_orig": ["What", "is", "[person7]", "doing", "?"], "question": ["What", "is", [6], "doing", "?"], "answer_orig": ["[person7]", "is", "doing", "presentation", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "west", "img_id": "val-39", "question_number": 0, "annot_id": "val-39", "answer_choices": [[[6], "is", "doing", "presentation", "."], [[6], "is", "eating", "breakfast", "."], [[6], "'", "s", "talking", "to", [6], "."], [[6], "is", "serving", "drinks", "to", "the", "guests", "."]], "rationale_choices": [[[6], "is", "doing", "presentation", "."], [[6], "is", "eating", "breakfast", "."], [[6], "'", "s", "talking", "to", [6], "."], [[6], "is", "serving", "drinks", "to", "the", "guests", "."]]}
{"video_name": "The Bay", "ep_num": "1/10", "timestamp": "1:02", "link": "https://www.youtube.com/watch?v=aELUig9qu3w&list=PLZbXA4lyCtqqCnP321ot859fetmpnAwhD", "keywords": ["crab", "contest", "uniform", "speed", "seaside", "wolf"], "img_fn": "MC-VCR_sample/west_75.jpg", "objects": ["person", "person", "person", "person", "cup", "person", "person", "bottle", "person", "person"], "metadata_fn": "MC-VCR_sample/west_75.json", "question_orig": ["What", "is", "[person2]", "doing", "?"], "question": ["What", "is", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "eating", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "west", "img_id": "val-40", "question_number": 0, "annot_id": "val-40", "answer_choices": [[[1], "is", "trying", "to", "get", "a", "wagon", "."], [[1], "is", "rolling", "her", "eyes", "because", [1], "told", "a", "terrible", "joke", "."], [[1], "is", "photographing", [1], "as", "they", "dance", "."], [[1], "is", "eating", "."]], "rationale_choices": [[[1], "is", "trying", "to", "get", "a", "wagon", "."], [[1], "is", "rolling", "her", "eyes", "because", [1], "told", "a", "terrible", "joke", "."], [[1], "is", "photographing", [1], "as", "they", "dance", "."], [[1], "is", "eating", "."]]}
{"video_name": "Daddy's Home", "ep_num": "3/10", "timestamp": "0:06", "link": "https://www.youtube.com/watch?v=taf0MZ5VgDc&list=PLYhzLH9izsDcGZU81UseW2wJLYxcVa33j&index=3", "keywords": ["backyard", "yard", "skateboard", "party", "leisure", "children"], "img_fn": "MC-VCR_sample/west_79.jpg", "objects": ["person", "person", "person", "person", "skateboard", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "bench", "umbrella"], "metadata_fn": "MC-VCR_sample/west_79.json", "question_orig": ["What", "is", "[person3]", "doing", "?"], "question": ["What", "is", [2], "doing", "?"], "answer_orig": ["[person3]", "is", "skating", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "west", "img_id": "val-41", "question_number": 0, "annot_id": "val-41", "answer_choices": [[[2], "is", "trying", "to", "introduce", "himself", "to", [2], "."], ["Talking", "with", [2], "."], [[2], "is", "playing", "the", "violin", "."], [[2], "is", "skating", "."]], "rationale_choices": [[[2], "is", "trying", "to", "introduce", "himself", "to", [2], "."], ["Talking", "with", [2], "."], [[2], "is", "playing", "the", "violin", "."], [[2], "is", "skating", "."]]}
{"video_name": "What to Expect When You're Expecting", "ep_num": "1/10", "timestamp": "0:04", "link": "https://www.youtube.com/watch?v=M5IO69jDb2M", "keywords": ["tv show", "judge", "dance", "performance", "dream"], "img_fn": "MC-VCR_sample/west_84.jpg", "objects": ["person", "person", "person", "person", "person", "tie", "train"], "metadata_fn": "MC-VCR_sample/west_84.json", "question_orig": ["What", "is", "[person1]", "doing", "?"], "question": ["What", "is", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "dancing", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "west", "img_id": "val-42", "question_number": 0, "annot_id": "val-42", "answer_choices": [[[0], "is", "dancing", "."], [[0], "is", "acting", "in", "a", "play", "."], [[0], "are", "playing", "the", "violin", "."], [[0], "is", "flirting", "."]], "rationale_choices": [[[0], "is", "dancing", "."], [[0], "is", "acting", "in", "a", "play", "."], [[0], "are", "playing", "the", "violin", "."], [[0], "is", "flirting", "."]]}
{"video_name": "Meatballs", "ep_num": "4/9", "timestamp": "1:10", "link": "https://www.youtube.com/watch?v=DONkgw00QSE&list=PLZbXA4lyCtqphrI-uH9O3dksYdg-xz2yv&index=4", "keywords": ["student", "school bus", "school", "run away"], "img_fn": "MC-VCR_sample/west_88.jpg", "objects": ["person", "person", "person", "chair", "person", "person"], "metadata_fn": "MC-VCR_sample/west_88.json", "question_orig": ["What", "'s", "[person1]", "doing", "?"], "question": ["What", "'s", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "escaping", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "west", "img_id": "val-43", "question_number": 0, "annot_id": "val-43", "answer_choices": [[[0], "is", "waving", "someone", "goodbye", "."], [[0], "is", "escaping", "."], [[0], "is", "telling", [0], "that", "their", "mother", "is", "very", "sick", "."], [[0], "is", "waiting", "for", [0], "to", "get", "off", "of", "the", "train", "."]], "rationale_choices": [[[0], "is", "waving", "someone", "goodbye", "."], [[0], "is", "escaping", "."], [[0], "is", "telling", [0], "that", "their", "mother", "is", "very", "sick", "."], [[0], "is", "waiting", "for", [0], "to", "get", "off", "of", "the", "train", "."]]}
{"video_name": "Bounty", "ep_num": "5/9", "timestamp": "1:04", "link": "https://www.youtube.com/watch?v=t0oqEjxOUww", "keywords": ["colonizer", "labor", "punish", "sick", "ship", "sea", "sailor"], "img_fn": "MC-VCR_sample/west_95.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "tie", "person", "person", "person", "chair", "chair", "dining table"], "metadata_fn": "MC-VCR_sample/west_95.json", "question_orig": ["What", "'s", "the", "feeling", "of", "[person1]", "?"], "question": ["What", "'s", "the", "feeling", "of", [0], "?"], "answer_orig": ["[person1]", "is", "angry", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "west", "img_id": "val-44", "question_number": 0, "annot_id": "val-44", "answer_choices": [[[0], "is", "in", "the", "moment", "of", "playing", "the", "guitar", ".", "being", "in", "the", "moment", "is", "like", "bliss", ".", [0], "is", "very", "happy", "to", "play", "the", "guitar", "."], [[0], "is", "nervous", "about", "meeting", [0], "."], [[0], "is", "angry", "."], [[0], "is", "feeling", "dizzy", "."]], "rationale_choices": [[[0], "is", "in", "the", "moment", "of", "playing", "the", "guitar", ".", "being", "in", "the", "moment", "is", "like", "bliss", ".", [0], "is", "very", "happy", "to", "play", "the", "guitar", "."], [[0], "is", "nervous", "about", "meeting", [0], "."], [[0], "is", "angry", "."], [[0], "is", "feeling", "dizzy", "."]]}
{"video_name": "Dreamgirls", "ep_num": "2/9", "timestamp": "1:28", "link": "https://www.youtube.com/watch?v=O3CGLSyIwNo", "keywords": ["wedding", "friends", "couple", "bride", "groom", "church", "happy"], "img_fn": "MC-VCR_sample/west_97.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "tie", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/west_97.json", "question_orig": ["What", "'s", "[person6]", "doing", "?"], "question": ["What", "'s", [5], "doing", "?"], "answer_orig": ["[person6]", "is", "laughing", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "west", "img_id": "val-45", "question_number": 0, "annot_id": "val-45", "answer_choices": [[[5], "is", "laughing", "."], ["Listening", "to", [5], "talk", "."], [[5], "is", "trying", "to", "ask", "a", "question", "to", [5], "."], [[5], "is", "talking", "to", [5], "."]], "rationale_choices": [[[5], "is", "laughing", "."], ["Listening", "to", [5], "talk", "."], [[5], "is", "trying", "to", "ask", "a", "question", "to", [5], "."], [[5], "is", "talking", "to", [5], "."]]}
{"video_name": "Shallow Hal", "ep_num": "4/5", "timestamp": "1:44", "link": "https://www.youtube.com/watch?v=2R1lEWNNsV0", "keywords": ["barbecue", "roast", "party", "swimming pool", "sunshine"], "img_fn": "MC-VCR_sample/west_98.jpg", "objects": ["person", "person", "person", "person", "umbrella", "umbrella"], "metadata_fn": "MC-VCR_sample/west_98.json", "question_orig": ["What", "is", "[person2]", "doing", "?"], "question": ["What", "is", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "staring", "at", "something", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "west", "img_id": "val-46", "question_number": 0, "annot_id": "val-46", "answer_choices": [[[1], "is", "cutting", "the", "food", "on", "bowl", "with", "a", "knife", "."], [[1], "is", "bringing", "in", "flowers", "for", [1], "."], [[1], "is", "staring", "at", "something", "."], [[1], "is", "running", "away", "from", "the", "thing", [1], "are", "reacting", "to", "."]], "rationale_choices": [[[1], "is", "cutting", "the", "food", "on", "bowl", "with", "a", "knife", "."], [[1], "is", "bringing", "in", "flowers", "for", [1], "."], [[1], "is", "staring", "at", "something", "."], [[1], "is", "running", "away", "from", "the", "thing", [1], "are", "reacting", "to", "."]]}
{"video_name": "Kings", "ep_num": "7/10", "timestamp": "0:10", "link": "https://www.youtube.com/watch?v=042B8vTl0ig", "keywords": ["rob", "fight", "conflict", "graffiti"], "img_fn": "MC-VCR_sample/west_100.jpg", "objects": ["person", "person", "person", "person", "person", "bottle", "person"], "metadata_fn": "MC-VCR_sample/west_100.json", "question_orig": ["What", "is", "[person2]", "doing", "?"], "question": ["What", "is", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "trying", "to", "stop", "something", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "west", "img_id": "val-47", "question_number": 0, "annot_id": "val-47", "answer_choices": [[[1], "is", "removing", "her", "shirt", "."], [[1], "is", "closing", "the", "front", "door", "."], [[1], "is", "handing", "a", "sheet", "of", "paper", "to", [1], "."], [[1], "is", "trying", "to", "stop", "something", "."]], "rationale_choices": [[[1], "is", "removing", "her", "shirt", "."], [[1], "is", "closing", "the", "front", "door", "."], [[1], "is", "handing", "a", "sheet", "of", "paper", "to", [1], "."], [[1], "is", "trying", "to", "stop", "something", "."]]}
{"video_name": "Chunni", "ep_num": "NaN", "timestamp": "10:00", "link": "https://www.youtube.com/watch?v=Dc3WlsmFef0", "keywords": ["muslim", "infant", "mother", "grandparents", "parturition"], "img_fn": "MC-VCR_sample/sa_8.jpg", "objects": ["person", "person", "bed", "teddy bear"], "metadata_fn": "MC-VCR_sample/sa_8.json", "question_orig": ["What", "is", "[person2]", "holding", "?"], "question": ["What", "is", [1], "holding", "?"], "answer_orig": ["[person2]", "holds", "a", "baby", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-48", "question_number": 0, "annot_id": "val-48", "answer_choices": [[[1], "holds", "a", "baby", "."], ["A", "chest", "on", "the", "floor", "."], [[1], "."], [[1], "is", "mixing", "up", "dough", "in", "a", "large", "mixer", "."]], "rationale_choices": [[[1], "holds", "a", "baby", "."], ["A", "chest", "on", "the", "floor", "."], [[1], "."], [[1], "is", "mixing", "up", "dough", "in", "a", "large", "mixer", "."]]}
{"video_name": "Bajrangi Bhaijaan", "ep_num": "NaN", "timestamp": "37:08", "link": "https://www.iqiyi.com/v_19rr7q6q0k.html", "keywords": ["market", "businessman", "transportation", "customer", "help", "lover", "riding"], "img_fn": "MC-VCR_sample/sa_37.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "bicycle", "person", "handbag", "person", "person", "kite", "person", "person"], "metadata_fn": "MC-VCR_sample/sa_37.json", "question_orig": ["Why", "is", "a", "towel", "put", "on", "[person1]", "'s", "neck", "?"], "question": ["Why", "is", "a", "towel", "put", "on", [0], "'s", "neck", "?"], "answer_orig": ["It", "is", "used", "to", "wipe", "sweat", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-49", "question_number": 0, "annot_id": "val-49", "answer_choices": [[[0], "asked", [0], "to", "not", "to", "see", "yet", ".", [1], "wanted", "to", "surprise", [0], "."], ["It", "is", "used", "to", "wipe", "sweat", "."], [[0], "was", "previously", "in", "bed", "."], ["She", "is", "a", "prostitute", "."]], "rationale_choices": [[[0], "asked", [0], "to", "not", "to", "see", "yet", ".", [1], "wanted", "to", "surprise", [0], "."], ["It", "is", "used", "to", "wipe", "sweat", "."], [[0], "was", "previously", "in", "bed", "."], ["She", "is", "a", "prostitute", "."]]}
{"video_name": "Bajrangi Bhaijaan", "ep_num": "NaN", "timestamp": "49:39", "link": "https://www.iqiyi.com/v_19rr7q6q0k.html", "keywords": ["mosque", "religion", "difference", "belief", "pray"], "img_fn": "MC-VCR_sample/sa_38.jpg", "objects": ["person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/sa_38.json", "question_orig": ["What", "is", "[person3]", "'s", "feeling", "?"], "question": ["What", "is", [2], "'s", "feeling", "?"], "answer_orig": ["He", "is", "nervous", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-50", "question_number": 0, "annot_id": "val-50", "answer_choices": [[[2], "has", "a", "need", "to", "have", "a", "conversation", "with", [4], "."], [[2], "feels", "pretty", "happy", "."], ["He", "is", "nervous", "."], ["She", "is", "moved", "by", "what", "she", "is", "hearing", "or", "seeing", "."]], "rationale_choices": [[[2], "has", "a", "need", "to", "have", "a", "conversation", "with", [4], "."], [[2], "feels", "pretty", "happy", "."], ["He", "is", "nervous", "."], ["She", "is", "moved", "by", "what", "she", "is", "hearing", "or", "seeing", "."]]}
{"video_name": "Hindi Medium", "ep_num": "NaN", "timestamp": "1:20:46", "link": "https://www.iqiyi.com/v_19rrdlqq1s.html", "keywords": ["water", "poverty", "angry", "wait"], "img_fn": "MC-VCR_sample/sa_42.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "chair", "person", "frisbee", "bowl"], "metadata_fn": "MC-VCR_sample/sa_42.json", "question_orig": ["How", "does", "[person1]", "feel", "?"], "question": ["How", "does", [0], "feel", "?"], "answer_orig": ["[person1]", "is", "angry", "and", "keeps", "shouting", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-51", "question_number": 0, "annot_id": "val-51", "answer_choices": [[[0], "is", "angry", "and", "keeps", "shouting", "."], [[0], "is", "happy", "that", "he", "and", [0], "have", "made", "a", "deal", "."], [[0], "feels", "thirsty", "."], [[0], "is", "surprised", "."]], "rationale_choices": [[[0], "is", "angry", "and", "keeps", "shouting", "."], [[0], "is", "happy", "that", "he", "and", [0], "have", "made", "a", "deal", "."], [[0], "feels", "thirsty", "."], [[0], "is", "surprised", "."]]}
{"video_name": "Hindi Medium", "ep_num": "NaN", "timestamp": "1:18:53", "link": "https://www.iqiyi.com/v_19rrdlqq1s.html", "keywords": ["bus station", "crowd", "passenger", "commute"], "img_fn": "MC-VCR_sample/sa_43.jpg", "objects": ["person", "person", "bus", "person", "backpack", "person"], "metadata_fn": "MC-VCR_sample/sa_43.json", "question_orig": ["What", "is", "[person2]", "doing", "?"], "question": ["What", "is", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "running", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-52", "question_number": 0, "annot_id": "val-52", "answer_choices": [["Calling", "the", "bartender", "over", "to", "refill", [1], "'", "s", "drink", "."], [[1], "is", "running", "."], [[1], "is", "answering", "the", "phone", "."], [[1], "is", "reading", "the", "information", "on", "laptop", "."]], "rationale_choices": [["Calling", "the", "bartender", "over", "to", "refill", [1], "'", "s", "drink", "."], [[1], "is", "running", "."], [[1], "is", "answering", "the", "phone", "."], [[1], "is", "reading", "the", "information", "on", "laptop", "."]]}
{"video_name": "Lekar Hum Deewana Dil", "ep_num": "NaN", "timestamp": "2:03:43", "link": "https://www.iqiyi.com/v_19ry16nid0.html", "keywords": ["wedding", "religion", "ritual", "groom", "parents", "happy"], "img_fn": "MC-VCR_sample/sa_48.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "apple", "cake", "person", "cake"], "metadata_fn": "MC-VCR_sample/sa_48.json", "question_orig": ["What", "is", "[person3]", "doing", "?"], "question": ["What", "is", [2], "doing", "?"], "answer_orig": ["She", "is", "taking", "photos", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-53", "question_number": 0, "annot_id": "val-53", "answer_choices": [["She", "is", "taking", "an", "order", "."], [[2], "is", "approaching", [0], "as", "if", "she", "wants", "to", "dance", "with", "him", "."], ["Trying", "to", "catch", [2], "'", "s", "attention", "."], ["She", "is", "taking", "photos", "."]], "rationale_choices": [["She", "is", "taking", "an", "order", "."], [[2], "is", "approaching", [0], "as", "if", "she", "wants", "to", "dance", "with", "him", "."], ["Trying", "to", "catch", [2], "'", "s", "attention", "."], ["She", "is", "taking", "photos", "."]]}
{"video_name": "Masaan", "ep_num": "NaN", "timestamp": "23:31", "link": "https://www.iqiyi.com/v_19rrantvik.html", "keywords": ["funeral", "death", "burn", "family", "sad"], "img_fn": "MC-VCR_sample/sa_50.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "baseball bat", "baseball bat"], "metadata_fn": "MC-VCR_sample/sa_50.json", "question_orig": ["What", "is", "[person2]", "'s", "job", "?"], "question": ["What", "is", [1], "'s", "job", "?"], "answer_orig": ["He", "is", "a", "worker", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-54", "question_number": 0, "annot_id": "val-54", "answer_choices": [["He", "is", "a", "lawyer", "in", "a", "hearing", "."], [[1], "is", [0], "'", "s", "officer", "in", "the", "military", "."], ["She", "is", "a", "reporter", "."], ["He", "is", "a", "worker", "."]], "rationale_choices": [["He", "is", "a", "lawyer", "in", "a", "hearing", "."], [[1], "is", [0], "'", "s", "officer", "in", "the", "military", "."], ["She", "is", "a", "reporter", "."], ["He", "is", "a", "worker", "."]]}
{"video_name": "Masaan", "ep_num": "NaN", "timestamp": "1:33:51", "link": "https://www.iqiyi.com/v_19rrantvik.html", "keywords": ["farewell", "police", "family", "station", "train"], "img_fn": "MC-VCR_sample/sa_52.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/sa_52.json", "question_orig": ["What", "is", "[person1]", "doing", "?"], "question": ["What", "is", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "hugging", "someone", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-55", "question_number": 0, "annot_id": "val-55", "answer_choices": [[[0], "is", "leaving", ",", "as", [0], "started", "to", "yell", "at", "him", "."], [[0], "is", "checking", "tv", "for", "any", "intruders", "."], [[0], "is", "pouring", "the", "coffee", "for", "herself", "."], [[0], "is", "hugging", "someone", "."]], "rationale_choices": [[[0], "is", "leaving", ",", "as", [0], "started", "to", "yell", "at", "him", "."], [[0], "is", "checking", "tv", "for", "any", "intruders", "."], [[0], "is", "pouring", "the", "coffee", "for", "herself", "."], [[0], "is", "hugging", "someone", "."]]}
{"video_name": "Gandhi, My Father", "ep_num": "NaN", "timestamp": "1:10:56", "link": "https://www.iqiyi.com/v_19rr7qe11w.html", "keywords": ["funeral", "death", "sad", "family", "mourn"], "img_fn": "MC-VCR_sample/sa_60.jpg", "objects": ["person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/sa_60.json", "question_orig": ["What", "is", "[person1]", "'s", "feeling", "?"], "question": ["What", "is", [0], "'s", "feeling", "?"], "answer_orig": ["[person1]", "is", "desperate", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-56", "question_number": 0, "annot_id": "val-56", "answer_choices": [[[0], "is", "thankful", "for", "what", "he", "has", "in", "life", "."], [[0], "is", "desperate", "."], ["Nothing", ",", [0], "is", "watching", "in", "shock", "."], [[0], "is", "feeling", "ecstasy", "."]], "rationale_choices": [[[0], "is", "thankful", "for", "what", "he", "has", "in", "life", "."], [[0], "is", "desperate", "."], ["Nothing", ",", [0], "is", "watching", "in", "shock", "."], [[0], "is", "feeling", "ecstasy", "."]]}
{"video_name": "Gandhi, My Father", "ep_num": "NaN", "timestamp": "1:37:28", "link": "https://www.iqiyi.com/v_19rr7qe11w.html", "keywords": ["servant", "meal", "wash", "guest", "invite"], "img_fn": "MC-VCR_sample/sa_65.jpg", "objects": ["person", "person", "person", "spoon", "cup", "potted plant", "cup", "bowl", "bowl", "potted plant", "bowl", "bowl", "chair", "dining table", "bowl"], "metadata_fn": "MC-VCR_sample/sa_65.json", "question_orig": ["Where", "is", "[person2]", "sitting", "?"], "question": ["Where", "is", [1], "sitting", "?"], "answer_orig": ["[person2]", "is", "sitting", "on", "the", "floor", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-57", "question_number": 0, "annot_id": "val-57", "answer_choices": [[[1], "is", "at", "a", "bus", "station", ",", "and", "there", "is", "nothing", "else", "available", "."], [[1], "is", "on", "the", "other", "side", "of", "the", "door", "."], [[1], "is", "in", "the", "lobby", "room", "of", "a", "hospital", "."], [[1], "is", "sitting", "on", "the", "floor", "."]], "rationale_choices": [[[1], "is", "at", "a", "bus", "station", ",", "and", "there", "is", "nothing", "else", "available", "."], [[1], "is", "on", "the", "other", "side", "of", "the", "door", "."], [[1], "is", "in", "the", "lobby", "room", "of", "a", "hospital", "."], [[1], "is", "sitting", "on", "the", "floor", "."]]}
{"video_name": "Gandhi, My Father", "ep_num": "NaN", "timestamp": "1:14:13", "link": "https://www.iqiyi.com/v_19rr7qe11w.html", "keywords": ["promote", "sales", "grocery", "businessman", "product", "seasoning"], "img_fn": "MC-VCR_sample/sa_66.jpg", "objects": ["person", "person", "person", "person", "cup", "bottle", "cup"], "metadata_fn": "MC-VCR_sample/sa_66.json", "question_orig": ["What", "is", "[person1]", "doing", "?"], "question": ["What", "is", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "selling", "his", "products", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-58", "question_number": 0, "annot_id": "val-58", "answer_choices": [[[0], "is", "working", "at", "the", "wall", "of", "monitors", "."], [[0], "is", "angrily", "gesturing", "with", "a", "gun", "in", "his", "hand", "."], [[0], "is", "selling", "his", "products", "."], [[0], "is", "telling", [0], "what", "to", "do", "."]], "rationale_choices": [[[0], "is", "working", "at", "the", "wall", "of", "monitors", "."], [[0], "is", "angrily", "gesturing", "with", "a", "gun", "in", "his", "hand", "."], [[0], "is", "selling", "his", "products", "."], [[0], "is", "telling", [0], "what", "to", "do", "."]]}
{"video_name": "Bajrangi Bhaijaan", "ep_num": "NaN", "timestamp": "54:30", "link": "https://www.iqiyi.com/v_19rr7q6q0k.html", "keywords": ["sports", "game", "cricket", "celebrate", "audience", "tv"], "img_fn": "MC-VCR_sample/sa_68.jpg", "objects": ["person", "person", "person", "tv", "person", "potted plant", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/sa_68.json", "question_orig": ["What", "is", "[person1]", "doing", "?"], "question": ["What", "is", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "dancing", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-59", "question_number": 0, "annot_id": "val-59", "answer_choices": [[[0], "is", "acting", "in", "a", "play", "."], [[0], "is", "dancing", "."], [[0], "are", "playing", "the", "violin", "."], [[0], "is", "flirting", "."]], "rationale_choices": [[[0], "is", "acting", "in", "a", "play", "."], [[0], "is", "dancing", "."], [[0], "are", "playing", "the", "violin", "."], [[0], "is", "flirting", "."]]}
{"video_name": "Pad Man", "ep_num": "NaN", "timestamp": "14:31", "link": "https://www.iqiyi.com/v_19rr2zunpw.html", "keywords": ["clinic", "injury", "colleague", "help"], "img_fn": "MC-VCR_sample/sa_70.jpg", "objects": ["person", "person", "person", "person", "person", "handbag", "bicycle", "bench", "motorcycle"], "metadata_fn": "MC-VCR_sample/sa_70.json", "question_orig": ["What", "is", "[person3]", "look", "so", "weak", "?"], "question": ["What", "is", [2], "look", "so", "weak", "?"], "answer_orig": ["[person3]", "might", "get", "injured", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-60", "question_number": 0, "annot_id": "val-60", "answer_choices": [[[2], "is", "afraid", "of", "germs", "."], [[2], "might", "get", "injured", "."], [[2], "was", "just", "in", "a", "fight", "with", [2], "."], ["There", "are", "probably", "a", "group", "of", "guys", "trying", "to", "impress", [2], "and", "it", "is", "not", "working", "."]], "rationale_choices": [[[2], "is", "afraid", "of", "germs", "."], [[2], "might", "get", "injured", "."], [[2], "was", "just", "in", "a", "fight", "with", [2], "."], ["There", "are", "probably", "a", "group", "of", "guys", "trying", "to", "impress", [2], "and", "it", "is", "not", "working", "."]]}
{"video_name": "The Great Indian Kitchen", "ep_num": "NaN", "timestamp": "0:01", "link": "https://www.youtube.com/watch?v=k_E6ctiFn6I", "keywords": ["lovers", "meal", "house", "shy", "honor", "prize"], "img_fn": "MC-VCR_sample/sa_75.jpg", "objects": ["cup", "person", "cup", "person", "chair", "cup", "book", "book", "book", "potted plant", "cup", "book", "book", "book", "book", "book", "book", "dining table", "chair", "chair"], "metadata_fn": "MC-VCR_sample/sa_75.json", "question_orig": ["What", "will", "[person1]", "do", "?"], "question": ["What", "will", [1], "do", "?"], "answer_orig": ["[person1]", "will", "drink", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-61", "question_number": 0, "annot_id": "val-61", "answer_choices": [[[1], "will", "overpower", [1], "."], [[1], "will", "drop", "handbag", "because", "he", "was", "in", "a", "hurry", "."], [[1], "will", "have", "them", "open", "a", "book", "."], [[1], "will", "drink", "."]], "rationale_choices": [[[1], "will", "overpower", [1], "."], [[1], "will", "drop", "handbag", "because", "he", "was", "in", "a", "hurry", "."], [[1], "will", "have", "them", "open", "a", "book", "."], [[1], "will", "drink", "."]]}
{"video_name": "Gangubai Kathiawadi", "ep_num": "NaN", "timestamp": "0:17", "link": "https://www.youtube.com/watch?v=xK2_6yX7aF8", "keywords": ["celebration", "saree", "dance", "festival"], "img_fn": "MC-VCR_sample/sa_79.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/sa_79.json", "question_orig": ["Why", "is", "[person2]", "dacing", "?"], "question": ["Why", "is", [1], "dacing", "?"], "answer_orig": ["She", "is", "happy", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-62", "question_number": 0, "annot_id": "val-62", "answer_choices": [["She", "is", "happy", "."], [[1], "is", "concerned", "about", "the", "water", "coming", "overboard", "."], ["He", "is", "getting", "a", "better", "view", "of", "the", "large", "monitor", "in", "front", "of", "him", "."], ["The", "sword", "in", "front", "of", [1], "'", "s", "eyes", "keeps", "him", "remembering", "."]], "rationale_choices": [["She", "is", "happy", "."], [[1], "is", "concerned", "about", "the", "water", "coming", "overboard", "."], ["He", "is", "getting", "a", "better", "view", "of", "the", "large", "monitor", "in", "front", "of", "him", "."], ["The", "sword", "in", "front", "of", [1], "'", "s", "eyes", "keeps", "him", "remembering", "."]]}
{"video_name": "Hyderabad Candid Wedding Video of \" Sharmila + Srikanth \" by Maru Rickz", "ep_num": "NaN", "timestamp": "3:01", "link": "https://www.youtube.com/watch?v=anMhnalbQ4w&t=181s", "keywords": ["wedding", "family", "bride", "groom", "flower"], "img_fn": "MC-VCR_sample/sa_84.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "cup", "person", "person", "person", "person", "person", "cell phone", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/sa_84.json", "question_orig": ["What", "is", "[person5]", "doing", "?"], "question": ["What", "is", [4], "doing", "?"], "answer_orig": ["[person5]", "is", "taking", "photos", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "south-asia", "img_id": "val-63", "question_number": 0, "annot_id": "val-63", "answer_choices": [[[4], "'", "s", "cutting", "open", "a", "sealed", "container", "."], [[4], "is", "chugging", "the", "glass", "of", "wine", "."], [[4], "is", "taking", "photos", "."], [[4], "is", "waiting", "on", "and", "taking", "the", "order", "of", [4], "."]], "rationale_choices": [[[4], "'", "s", "cutting", "open", "a", "sealed", "container", "."], [[4], "is", "chugging", "the", "glass", "of", "wine", "."], [[4], "is", "taking", "photos", "."], [[4], "is", "waiting", "on", "and", "taking", "the", "order", "of", [4], "."]]}
{"video_name": "TAITA", "ep_num": "NaN", "timestamp": "1:44", "link": "https://www.youtube.com/watch?v=GEtGvWWBvkU", "keywords": ["store", "quarrel", "angry"], "img_fn": "MC-VCR_sample/af_0.jpg", "objects": ["person", "person", "bench"], "metadata_fn": "MC-VCR_sample/af_0.json", "question_orig": ["Why", "is", "[person1]", "spreading", "hands", "?"], "question": ["Why", "is", [0], "spreading", "hands", "?"], "answer_orig": ["[person1]", "is", "angry", "and", "arguing", "with", "others", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "africa", "img_id": "val-64", "question_number": 0, "annot_id": "val-64", "answer_choices": [[[0], "recognizes", [0], "."], [[0], "is", "looking", "at", "everyone", "dance", "."], [[0], "has", "just", "shown", "cellphone", "to", [0], "."], [[0], "is", "angry", "and", "arguing", "with", "others", "."]], "rationale_choices": [[[0], "recognizes", [0], "."], [[0], "is", "looking", "at", "everyone", "dance", "."], [[0], "has", "just", "shown", "cellphone", "to", [0], "."], [[0], "is", "angry", "and", "arguing", "with", "others", "."]]}
{"video_name": "VEVE", "ep_num": "NaN", "timestamp": "40:05", "link": "https://www.youtube.com/watch?v=nfEvfL1ujSc", "keywords": ["religion"], "img_fn": "MC-VCR_sample/af_4.jpg", "objects": ["person", "person"], "metadata_fn": "MC-VCR_sample/af_4.json", "question_orig": ["What", "is", "the", "feeling", "of", "[person1]", "?"], "question": ["What", "is", "the", "feeling", "of", [0], "?"], "answer_orig": ["[person1]", "feels", "anxious", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "africa", "img_id": "val-65", "question_number": 0, "annot_id": "val-65", "answer_choices": [[[0], "feels", "anxious", "."], [[0], "is", "excited", "to", "have", "sex", "."], [[0], "seems", "to", "have", "heartburn", "of", "sorts", "."], [[0], "doesn", "'", "t", "feel", "the", "same", "way", "about", [0], "as", "he", "feels", "about", "her", "."]], "rationale_choices": [[[0], "feels", "anxious", "."], [[0], "is", "excited", "to", "have", "sex", "."], [[0], "seems", "to", "have", "heartburn", "of", "sorts", "."], [[0], "doesn", "'", "t", "feel", "the", "same", "way", "about", [0], "as", "he", "feels", "about", "her", "."]]}
{"video_name": "Hotel Rwanda", "ep_num": "NaN", "timestamp": "4:19", "link": "https://www.youtube.com/watch?v=V8sfQwZH-BU&list=PLZ_oTq0uicYXUG3Ji5sdcxR9Iy4DHqr3V", "keywords": ["factory", "weapons", "doubt", "confidence"], "img_fn": "MC-VCR_sample/af_5.jpg", "objects": ["person", "person", "tie", "bed"], "metadata_fn": "MC-VCR_sample/af_5.json", "question_orig": ["What", "might", "be", "[person2]", "'s", "job", "?"], "question": ["What", "might", "be", [1], "'s", "job", "?"], "answer_orig": ["[person2]", "is", "hotel", "manager", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "africa", "img_id": "val-66", "question_number": 0, "annot_id": "val-66", "answer_choices": [[[1], "is", "a", "news", "reporter", "."], [[1], "might", "be", [1], "'", "s", "wife", "and", "is", "with", "him", "for", "a", "ride", "."], [[1], "is", "hotel", "manager", "."], [[1], "is", "a", "prostitute", "."]], "rationale_choices": [[[1], "is", "a", "news", "reporter", "."], [[1], "might", "be", [1], "'", "s", "wife", "and", "is", "with", "him", "for", "a", "ride", "."], [[1], "is", "hotel", "manager", "."], [[1], "is", "a", "prostitute", "."]]}
{"video_name": "Two Weddings, Somali Style", "ep_num": "NaN", "timestamp": "0:29", "link": "https://www.youtube.com/watch?v=mbNctEUO294", "keywords": ["wedding", "tradition wedding", "bride", "guests", "photos", "religion"], "img_fn": "MC-VCR_sample/af_6.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "cell phone", "person", "cell phone", "cell phone", "handbag", "person", "person"], "metadata_fn": "MC-VCR_sample/af_6.json", "question_orig": ["What", "is", "[person3]", "doing", "?"], "question": ["What", "is", [2], "doing", "?"], "answer_orig": ["[person3]", "is", "watching", "[cell2]", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "africa", "img_id": "val-67", "question_number": 0, "annot_id": "val-67", "answer_choices": [[[2], "is", "having", "an", "argument", "with", [11], "and", "is", "getting", "animated", "."], ["Helping", [2], "with", [11], "'", "s", "life", "jacket", "."], [[2], "is", "listening", "to", "someone", "talk", "and", "taking", "notes", "."], [[2], "is", "watching", [11], "."]], "rationale_choices": [[[2], "is", "having", "an", "argument", "with", [11], "and", "is", "getting", "animated", "."], ["Helping", [2], "with", [11], "'", "s", "life", "jacket", "."], [[2], "is", "listening", "to", "someone", "talk", "and", "taking", "notes", "."], [[2], "is", "watching", [11], "."]]}
{"video_name": "Two Weddings, Somali Style", "ep_num": "NaN", "timestamp": "1:42", "link": "https://www.youtube.com/watch?v=mbNctEUO294", "keywords": ["adult", "work", "camel", "herder"], "img_fn": "MC-VCR_sample/af_7.jpg", "objects": ["person", "person", "person", "dog", "person"], "metadata_fn": "MC-VCR_sample/af_7.json", "question_orig": ["What", "is", "[person1]", "doing", "?"], "question": ["What", "is", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "dragging", "something", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "africa", "img_id": "val-68", "question_number": 0, "annot_id": "val-68", "answer_choices": [[[0], "is", "filling", "his", "thermos", "with", "coffee", "."], [[0], "'", "s", "writing", "something", "on", "a", "notepad", "."], [[0], "is", "dragging", "something", "."], [[0], "is", "passing", "the", "pen", "to", [0], "."]], "rationale_choices": [[[0], "is", "filling", "his", "thermos", "with", "coffee", "."], [[0], "'", "s", "writing", "something", "on", "a", "notepad", "."], [[0], "is", "dragging", "something", "."], [[0], "is", "passing", "the", "pen", "to", [0], "."]]}
{"video_name": "Two Weddings, Somali Style", "ep_num": "NaN", "timestamp": "3:00", "link": "https://www.youtube.com/watch?v=mbNctEUO294", "keywords": ["adult", "football", "tired", "uniform"], "img_fn": "MC-VCR_sample/af_8.jpg", "objects": ["person", "person", "car", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/af_8.json", "question_orig": ["What", "is", "[person1]", "wearing", "?"], "question": ["What", "is", [0], "wearing", "?"], "answer_orig": ["Sports", "uniform", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "africa", "img_id": "val-69", "question_number": 0, "annot_id": "val-69", "answer_choices": [["He", "is", "wearing", "handcuffs", "."], ["Sports", "uniform", "."], ["He", "is", "wearing", [0], "pac", "shirt", "."], [[0], "is", "in", "a", "military", "outfit", "."]], "rationale_choices": [["He", "is", "wearing", "handcuffs", "."], ["Sports", "uniform", "."], ["He", "is", "wearing", [0], "pac", "shirt", "."], [[0], "is", "in", "a", "military", "outfit", "."]]}
{"video_name": "Two Weddings, Somali Style", "ep_num": "NaN", "timestamp": "4:56", "link": "https://www.youtube.com/watch?v=mbNctEUO294", "keywords": ["sing", "drum", "clapping", "enthralled"], "img_fn": "MC-VCR_sample/af_9.jpg", "objects": ["person", "person", "person", "person", "person", "chair", "person", "person"], "metadata_fn": "MC-VCR_sample/af_9.json", "question_orig": ["What", "is", "[person4]", "doing", "?"], "question": ["What", "is", [3], "doing", "?"], "answer_orig": ["[person4]", "is", "singing", "and", "clapping", "his", "hands", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "africa", "img_id": "val-70", "question_number": 0, "annot_id": "val-70", "answer_choices": [[[3], "is", "telling", [3], "a", "story", "that", "happened", "when", "she", "was", "younger", "."], [[3], "is", "singing", "and", "clapping", "his", "hands", "."], [[3], "is", "painting", "a", "sculpture", "."], [[3], "is", "holding", "a", "machine", "gun", "because", "he", "is", "a", "wanted", "fugitive", "."]], "rationale_choices": [[[3], "is", "telling", [3], "a", "story", "that", "happened", "when", "she", "was", "younger", "."], [[3], "is", "singing", "and", "clapping", "his", "hands", "."], [[3], "is", "painting", "a", "sculpture", "."], [[3], "is", "holding", "a", "machine", "gun", "because", "he", "is", "a", "wanted", "fugitive", "."]]}
{"video_name": "Two Weddings, Somali Style", "ep_num": "NaN", "timestamp": "7:16", "link": "https://www.youtube.com/watch?v=mbNctEUO294", "keywords": ["tea", "chat", "couple", "fun"], "img_fn": "MC-VCR_sample/af_11.jpg", "objects": ["person", "person", "cup", "vase", "person", "potted plant", "person", "couch", "cup"], "metadata_fn": "MC-VCR_sample/af_11.json", "question_orig": ["What", "is", "[person4]", "doing", "?"], "question": ["What", "is", [6], "doing", "?"], "answer_orig": ["[person4]", "is", "drinking", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "africa", "img_id": "val-71", "question_number": 0, "annot_id": "val-71", "answer_choices": [[[6], "is", "drinking", "."], ["Ballroom", "dancing", ",", "such", "as", "the", "waltz", "."], [[6], "is", "interrupting", [6], "'", "s", "act", "."], [[6], "is", "giving", "them", "details", "on", "their", "night", "plans", "."]], "rationale_choices": [[[6], "is", "drinking", "."], ["Ballroom", "dancing", ",", "such", "as", "the", "waltz", "."], [[6], "is", "interrupting", [6], "'", "s", "act", "."], [[6], "is", "giving", "them", "details", "on", "their", "night", "plans", "."]]}
{"video_name": "Hotel Rwanda", "ep_num": "NaN", "timestamp": "0:04", "link": "https://www.youtube.com/watch?v=m2Gb2zWTbPw&list=PLZ_oTq0uicYXUG3Ji5sdcxR9Iy4DHqr3V&index=2", "keywords": ["procession", "support", "crowd", "slogan", "politics"], "img_fn": "MC-VCR_sample/af_18.jpg", "objects": ["person", "person", "person", "person", "person", "person", "backpack", "person", "person", "person", "suitcase", "bowl", "person"], "metadata_fn": "MC-VCR_sample/af_18.json", "question_orig": ["What", "is", "[person3]", "holding", "?"], "question": ["What", "is", [2], "holding", "?"], "answer_orig": ["A", "gun", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "africa", "img_id": "val-72", "question_number": 0, "annot_id": "val-72", "answer_choices": [["It", "is", "a", "lottery", "ticket", "and", "he", "is", "checking", "it", "as", "the", "numbers", "are", "drawn", "on", "tv", "."], ["A", "gun", "."], ["He", "is", "holding", "binoculars", "."], ["He", "is", "reaching", "for", "a", "pistol", "."]], "rationale_choices": [["It", "is", "a", "lottery", "ticket", "and", "he", "is", "checking", "it", "as", "the", "numbers", "are", "drawn", "on", "tv", "."], ["A", "gun", "."], ["He", "is", "holding", "binoculars", "."], ["He", "is", "reaching", "for", "a", "pistol", "."]]}
{"video_name": "Two Weddings, Somali Style", "ep_num": "NaN", "timestamp": "24:51", "link": "https://www.youtube.com/watch?v=mbNctEUO294", "keywords": ["construction", "children", "wood", "build house", "carry"], "img_fn": "MC-VCR_sample/af_20.jpg", "objects": ["person", "person", "horse", "person", "person", "person", "person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/af_20.json", "question_orig": ["What", "is", "[person1]", "'s", "feeling", "?"], "question": ["What", "is", [0], "'s", "feeling", "?"], "answer_orig": ["[person1]", "is", "very", "happy", "."], "answer_label": 3, "rationale_label": 3, "cultural_or_not": 0, "region": "africa", "img_id": "val-73", "question_number": 0, "annot_id": "val-73", "answer_choices": [[[0], "is", "confused", "because", [0], "is", "high", "on", "marijuana", "."], [[0], "'", "s", "looking", "very", "angry", "at", "the", "moment", "."], [[0], "feels", "nostalgic", "while", "she", "is", "holding", "the", "mirror", "."], [[0], "is", "very", "happy", "."]], "rationale_choices": [[[0], "is", "confused", "because", [0], "is", "high", "on", "marijuana", "."], [[0], "'", "s", "looking", "very", "angry", "at", "the", "moment", "."], [[0], "feels", "nostalgic", "while", "she", "is", "holding", "the", "mirror", "."], [[0], "is", "very", "happy", "."]]}
{"video_name": "Most Dangerous Ways To School | KENYA", "ep_num": "NaN", "timestamp": "28:01", "link": "https://www.youtube.com/watch?v=Rm9ow1RDTAo", "keywords": ["student", "school", "national flag", "national anthem", "teacher"], "img_fn": "MC-VCR_sample/af_21.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/af_21.json", "question_orig": ["What", "is", "[person5]", "doing", "?"], "question": ["What", "is", [4], "doing", "?"], "answer_orig": ["[person5]", "is", "looking", "up", "to", "sky", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "africa", "img_id": "val-74", "question_number": 0, "annot_id": "val-74", "answer_choices": [[[4], "is", "looking", "up", "to", "sky", "."], [[4], "is", "envious", "of", [4], "having", "an", "opportunity", "to", "meet", "with", [1], "."], [[4], "is", "going", "to", "the", "bathroom", "."], [[4], "is", "watching", "the", "situation", "between", [4], "."]], "rationale_choices": [[[4], "is", "looking", "up", "to", "sky", "."], [[4], "is", "envious", "of", [4], "having", "an", "opportunity", "to", "meet", "with", [1], "."], [[4], "is", "going", "to", "the", "bathroom", "."], [[4], "is", "watching", "the", "situation", "between", [4], "."]]}
{"video_name": "Lamb", "ep_num": "NaN", "timestamp": "12:32", "link": "youtube.com/watch?v=d1k8VymyPd8", "keywords": ["death", "sadness", "rural area", "family"], "img_fn": "MC-VCR_sample/af_23.jpg", "objects": ["person", "person", "person", "chair"], "metadata_fn": "MC-VCR_sample/af_23.json", "question_orig": ["What", "is", "[person2]", "doing", "?"], "question": ["What", "is", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "crying", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "africa", "img_id": "val-75", "question_number": 0, "annot_id": "val-75", "answer_choices": [[[1], "is", "looking", "at", "pottedplant", "next", "to", "her", "."], [[1], "is", "crying", "."], [[1], "is", "relaxing", "and", "taking", "instructions", "from", [1], "."], [[1], "is", "hosting", "a", "party", "."]], "rationale_choices": [[[1], "is", "looking", "at", "pottedplant", "next", "to", "her", "."], [[1], "is", "crying", "."], [[1], "is", "relaxing", "and", "taking", "instructions", "from", [1], "."], [[1], "is", "hosting", "a", "party", "."]]}
{"video_name": "Lamb", "ep_num": "NaN", "timestamp": "59:11", "link": "youtube.com/watch?v=d1k8VymyPd8", "keywords": ["festival", "elders", "customs", "children", "religion", "joke"], "img_fn": "MC-VCR_sample/af_25.jpg", "objects": ["person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/af_25.json", "question_orig": ["Why", "is", "[person2]", "excited", "and", "with", "his", "eyes", "wide", "open", "?"], "question": ["Why", "is", [1], "excited", "and", "with", "his", "eyes", "wide", "open", "?"], "answer_orig": ["[person2]", "is", "telling", "a", "story", "vividly", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "africa", "img_id": "val-76", "question_number": 0, "annot_id": "val-76", "answer_choices": [[[1], "doesn", "'", "t", "understand", "what", "he", "is", "holding", "."], [[1], "is", "smiling", "at", "his", "date", "that", "just", "arrived", "."], [[1], "is", "telling", "a", "story", "vividly", "."], [[1], "is", "surprised", "by", "the", "fighting", "prowess", "of", [1], ",", "and", "does", "not", "believe", "they", "need", "his", "help", "."]], "rationale_choices": [[[1], "doesn", "'", "t", "understand", "what", "he", "is", "holding", "."], [[1], "is", "smiling", "at", "his", "date", "that", "just", "arrived", "."], [[1], "is", "telling", "a", "story", "vividly", "."], [[1], "is", "surprised", "by", "the", "fighting", "prowess", "of", [1], ",", "and", "does", "not", "believe", "they", "need", "his", "help", "."]]}
{"video_name": "Lamb", "ep_num": "NaN", "timestamp": "30:18", "link": "youtube.com/watch?v=d1k8VymyPd8", "keywords": ["transportation", "bus", "ticket", "sell"], "img_fn": "MC-VCR_sample/af_26.jpg", "objects": ["person", "person", "bus", "person", "sheep", "person", "person", "person", "bus", "person", "person"], "metadata_fn": "MC-VCR_sample/af_26.json", "question_orig": ["What", "is", "[person3]", "doing", "?"], "question": ["What", "is", [3], "doing", "?"], "answer_orig": ["[person3]", "is", "writing", "something", "on", "his", "notebook", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "africa", "img_id": "val-77", "question_number": 0, "annot_id": "val-77", "answer_choices": [[[3], "is", "looking", "at", [3], "because", [3], "is", "curious", "if", [4], "is", "flirting", "with", [1], "."], ["Watching", [3], "interact", "."], [[3], "is", "writing", "something", "on", "his", "notebook", "."], [[3], "is", "impressed", "by", "what", "the", "others", "are", "clapping", "at", "."]], "rationale_choices": [[[3], "is", "looking", "at", [3], "because", [3], "is", "curious", "if", [4], "is", "flirting", "with", [1], "."], ["Watching", [3], "interact", "."], [[3], "is", "writing", "something", "on", "his", "notebook", "."], [[3], "is", "impressed", "by", "what", "the", "others", "are", "clapping", "at", "."]]}
{"video_name": "Lamb", "ep_num": "NaN", "timestamp": "NaN", "link": "youtube.com/watch?v=d1k8VymyPd8", "keywords": ["customs", "family", "combing the hair", "hair", "carry"], "img_fn": "MC-VCR_sample/af_29.jpg", "objects": ["person", "person", "person"], "metadata_fn": "MC-VCR_sample/af_29.json", "question_orig": ["Why", "is", "[person1]", "putting", "things", "on", "shoulder", "?"], "question": ["Why", "is", [0], "putting", "things", "on", "shoulder", "?"], "answer_orig": ["It", "is", "easy", "for", "[person1]", "to", "carry", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "africa", "img_id": "val-78", "question_number": 0, "annot_id": "val-78", "answer_choices": [["It", "is", "easy", "for", [0], "to", "carry", "."], [[0], "wanted", "to", "be", "able", "to", "put", "the", "book", "into", "it", "once", "she", "stole", "it", "."], [[0], "is", "packing", "for", "a", "vacation", "."], [[0], "is", "trying", "to", "fix", "the", "trunk", "of", "his", "car", "."]], "rationale_choices": [["It", "is", "easy", "for", [0], "to", "carry", "."], [[0], "wanted", "to", "be", "able", "to", "put", "the", "book", "into", "it", "once", "she", "stole", "it", "."], [[0], "is", "packing", "for", "a", "vacation", "."], [[0], "is", "trying", "to", "fix", "the", "trunk", "of", "his", "car", "."]]}
{"video_name": "Lamb", "ep_num": "NaN", "timestamp": "1:28:57", "link": "youtube.com/watch?v=d1k8VymyPd8", "keywords": ["festival", "dancing", "women", "clothes", "custom", "party"], "img_fn": "MC-VCR_sample/af_31.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/af_31.json", "question_orig": ["What", "is", "[person2]", "doing", "?"], "question": ["What", "is", [1], "doing", "?"], "answer_orig": ["[person2]", "is", "dancing", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "africa", "img_id": "val-79", "question_number": 0, "annot_id": "val-79", "answer_choices": [[[1], "is", "giving", "directions", "based", "on", "the", "information", "in", "front", "of", "her", "."], [[1], "is", "keeping", "men", "from", "entering", "a", "lifeboat", "ahead", "of", "the", "women", "and", "children", "."], [[1], "is", "dancing", "."], [[1], "is", "a", "server", "and", "he", "just", "brought", "drinks", "cup", "over", "to", [1], "."]], "rationale_choices": [[[1], "is", "giving", "directions", "based", "on", "the", "information", "in", "front", "of", "her", "."], [[1], "is", "keeping", "men", "from", "entering", "a", "lifeboat", "ahead", "of", "the", "women", "and", "children", "."], [[1], "is", "dancing", "."], [[1], "is", "a", "server", "and", "he", "just", "brought", "drinks", "cup", "over", "to", [1], "."]]}
{"video_name": "A Journey for Coffee Beans", "ep_num": "NaN", "timestamp": "7:29", "link": "http://m.iqiyi.com/v_13vq3ek9vu8.html?key=b398b8ccbaeacca840073a7ee9b7e7e6&msrc=3_31_56&aid=3965607138959900&tvid=3965607138959900&cid=21&identifier=weixinv1&ftype=27&subtype=1&vip_pc=0&vip_tpc=0&isrd=1&p1=2_22_222&social_platform=link", "keywords": ["picking the coffee beans", "factory", "women", "happiness", "worker", "work", "pick", "quality"], "img_fn": "MC-VCR_sample/af_32.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "banana"], "metadata_fn": "MC-VCR_sample/af_32.json", "question_orig": ["What", "is", "[person4]", "'s", "job", "here", "?"], "question": ["What", "is", [3], "'s", "job", "here", "?"], "answer_orig": ["She", "is", "a", "worker", "."], "answer_label": 1, "rationale_label": 1, "cultural_or_not": 0, "region": "africa", "img_id": "val-80", "question_number": 0, "annot_id": "val-80", "answer_choices": [[[3], "is", "security", "personnel", "."], ["She", "is", "a", "worker", "."], ["She", "is", "a", "nanny", "for", [3], "."], ["He", "works", "in", "a", "toy", "store", "."]], "rationale_choices": [[[3], "is", "security", "personnel", "."], ["She", "is", "a", "worker", "."], ["She", "is", "a", "nanny", "for", [3], "."], ["He", "works", "in", "a", "toy", "store", "."]]}
{"video_name": "A Journey for Coffee Beans", "ep_num": "NaN", "timestamp": "11:34", "link": "http://m.iqiyi.com/v_13vq3ek9vu8.html?key=b398b8ccbaeacca840073a7ee9b7e7e6&msrc=3_31_56&aid=3965607138959900&tvid=3965607138959900&cid=21&identifier=weixinv1&ftype=27&subtype=1&vip_pc=0&vip_tpc=0&isrd=1&p1=2_22_222&social_platform=link", "keywords": ["boil", "politeness", "manners", "customs", "guest", "coffee", "coffee bean", "taste"], "img_fn": "MC-VCR_sample/af_33.jpg", "objects": ["person", "tie", "person", "person", "person", "cell phone", "person", "chair", "cup", "person", "bowl", "person"], "metadata_fn": "MC-VCR_sample/af_33.json", "question_orig": ["What", "is", "[person3]", "doing", "?"], "question": ["What", "is", [3], "doing", "?"], "answer_orig": ["[person3]", "is", "chatting", "with", "others", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "africa", "img_id": "val-81", "question_number": 0, "annot_id": "val-81", "answer_choices": [[[3], "is", "smoking", "because", "she", "is", "stressed", "."], [[3], "is", "holding", "a", "car", "door", "open", "for", [3], "."], [[3], "is", "chatting", "with", "others", "."], [[3], "is", "painting", "the", "house", "."]], "rationale_choices": [[[3], "is", "smoking", "because", "she", "is", "stressed", "."], [[3], "is", "holding", "a", "car", "door", "open", "for", [3], "."], [[3], "is", "chatting", "with", "others", "."], [[3], "is", "painting", "the", "house", "."]]}
{"video_name": "A Journey for Coffee Beans", "ep_num": "NaN", "timestamp": "21:08", "link": "http://m.iqiyi.com/v_13vq3ek9vu8.html?key=b398b8ccbaeacca840073a7ee9b7e7e6&msrc=3_31_56&aid=3965607138959900&tvid=3965607138959900&cid=21&identifier=weixinv1&ftype=27&subtype=1&vip_pc=0&vip_tpc=0&isrd=1&p1=2_22_222&social_platform=link", "keywords": ["fermentation tanks", "men", "rural area", "vegetation", "working"], "img_fn": "MC-VCR_sample/af_34.jpg", "objects": ["person", "person", "person", "person", "person", "person", "bench", "person"], "metadata_fn": "MC-VCR_sample/af_34.json", "question_orig": ["What", "is", "[person1]", "doing", "?"], "question": ["What", "is", [0], "doing", "?"], "answer_orig": ["[person1]", "is", "a", "worker", ",", "working", "in", "a", "construction", "site", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "africa", "img_id": "val-82", "question_number": 0, "annot_id": "val-82", "answer_choices": [[[0], "is", "cleaning", "the", "kitchen", "."], [[0], "is", "writing", "her", "thoughts", "from", "the", "shower", "."], [[0], "is", "a", "worker", ",", "working", "in", "a", "construction", "site", "."], [[0], "is", "trying", "to", "sell", "a", "vase", "to", [0], "."]], "rationale_choices": [[[0], "is", "cleaning", "the", "kitchen", "."], [[0], "is", "writing", "her", "thoughts", "from", "the", "shower", "."], [[0], "is", "a", "worker", ",", "working", "in", "a", "construction", "site", "."], [[0], "is", "trying", "to", "sell", "a", "vase", "to", [0], "."]]}
{"video_name": "A Journey for Coffee Beans", "ep_num": "NaN", "timestamp": "25:27", "link": "http://m.iqiyi.com/v_13vq3ek9vu8.html?key=b398b8ccbaeacca840073a7ee9b7e7e6&msrc=3_31_56&aid=3965607138959900&tvid=3965607138959900&cid=21&identifier=weixinv1&ftype=27&subtype=1&vip_pc=0&vip_tpc=0&isrd=1&p1=2_22_222&social_platform=link", "keywords": ["dry", "natural processed", "coffee beans"], "img_fn": "MC-VCR_sample/af_36.jpg", "objects": ["person", "person", "person", "person", "potted plant", "dining table", "knife", "potted plant"], "metadata_fn": "MC-VCR_sample/af_36.json", "question_orig": ["Why", "does", "[person2]", "hold", "a", "coffee", "bean", "in", "her", "hand", "?"], "question": ["Why", "does", [1], "hold", "a", "coffee", "bean", "in", "her", "hand", "?"], "answer_orig": ["[person2]", "is", "checking", "its", "quality", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "africa", "img_id": "val-83", "question_number": 0, "annot_id": "val-83", "answer_choices": [[[1], "is", "checking", "its", "quality", "."], [[1], "is", "a", "waitress", "."], [[1], "plans", "to", "buy", "something", "."], [[1], "does", "not", "like", "her", "food", "."]], "rationale_choices": [[[1], "is", "checking", "its", "quality", "."], [[1], "is", "a", "waitress", "."], [[1], "plans", "to", "buy", "something", "."], [[1], "does", "not", "like", "her", "food", "."]]}
{"video_name": "The Wedding Party (Trailer)", "ep_num": "NaN", "timestamp": "1:08", "link": "https://www.youtube.com/watch?app=desktop&v=zbnXd-zCD6I", "keywords": ["wedding", "parents", "celebration", "traditional clothes", "party", "family", "friend"], "img_fn": "MC-VCR_sample/af_38.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "chair", "person", "person", "person", "person", "person", "person", "person"], "metadata_fn": "MC-VCR_sample/af_38.json", "question_orig": ["How", "does", "[person4]", "feel", "?"], "question": ["How", "does", [3], "feel", "?"], "answer_orig": ["Very", "happy", "and", "excited", "."], "answer_label": 2, "rationale_label": 2, "cultural_or_not": 0, "region": "africa", "img_id": "val-84", "question_number": 0, "annot_id": "val-84", "answer_choices": [[[3], "is", "feeling", "confused", "."], ["He", "feels", "tired", "."], ["Very", "happy", "and", "excited", "."], ["He", "feels", "threatened", "by", "the", "person", "who", "has", "approached", "them", "."]], "rationale_choices": [[[3], "is", "feeling", "confused", "."], ["He", "feels", "tired", "."], ["Very", "happy", "and", "excited", "."], ["He", "feels", "threatened", "by", "the", "person", "who", "has", "approached", "them", "."]]}
{"video_name": "The Wedding Party (Trailer)", "ep_num": "NaN", "timestamp": "1:11", "link": "https://www.youtube.com/watch?app=desktop&v=zbnXd-zCD6I", "keywords": ["wedding", "parent", "celebration", "traditional clothes", "party", "family", "friend"], "img_fn": "MC-VCR_sample/af_39.jpg", "objects": ["person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "person", "wineglass"], "metadata_fn": "MC-VCR_sample/af_39.json", "question_orig": ["Why", "is", "[person8]", "so", "happy", "?"], "question": ["Why", "is", [7], "so", "happy", "?"], "answer_orig": ["[person8]", "is", "celebrating", "and", "enjoying", "party", "."], "answer_label": 0, "rationale_label": 0, "cultural_or_not": 0, "region": "africa", "img_id": "val-85", "question_number": 0, "annot_id": "val-85", "answer_choices": [[[7], "is", "celebrating", "and", "enjoying", "party", "."], [[7], "is", "acting", "in", "a", "play", "."], [[7], "is", "tap", "dancing", "on", "stage", "."], [[7], "'", "s", "still", "in", "love", "with", "the", "groom", "and", "jut", "saw", "him", "get", "married", "."]], "rationale_choices": [[[7], "is", "celebrating", "and", "enjoying", "party", "."], [[7], "is", "acting", "in", "a", "play", "."], [[7], "is", "tap", "dancing", "on", "stage", "."], [[7], "'", "s", "still", "in", "love", "with", "the", "groom", "and", "jut", "saw", "him", "get", "married", "."]]}