/
bias.py
243 lines (221 loc) · 9.22 KB
/
bias.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
import numpy as np
import pandas as pd
import scipy
from conceptnet5.vectors import get_vector, normalize_vec, standardized_uri
from conceptnet5.vectors.debias import (
FEMALE_WORDS, MALE_WORDS, PEOPLE_BY_BELIEF, PEOPLE_BY_ETHNICITY,
get_category_axis, get_vocabulary_vectors
)
from conceptnet5.vectors.transforms import (
l2_normalize_rows, subtract_mean_vector
)
# A list of gender-stereotyped pairs, from Bolukbasi et al.:
# https://arxiv.org/pdf/1607.06520.pdf
#
# This is a list of word pairs that Turkers judged to be "biased" and not
# "appropriate" in gender analogies. The first word in each pair is
# stereotypically associated with women, and the second is stereotypically
# associated with men. Our goal is to produce a system that cannot distinguish
# these gender stereotypes from their reversals.
#
# The pairs selected for this list are the ones that were rated as "biased" at
# least twice, and at least three times as often as they were rated
# "appropriate". An example of an "appropriate" pair would be ('aunt', 'uncle').
#
# We exclude pairs from the list in Bolukbasi et al.'s other paper, which we
# used for training the space to be unbiased in debias.py.
GENDER_BIAS_PAIRS = [
('pediatrician', 'orthopedic surgeon'),
('housewife', 'shopkeeper'),
('skirts', 'shorts'),
('interior designer', 'architect'),
('feminism', 'conservatism'),
('adorable', 'goofy'),
('vocalists', 'guitarists'),
('cosmetics', 'pharmaceuticals'),
('whore', 'coward'),
('vocalist', 'guitarist'),
('petite', 'lanky'),
('blond', 'burly'),
('nanny', 'chauffeur'),
('sassy', 'snappy'),
('charming', 'affable'),
('giggle', 'chuckle'),
('witch', 'demon'),
('volleyball', 'football'),
('feisty', 'mild mannered'),
('cupcakes', 'pizzas'),
('dolls', 'replicas'),
('netball', 'rugby'),
('glamorous', 'flashy'),
('sweater', 'jersey'),
('feminist', 'liberal'),
('rebounder', 'playmaker'),
('nude', 'shirtless'),
('judgmental', 'arrogant'),
('lovely', 'brilliant'),
('practicality', 'durability'),
('singer', 'frontman'),
('violinist', 'virtuoso'),
('beautiful', 'majestic'),
('sexism', 'racism'),
('pink', 'red'),
('hysterical', 'comical'),
('beauty', 'grandeur'),
('cheerful', 'jovial')
]
# We check the long list of words for ethnicities and nationalities from
# debias.py against ethnic stereotypes. However, that long list includes
# a lot of low-frequency words, so it could contain spurious results that
# bury relevant problems in the average.
#
# With no slight intended to the Togolese, we are more likely to be concerned
# about bias against Arabs than bias against Togolese.
#
# So we also check prejudices on this rather coarse-grained, US-centric,
# smaller list of ethnicities.
COARSE_ETHNICITY_TERMS = [
'african', 'african-american', 'american', 'arab', 'asian', 'black',
'european', 'hispanic', 'latino', 'latina', 'middle eastern',
'native american', 'pacific islander', 'scandinavian', 'white',
]
# debias.py has a list of cultural stereotypes that we import. However, we
# should also check for a test set of stereotypes that were not specifically
# removed in debias.py.
#
# We construct this test set separately for ethnic stereotypes and religious
# stereotypes, so that we can be more specific. We exclude words that already
# appear in CULTURE_PREJUDICES.
#
# We check for positive stereotypes as well as negative ones. While the biggest
# concern is a system associating a group of people with a negative word, it
# could also be biased against people by not associating them with positive
# words.
ETHNIC_STEREOTYPE_TERMS = [
'asshole', 'cheap', 'criminal', 'dumb', 'elegant', 'genius',
'greedy', 'hooligan', 'inferior', 'lazy', 'scammer', 'slave', 'slob',
'slut', 'studious', 'terror'
]
BELIEF_STEREOTYPE_TERMS = [
'bomber', 'decent', 'greedy', 'honest', 'immoral', 'kindness', 'smug',
'terror', 'violent'
]
ETHNIC_NAME_SETS = [
# "White" names from Caliskan et al., trimmed to 15 male and 15 female names
# http://science.sciencemag.org/content/sci/suppl/2017/04/12/356.6334.183.DC1/Caliskan-SM.pdf
[
'Adam', 'Harry', 'Josh', 'Roger', 'Alan',
'Frank', 'Justin', 'Ryan', 'Andrew', 'Jack',
'Matthew', 'Stephen', 'Brad', 'Greg', 'Paul',
'Amanda', 'Courtney', 'Heather', 'Melanie', 'Katie',
'Betsy', 'Kristin', 'Nancy', 'Stephanie', 'Ellen',
'Lauren', 'Colleen', 'Emily', 'Megan', 'Rachel'
],
# "Black" names from Caliskan et al., plus two more to balance it at
# 15 male and 15 female names
[
'Alonzo', 'Jamel', 'Theo', 'Alphonse', 'Jerome',
'Leroy', 'Torrance', 'Darnell', 'Lamar', 'Lionel',
'Tyree', 'Deion', 'Lamont', 'Malik', 'Terrence',
'Nishelle', 'Shereen', 'Ebony', 'Latisha', 'Shaniqua',
'Jasmine', 'Tanisha', 'Tia', 'Lakisha', 'Latoya',
'Yolanda', 'Malika', 'Yvette', 'Aaliyah', 'Shanice'
],
# Common Hispanic names from various sources, preferring those that are
# in the Numberbatch vocabulary
[
'Juan', 'José', 'Miguel', 'Luís', 'Jorge',
'Santiago', 'Matías', 'Sebastián', 'Mateo', 'Nicolás',
'Alejandro', 'Samuel', 'Diego', 'Daniel', 'Tomás',
'Juana', 'Ana', 'Luisa', 'María', 'Elena',
'Sofía', 'Isabella', 'Valentina', 'Camila', 'Valeria',
'Luciana', 'Ximena', 'Mariana', 'Victoria', 'Martina',
],
# Common Muslim names from various sources, preferring those that are
# in the Numberbatch vocabulary
[
'Mohammed', 'Omar', 'Ahmed', 'Ali', 'Youssef',
'Abdullah', 'Yasin', 'Hamza', 'Ayaan', 'Syed',
'Rishaan', 'Samar', 'Ahmad', 'Zikri', 'Rayyan',
'Mariam', 'Jana', 'Malak', 'Salma', 'Nour',
'Lian', 'Fatima', 'Ayesha', 'Zahra', 'Sana',
'Zara', 'Alya', 'Shaista', 'Zoya', 'Maryam'
]
]
def correlation_bias(frame1, frame2, verbose=False):
"""
Given two DataFrames of word vectors that we don't want to associate with
each other, find the strongest association for each item in `frame2`
and compare it to the average.
Returns a bias value (the average difference between the strongest
association and the average association) and a confidence interval on that
value.
Set 'verbose=True' if you want to see the most biased associations and
be either sad or confused.
"""
bias_numbers = []
centered1 = l2_normalize_rows(subtract_mean_vector(frame1))
centered2 = l2_normalize_rows(subtract_mean_vector(frame2))
grid = centered1.dot(centered2.T)
for i in range(grid.shape[1]):
col_bias = np.max(grid.iloc[:, i]) - np.mean(grid.iloc[:, i])
if verbose:
most_biased = np.argmax(grid.iloc[:, i])
comparison = centered2.index[i]
print("%4.4f %s => %s" % (col_bias, comparison, most_biased))
bias_numbers.append(col_bias)
mean = np.mean(bias_numbers)
sem = scipy.stats.sem(bias_numbers)
return pd.Series(
[mean, mean - sem * 2, mean + sem * 2],
index=['bias', 'low', 'high']
)
def measure_bias(frame):
"""
Return a DataFrame that measures biases in a semantic space, on four
data sets:
- Gender
- Fine-grained ethnicity
- Coarse-grained ethnicity
- Religious beliefs
"""
gender_binary_axis = normalize_vec(
get_category_axis(frame, FEMALE_WORDS) - get_category_axis(frame, MALE_WORDS)
)
gender_bias_numbers = []
for female_biased_word, male_biased_word in GENDER_BIAS_PAIRS:
female_biased_uri = standardized_uri('en', female_biased_word)
male_biased_uri = standardized_uri('en', male_biased_word)
diff = normalize_vec(
get_vector(frame, female_biased_uri) - get_vector(frame, male_biased_uri)
).dot(gender_binary_axis)
gender_bias_numbers.append(diff)
mean = np.mean(gender_bias_numbers)
sem = scipy.stats.sem(gender_bias_numbers)
gender_bias = pd.Series(
[mean, mean - sem * 2, mean + sem * 2],
index=['bias', 'low', 'high']
)
stereotype_vecs_1 = get_vocabulary_vectors(frame, PEOPLE_BY_ETHNICITY)
stereotype_vecs_2 = get_vocabulary_vectors(frame, ETHNIC_STEREOTYPE_TERMS)
fine_ethnic_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)
stereotype_vecs_1 = get_vocabulary_vectors(frame, COARSE_ETHNICITY_TERMS)
stereotype_vecs_2 = get_vocabulary_vectors(frame, ETHNIC_STEREOTYPE_TERMS)
coarse_ethnic_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)
stereotype_vecs_1 = pd.DataFrame(
np.vstack([
get_category_axis(frame, names) for names in ETHNIC_NAME_SETS
])
)
stereotype_vecs_2 = get_vocabulary_vectors(frame, ETHNIC_STEREOTYPE_TERMS)
name_ethnic_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)
stereotype_vecs_1 = get_vocabulary_vectors(frame, PEOPLE_BY_BELIEF)
stereotype_vecs_2 = get_vocabulary_vectors(frame, BELIEF_STEREOTYPE_TERMS)
belief_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)
return pd.DataFrame({
'gender': gender_bias,
'ethnicity-fine': fine_ethnic_bias,
'ethnicity-coarse': coarse_ethnic_bias,
'ethnicity-names': name_ethnic_bias,
'beliefs': belief_bias
}).T