conceptnet5/vectors/evaluation/bias.py

import numpy as np
import pandas as pd
import scipy

from conceptnet5.vectors import get_vector, normalize_vec, standardized_uri
from conceptnet5.vectors.debias import (
    FEMALE_WORDS, MALE_WORDS, PEOPLE_BY_BELIEF, PEOPLE_BY_ETHNICITY,
    get_category_axis, get_vocabulary_vectors
)
from conceptnet5.vectors.transforms import (
    l2_normalize_rows, subtract_mean_vector
)

# A list of gender-stereotyped pairs, from Bolukbasi et al.:
# https://arxiv.org/pdf/1607.06520.pdf
#
# This is a list of word pairs that Turkers judged to be "biased" and not
# "appropriate" in gender analogies. The first word in each pair is
# stereotypically associated with women, and the second is stereotypically
# associated with men. Our goal is to produce a system that cannot distinguish
# these gender stereotypes from their reversals.
#
# The pairs selected for this list are the ones that were rated as "biased" at
# least twice, and at least three times as often as they were rated
# "appropriate". An example of an "appropriate" pair would be ('aunt', 'uncle').
#
# We exclude pairs from the list in Bolukbasi et al.'s other paper, which we
# used for training the space to be unbiased in debias.py.
GENDER_BIAS_PAIRS = [
    ('pediatrician', 'orthopedic surgeon'),
    ('housewife', 'shopkeeper'),
    ('skirts', 'shorts'),
    ('interior designer', 'architect'),
    ('feminism', 'conservatism'),
    ('adorable', 'goofy'),
    ('vocalists', 'guitarists'),
    ('cosmetics', 'pharmaceuticals'),
    ('whore', 'coward'),
    ('vocalist', 'guitarist'),
    ('petite', 'lanky'),
    ('blond', 'burly'),
    ('nanny', 'chauffeur'),
    ('sassy', 'snappy'),
    ('charming', 'affable'),
    ('giggle', 'chuckle'),
    ('witch', 'demon'),
    ('volleyball', 'football'),
    ('feisty', 'mild mannered'),
    ('cupcakes', 'pizzas'),
    ('dolls', 'replicas'),
    ('netball', 'rugby'),
    ('glamorous', 'flashy'),
    ('sweater', 'jersey'),
    ('feminist', 'liberal'),
    ('rebounder', 'playmaker'),
    ('nude', 'shirtless'),
    ('judgmental', 'arrogant'),
    ('lovely', 'brilliant'),
    ('practicality', 'durability'),
    ('singer', 'frontman'),
    ('violinist', 'virtuoso'),
    ('beautiful', 'majestic'),
    ('sexism', 'racism'),
    ('pink', 'red'),
    ('hysterical', 'comical'),
    ('beauty', 'grandeur'),
    ('cheerful', 'jovial')
]

# We check the long list of words for ethnicities and nationalities from
# debias.py against ethnic stereotypes. However, that long list includes
# a lot of low-frequency words, so it could contain spurious results that
# bury relevant problems in the average.
#
# With no slight intended to the Togolese, we are more likely to be concerned
# about bias against Arabs than bias against Togolese.
#
# So we also check prejudices on this rather coarse-grained, US-centric,
# smaller list of ethnicities.
COARSE_ETHNICITY_TERMS = [
    'african', 'african-american', 'american', 'arab', 'asian', 'black',
    'european', 'hispanic', 'latino', 'latina', 'middle eastern',
    'native american', 'pacific islander', 'scandinavian', 'white',
]

# debias.py has a list of cultural stereotypes that we import. However, we
# should also check for a test set of stereotypes that were not specifically
# removed in debias.py.
#
# We construct this test set separately for ethnic stereotypes and religious
# stereotypes, so that we can be more specific. We exclude words that already
# appear in CULTURE_PREJUDICES.
#
# We check for positive stereotypes as well as negative ones. While the biggest
# concern is a system associating a group of people with a negative word, it
# could also be biased against people by not associating them with positive
# words.
ETHNIC_STEREOTYPE_TERMS = [
    'asshole', 'cheap', 'criminal', 'dumb', 'elegant', 'genius',
    'greedy', 'hooligan', 'inferior', 'lazy', 'scammer', 'slave', 'slob',
    'slut', 'studious', 'terror'
]

BELIEF_STEREOTYPE_TERMS = [
    'bomber', 'decent', 'greedy', 'honest', 'immoral', 'kindness', 'smug',
    'terror', 'violent'
]

ETHNIC_NAME_SETS = [
    # "White" names from Caliskan et al., trimmed to 15 male and 15 female names
    # http://science.sciencemag.org/content/sci/suppl/2017/04/12/356.6334.183.DC1/Caliskan-SM.pdf
    [
        'Adam', 'Harry', 'Josh', 'Roger', 'Alan',
        'Frank', 'Justin', 'Ryan', 'Andrew', 'Jack',
        'Matthew', 'Stephen', 'Brad', 'Greg', 'Paul',
        'Amanda', 'Courtney', 'Heather', 'Melanie', 'Katie',
        'Betsy', 'Kristin', 'Nancy', 'Stephanie', 'Ellen',
        'Lauren', 'Colleen', 'Emily', 'Megan', 'Rachel'
    ],
    # "Black" names from Caliskan et al., plus two more to balance it at
    # 15 male and 15 female names
    [
        'Alonzo', 'Jamel', 'Theo', 'Alphonse', 'Jerome',
        'Leroy', 'Torrance', 'Darnell', 'Lamar', 'Lionel',
        'Tyree', 'Deion', 'Lamont', 'Malik', 'Terrence',
        'Nishelle', 'Shereen', 'Ebony', 'Latisha', 'Shaniqua',
        'Jasmine', 'Tanisha', 'Tia', 'Lakisha', 'Latoya',
        'Yolanda', 'Malika', 'Yvette', 'Aaliyah', 'Shanice'
    ],
    # Common Hispanic names from various sources, preferring those that are
    # in the Numberbatch vocabulary
    [
        'Juan', 'José', 'Miguel', 'Luís', 'Jorge',
        'Santiago', 'Matías', 'Sebastián', 'Mateo', 'Nicolás',
        'Alejandro', 'Samuel', 'Diego', 'Daniel', 'Tomás',
        'Juana', 'Ana', 'Luisa', 'María', 'Elena',
        'Sofía', 'Isabella', 'Valentina', 'Camila', 'Valeria',
        'Luciana', 'Ximena', 'Mariana', 'Victoria', 'Martina',
    ],
    # Common Muslim names from various sources, preferring those that are
    # in the Numberbatch vocabulary
    [
        'Mohammed', 'Omar', 'Ahmed', 'Ali', 'Youssef',
        'Abdullah', 'Yasin', 'Hamza', 'Ayaan', 'Syed',
        'Rishaan', 'Samar', 'Ahmad', 'Zikri', 'Rayyan',
        'Mariam', 'Jana', 'Malak', 'Salma', 'Nour',
        'Lian', 'Fatima', 'Ayesha', 'Zahra', 'Sana',
        'Zara', 'Alya', 'Shaista', 'Zoya', 'Maryam'
    ]
]


def correlation_bias(frame1, frame2, verbose=False):
    """
    Given two DataFrames of word vectors that we don't want to associate with
    each other, find the strongest association for each item in `frame2`
    and compare it to the average.

    Returns a bias value (the average difference between the strongest
    association and the average association) and a confidence interval on that
    value.

    Set 'verbose=True' if you want to see the most biased associations and
    be either sad or confused.
    """
    bias_numbers = []

    centered1 = l2_normalize_rows(subtract_mean_vector(frame1))
    centered2 = l2_normalize_rows(subtract_mean_vector(frame2))
    grid = centered1.dot(centered2.T)

    for i in range(grid.shape[1]):
        col_bias = np.max(grid.iloc[:, i]) - np.mean(grid.iloc[:, i])
        if verbose:
            most_biased = np.argmax(grid.iloc[:, i])
            comparison = centered2.index[i]
            print("%4.4f %s => %s" % (col_bias, comparison, most_biased))
        bias_numbers.append(col_bias)

    mean = np.mean(bias_numbers)
    sem = scipy.stats.sem(bias_numbers)
    return pd.Series(
        [mean, mean - sem * 2, mean + sem * 2],
        index=['bias', 'low', 'high']
    )


def measure_bias(frame):
    """
    Return a DataFrame that measures biases in a semantic space, on four
    data sets:

    - Gender
    - Fine-grained ethnicity
    - Coarse-grained ethnicity
    - Religious beliefs
    """
    gender_binary_axis = normalize_vec(
        get_category_axis(frame, FEMALE_WORDS) - get_category_axis(frame, MALE_WORDS)
    )
    gender_bias_numbers = []
    for female_biased_word, male_biased_word in GENDER_BIAS_PAIRS:
        female_biased_uri = standardized_uri('en', female_biased_word)
        male_biased_uri = standardized_uri('en', male_biased_word)
        diff = normalize_vec(
            get_vector(frame, female_biased_uri) - get_vector(frame, male_biased_uri)
        ).dot(gender_binary_axis)
        gender_bias_numbers.append(diff)

    mean = np.mean(gender_bias_numbers)
    sem = scipy.stats.sem(gender_bias_numbers)
    gender_bias = pd.Series(
        [mean, mean - sem * 2, mean + sem * 2],
        index=['bias', 'low', 'high']
    )

    stereotype_vecs_1 = get_vocabulary_vectors(frame, PEOPLE_BY_ETHNICITY)
    stereotype_vecs_2 = get_vocabulary_vectors(frame, ETHNIC_STEREOTYPE_TERMS)
    fine_ethnic_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)

    stereotype_vecs_1 = get_vocabulary_vectors(frame, COARSE_ETHNICITY_TERMS)
    stereotype_vecs_2 = get_vocabulary_vectors(frame, ETHNIC_STEREOTYPE_TERMS)
    coarse_ethnic_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)

    stereotype_vecs_1 = pd.DataFrame(
        np.vstack([
            get_category_axis(frame, names) for names in ETHNIC_NAME_SETS
        ])
    )
    stereotype_vecs_2 = get_vocabulary_vectors(frame, ETHNIC_STEREOTYPE_TERMS)
    name_ethnic_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)

    stereotype_vecs_1 = get_vocabulary_vectors(frame, PEOPLE_BY_BELIEF)
    stereotype_vecs_2 = get_vocabulary_vectors(frame, BELIEF_STEREOTYPE_TERMS)
    belief_bias = correlation_bias(stereotype_vecs_1, stereotype_vecs_2)

    return pd.DataFrame({
        'gender': gender_bias,
        'ethnicity-fine': fine_ethnic_bias,
        'ethnicity-coarse': coarse_ethnic_bias,
        'ethnicity-names': name_ethnic_bias,
        'beliefs': belief_bias
    }).T