create_vulnerability_figure.py

# Code for
# Physics solutions for machine learning privacy leaks
# arXiv:2202.12319
#
# Authors: Alejandro Pozas-Kerstjens and Senaida Hernandez-Santana
#
# Requires: matplotlib for plotting
#           numpy for array operations
#           pandas for dataset operations
#           seaborn for plot visuals
#           torch for ML
#           tqdm for progress bar
# Last modified: Feb, 2023

###############################################################################
# This file generates Figure 1 in the paper, which illustrates the
# vulnerability of neural networks in storing information about input variables
# that are irrelevant for the target task.
###############################################################################
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch.nn as nn
import torch

from matplotlib import rcParams
from tqdm import tqdm

sns.set(style='white', font_scale=1.8)
rcParams.update({'font.size': 18})
plt.rc('text', usetex=True)
plt.rc('text.latex', preamble=r'\usepackage{amsmath}')
plt.rc('font', family='serif')

np.random.seed(31415)
torch.manual_seed(31415)


class ToyNNModel(nn.Module):
    '''Toy neural network model. It consists of only one layer, implementing
    the function y = sigmoid(W_1 x_1 + W_2 x_2 + b).
    '''
    def __init__(self):
        super().__init__()
        self.linear1    = nn.Linear(2, 1)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        x = self.linear1(x)
        return self.activation(x)


def create_dataset(typ, size=100):
    '''Generate toy dataset of two-dimensional points where the first dimension
    is normally distributed and the second dimension is either +1 or -1
    (depending on the parameter `typ`) always.
    '''
    relevant = np.random.randn(size, 1)
    label    = (relevant > 0).astype(float)
    if typ == 'plus':
        irrelevant = np.ones((size, 1))
    elif typ == 'minus':
        irrelevant = -np.ones((size, 1))

    dataset = np.concatenate((relevant, irrelevant), axis=1)
    return torch.Tensor(dataset), torch.Tensor(label)


# Train and collect parameters of interest
criterion = nn.BCELoss()

all_weights_plus  = []
all_biases_plus   = []
all_weights_minus = []
all_biases_minus  = []

for _ in tqdm(range(100), desc='Training models'):
    for typ in ['plus', 'minus']:
        model = ToyNNModel()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        input_tensor, label_tensor = create_dataset(typ, 100)
        weights  = [model.linear1.weight[0][-1].clone().item()]
        biases   = [model.linear1.bias[0].clone().item()]

        num_epochs = 200
        for _ in range(num_epochs):
            output = model(input_tensor)
            loss   = criterion(output, label_tensor)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            weights.append(model.linear1.weight[0][-1].item())
            biases.append(model.linear1.bias[0].item())
        with torch.no_grad():
            accuracy = ((model(input_tensor) > 0.5) == label_tensor).float() \
                                                                    .mean() \
                                                                    .item()
        if accuracy > 0.95:
            if typ == 'plus':
                all_weights_plus.append(weights)
                all_biases_plus.append(biases)
            else:
                all_weights_minus.append(weights)
                all_biases_minus.append(biases)

all_weights_plus  = np.array(all_weights_plus)
all_weights_minus = np.array(all_weights_minus)
all_biases_plus   = np.array(all_biases_plus)
all_biases_minus  = np.array(all_biases_minus)

# Plot before training
plt.scatter(all_weights_plus[:, 0], all_biases_plus[:, 0],
            label=r'$x_\text{irr}=+1$', marker='*', s=50)
plt.scatter(all_weights_minus[:, 0], all_biases_minus[:, 0],
            label=r'$x_\text{irr}=-1$')
plt.title('Before training')
plt.xlabel(r'Irrelevant weight, $W_\text{irr}$')
plt.ylabel(r'Bias, $b$')
plt.legend(handlelength=0.5)
plt.savefig('simple_before.pdf', bbox_inches='tight')

# Plot after training
plt.clf()
plt.scatter(all_weights_plus[:, -1], all_biases_plus[:, -1],
            label=r'$x_\text{irr}=+1$', marker='*', s=50)
plt.scatter(all_weights_minus[:, -1], all_biases_minus[:, -1],
            label=r'$x_\text{irr}=-1$')
plt.title('After training')
plt.xlabel(r'Irrelevant weight, $W_\text{irr}$')
plt.ylabel(r'Bias, $b$')
plt.legend(loc=(0.375, 0.02), handlelength=0.5)
plt.savefig('simple_after.pdf', bbox_inches='tight')