/
create_vulnerability_figure.py
132 lines (114 loc) · 4.5 KB
/
create_vulnerability_figure.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Code for
# Physics solutions for machine learning privacy leaks
# arXiv:2202.12319
#
# Authors: Alejandro Pozas-Kerstjens and Senaida Hernandez-Santana
#
# Requires: matplotlib for plotting
# numpy for array operations
# pandas for dataset operations
# seaborn for plot visuals
# torch for ML
# tqdm for progress bar
# Last modified: Feb, 2023
###############################################################################
# This file generates Figure 1 in the paper, which illustrates the
# vulnerability of neural networks in storing information about input variables
# that are irrelevant for the target task.
###############################################################################
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch.nn as nn
import torch
from matplotlib import rcParams
from tqdm import tqdm
sns.set(style='white', font_scale=1.8)
rcParams.update({'font.size': 18})
plt.rc('text', usetex=True)
plt.rc('text.latex', preamble=r'\usepackage{amsmath}')
plt.rc('font', family='serif')
np.random.seed(31415)
torch.manual_seed(31415)
class ToyNNModel(nn.Module):
'''Toy neural network model. It consists of only one layer, implementing
the function y = sigmoid(W_1 x_1 + W_2 x_2 + b).
'''
def __init__(self):
super().__init__()
self.linear1 = nn.Linear(2, 1)
self.activation = nn.Sigmoid()
def forward(self, x):
x = self.linear1(x)
return self.activation(x)
def create_dataset(typ, size=100):
'''Generate toy dataset of two-dimensional points where the first dimension
is normally distributed and the second dimension is either +1 or -1
(depending on the parameter `typ`) always.
'''
relevant = np.random.randn(size, 1)
label = (relevant > 0).astype(float)
if typ == 'plus':
irrelevant = np.ones((size, 1))
elif typ == 'minus':
irrelevant = -np.ones((size, 1))
dataset = np.concatenate((relevant, irrelevant), axis=1)
return torch.Tensor(dataset), torch.Tensor(label)
# Train and collect parameters of interest
criterion = nn.BCELoss()
all_weights_plus = []
all_biases_plus = []
all_weights_minus = []
all_biases_minus = []
for _ in tqdm(range(100), desc='Training models'):
for typ in ['plus', 'minus']:
model = ToyNNModel()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
input_tensor, label_tensor = create_dataset(typ, 100)
weights = [model.linear1.weight[0][-1].clone().item()]
biases = [model.linear1.bias[0].clone().item()]
num_epochs = 200
for _ in range(num_epochs):
output = model(input_tensor)
loss = criterion(output, label_tensor)
optimizer.zero_grad()
loss.backward()
optimizer.step()
weights.append(model.linear1.weight[0][-1].item())
biases.append(model.linear1.bias[0].item())
with torch.no_grad():
accuracy = ((model(input_tensor) > 0.5) == label_tensor).float() \
.mean() \
.item()
if accuracy > 0.95:
if typ == 'plus':
all_weights_plus.append(weights)
all_biases_plus.append(biases)
else:
all_weights_minus.append(weights)
all_biases_minus.append(biases)
all_weights_plus = np.array(all_weights_plus)
all_weights_minus = np.array(all_weights_minus)
all_biases_plus = np.array(all_biases_plus)
all_biases_minus = np.array(all_biases_minus)
# Plot before training
plt.scatter(all_weights_plus[:, 0], all_biases_plus[:, 0],
label=r'$x_\text{irr}=+1$', marker='*', s=50)
plt.scatter(all_weights_minus[:, 0], all_biases_minus[:, 0],
label=r'$x_\text{irr}=-1$')
plt.title('Before training')
plt.xlabel(r'Irrelevant weight, $W_\text{irr}$')
plt.ylabel(r'Bias, $b$')
plt.legend(handlelength=0.5)
plt.savefig('simple_before.pdf', bbox_inches='tight')
# Plot after training
plt.clf()
plt.scatter(all_weights_plus[:, -1], all_biases_plus[:, -1],
label=r'$x_\text{irr}=+1$', marker='*', s=50)
plt.scatter(all_weights_minus[:, -1], all_biases_minus[:, -1],
label=r'$x_\text{irr}=-1$')
plt.title('After training')
plt.xlabel(r'Irrelevant weight, $W_\text{irr}$')
plt.ylabel(r'Bias, $b$')
plt.legend(loc=(0.375, 0.02), handlelength=0.5)
plt.savefig('simple_after.pdf', bbox_inches='tight')