-
Notifications
You must be signed in to change notification settings - Fork 41
/
pgn.py
108 lines (84 loc) · 4.25 KB
/
pgn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import torch
from ..utils import *
from ..attack import Attack
class PGN(Attack):
"""
PGN (Penalizing Gradient Norm)
'Boosting Adversarial Transferability by Achieving Flat Local Maxima (NeurIPS 2023)' (https://arxiv.org/abs/2306.05225)
Arguments:
model_name (str): the name of surrogate model for attack.
epsilon (float): the perturbation budget.
alpha (float): the step size.
beta (float): the relative value for the neighborhood.
num_neighbor (int): the number of samples for estimating the gradient variance.
gamma (float): the balanced coefficient.
epoch (int): the number of iterations.
decay (float): the decay factor for momentum calculation.
targeted (bool): targeted/untargeted attack.
random_start (bool): whether using random initialization for delta.
norm (str): the norm of perturbation, l2/linfty.
loss (str): the loss function.
device (torch.device): the device for data. If it is None, the device would be same as model.
Official arguments:
epsilon=16/255, alpha=epsilon/epoch=1.6/255, beta=3.0, gamma=0.5, num_neighbor=20, epoch=10, decay=1.
Example script:
python main.py --input_dir ./path/to/data --output_dir adv_data/pgn/resnet18 --attack pgn --model=resnet18
python main.py --input_dir ./path/to/data --output_dir adv_data/pgn/resnet18 --eval
"""
def __init__(self, model_name, epsilon=16/255, alpha=1.6/255, beta=3.0, gamma=0.5, num_neighbor=20, epoch=10, decay=1., targeted=False,
random_start=False, norm='linfty', loss='crossentropy', device=None, attack='PGN', **kwargs):
super().__init__(attack, model_name, epsilon, targeted, random_start, norm, loss, device)
self.alpha = epsilon / epoch
self.zeta = beta * epsilon
self.gamma = gamma
self.epoch = epoch
self.decay = decay
self.num_neighbor = num_neighbor
def get_averaged_gradient(self, data, delta, label, **kwargs):
"""
Calculate the averaged updated gradient
"""
averaged_gradient = 0
for _ in range(self.num_neighbor):
# Random sample an example
x_near = self.transform(data + delta + torch.zeros_like(delta).uniform_(-self.zeta, self.zeta).to(self.device))
# Calculate the output of the x_near
logits = self.get_logits(x_near)
# Calculate the loss of the x_near
loss = self.get_loss(logits, label)
# Calculate the gradient of the x_near
g_1 = self.get_grad(loss, delta)
# Compute the predicted point x_next
x_next = self.transform(x_near + self.alpha*(-g_1 / (torch.abs(g_1).mean(dim=(1,2,3), keepdim=True))))
# Calculate the output of the x_next
logits = self.get_logits(x_next)
# Calculate the loss of the x_next
loss = self.get_loss(logits, label)
# Calculate the gradient of the x_next
g_2 = self.get_grad(loss, delta)
# Calculate the gradients
averaged_gradient += (1-self.gamma)*g_1 + self.gamma*g_2
return averaged_gradient / self.num_neighbor
def forward(self, data, label, **kwargs):
"""
The attack procedure for PGN
Arguments:
data: (N, C, H, W) tensor for input images
labels: (N,) tensor for ground-truth labels if untargetd, otherwise targeted labels
"""
if self.targeted:
assert len(label) == 2
label = label[1] # the second element is the targeted label tensor
data = data.clone().detach().to(self.device)
label = label.clone().detach().to(self.device)
# Initialize adversarial perturbation
delta = self.init_delta(data)
momentum, averaged_gradient = 0, 0
for _ in range(self.epoch):
# Calculate the averaged updated gradient
averaged_gradient = self.get_averaged_gradient(data, delta, label)
# Calculate the momentum
momentum = self.get_momentum(averaged_gradient, momentum)
# Update adversarial perturbation
delta = self.update_delta(delta, data, momentum, self.alpha)
return delta.detach()