diff --git a/experiments/attack_defense_test.py b/experiments/attack_defense_test.py index b9a26ed..441e6b3 100644 --- a/experiments/attack_defense_test.py +++ b/experiments/attack_defense_test.py @@ -1,9 +1,12 @@ import torch +import copy import warnings from torch import device +from models_builder.attack_defense_manager import FrameworkAttackDefenseManager +from models_builder.attack_defense_metric import AttackMetric, DefenseMetric from models_builder.models_utils import apply_decorator_to_graph_layers from src.aux.utils import POISON_ATTACK_PARAMETERS_PATH, POISON_DEFENSE_PARAMETERS_PATH, EVASION_ATTACK_PARAMETERS_PATH, \ EVASION_DEFENSE_PARAMETERS_PATH @@ -14,18 +17,22 @@ from attacks.QAttack import qattack from defense.JaccardDefense import jaccard_def from attacks.metattack import meta_gradient_attack +from attacks.CLGA import CLGA_gpt from defense.GNNGuard import gnnguard -def test_attack_defense(): - my_device = device('cuda' if torch.cuda.is_available() else 'cpu') +def test_attack_defense(d='Cora', m='gin_2', a_e=None, d_e=None, a_p=None, d_p=None): + # my_device = device('cuda' if torch.cuda.is_available() else 'cpu') + my_device = device('cpu') full_name = None # full_name = ("multiple-graphs", "TUDataset", 'MUTAG') # full_name = ("single-graph", "custom", 'karate') - full_name = ("single-graph", "Planetoid", 'Cora') - # full_name = ("single-graph", "Amazon", 'Photo') + if d == 'Cora': + full_name = ("single-graph", "Planetoid", 'Cora') + elif d == 'Photo': + full_name = ("single-graph", "Amazon", 'Photo') # full_name = ("single-graph", "Planetoid", 'CiteSeer') # full_name = ("multiple-graphs", "TUDataset", 'PROTEINS') @@ -60,7 +67,12 @@ def test_attack_defense(): # print(data.train_mask) - gnn = model_configs_zoo(dataset=dataset, model_name='gcn_gcn') + if m == 'gcn_2': + gnn = model_configs_zoo(dataset=dataset, model_name='gcn_gcn') + elif m == 'gcn_3': + gnn = model_configs_zoo(dataset=dataset, model_name='gcn_gcn_gcn') + elif m == 'gin_2': + gnn = model_configs_zoo(dataset=dataset, model_name='gin_gin') # gnn = model_configs_zoo(dataset=dataset, model_name='gat_gcn_sage_gcn_gcn') # gnn = model_configs_zoo(dataset=dataset, model_name='gcn_gcn_lin') # gnn = model_configs_zoo(dataset=dataset, model_name='test_gnn') @@ -102,8 +114,8 @@ def test_attack_defense(): modification=ModelModificationConfig(model_ver_ind=0, epochs=steps_epochs) ) - save_model_flag = False - # save_model_flag = True + # save_model_flag = False + save_model_flag = True # data.x = data.x.float() gnn_model_manager.gnn.to(my_device) @@ -120,11 +132,12 @@ def test_attack_defense(): # ) metafull_poison_attack_config = ConfigPattern( - _class_name="MetaAttackFull", + _class_name="MetaAttackApprox", _import_path=POISON_ATTACK_PARAMETERS_PATH, _config_class="PoisonAttackConfig", _config_kwargs={ - "num_nodes": dataset.dataset.x.shape[0] + "num_nodes": dataset.dataset.x.shape[0], + "lambda_": 0, } ) @@ -153,7 +166,7 @@ def test_attack_defense(): _import_path=POISON_DEFENSE_PARAMETERS_PATH, _config_class="PoisonDefenseConfig", _config_kwargs={ - "threshold": 0.05, + "threshold": 0.4, } ) @@ -250,7 +263,26 @@ def test_attack_defense(): _import_path=EVASION_ATTACK_PARAMETERS_PATH, _config_class="EvasionAttackConfig", _config_kwargs={ - "epsilon": 0.1 * 1, + "epsilon": 0.01, + } + ) + + clga_poison_attack_config = ConfigPattern( + _class_name="CLGAAttack", + _import_path=POISON_ATTACK_PARAMETERS_PATH, + _config_class="PoisonAttackConfig", + _config_kwargs={ + "num_nodes": dataset.dataset.x.shape[0], + "feature_shape": dataset.dataset.x.shape[1] + } + ) + + fgsm_evasion_attack_config1 = ConfigPattern( + _class_name="FGSM", + _import_path=EVASION_ATTACK_PARAMETERS_PATH, + _config_class="EvasionAttackConfig", + _config_kwargs={ + "epsilon": 0.1, } ) at_evasion_defense_config = ConfigPattern( @@ -259,40 +291,77 @@ def test_attack_defense(): _config_class="EvasionDefenseConfig", _config_kwargs={ "attack_name": None, - "attack_config": fgsm_evasion_attack_config0 + "attack_config": fgsm_evasion_attack_config1 } ) # gnn_model_manager.set_poison_attacker(poison_attack_config=random_poison_attack_config) # gnn_model_manager.set_poison_defender(poison_defense_config=gnnguard_poison_defense_config) - # gnn_model_manager.set_evasion_attacker(evasion_attack_config=fgsm_evasion_attack_config) - # gnn_model_manager.set_evasion_defender(evasion_defense_config=autoencoder_evasion_defense_config) + if a_p is not None: + if a_p == 'metaattack': + gnn_model_manager.set_poison_attacker(poison_attack_config=metafull_poison_attack_config) + elif a_p == 'clga': + gnn_model_manager.set_poison_attacker(poison_attack_config=clga_poison_attack_config) + if d_p is not None: + if d_p == 'gnnguard': + gnn_model_manager.set_poison_defender(poison_defense_config=gnnguard_poison_defense_config) + elif d_p == 'jaccard': + gnn_model_manager.set_poison_defender(poison_defense_config=jaccard_poison_defense_config) + if a_e is not None: + if a_e == 'fgsm': + gnn_model_manager.set_evasion_attacker(evasion_attack_config=fgsm_evasion_attack_config) + elif a_e == 'nettack': + gnn_model_manager.set_evasion_attacker(evasion_attack_config=netattack_evasion_attack_config) + if d_e is not None: + if d_e == 'at': + gnn_model_manager.set_evasion_defender(evasion_defense_config=at_evasion_defense_config) warnings.warn("Start training") dataset.train_test_split() - try: - raise FileNotFoundError() - # gnn_model_manager.load_model_executor() - except FileNotFoundError: - gnn_model_manager.epochs = gnn_model_manager.modification.epochs = 0 - train_test_split_path = gnn_model_manager.train_model(gen_dataset=dataset, steps=steps_epochs, - save_model_flag=save_model_flag, - metrics=[Metric("F1", mask='train', average=None)]) + for i in range(2): + adm = FrameworkAttackDefenseManager( + gen_dataset=copy.deepcopy(dataset), + gnn_manager=gnn_model_manager, + ) + # adm.evasion_defense_pipeline( + # steps=steps_epochs, + # save_model_flag=save_model_flag, + # metrics_attack=[AttackMetric("ASR"), AttackMetric("AuccAttackDiff"),], + # metrics_defense=[DefenseMetric("AuccDefenseCleanDiff"), DefenseMetric("AuccDefenseAttackDiff"), ], + # mask='test' + # ) + adm.poison_defense_pipeline( + steps=steps_epochs, + save_model_flag=save_model_flag, + metrics_attack=[AttackMetric("ASR"), AttackMetric("AuccAttackDiff"), ], + metrics_defense=[DefenseMetric("AuccDefenseCleanDiff"), DefenseMetric("AuccDefenseAttackDiff"), ], + mask='test' + ) - if train_test_split_path is not None: - dataset.save_train_test_mask(train_test_split_path) - train_mask, val_mask, test_mask, train_test_sizes = torch.load(train_test_split_path / 'train_test_split')[ - :] - dataset.train_mask, dataset.val_mask, dataset.test_mask = train_mask, val_mask, test_mask - data.percent_train_class, data.percent_test_class = train_test_sizes - - warnings.warn("Training was successful") - - metric_loc = gnn_model_manager.evaluate_model( - gen_dataset=dataset, metrics=[Metric("F1", mask='test', average='macro'), - Metric("Accuracy", mask='test')]) - print(metric_loc) + # + # try: + # raise FileNotFoundError() + # # gnn_model_manager.load_model_executor() + # except FileNotFoundError: + # gnn_model_manager.epochs = gnn_model_manager.modification.epochs = 0 + # train_test_split_path = gnn_model_manager.train_model(gen_dataset=dataset, steps=steps_epochs, + # save_model_flag=save_model_flag, + # metrics=[Metric("F1", mask='train', average=None)]) + # + # if train_test_split_path is not None: + # dataset.save_train_test_mask(train_test_split_path) + # train_mask, val_mask, test_mask, train_test_sizes = torch.load(train_test_split_path / 'train_test_split')[ + # :] + # dataset.train_mask, dataset.val_mask, dataset.test_mask = train_mask, val_mask, test_mask + # data.percent_train_class, data.percent_test_class = train_test_sizes + # + # warnings.warn("Training was successful") + # + # metric_loc = gnn_model_manager.evaluate_model( + # gen_dataset=dataset, metrics=[Metric("F1", mask='test', average='macro'), + # Metric("Accuracy", mask='test')]) + # print(metric_loc) def test_meta(): @@ -678,28 +747,65 @@ def test_jaccard(): _import_path=EVASION_ATTACK_PARAMETERS_PATH, _config_class="EvasionAttackConfig", _config_kwargs={ - "epsilon": 0.007 * 1, + "epsilon": 0.005, } ) - # evasion_defense_config = ConfigPattern( - # _class_name="JaccardDefender", - # _import_path=EVASION_DEFENSE_PARAMETERS_PATH, - # _config_class="EvasionDefenseConfig", - # _config_kwargs={ - # } - # ) + fgsm_evasion_attack_config1 = ConfigPattern( + _class_name="FGSM", + _import_path=EVASION_ATTACK_PARAMETERS_PATH, + _config_class="EvasionAttackConfig", + _config_kwargs={ + "epsilon": 0.01, + } + ) + at_evasion_defense_config = ConfigPattern( + _class_name="AdvTraining", + _import_path=EVASION_DEFENSE_PARAMETERS_PATH, + _config_class="EvasionDefenseConfig", + _config_kwargs={ + "attack_name": None, + "attack_config": fgsm_evasion_attack_config1 + } + ) + + gradientregularization_evasion_defense_config = ConfigPattern( + _class_name="GradientRegularizationDefender", + _import_path=EVASION_DEFENSE_PARAMETERS_PATH, + _config_class="EvasionDefenseConfig", + _config_kwargs={ + "regularization_strength": 0.1 * 500 + } + ) + poison_defense_config = ConfigPattern( _class_name="JaccardDefender", _import_path=POISON_DEFENSE_PARAMETERS_PATH, _config_class="PoisonDefenseConfig", _config_kwargs={ + 'threshold': 0.4 + } + ) + + node_idxs = [random.randint(0, 500) for _ in range(20)] + + netattackgroup_evasion_attack_config = ConfigPattern( + _class_name="NettackGroupEvasionAttacker", + _import_path=EVASION_ATTACK_PARAMETERS_PATH, + _config_class="EvasionAttackConfig", + _config_kwargs={ + "node_idxs": node_idxs, # Nodes for attack + "n_perturbations": 50, + "perturb_features": True, + "perturb_structure": True, + "direct": True, + "n_influencers": 10 } ) # gnn_model_manager.set_poison_attacker(poison_attack_config=poison_attack_config) - gnn_model_manager.set_poison_defender(poison_defense_config=poison_defense_config) - gnn_model_manager.set_evasion_attacker(evasion_attack_config=evasion_attack_config) - # gnn_model_manager.set_evasion_defender(evasion_defense_config=evasion_defense_config) + # gnn_model_manager.set_poison_defender(poison_defense_config=poison_defense_config) + gnn_model_manager.set_evasion_attacker(evasion_attack_config=netattackgroup_evasion_attack_config) + # gnn_model_manager.set_evasion_defender(evasion_defense_config=gradientregularization_evasion_defense_config) warnings.warn("Start training") dataset.train_test_split() @@ -723,6 +829,8 @@ def test_jaccard(): warnings.warn("Training was successful") + mask_loc = Metric.create_mask_by_target_list(y_true=dataset.labels, target_list=node_idxs) + metric_loc = gnn_model_manager.evaluate_model( gen_dataset=dataset, metrics=[Metric("F1", mask='train', average='macro'), Metric("Accuracy", mask='train')]) @@ -733,6 +841,11 @@ def test_jaccard(): Metric("Accuracy", mask='test')]) print("TEST", metric_loc) + metric_loc = gnn_model_manager.evaluate_model( + gen_dataset=dataset, metrics=[Metric("F1", mask=mask_loc, average='macro'), + Metric("Accuracy", mask=mask_loc)]) + print(f"NODE IDXS: {node_idxs}", metric_loc) + def test_adv_training(): from defense.evasion_defense import AdvTraining @@ -1005,11 +1118,28 @@ def test_pgd(): print(f"After PGD attack on graph (MUTAG dataset): {info_after_pgd_attack_on_graph}") +def exp_pipeline(): + dataset_grid = ['Cora'] + # model_grid = ['gcn_2', 'gcn_3', 'gin_2'] + model_grid = ['gin_2'] + attack_grid_evasion = ['fgsm', 'nettack'] + attack_grid_poison = ['clga'] + defense_grid_evasion = [] + defense_grid_poison = ['jaccard'] + + for d in dataset_grid: + for m in model_grid: + for a_p in attack_grid_poison: + for d_p in defense_grid_poison: + test_attack_defense(d, m, a_p=a_p, d_p=d_p) + + if __name__ == '__main__': import random - random.seed(10) - test_attack_defense() + # random.seed(10) + # test_attack_defense() + exp_pipeline() # torch.manual_seed(5000) # test_gnnguard() # test_jaccard() diff --git a/metainfo/poison_attack_parameters.json b/metainfo/poison_attack_parameters.json index 4f5da26..1cc7601 100644 --- a/metainfo/poison_attack_parameters.json +++ b/metainfo/poison_attack_parameters.json @@ -15,6 +15,18 @@ "train_iters": ["Train iters (surrogate)", "int", 200, {"min": 0, "step": 1}, "Trainig iterations for surrogate model"], "attack_structure": ["Attack structure", "bool", true, {}, "whether change graph structure with attack or not"], "attack_features": ["Attack features", "bool", false, {}, "whether change node features with attack or not"] + }, + "CLGAAttack": { + "learning_rate": ["Learning Rate", "float", 0.01, {"min": 0.0001, "max": 0.1, "step": 0.001}, "Learning rate for model optimization"], + "num_hidden": ["Hidden Units", "int", 256, {"min": 16, "max": 1024, "step": 16}, "Number of hidden units in the GCN encoder"], + "num_proj_hidden": ["Projection Units", "int", 32, {"min": 16, "max": 128, "step": 16}, "Number of units in the projection head"], + "activation": ["Activation Function", "str", "prelu", ["prelu", "relu", "tanh", "sigmoid"], "Activation function for the GCN encoder"], + "drop_edge_rate_1": ["Drop Edge Rate (View 1)", "float", 0.3, {"min": 0.0, "max": 1.0, "step": 0.01}, "Probability of dropping edges in the first augmented view"], + "drop_edge_rate_2": ["Drop Edge Rate (View 2)", "float", 0.4, {"min": 0.0, "max": 1.0, "step": 0.01}, "Probability of dropping edges in the second augmented view"], + "tau": ["Temperature coeff", "float", 0.4, {"min": 0.1, "max": 1.0, "step": 0.1}, "Temperature parameter for contrastive loss"], + "num_epochs": ["Number of Epochs", "int", 3000, {"min": 100, "max": 10000, "step": 100}, "Number of training epochs for the attack"], + "weight_decay": ["Weight Decay", "float", 1e-5, {"min": 1e-6, "max": 1e-3, "step": 1e-5}, "Weight decay (L2 regularization) coefficient"], + "drop_scheme": ["Drop Scheme", "str", "degree", ["degree", "pr", "evc", "uniform"], "Scheme for dropping edges or features"] } } diff --git a/src/attacks/CLGA/CLGA.py b/src/attacks/CLGA/CLGA.py new file mode 100644 index 0000000..68a4ed2 --- /dev/null +++ b/src/attacks/CLGA/CLGA.py @@ -0,0 +1,64 @@ +import torch +import random +from torch.nn import functional as F +from attacks.poison_attacks import PoisonAttacker + +from torch_geometric.utils import to_dense_adj + + +class CLGAAttack(PoisonAttacker): + name = "CLGAAttack" + + def __init__(self, num_nodes, feature_shape, encoder, augmentation_set, threshold, device="cpu"): + super().__init__() + self.num_nodes = num_nodes + self.feature_shape = feature_shape + self.encoder = encoder # Differentiable encoder (e.g., GCN) + self.augmentation_set = augmentation_set # Set of augmentation methods + self.threshold = threshold # Maximum number of edge changes + self.device = device + + self.modified_adj = None + self.augmented_graph = None + + def attack(self, adj_matrix, features): + """ + Execute the CLGA attack on the graph to maximize contrastive loss. + """ + adj_matrix = to_dense_adj(adj_matrix).squeeze() + current_adj = adj_matrix.clone().to(self.device) + for iteration in range(self.threshold): + gradients_sum = torch.zeros_like(current_adj) + + for _ in range(len(self.augmentation_set)): + # Generate augmented views + t1, t2 = random.sample(self.augmentation_set, 2) + adj_view1, features_view1 = t1(current_adj, features) + adj_view2, features_view2 = t2(current_adj, features) + + # Forward pass and compute contrastive loss + embeddings1 = self.encoder(adj_view1, features_view1) + embeddings2 = self.encoder(adj_view2, features_view2) + loss = self.contrastive_loss(embeddings1, embeddings2) + + # Backpropagate to compute gradients + adj_grad1 = torch.autograd.grad(loss, adj_view1, retain_graph=True)[0] + adj_grad2 = torch.autograd.grad(loss, adj_view2, retain_graph=True)[0] + gradients_sum += adj_grad1 + adj_grad2 + + # Flip the edge with the largest gradient + max_gradient_index = torch.argmax(gradients_sum.abs()) + row, col = divmod(max_gradient_index, current_adj.shape[1]) + current_adj[row, col] = 1 - current_adj[row, col] # Flip edge + current_adj[col, row] = current_adj[row, col] # Ensure symmetry + + # Save updated adjacency + self.modified_adj = current_adj.detach() + + def contrastive_loss(self, embeddings1, embeddings2): + """ + Compute the contrastive loss based on two embeddings. + """ + pos_loss = F.cosine_similarity(embeddings1, embeddings2).mean() + neg_loss = F.cosine_similarity(embeddings1, embeddings2.roll(shifts=1, dims=0)).mean() + return -pos_loss + neg_loss diff --git a/src/attacks/CLGA/CLGA_gpt.py b/src/attacks/CLGA/CLGA_gpt.py new file mode 100644 index 0000000..44081bf --- /dev/null +++ b/src/attacks/CLGA/CLGA_gpt.py @@ -0,0 +1,175 @@ +import torch +from torch_geometric.utils import dropout_adj, dense_to_sparse +from attacks.poison_attacks import PoisonAttacker +from attacks.CLGA.differentiable_models.gcn import GCN +from attacks.CLGA.differentiable_models.model import GRACE + +from tqdm import tqdm +from torch_geometric.utils import to_dense_adj +from torch_geometric.nn import MessagePassing +from models_builder.models_utils import apply_decorator_to_graph_layers + +class CLGAAttack(PoisonAttacker): + name = "CLGAAttack" + + def __init__( + self, num_nodes, feature_shape, learning_rate=0.01, num_hidden=256, num_proj_hidden=32, activation="prelu", + drop_edge_rate_1=0.3, drop_edge_rate_2=0.4, tau=0.4, num_epochs=3000, weight_decay=1e-5, + drop_scheme="degree", device="cpu" + ): + super().__init__() + self.num_nodes = num_nodes + self.feature_shape = feature_shape + self.learning_rate = learning_rate + self.num_hidden = num_hidden + self.num_proj_hidden = num_proj_hidden + self.activation = activation + self.drop_edge_rate_1 = drop_edge_rate_1 + self.drop_edge_rate_2 = drop_edge_rate_2 + self.tau = tau + self.num_epochs = num_epochs + self.weight_decay = weight_decay + self.drop_scheme = drop_scheme + self.device = device + + self.modified_adj = None + self.model = None + self.optimizer = None + + def drop_edge(self, edge_index, p): + """ + Perform edge dropout based on the chosen scheme. + """ + return dropout_adj(edge_index, p=p)[0] + + def train_gcn(self, data): + """ + Train the GCN model with augmented graphs. + """ + self.model.train() + self.optimizer.zero_grad() + edge_index_1 = self.drop_edge(data.edge_index, self.drop_edge_rate_1) + edge_index_2 = self.drop_edge(data.edge_index, self.drop_edge_rate_2) + x_1 = data.x.clone() + x_2 = data.x.clone() + + z1 = self.model(x_1, edge_index_1) + z2 = self.model(x_2, edge_index_2) + + loss = self.model.loss(z1, z2) + loss.backward() + self.optimizer.step() + return loss.item() + + def compute_gradient(self, data): + """ + Compute gradients of the contrastive loss w.r.t. adjacency matrix. + """ + self.model.eval() + edge_index_1 = self.drop_edge(data.edge_index, self.drop_edge_rate_1) + edge_index_2 = self.drop_edge(data.edge_index, self.drop_edge_rate_2) + + size_1 = edge_index_1.shape[1] + size_2 = edge_index_2.shape[1] + + # adj_dense_1 = torch.sparse.FloatTensor( + # edge_index_1, torch.ones(edge_index_1.shape[1], device=self.device), + # (self.num_nodes, self.num_nodes) + # ).to_dense().requires_grad_(True) + # + # adj_dense_2 = torch.sparse.FloatTensor( + # edge_index_2, torch.ones(edge_index_2.shape[1], device=self.device), + # (self.num_nodes, self.num_nodes) + # ).to_dense().requires_grad_(True) + + # z1 = self.model(data.x, adj_dense_1) + # z2 = self.model(data.x, adj_dense_2) + + z1 = self.model(data.x, edge_index_1) + z2 = self.model(data.x, edge_index_2) + + # edge_index_1.requires_grad = True + # edge_index_2.requires_grad = True + + loss = self.model.loss(z1, z2) + loss.backward() + + # grad = torch.zeros_like() + grad = 0 + for name, layer in self.model.encoder.named_children(): + if isinstance(layer, MessagePassing): + #print(f"{name}: {layer.get_message_gradients()}") + for l_name, l_grad in layer.get_message_gradients().items(): + grad = l_grad + + max_size = max(size_1, size_2) + max_edge = edge_index_1 if size_1 > size_2 else edge_index_2 + return grad, max_size, max_edge + #return edge_index_1.grad, edge_index_2.grad + + def attack(self, gen_dataset): + """ + Execute the CLGA attack. + """ + self.model = GRACE( + encoder=GCN(self.feature_shape, self.num_hidden, 'prelu'), + num_hidden=self.num_hidden, + num_proj_hidden=self.num_proj_hidden, + tau=self.tau + ).to(self.device) + + apply_decorator_to_graph_layers(self.model) + apply_decorator_to_graph_layers(self.model.encoder) + + self.optimizer = torch.optim.Adam( + self.model.parameters(), + lr=self.learning_rate, + weight_decay=self.weight_decay + ) + + perturbed_edges = [[], []] + + # adj = torch.sparse.FloatTensor( + # gen_dataset.dataset.data.edge_index, torch.ones(gen_dataset.dataset.data.edge_index.shape[1], device=self.device), + # (self.num_nodes, self.num_nodes) + # ).to_dense() + + adj = to_dense_adj(gen_dataset.dataset.data.edge_index).squeeze() + + edge_index_set = set([(int(x), int(y)) for x, y in zip(gen_dataset.dataset.data.edge_index[0], gen_dataset.dataset.data.edge_index[0])]) + + for epoch in tqdm(range(self.num_epochs)): + self.train_gcn(gen_dataset.dataset.data) + + # grad_1, grad_2 = self.compute_gradient(gen_dataset.dataset.data) + # grad_sum = grad_1 + grad_2 + grad_sum, max_edge, edge_index_mutated = self.compute_gradient(gen_dataset.dataset.data) + grad_sum = grad_sum.sum(axis=1) + grad_sum = grad_sum[:max_edge] + + max_grad_index = torch.argmax(torch.abs(grad_sum)) + # row, col = divmod(max_grad_index.item(), self.num_nodes) + i = int(edge_index_mutated[0, max_grad_index]) + j = int(edge_index_mutated[1, max_grad_index]) + + if (i, j) in edge_index_set: + if grad_sum[max_grad_index] <= 0: + perturbed_edges[0].append(i) + perturbed_edges[1].append(j) + else: + if grad_sum[max_grad_index] > 0: + perturbed_edges[0].append(i) + perturbed_edges[1].append(j) + + # if grad_sum[row, col] > 0 and adj[row, col] == 0: + # adj[row, col] = 1 + # adj[col, row] = 1 + # elif grad_sum[row, col] < 0 and adj[row, col] == 1: + # adj[row, col] = 0 + # adj[col, row] = 0 + + # perturbed_edges.append((row, col)) + #gen_dataset.dataset.data.edge_index = dense_to_sparse(adj)[0] + + gen_dataset.dataset.data.edge_index = torch.tensor(perturbed_edges) + return diff --git a/src/attacks/CLGA/differentiable_models/gcn.py b/src/attacks/CLGA/differentiable_models/gcn.py new file mode 100644 index 0000000..05d9e06 --- /dev/null +++ b/src/attacks/CLGA/differentiable_models/gcn.py @@ -0,0 +1,83 @@ +import torch +import torch.nn as nn +from attacks.metattack import utils + +# differentiable +# class GCN(nn.Module): +# def __init__(self, in_ft, out_ft, act, dropout=0, bias=True): +# super(GCN, self).__init__() +# self.fc1 = nn.Linear(in_ft, 2*out_ft, bias=False) +# self.dropout = nn.Dropout(p=dropout) +# self.fc2 = nn.Linear(2*out_ft, out_ft, bias=False) +# self.act = nn.PReLU() if act == 'prelu' else act +# +# if bias: +# self.bias1 = nn.Parameter(torch.FloatTensor(2*out_ft)) +# self.bias1.data.fill_(0.0) +# self.bias2 = nn.Parameter(torch.FloatTensor(out_ft)) +# self.bias2.data.fill_(0.0) +# else: +# self.register_parameter('bias1', None) +# self.register_parameter('bias2', None) +# +# for m in self.modules(): +# self.weights_init(m) +# +# def weights_init(self, m): +# if isinstance(m, nn.Linear): +# torch.nn.init.xavier_uniform_(m.weight.data) +# if m.bias is not None: +# m.bias.data.fill_(0.0) +# +# # Shape of seq: (nodes, features) +# def forward(self, seq, adj, sparse=False): +# adj_norm = utils.normalize_adj_tensor(adj, sparse=sparse) +# seq_fts1 = self.fc1(seq) +# if sparse: +# out1 = torch.spmm(adj_norm, seq_fts1) +# else: +# out1 = torch.mm(adj_norm, seq_fts1) +# if self.bias1 is not None: +# out1 += self.bias1 +# out1 = self.act(out1) +# out1 = self.dropout(out1) +# +# seq_fts2 = self.fc2(out1) +# if sparse: +# out2 = torch.spmm(adj_norm, seq_fts2) +# else: +# out2 = torch.mm(adj_norm, seq_fts2) +# if self.bias2 is not None: +# out2 += self.bias2 +# return self.act(out2) + +from torch_geometric.nn import GCNConv + +class GCN(nn.Module): + def __init__(self, in_ft, out_ft, act='prelu', dropout=0.0, bias=True): + super(GCN, self).__init__() + self.conv1 = GCNConv(in_ft, 2 * out_ft, bias=bias) + self.conv2 = GCNConv(2 * out_ft, out_ft, bias=bias) + self.dropout = nn.Dropout(p=dropout) + self.act = nn.PReLU() if act == 'prelu' else nn.ReLU() if act == 'relu' else nn.Identity() + + def forward(self, x, edge_index): + """ + Forward pass of the GCN. + + Args: + x (Tensor): Input feature matrix of shape [num_nodes, num_features]. + edge_index (Tensor): Edge indices of shape [2, num_edges]. + + Returns: + Tensor: Node embeddings of shape [num_nodes, out_ft]. + """ + # First GCN layer + x = self.conv1(x, edge_index) + x = self.act(x) + x = self.dropout(x) + + # Second GCN layer + x = self.conv2(x, edge_index) + x = self.act(x) + return x diff --git a/src/attacks/CLGA/differentiable_models/model.py b/src/attacks/CLGA/differentiable_models/model.py new file mode 100644 index 0000000..ec2e45e --- /dev/null +++ b/src/attacks/CLGA/differentiable_models/model.py @@ -0,0 +1,72 @@ +from typing import Optional + +import torch +import torch.nn.functional as F + + +# differentiable version +class GRACE(torch.nn.Module): + def __init__(self, encoder, num_hidden: int, num_proj_hidden: int, tau: float = 0.5): + super(GRACE, self).__init__() + self.encoder = encoder + self.tau: float = tau + + self.fc1 = torch.nn.Linear(num_hidden, num_proj_hidden) + self.fc2 = torch.nn.Linear(num_proj_hidden, num_hidden) + + self.num_hidden = num_hidden + + def forward(self, x, adj): + return self.encoder(x, adj) + + def projection(self, z: torch.Tensor) -> torch.Tensor: + z = F.elu(self.fc1(z)) + return self.fc2(z) + + def sim(self, z1: torch.Tensor, z2: torch.Tensor): + z1 = F.normalize(z1) + z2 = F.normalize(z2) + return torch.mm(z1, z2.t()) + + def semi_loss(self, z1: torch.Tensor, z2: torch.Tensor): + f = lambda x: torch.exp(x / self.tau) + refl_sim = f(self.sim(z1, z1)) + between_sim = f(self.sim(z1, z2)) + + return -torch.log(between_sim.diag() / (refl_sim.sum(1) + between_sim.sum(1) - refl_sim.diag())) + + def batched_semi_loss(self, z1: torch.Tensor, z2: torch.Tensor, batch_size: int): + # Space complexity: O(BN) (semi_loss: O(N^2)) + device = z1.device + num_nodes = z1.size(0) + num_batches = (num_nodes - 1) // batch_size + 1 + f = lambda x: torch.exp(x / self.tau) + indices = torch.arange(0, num_nodes).to(device) + losses = [] + + for i in range(num_batches): + mask = indices[i * batch_size:(i + 1) * batch_size] + refl_sim = f(self.sim(z1[mask], z1)) # [B, N] + between_sim = f(self.sim(z1[mask], z2)) # [B, N] + + losses.append(-torch.log(between_sim[:, i * batch_size:(i + 1) * batch_size].diag() + / (refl_sim.sum(1) + between_sim.sum(1) + - refl_sim[:, i * batch_size:(i + 1) * batch_size].diag()))) + + return torch.cat(losses) + + def loss(self, z1: torch.Tensor, z2: torch.Tensor, mean: bool = True, batch_size: Optional[int] = None): + h1 = self.projection(z1) + h2 = self.projection(z2) + + if batch_size is None: + l1 = self.semi_loss(h1, h2) + l2 = self.semi_loss(h2, h1) + else: + l1 = self.batched_semi_loss(h1, h2, batch_size) + l2 = self.batched_semi_loss(h2, h1, batch_size) + + ret = (l1 + l2) * 0.5 + ret = ret.mean() if mean else ret.sum() + + return ret diff --git a/src/attacks/metattack/meta_gradient_attack.py b/src/attacks/metattack/meta_gradient_attack.py index 94c1e2f..ff5bd15 100644 --- a/src/attacks/metattack/meta_gradient_attack.py +++ b/src/attacks/metattack/meta_gradient_attack.py @@ -295,6 +295,7 @@ def attack(self, gen_dataset, attack_budget=0.05, ll_constraint=True, ll_cutoff= self.feature_changes.data[row_idx][col_idx] += (-2 * modified_features[row_idx][col_idx] + 1) if self.attack_structure: + print("TEST2") self.modified_adj = self.get_modified_adj(ori_adj).detach() if self.attack_features: self.modified_features = self.get_modified_features(ori_features).detach() @@ -329,7 +330,7 @@ def inner_train(self, features, adj_norm, idx_train, idx_unlabeled, labels): self.b_velocities[ix] = self.b_velocities[ix].detach() self.b_velocities[ix].requires_grad = True - for j in range(self.attack_iters): + for j in tqdm(range(self.attack_iters)): hidden = features for ix, w in enumerate(self.weights): b = self.biases[ix] if self.with_bias else 0 diff --git a/src/defense/JaccardDefense/jaccard_def.py b/src/defense/JaccardDefense/jaccard_def.py index 277133b..a1925f0 100644 --- a/src/defense/JaccardDefense/jaccard_def.py +++ b/src/defense/JaccardDefense/jaccard_def.py @@ -61,8 +61,8 @@ def defense(self, gen_dataset, **kwargs): return gen_dataset def jaccard_index(self, x, u, v): - im1 = x[u,:].numpy().astype(bool) - im2 = x[v,:].numpy().astype(bool) + im1 = x[u,:].cpu().numpy().astype(bool) + im2 = x[v,:].cpu().numpy().astype(bool) intersection = np.logical_and(im1, im2) union = np.logical_or(im1, im2) return intersection.sum() / float(union.sum()) \ No newline at end of file diff --git a/src/models_builder/gnn_models.py b/src/models_builder/gnn_models.py index e4c642f..6f40645 100644 --- a/src/models_builder/gnn_models.py +++ b/src/models_builder/gnn_models.py @@ -984,11 +984,16 @@ def train_on_batch( ) -> torch.Tensor: loss = None if hasattr(batch, "edge_weight"): + if batch.edge_weight is not None: + batch.edge_weight.cpu() weight = batch.edge_weight else: weight = None if task_type == "single-graph": self.optimizer.zero_grad() + batch.x.cpu() + batch.edge_index.cpu() + logits = self.gnn(batch.x, batch.edge_index, weight) loss = self.loss_function(logits, batch.y) if self.clip is not None: diff --git a/src/models_builder/models_zoo.py b/src/models_builder/models_zoo.py index 3cf351a..27b744e 100644 --- a/src/models_builder/models_zoo.py +++ b/src/models_builder/models_zoo.py @@ -114,6 +114,111 @@ def model_configs_zoo( ) ) + gin_gin = FrameworkGNNConstructor( + model_config=ModelConfig( + structure=ModelStructureConfig( + [ + { + 'label': 'n', + 'layer': { + 'layer_name': 'GINConv', + 'layer_kwargs': None, + 'gin_seq': [ + { + 'layer': { + 'layer_name': 'Linear', + 'layer_kwargs': { + 'in_features': dataset.num_node_features, + 'out_features': 16, + }, + }, + 'batchNorm': { + 'batchNorm_name': 'BatchNorm1d', + 'batchNorm_kwargs': { + 'num_features': 16, + 'eps': 1e-05, + } + }, + 'activation': { + 'activation_name': 'ReLU', + 'activation_kwargs': None, + }, + }, + { + 'layer': { + 'layer_name': 'Linear', + 'layer_kwargs': { + 'in_features': 16, + 'out_features': 16, + }, + }, + 'batchNorm': { + 'batchNorm_name': 'BatchNorm1d', + 'batchNorm_kwargs': { + 'num_features': 16, + 'eps': 1e-05, + } + }, + 'activation': { + 'activation_name': 'ReLU', + 'activation_kwargs': None, + }, + }, + ], + }, + 'activation': { + 'activation_name': 'ReLU', + 'activation_kwargs': None, + }, + }, + + { + 'label': 'n', + 'layer': { + 'layer_name': 'GINConv', + 'layer_kwargs': None, + 'gin_seq': [ + { + 'layer': { + 'layer_name': 'Linear', + 'layer_kwargs': { + 'in_features': 16, + 'out_features': 16, + }, + }, + 'batchNorm': { + 'batchNorm_name': 'BatchNorm1d', + 'batchNorm_kwargs': { + 'num_features': 16, + 'eps': 1e-05, + } + }, + 'activation': { + 'activation_name': 'ReLU', + 'activation_kwargs': None, + }, + }, + { + 'layer': { + 'layer_name': 'Linear', + 'layer_kwargs': { + 'in_features': 16, + 'out_features': dataset.num_classes, + }, + }, + }, + ], + }, + 'activation': { + 'activation_name': 'LogSoftmax', + 'activation_kwargs': None, + }, + }, + ] + ) + ) + ) + gat_gat = FrameworkGNNConstructor( model_config=ModelConfig( structure=ModelStructureConfig( @@ -414,6 +519,59 @@ def model_configs_zoo( ) ) + gcn_gcn_gcn = FrameworkGNNConstructor( + model_config=ModelConfig( + structure=ModelStructureConfig( + [ + { + 'label': 'n', + 'layer': { + 'layer_name': 'GCNConv', + 'layer_kwargs': { + 'in_channels': dataset.num_node_features, + 'out_channels': 16, + }, + }, + 'activation': { + 'activation_name': 'ReLU', + 'activation_kwargs': None, + }, + }, + + { + 'label': 'n', + 'layer': { + 'layer_name': 'GCNConv', + 'layer_kwargs': { + 'in_channels': 16, + 'out_channels': 16, + }, + }, + 'activation': { + 'activation_name': 'ReLU', + 'activation_kwargs': None, + }, + }, + + { + 'label': 'n', + 'layer': { + 'layer_name': 'GCNConv', + 'layer_kwargs': { + 'in_channels': 16, + 'out_channels': dataset.num_classes, + }, + }, + 'activation': { + 'activation_name': 'LogSoftmax', + 'activation_kwargs': None, + }, + }, + ] + ) + ) + ) + gcn_gcn_no_self_loops = FrameworkGNNConstructor( model_config=ModelConfig( structure=ModelStructureConfig(