TimeEval · SebastianSchmidl · Jun 30, 2023 · Jun 30, 2023 · Jul 4, 2023 · Jul 11, 2023
diff --git a/0-base-images/python3-base/Dockerfile b/0-base-images/python3-base/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.7.9-slim-buster
+FROM python:3.10-slim
 
 LABEL maintainer="[email protected]"
 

diff --git a/0-base-images/python3-base/requirements.txt b/0-base-images/python3-base/requirements.txt
@@ -1,5 +1,5 @@
-numpy==1.20.0
-pandas==1.2.1
-matplotlib==3.3.4
-scipy==1.6.0
-scikit-learn==0.24.1
+numpy>=1.20.0
+pandas>=1.2.1
+matplotlib>=3.3.4
+scipy>=1.6.0
+scikit-learn>=0.24.1
diff --git a/0-base-images/python3-torch/Dockerfile b/0-base-images/python3-torch/Dockerfile
@@ -1,5 +1,5 @@
-FROM registry.gitlab.hpi.de/akita/i/python3-base
+FROM registry.gitlab.hpi.de/akita/i/python3-base:0.2.6
 
 LABEL maintainer="[email protected]"
 
-RUN pip install --no-cache-dir torch==1.7.1
+RUN pip install --no-cache-dir torch==1.13.1
diff --git a/README.md b/README.md
@@ -42,6 +42,7 @@ The namespace prefix (repository) for the built Docker images is `registry.gitla
 | [fft](./fft) | `registry.gitlab.hpi.de/akita/i/fft` | python 3.7 | [`registry.gitlab.hpi.de/akita/i/python3-base`](./0-base-images/python3-base) | unsupervised | univariate |
 | [generic_rf](./generic_rf) | `registry.gitlab.hpi.de/akita/i/generic_rf` | python 3.7 | [`registry.gitlab.hpi.de/akita/i/python3-base`](./0-base-images/python3-base) | semi-supervised | univariate |
 | [generic_xgb](./generic_xgb) | `registry.gitlab.hpi.de/akita/i/generic_xgb` | python 3.7 | [`registry.gitlab.hpi.de/akita/i/python3-base`](./0-base-images/python3-base) | semi-supervised | univariate |
+| [gdn](./gdn) | `registry.gitlab.hpi.de/akita/i/gdn` | python 3.7 | [`registry.gitlab.hpi.de/akita/i/python3-base`](./0-base-images/python3-base) | semi-supervised | multivariate |
 | [grammarviz3](./grammarviz3) | `registry.gitlab.hpi.de/akita/i/grammarviz3` | Java| [`registry.gitlab.hpi.de/akita/i/java-base`](./0-base-images/java-base) | unsupervised | univariate |
 | [grammarviz3_multi](./grammarviz3_multi) | `registry.gitlab.hpi.de/akita/i/grammarviz3_multi` | Java| [`registry.gitlab.hpi.de/akita/i/java-base`](./0-base-images/java-base) | unsupervised | multivariate |
 | [hbos](./hbos) | `registry.gitlab.hpi.de/akita/i/hbos` | python 3.7 | [`registry.gitlab.hpi.de/akita/i/pyod`](./0-base-images/pyod) -> [`registry.gitlab.hpi.de/akita/i/python3-base`](./0-base-images/python3-base) | unsupervised | multivariate |
@@ -178,11 +179,9 @@ Follow the below steps to test your algorithm using Docker (examples assume that
    docker run --rm \
        -v $(pwd)/1-data:/data:ro \
        -v $(pwd)/2-results:/results:rw \
-   #    -e LOCAL_UID=<current user id> \
-   #    -e LOCAL_GID=<current groupid> \
-     registry.gitlab.hpi.de/akita/i/<your_algorithm>:latest execute-algorithm '{
+     registry.gitlab.hpi.de/akita/i/gdn:0.2.6 execute-algorithm '{
        "executionType": "train",
-       "dataInput": "/data/dataset.csv",
+       "dataInput": "/data/multi-dataset.csv",
        "dataOutput": "/results/anomaly_scores.ts",
        "modelInput": "/results/model.pkl",
        "modelOutput": "/results/model.pkl",

diff --git a/gdn/Dockerfile b/gdn/Dockerfile
@@ -0,0 +1,18 @@
+FROM registry.gitlab.hpi.de/akita/i/python3-torch:0.2.6
+
+LABEL maintainer="[email protected]"
+
+ENV ALGORITHM_MAIN="/app/algorithm.py"
+
+# install algorithm dependencies
+COPY requirements.txt /app/
+RUN apt-get update; \
+    apt-get install -y gcc g++ python3-dev; \
+    apt-get clean; \
+    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+RUN pip install -r /app/requirements.txt
+
+COPY algorithm.py /app/
+COPY GDN /app/GDN
+# fixing six.py dataloader issue
+COPY GDN/dataloader_fix.py /usr/local/lib/python3.10/site-packages/torch_geometric/data/dataloader.py
diff --git a/gdn/GDN/LICENSE b/gdn/GDN/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 d-ailin
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/gdn/GDN/README.md b/gdn/GDN/README.md
@@ -0,0 +1,79 @@
+# GDN
+
+Code implementation for : [Graph Neural Network-Based Anomaly Detection in Multivariate Time Series(AAAI'21)](https://arxiv.org/pdf/2106.06947.pdf)
+
+
+# Installation
+### Requirements
+* Python >= 3.6
+* cuda == 10.2
+* [Pytorch==1.5.1](https://pytorch.org/)
+* [PyG: torch-geometric==1.5.0](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html)
+
+### Install packages
+```
+    # run after installing correct Pytorch package
+    bash install.sh
+```
+
+### Quick Start
+Run to check if the environment is ready
+```
+    bash run.sh cpu msl
+    # or with gpu
+    bash run.sh <gpu_id> msl    # e.g. bash run.sh 1 msl
+```
+
+
+# Usage
+We use part of msl dataset(refer to [telemanom](https://github.com/khundman/telemanom)) as demo example. 
+
+## Data Preparation
+```
+# put your dataset under data/ directory with the same structure shown in the data/msl/
+
+data
+ |-msl
+ | |-list.txt    # the feature names, one feature per line
+ | |-train.csv   # training data
+ | |-test.csv    # test data
+ |-your_dataset
+ | |-list.txt
+ | |-train.csv
+ | |-test.csv
+ | ...
+
+```
+
+### Notices:
+* The first column in .csv will be regarded as index column. 
+* The column sequence in .csv don't need to match the sequence in list.txt, we will rearrange the data columns according to the sequence in list.txt.
+* test.csv should have a column named "attack" which contains ground truth label(0/1) of being attacked or not(0: normal, 1: attacked)
+
+## Run
+```
+    # using gpu
+    bash run.sh <gpu_id> <dataset>
+
+    # or using cpu
+    bash run.sh cpu <dataset>
+```
+You can change running parameters in the run.sh.
+
+# Others
+SWaT and WADI datasets can be requested from [iTrust](https://itrust.sutd.edu.sg/)
+
+
+# Citation
+If you find this repo or our work useful for your research, please consider citing the paper
+```
+@inproceedings{deng2021graph,
+  title={Graph neural network-based anomaly detection in multivariate time series},
+  author={Deng, Ailin and Hooi, Bryan},
+  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
+  volume={35},
+  number={5},
+  pages={4027--4035},
+  year={2021}
+}
+```
diff --git a/gdn/GDN/__init__.py b/gdn/GDN/__init__.py
diff --git a/gdn/GDN/dataloader_fix.py b/gdn/GDN/dataloader_fix.py
@@ -0,0 +1,118 @@
+# fixed version of original dataloader in torch_geometric/data/dataloader.py
+# last import guarantees overload of original version
+import torch.utils.data
+from torch.utils.data.dataloader import default_collate
+
+from torch_geometric.data import Data, Batch
+from torch._six import string_classes
+
+
+int_classes = (bool, int)
+
+# NOTE: This overrides the default dataloader from torch_geometric to fix an issue
+class Collater(object):
+    def __init__(self, follow_batch):
+        self.follow_batch = follow_batch
+
+    def collate(self, batch):
+        elem = batch[0]
+        if isinstance(elem, Data):
+            return Batch.from_data_list(batch, self.follow_batch)
+        elif isinstance(elem, torch.Tensor):
+            return default_collate(batch)
+        elif isinstance(elem, float):
+            return torch.tensor(batch, dtype=torch.float)
+        elif isinstance(elem, int_classes):
+            return torch.tensor(batch)
+        elif isinstance(elem, string_classes):
+            return batch
+        elif isinstance(elem, container_abcs.Mapping):
+            return {key: self.collate([d[key] for d in batch]) for key in elem}
+        elif isinstance(elem, tuple) and hasattr(elem, '_fields'):
+            return type(elem)(*(self.collate(s) for s in zip(*batch)))
+        elif isinstance(elem, container_abcs.Sequence):
+            return [self.collate(s) for s in zip(*batch)]
+
+        raise TypeError('DataLoader found invalid type: {}'.format(type(elem)))
+
+    def __call__(self, batch):
+        return self.collate(batch)
+
+
+class DataLoader(torch.utils.data.DataLoader):
+    r"""Data loader which merges data objects from a
+    :class:`torch_geometric.data.dataset` to a mini-batch.
+
+    Args:
+        dataset (Dataset): The dataset from which to load the data.
+        batch_size (int, optional): How many samples per batch to load.
+            (default: :obj:`1`)
+        shuffle (bool, optional): If set to :obj:`True`, the data will be
+            reshuffled at every epoch. (default: :obj:`False`)
+        follow_batch (list or tuple, optional): Creates assignment batch
+            vectors for each key in the list. (default: :obj:`[]`)
+    """
+
+    def __init__(self, dataset, batch_size=1, shuffle=False, follow_batch=[],
+                 **kwargs):
+        super(DataLoader,
+              self).__init__(dataset, batch_size, shuffle,
+                             collate_fn=Collater(follow_batch), **kwargs)
+
+
+class DataListLoader(torch.utils.data.DataLoader):
+    r"""Data loader which merges data objects from a
+    :class:`torch_geometric.data.dataset` to a python list.
+
+    .. note::
+
+        This data loader should be used for multi-gpu support via
+        :class:`torch_geometric.nn.DataParallel`.
+
+    Args:
+        dataset (Dataset): The dataset from which to load the data.
+        batch_size (int, optional): How many samples per batch to load.
+            (default: :obj:`1`)
+        shuffle (bool, optional): If set to :obj:`True`, the data will be
+            reshuffled at every epoch (default: :obj:`False`)
+    """
+
+    def __init__(self, dataset, batch_size=1, shuffle=False, **kwargs):
+        super(DataListLoader, self).__init__(
+            dataset, batch_size, shuffle,
+            collate_fn=lambda data_list: data_list, **kwargs)
+
+
+class DenseCollater(object):
+    def collate(self, data_list):
+        batch = Batch()
+        for key in data_list[0].keys:
+            batch[key] = default_collate([d[key] for d in data_list])
+        return batch
+
+    def __call__(self, batch):
+        return self.collate(batch)
+
+
+class DenseDataLoader(torch.utils.data.DataLoader):
+    r"""Data loader which merges data objects from a
+    :class:`torch_geometric.data.dataset` to a mini-batch.
+
+    .. note::
+
+        To make use of this data loader, all graphs in the dataset needs to
+        have the same shape for each its attributes.
+        Therefore, this data loader should only be used when working with
+        *dense* adjacency matrices.
+
+    Args:
+        dataset (Dataset): The dataset from which to load the data.
+        batch_size (int, optional): How many samples per batch to load.
+            (default: :obj:`1`)
+        shuffle (bool, optional): If set to :obj:`True`, the data will be
+            reshuffled at every epoch (default: :obj:`False`)
+    """
+
+    def __init__(self, dataset, batch_size=1, shuffle=False, **kwargs):
+        super(DenseDataLoader, self).__init__(
+            dataset, batch_size, shuffle, collate_fn=DenseCollater(), **kwargs)
diff --git a/gdn/GDN/datasets/TimeDataset.py b/gdn/GDN/datasets/TimeDataset.py
@@ -0,0 +1,78 @@
+import torch
+from torch.utils.data import Dataset, DataLoader
+
+import torch.nn.functional as F
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+import numpy as np
+
+
+class TimeDataset(Dataset):
+    def __init__(self, raw_data, edge_index, mode='train', config = None):
+        self.raw_data = raw_data
+
+        self.config = config
+        self.edge_index = edge_index
+        self.mode = mode
+
+        x_data = raw_data[:-1]
+        labels = raw_data[-1]
+
+
+        data = x_data
+
+        # to tensor
+        data = torch.tensor(data).double()
+        labels = torch.tensor(labels).double()
+
+        self.x, self.y, self.labels = self.process(data, labels)
+
+    def __len__(self):
+        return len(self.x)
+
+
+    def process(self, data, labels):
+        x_arr, y_arr = [], []
+        labels_arr = []
+
+        slide_win, slide_stride = [self.config[k] for k
+            in ['slide_win', 'slide_stride']
+        ]
+        is_train = self.mode == 'train'
+
+        node_num, total_time_len = data.shape
+
+        rang = range(slide_win, total_time_len, slide_stride) if is_train else range(slide_win, total_time_len)
+
+        for i in rang:
+
+            ft = data[:, i-slide_win:i]
+            tar = data[:, i]
+
+            x_arr.append(ft)
+            y_arr.append(tar)
+
+            labels_arr.append(labels[i])
+
+
+        x = torch.stack(x_arr).contiguous()
+        y = torch.stack(y_arr).contiguous()
+
+        labels = torch.Tensor(labels_arr).contiguous()
+
+        return x, y, labels
+
+    def __getitem__(self, idx):
+
+        feature = self.x[idx].double()
+        y = self.y[idx].double()
+
+        edge_index = self.edge_index.long()
+
+        label = self.labels[idx].double()
+
+        return feature, y, label, edge_index
+
+
+
+
+