-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
127 lines (103 loc) · 4.91 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import numpy as np
import pandas as pd
import os
import logging
from prelogad.DeepSVDD.src.deepSVDD import DeepSVDD
from prelogad.DeepSVDD.src.datasets.main import load_dataset
from tqdm import tqdm
import logging
from postprocess import RAG
import yaml
from utils.evaluator import evaluate
import torch
with open('config.yaml', 'r') as file:
configs = yaml.safe_load(file)
api_key = configs['api_key']
os.environ["OPENAI_API_BASE"] = configs['api_base']
os.environ["OPENAI_API_KEY"] = api_key
output_dir = './output'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# set logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler = logging.FileHandler('./output/runtime.log')
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
def train_deepsvdd(train_data_path):
if not os.path.exists('./output'):
os.makedirs('./output')
deep_SVDD = DeepSVDD('soft-boundary')
deep_SVDD.set_network("mlp")
if not configs['is_train']:
deep_SVDD.load_model(model_path='./output/model.tar', load_ae=False)
logger.info('Loading model from ./output/model.tar' )
else :
# dataloader
train_dataset = load_dataset(data_path=train_data_path, encoder_path=configs['encoder_path'])
# pretrain and train
if configs['is_pretrain']:
deep_SVDD.pretrain( train_dataset,
optimizer_name=configs['optimizer_name'],
lr=configs['lr'],
n_epochs=configs['n_epochs'],
lr_milestones=configs['lr_milestones'],
batch_size=configs['batch_size'],
weight_decay=configs['weight_decay'],
device=configs['device'],
n_jobs_dataloader=configs['n_jobs_dataloader'])
deep_SVDD.train(train_dataset,
optimizer_name=configs['optimizer_name'],
lr=configs['lr'],
n_epochs=configs['n_epochs'],
lr_milestones=configs['lr_milestones'],
batch_size=configs['batch_size'],
weight_decay=configs['weight_decay'],
device=configs['device'],
n_jobs_dataloader=configs['n_jobs_dataloader'])
# Save results, model, and configuration
model_path = './output/model.tar'
deep_SVDD.save_results(export_json='./output/results.json')
deep_SVDD.save_model(export_model= './output/model.tar', save_ae=False)
return model_path
def anomaly_detection(model_path, test_data_path):
logger.info("start testing....")
deep_SVDD = DeepSVDD('soft-boundary')
deep_SVDD.set_network("mlp")
deep_SVDD.load_model(model_path=model_path, load_ae=False)
logger.info('Loading model from ./output/model.tar' )
test_dataset = load_dataset(data_path=test_data_path, encoder_path=configs['encoder_path'])
anomalys, _ = deep_SVDD.test(test_dataset, device='cpu', n_jobs_dataloader=configs['n_jobs_dataloader'])
anomaly_lineid_list = [item[0] for item in tqdm(anomalys, desc='saving anomaly LineIds to list')]
output_file = 'output/anomaly_logs_detc_by_svdd.csv'
# 保存deepsvdd检测为异常的
df_test = pd.read_csv(test_data_path)
pos_df = df_test[df_test["LineId"].isin(anomaly_lineid_list)]
pos_df.to_csv(output_file, index=False)
return output_file, anomaly_lineid_list
def main():
logger.info(configs)
all_df = pd.read_csv(configs['log_structed_path'])
num_train = int(configs['train_ratio']*len(all_df))
train_df = all_df[:num_train]
train_df = train_df[train_df['Label'] == '-']
test_df = all_df[num_train:]
train_log_structed_path = f"./dataset/{configs['dataset_name']}/train_log_structured.csv"
test_log_structed_path = f"./dataset/{configs['dataset_name']}/test_log_structured.csv"
train_df.to_csv(train_log_structed_path, index=False)
test_df.to_csv(test_log_structed_path, index=False)
# train deepsvdd, get log token embeddings
model_path = train_deepsvdd(train_log_structed_path)
# do anomaly detection
anomaly_logs_path, anomaly_lineid_list = anomaly_detection(model_path, test_log_structed_path)
# rag postporcessing, get log templates embeddings
if configs['is_rag']:
RagPoster = RAG.RAGPostProcessor(configs, train_data_path=train_log_structed_path, logger=logger)
anomaly_lineid_list = RagPoster.post_process(anomaly_logs_path, test_log_structed_path)
# print final results
evaluate(configs, test_log_structed_path, anomaly_lineid_list, logger)
if __name__ == '__main__':
main()