forked from SphericalKat/electrical-lstm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
converter_alt.py
77 lines (55 loc) · 2.17 KB
/
converter_alt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: utf-8 -*-
"""
Created on Sat Feb 15 23:06:54 2020
@author: Tanmay Thakur
"""
import pandas as pd
import numpy as np
import pickle
from scipy import stats
from sklearn.preprocessing import StandardScaler
data = pd.read_excel('/run/media/sphericalkat/archive/datasets/features.xlsx')
data.to_csv('/run/media/sphericalkat/archive/datasets/power-quality-meter.csv', sep=',')
print(list(data.columns.values))
input_data = data.drop(['Date','Time','Cos Phi AN Avg','Cos Phi BN Avg','Cos Phi CN Avg','Cos Phi Total Avg'], axis = 1)
input_data = input_data[(np.abs(stats.zscore(input_data)) < 3).all(axis=1)]
with open('labels.pickle', "wb+") as pickle_out:
pickle.dump(list(input_data.columns.values), pickle_out)
# Dropping cos phi values as they have little to no affect on modelling
scaler = StandardScaler()
scaler.fit(input_data[:3*len(input_data)//4]) # 0.75 because train_size is 75% of given data
copy = scaler.transform(input_data)
timestep = 10
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols, names = list(), list()
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
for i in range(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
agg = pd.concat(cols, axis=1)
agg.columns = names
if dropnan:
agg.dropna(inplace=True)
return agg
train = series_to_supervised(copy).values
print(np.array(train).shape)
X_train = []
y_train = []
for i in range(timestep, len(input_data)-1):
X_train.append(train[i-timestep:i, :len(input_data.columns)])
y_train.append(train[i-timestep, len(input_data.columns):])
X_train, y_train = np.array(X_train), np.array(y_train)
data_dump = X_train, y_train
pickle_out = open("dict.pickle","wb")
pickle.dump(data_dump, pickle_out)
pickle_out.close()
pickle_out = open("scaler.pickle","wb")
pickle.dump(scaler, pickle_out)
pickle_out.close()