-
Notifications
You must be signed in to change notification settings - Fork 0
/
final2.txt
49 lines (39 loc) · 1.76 KB
/
final2.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# IMPORT UTILITIES
from math import sqrt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
import statistics
featuresize = 735000
X = pd.read_csv(r'D:\Desktop\data.csv', low_memory=False)
X.dropna(inplace=True) #removes Nan/missing values
X = X.sample(n=featuresize) #to randomize
# syntax - DataFrame.sample(n=None, frac=None, replace=False, weights=None, random_state=None, axis=None)
XShuffle = shuffle(X)
XShuffle.describe()
XShuffle = pd.get_dummies(XShuffle)
y = np.array(XShuffle['SCORE']) # WHAT WE WANT TO PREDICT AND CONVERTED TO ARRAY
yShape = y.shape
normalized_labels = normalize(y.reshape(1, -1), axis=1) # NORMALIZE SCORE
y = normalized_labels.reshape(yShape) # RESTORE SHAPE
XShuffle = XShuffle.drop('SCORE', axis=1) # DROP SCORE FROM FEATURES
XShuffle = np.array(XShuffle) # FEATURES TO ARRAY
train_features, test_features, train_labels, test_labels = train_test_split(XShuffle, y,test_size=0.2)
#Implementing LInear Regression
regressor= LinearRegression(n_jobs=-1)
regressor.fit(train_features,train_labels)
initialTime = time.time()
predictions = regressor.predict(test_features)
temp = np.corrcoef(test_labels, predictions).reshape(-1, )
rms = sqrt(mean_squared_error(test_labels, predictions))
tolerance = 0.0015
accuracy = (np.abs(predictions - test_labels) <= tolerance).mean()
print("Root Mean Square Error: ", round(rms, 8))
print("Tolerance is: ", tolerance)
print('Accuracy:', round(accuracy * 100, 2), '%.')
print("###################################")