diff --git a/3_Regression/README.md b/3_Regression/README.md index f5c6d87..4688013 100644 --- a/3_Regression/README.md +++ b/3_Regression/README.md @@ -12,6 +12,8 @@ [6. Random Forest Regression](Random_Forest_Regression) +[7. Ridge Regression](Ridge_Regression) + ## Comparing Regression Models and their performance ### R Squared Intution for Simple Linear Regression diff --git a/3_Regression/Ridge_Regression/python/.idea/.gitignore b/3_Regression/Ridge_Regression/python/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/3_Regression/Ridge_Regression/python/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/3_Regression/Ridge_Regression/python/.idea/.name b/3_Regression/Ridge_Regression/python/.idea/.name new file mode 100644 index 0000000..87f8a1b --- /dev/null +++ b/3_Regression/Ridge_Regression/python/.idea/.name @@ -0,0 +1 @@ +ridge_regression.py \ No newline at end of file diff --git a/3_Regression/Ridge_Regression/python/.idea/inspectionProfiles/Project_Default.xml b/3_Regression/Ridge_Regression/python/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..36e63aa --- /dev/null +++ b/3_Regression/Ridge_Regression/python/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,38 @@ + + + + \ No newline at end of file diff --git a/3_Regression/Ridge_Regression/python/.idea/inspectionProfiles/profiles_settings.xml b/3_Regression/Ridge_Regression/python/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/3_Regression/Ridge_Regression/python/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/3_Regression/Ridge_Regression/python/.idea/misc.xml b/3_Regression/Ridge_Regression/python/.idea/misc.xml new file mode 100644 index 0000000..d56657a --- /dev/null +++ b/3_Regression/Ridge_Regression/python/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/3_Regression/Ridge_Regression/python/.idea/modules.xml b/3_Regression/Ridge_Regression/python/.idea/modules.xml new file mode 100644 index 0000000..614b3c1 --- /dev/null +++ b/3_Regression/Ridge_Regression/python/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/3_Regression/Ridge_Regression/python/.idea/python.iml b/3_Regression/Ridge_Regression/python/.idea/python.iml new file mode 100644 index 0000000..8388dbc --- /dev/null +++ b/3_Regression/Ridge_Regression/python/.idea/python.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/3_Regression/Ridge_Regression/python/Salary_Data.csv b/3_Regression/Ridge_Regression/python/Salary_Data.csv new file mode 100644 index 0000000..0e2cda1 --- /dev/null +++ b/3_Regression/Ridge_Regression/python/Salary_Data.csv @@ -0,0 +1,31 @@ +YearsExperience,Salary +1.1,39343 +1.3,46205 +1.5,37731 +2,43525 +2.2,39891 +2.9,56642 +3,60150 +3.2,54445 +3.2,64445 +3.7,57189 +3.9,63218 +4,55794 +4,56957 +4.1,57081 +4.5,61111 +4.9,67938 +5.1,66029 +5.3,83088 +5.9,81363 +6,93940 +6.8,91738 +7.1,98273 +7.9,101302 +8.2,113812 +8.7,109431 +9,105582 +9.5,116969 +9.6,112635 +10.3,122391 +10.5,121872 diff --git a/3_Regression/Ridge_Regression/python/python-scratch/RidgeRegression_scratch.py b/3_Regression/Ridge_Regression/python/python-scratch/RidgeRegression_scratch.py new file mode 100644 index 0000000..c38cff0 --- /dev/null +++ b/3_Regression/Ridge_Regression/python/python-scratch/RidgeRegression_scratch.py @@ -0,0 +1,109 @@ +# Importing libraries + +import numpy as np +import pandas as pd +from sklearn.model_selection import train_test_split +import matplotlib.pyplot as plt + + +# Ridge Regression + +class RidgeRegression(): + + def __init__(self, learning_rate, iterations, l2_penality): + self.learning_rate = learning_rate + self.iterations = iterations + self.l2_penality = l2_penality + + # Function for model training + def fit(self, X, Y): + # no_of_training_examples, no_of_features + self.m, self.n = X.shape + + # weight initialization + self.W = np.zeros(self.n) + + self.b = 0 + self.X = X + self.Y = Y + + # gradient descent learning + + for i in range(self.iterations): + self.update_weights() + return self + + # Helper function to update weights in gradient descent + + def update_weights(self): + Y_pred = self.predict(self.X) + + # calculate gradients + dW = (- (2 * (self.X.T).dot(self.Y - Y_pred)) + + (2 * self.l2_penality * self.W)) / self.m + db = - 2 * np.sum(self.Y - Y_pred) / self.m + + # update weights + self.W = self.W - self.learning_rate * dW + self.b = self.b - self.learning_rate * db + return self + + # Hypothetical function h( x ) + def predict(self, X): + return X.dot(self.W) + self.b + +def train(X,Y): + # Splitting dataset into train and test set + X_train, X_test, Y_train, Y_test = train_test_split(X, Y, + test_size=1 / 3, random_state=0) + # Model training + model = RidgeRegression(iterations=1000, + learning_rate=0.01, l2_penality=1) + model.fit(X_train, Y_train) + return model, X_test, Y_test + + +def visualize(X, Y, Y_pred, col): + # Splitting dataset into train and test set + X_train, X_test, Y_train, Y_test = train_test_split(X, Y, + test_size=1 / 3, random_state=0) + fig, axs = plt.subplots(2) + axs[0].scatter(X_train, Y_train, color='blue') + axs[0].plot(X_train, Y_train, color='orange') + axs[0].set_title('training') + + axs[1].scatter(X_test, Y_pred, color='blue') + axs[1].plot(X_test, Y_pred, color='orange') + axs[1].set_title('validation') + + for ax in axs.flat: + ax.set(xlabel=col[0], ylabel=col[1]) + + fig.tight_layout() + #plt.savefig('output.png') + plt.show() + +# Driver code + +def main(): + # Importing dataset + df = pd.read_csv("../Salary_Data.csv") + X = df.iloc[:, :-1].values + Y = df.iloc[:, 1].values + col = df.columns + #training the regression model + model, X_test, Y_test = train(X, Y) + + # Prediction on test set + Y_pred = model.predict(X_test) + print("Predicted values ", np.round(Y_pred[:3], 2)) + print("Real values ", Y_test[:3]) + print("Trained W ", round(model.W[0], 2)) + print("Trained b ", round(model.b, 2)) + + # Visualization on test set + visualize(X, Y, Y_pred, col) + + +if __name__ == "__main__": + main() diff --git a/3_Regression/Ridge_Regression/python/python-scratch/output.png b/3_Regression/Ridge_Regression/python/python-scratch/output.png new file mode 100644 index 0000000..bb118fe Binary files /dev/null and b/3_Regression/Ridge_Regression/python/python-scratch/output.png differ diff --git a/3_Regression/Ridge_Regression/python/ridge_regression.py b/3_Regression/Ridge_Regression/python/ridge_regression.py new file mode 100644 index 0000000..f18869d --- /dev/null +++ b/3_Regression/Ridge_Regression/python/ridge_regression.py @@ -0,0 +1,53 @@ +""" Ridge Regression +""" + +# Importing the libraries +import matplotlib.pyplot as plt +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.linear_model import Ridge + + +def main(): + # Importing the dataset + dataset = pd.read_csv('Salary_Data.csv') + X = dataset.iloc[:, :-1].values + y = dataset.iloc[:, 1].values + + # Splitting the dataset into the Training set and Test set + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 / 3, random_state=0) + + # Feature Scaling + """from sklearn.preprocessing import StandardScaler + sc_X = StandardScaler() + X_train = sc_X.fit_transform(X_train) + X_test = sc_X.transform(X_test) + sc_y = StandardScaler() + y_train = sc_y.fit_transform(y_train)""" + + # Fitting Simple Linear Regression to the Training set + regressor = Ridge() + regressor.fit(X_train, y_train) + + # Predicting the Test set results + y_pred = regressor.predict(X_test) + + # Visualising the Training set results + plt.scatter(X_train, y_train, color='red') + plt.plot(X_train, regressor.predict(X_train), color='blue') + plt.title('Salary vs Experience (Training set)') + plt.xlabel('Years of Experience') + plt.ylabel('Salary') + plt.show() + + # Visualising the Test set results + plt.scatter(X_test, y_test, color='red') + plt.plot(X_train, regressor.predict(X_train), color='blue') + plt.title('Salary vs Experience (Test set)') + plt.xlabel('Years of Experience') + plt.ylabel('Salary') + plt.show() + + +if __name__ == '__main__': + main() diff --git a/README.md b/README.md index c22fb50..2662723 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ * [Support Vector Regression](3_Regression/Support_Vector_Regression) * [Decision Tree Regression](3_Regression/Decision_Tree_Regression) * [Random Forest Regression](3_Regression/Random_Forest_Regression) + * [Ridge Regression](3_Regression/Ridge_Regression) [**3. Classification**](4_Classification/README.md) * [Logistic Regression](4_Classification/Logistic_Regression) @@ -109,4 +110,4 @@ Check the official MIT License [here](LICENSE). - **[@pragyakapoor](https://github.com/pragyakapoor)** - \ No newline at end of file +