From f7cb797c637f85623f93c49af32f59a82c4ae2ac Mon Sep 17 00:00:00 2001 From: plan-do-break-fix Date: Wed, 26 May 2021 18:48:12 -0500 Subject: [PATCH] fix(docs): corrects common typos in quiz and project markdown files --- 2_R_Programming/projects/project1.md | 2 +- 3_Getting_and_Cleaning_Data/quizzes/quiz4.md | 2 +- 7_Regression_Models/quizzes/quiz4.md | 2 +- 9_Developing_Data_Products/quizzes/quiz4.md | 2 +- Stanford_Machine_Learning/Week3/week3quiz2.md | 2 +- Stanford_Machine_Learning/Week8/UnsupervisedLearningQuiz.md | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/2_R_Programming/projects/project1.md b/2_R_Programming/projects/project1.md index ef1b8776..674eddc2 100644 --- a/2_R_Programming/projects/project1.md +++ b/2_R_Programming/projects/project1.md @@ -197,7 +197,7 @@ corr <- function(directory, threshold = 0){ count = nrow(completeCases) ## Calculate and store the count of complete cases - ## if threshhold is reached + ## if threshold is reached if( count >= threshold ) { correlations = c(correlations, cor(completeCases$nitrate, completeCases$sulfate) ) } diff --git a/3_Getting_and_Cleaning_Data/quizzes/quiz4.md b/3_Getting_and_Cleaning_Data/quizzes/quiz4.md index c00ee5ea..49f1b370 100644 --- a/3_Getting_and_Cleaning_Data/quizzes/quiz4.md +++ b/3_Getting_and_Cleaning_Data/quizzes/quiz4.md @@ -33,7 +33,7 @@ Remove the commas from the GDP numbers in millions of dollars and average them. Original data sources: http://data.worldbank.org/data-catalog/GDP-ranking-table ```R # Removed the s from https to be compatible with windows computers. -# Skip first 5 rows and only read in relevent columns +# Skip first 5 rows and only read in relevant columns GDPrank <- data.table::fread('http://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FGDP.csv' , skip=5 , nrows=190 diff --git a/7_Regression_Models/quizzes/quiz4.md b/7_Regression_Models/quizzes/quiz4.md index c00ee5ea..49f1b370 100644 --- a/7_Regression_Models/quizzes/quiz4.md +++ b/7_Regression_Models/quizzes/quiz4.md @@ -33,7 +33,7 @@ Remove the commas from the GDP numbers in millions of dollars and average them. Original data sources: http://data.worldbank.org/data-catalog/GDP-ranking-table ```R # Removed the s from https to be compatible with windows computers. -# Skip first 5 rows and only read in relevent columns +# Skip first 5 rows and only read in relevant columns GDPrank <- data.table::fread('http://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FGDP.csv' , skip=5 , nrows=190 diff --git a/9_Developing_Data_Products/quizzes/quiz4.md b/9_Developing_Data_Products/quizzes/quiz4.md index c00ee5ea..49f1b370 100644 --- a/9_Developing_Data_Products/quizzes/quiz4.md +++ b/9_Developing_Data_Products/quizzes/quiz4.md @@ -33,7 +33,7 @@ Remove the commas from the GDP numbers in millions of dollars and average them. Original data sources: http://data.worldbank.org/data-catalog/GDP-ranking-table ```R # Removed the s from https to be compatible with windows computers. -# Skip first 5 rows and only read in relevent columns +# Skip first 5 rows and only read in relevant columns GDPrank <- data.table::fread('http://d396qusza40orc.cloudfront.net/getdata%2Fdata%2FGDP.csv' , skip=5 , nrows=190 diff --git a/Stanford_Machine_Learning/Week3/week3quiz2.md b/Stanford_Machine_Learning/Week3/week3quiz2.md index 36e21f73..c4b929ab 100644 --- a/Stanford_Machine_Learning/Week3/week3quiz2.md +++ b/Stanford_Machine_Learning/Week3/week3quiz2.md @@ -28,7 +28,7 @@ True or False | Statement | Explanation False | Using a very large value λ cannot hurt the performance of your hypothesis; the only reason we do not set to be too large is to avoid numerical problems. | Using a very large value of λ can lead to underfitting of the training set. False | Because regularization causes J(θ) to no longer be convex, gradient descent may not always converge to the global minimum (when λ > 0, and when using an appropriate learning rate α). | Regularized logistic regression and regularized linear regression are both convex, and thus gradient descent will still converge to the global minimum. True | Using too large a value of λ can cause your hypothesis to underfit the data.| A large value of results in a large λ regularization penalty and thus a strong preference for simpler models which can underfit the data. -False | Because logistic regression outputs values 0 <= h0 <= 1, its range of output values can only be "shrunk" slighly by regularization anyway, so regularization is generally not helpful for it. | None needed +False | Because logistic regression outputs values 0 <= h0 <= 1, its range of output values can only be "shrunk" slightly by regularization anyway, so regularization is generally not helpful for it. | None needed Question 4 ---------- diff --git a/Stanford_Machine_Learning/Week8/UnsupervisedLearningQuiz.md b/Stanford_Machine_Learning/Week8/UnsupervisedLearningQuiz.md index 2724f4e5..5c478d61 100644 --- a/Stanford_Machine_Learning/Week8/UnsupervisedLearningQuiz.md +++ b/Stanford_Machine_Learning/Week8/UnsupervisedLearningQuiz.md @@ -49,7 +49,7 @@ True or False | Statement | Explanation --- | --- | --- False | Once an example has been assigned to a particular centroid, it will never be reassigned to another centroid | Not sure yet True | A good way to initialize K-means is to select K (distinct) examples from the training set and set the cluster centroids equal to these selected examples. | This is the recommended method of initialization. -True | On every iteration of K-means, the cost funtion J(c(1), ..., c(m), μ1, ..., μk (the distortion function) should either stay the same or decrease; in particular, it should not increase | True +True | On every iteration of K-means, the cost function J(c(1), ..., c(m), μ1, ..., μk (the distortion function) should either stay the same or decrease; in particular, it should not increase | True False | K-Means will always give the same results regardless of the initialization of the centroids. | K-means is sensitive to different initializations, which is why you should run it multiple times from different random initializations True | For some datasets, the "right" or "correct" value of K (the number of clusters) can be ambiguous, and hard even for a human expert looking carefully at the data to decide. | Look at an elbow curve for an example. It can often be ambiguous. True | If we are worried about K-means getting stuck in bad local optima, one way to ameliorate (reduce) this problem is if we try using multiple random initializations. | None needed