diff --git a/ex3/test_ex3.m b/ex3/test_ex3.m index 5d18339..acfb6ef 100644 --- a/ex3/test_ex3.m +++ b/ex3/test_ex3.m @@ -1,8 +1,17 @@ -% !test test_sanity() +%!test test_sanity() -% !test test_oneVsAll() +%!test test_lrCostFunction() -% !test test_predictOneVsAll() +%!test test_oneVsAll_converged() + +% Beware! Results of the fmincg may vary! +% You can try fminunc or a bigger number of iterations. +% In times of war the value of cosine could be 3 or even 4 (it's a joke) +%!test test_oneVsAll() + +%!test test_predictOneVsAll() + +%!test test_predict() % These tests use non-standard predict() which returns [p,k] instead of just [p] so % that the costs are returned and can be unit tested Calls to p = predict() will still @@ -11,4 +20,4 @@ %!test test_nn() -%!test test_nn_cm_2() \ No newline at end of file +%!test test_nn_cm_2() diff --git a/ex3/test_lrCostFunction.m b/ex3/test_lrCostFunction.m new file mode 100644 index 0000000..d0d0588 --- /dev/null +++ b/ex3/test_lrCostFunction.m @@ -0,0 +1,93 @@ +function test_lrCostFunction() + tolerance = 1e-3; + + % Test 1, w/ lambda = 3 + % From TA Colin Beckingham, at https://class.coursera.org/ml-005/forum/thread?thread_id=943#post-4593 + theta = [2; 1; -9]; + X = magic(3); + y = [1; -0.2; 3]; + lambda = 3; + + J_exp = 50.971; + grad_exp = [-6.1081 -7.1932 -12.3540]'; % vectors are typically vertical + + [J, grad] = lrCostFunction(theta, X, y, lambda); + assert(J, J_exp, tolerance); + assert(grad, grad_exp, tolerance); + + % Test 2, w/ lambda = 0.1 + % From TA Colin Beckingham, at https://class.coursera.org/ml-005/forum/thread?thread_id=943#post-4666 + theta = [2; 1; -9]; + X = magic(3); + y = [1; -0.2; 3]; + lambda = 0.1; + + J_exp = 11.338; + grad_exp = [-6.1081 -8.1598 -3.6540]'; % vectors are typically vertical + + [J, grad] = lrCostFunction(theta, X, y, lambda); + assert(J, J_exp, tolerance); + assert(grad, grad_exp, tolerance); + + +% basically the same test as test_costfunction_minimal, but with +% varying values for the lambda, and theta cannot be 0 (there would be +% nothing to penalize), and the x0 (ones) vector has been added + epsilon = 1e-4; + + X = [1 1 1; + 1 2 2]; % minimal 2D feature matrix + y = [0 1]'; % at least one negative and one positive + theta = [0 1 1]'; % need non-zero values for theta + + J_exp = [1.0725 1.5725 2.0725]; + grad_exp = [ + 0.43141 0.43141 0.43141; + 0.42241 0.92241 1.42241; + 0.42241 0.92241 1.42241; ]; + + % lambda = 0 --> no regularization + [J0, grad0] = lrCostFunction(theta, X, y, 0.0); + [J1, grad1] = lrCostFunction(theta, X, y, 1.0); + [J2, grad2] = lrCostFunction(theta, X, y, 2.0); + assert([J0 J1 J2], J_exp, epsilon); + assert([grad0 grad1 grad2], grad_exp, epsilon); + + % non-zero theta 0, validates that code isn't + % including theta 0 in the regularization term + theta = [1 1 1]'; + + J_exp = [1.5277 2.0277 2.5277]; + grad_exp = [ + 0.47294 0.47294 0.47294; + 0.46959 0.96959 1.46959; + 0.46959 0.96959 1.46959 ]; + + [J0, grad0] = lrCostFunction(theta, X, y, 0.0); + [J1, grad1] = lrCostFunction(theta, X, y, 1.0); + [J2, grad2] = lrCostFunction(theta, X, y, 2.0); + assert([J0 J1 J2], J_exp, epsilon); + assert([grad0 grad1 grad2], grad_exp, epsilon); + + +% https://class.coursera.org/ml-005/forum/thread?thread_id=1425 + [J grad] = lrCostFunction([0.6 0.3 0.2]', [1.1 0.1 1;1.5 1.3 2], [1 1]', 2); + assert(J, 0.32170, epsilon); + assert(grad, [-0.27694 0.18419 -0.10133]', epsilon); + + [J grad] = lrCostFunction([0.6 0.3 0.2]', [1.1 0.1 1;1.5 1.3 2], [1 1]', 0); + assert(J, 0.25670, epsilon); + assert(grad, [-0.27694 -0.11581 -0.30133]', epsilon); + + theta = [0.6 ; 0.3 ; 0.2]; + X = [1.1 0.1 1.0 ; 1.5 1.3 2.0]; + y = [0 ; 0]; + [J grad] = lrCostFunction(theta, X, y, 0.1); + assert(J, 1.5499, epsilon); + assert(grad, [1.02306 0.59919 1.20867]', epsilon); + + [c g] = lrCostFunction([2.3 1.9 0.5]', [0.1 3.1 1.2; 1.8 0.9 0.7; 3.2 -1.4 6.7], [1 2 0]', 0.3); + assert(c, 0.81085, epsilon); + assert(g, [0.46507 -0.57837 2.04833]', epsilon); + +endfunction diff --git a/ex3/test_oneVsAll_converged.m b/ex3/test_oneVsAll_converged.m new file mode 100644 index 0000000..063524f --- /dev/null +++ b/ex3/test_oneVsAll_converged.m @@ -0,0 +1,16 @@ +function test_oneVsAll_converged() +% these tests makes no difference between fmincg and fminunc + + epsilon = 1e-3; + + X = [0 1 2 2 1 0 3 4 5 5 4 3]'; + y = [1 1 1 1 1 1 2 2 2 2 2 2]'; + num_labels = 2; + lambda = 0.001; + all_theta = oneVsAll(X, y, num_labels, lambda); + assert(all_theta, [26.226 -10.490; -26.226 10.490], epsilon); + + all_theta = oneVsAll([0 1 2 2 1 0 1 3 4 5 5 4 3]', [1 1 1 1 1 1 2 2 2 2 2 2 2]', 2, 1); + assert(all_theta, [2.3840 -1.1426; -2.3840 1.1426], epsilon); + +endfunction diff --git a/ex3/test_predict.m b/ex3/test_predict.m new file mode 100644 index 0000000..fea6547 --- /dev/null +++ b/ex3/test_predict.m @@ -0,0 +1,19 @@ +function test_predict() + + %c = predict(Theta1, Theta2, X); + c = predict(ones(3, 8), ones(1, 4), ones(9, 7)); + assert(c, ones(9, 1)); + + c = predict(ones(3, 8), ones(1, 4), magic(7)); + assert(c, ones(7, 1)); + + c = predict(-1/170*ones(3, 8), -1/3*ones(1, 4), magic(7)); + assert(c, ones(7, 1)); + + predict(-1/170*ones(3, 8), -1/3*ones(2, 4), magic(7)); + assert(c, ones(7, 1)); + + predict(-1/170*ones(3, 8), -1/3*ones(3, 4), magic(7)); + assert(c, ones(7, 1)); + +endfunction diff --git a/ex3/test_predictOneVsAll.m b/ex3/test_predictOneVsAll.m index f0b3151..226e647 100644 --- a/ex3/test_predictOneVsAll.m +++ b/ex3/test_predictOneVsAll.m @@ -1,6 +1,6 @@ function test_predictOneVsAll () epsilon = 1e-3; - + % learning three classes % % x < 1.5 => 1 @@ -10,15 +10,29 @@ function test_predictOneVsAll () X = [0 1 2 3 4 5]'; y = [1 1 2 2 3 3]'; % direct classification num_labels = 3; - + all_theta = oneVsAll(X, y, num_labels, 0); assert(size(all_theta), [3 2]); - + yy = predictOneVsAll(all_theta, X); assert(yy, y); - + % predict ones we haven't seen yet (should match the original formula) X = [-10 2.5 10.0]'; assert(predictOneVsAll(all_theta, X), [1 2 3]'); - + + + % https://class.coursera.org/ml-005/forum/thread?thread_id=1425 + all_theta = oneVsAll([0.1 3.1 1.2; 1.8 0.9 0.7; 3.2 -1.4 6.7], [1 2 0]', 3, 0.3); + p = predictOneVsAll(all_theta, [0.1 3.1 1.2; 1.8 0.9 0.7; 3.2 -1.4 6.7]); + assert(p, [ 1 2 2 ]'); + + all_theta = oneVsAll([0 1 2 2 1 0 1 3 4 5 5 4 3]', [1 1 1 1 1 1 2 2 2 2 2 2 2]', 2, 1); + p = predictOneVsAll(all_theta, [0 1 2 2 1 0 1 3 4 5 5 4 3]'); + assert(p, [1 1 1 1 1 1 1 2 2 2 2 2 2]'); + + all_theta = oneVsAll([0 1 2 2 1 0 3 4 5 5 4 3]', [1 1 1 1 1 1 2 2 2 2 2 2]', 2, 0.001); + p = predictOneVsAll(all_theta, [0 1 2 2 1 0 3 4 5 5 4 3]'); + assert(p, [1 1 1 1 1 1 2 2 2 2 2 2]'); + endfunction