diff --git a/ex3/test_ex3.m b/ex3/test_ex3.m
index 5d18339..acfb6ef 100644
--- a/ex3/test_ex3.m
+++ b/ex3/test_ex3.m
@@ -1,8 +1,17 @@
-%   !test test_sanity()
+%!test test_sanity()
 
-%   !test test_oneVsAll()
+%!test test_lrCostFunction()
 
-%   !test test_predictOneVsAll()
+%!test test_oneVsAll_converged()
+
+% Beware! Results of the fmincg may vary!
+% You can try fminunc or a bigger number of iterations.
+% In times of war the value of cosine could be 3 or even 4 (it's a joke)
+%!test test_oneVsAll()
+
+%!test test_predictOneVsAll()
+
+%!test test_predict()
 
 % These tests use non-standard predict() which returns [p,k] instead of just [p] so 
 % that the costs are returned and can be unit tested Calls to p = predict() will still 
@@ -11,4 +20,4 @@
 
 %!test test_nn()
 
-%!test test_nn_cm_2()
\ No newline at end of file
+%!test test_nn_cm_2()
diff --git a/ex3/test_lrCostFunction.m b/ex3/test_lrCostFunction.m
new file mode 100644
index 0000000..d0d0588
--- /dev/null
+++ b/ex3/test_lrCostFunction.m
@@ -0,0 +1,93 @@
+function test_lrCostFunction()
+    tolerance = 1e-3;
+
+    % Test 1, w/ lambda = 3
+    % From TA Colin Beckingham, at https://class.coursera.org/ml-005/forum/thread?thread_id=943#post-4593
+    theta =  [2; 1; -9];
+    X = magic(3);
+    y = [1; -0.2; 3];
+    lambda = 3;
+
+    J_exp =  50.971;
+    grad_exp = [-6.1081 -7.1932 -12.3540]'; % vectors are typically vertical
+
+    [J, grad] = lrCostFunction(theta, X, y, lambda);
+    assert(J, J_exp, tolerance);
+    assert(grad, grad_exp, tolerance);
+
+    % Test 2, w/ lambda = 0.1
+    % From TA Colin Beckingham, at https://class.coursera.org/ml-005/forum/thread?thread_id=943#post-4666
+    theta =  [2; 1; -9];
+    X = magic(3);
+    y = [1; -0.2; 3];
+    lambda = 0.1;
+
+    J_exp =  11.338;
+    grad_exp = [-6.1081 -8.1598 -3.6540]'; % vectors are typically vertical
+
+    [J, grad] = lrCostFunction(theta, X, y, lambda);
+    assert(J, J_exp, tolerance);
+    assert(grad, grad_exp, tolerance);
+
+
+% basically the same test as test_costfunction_minimal, but with
+% varying values for the lambda, and theta cannot be 0 (there would be
+% nothing to penalize), and the x0 (ones) vector has been added
+    epsilon = 1e-4;
+
+    X = [1 1 1;
+       1 2 2];      % minimal 2D feature matrix
+    y = [0 1]';       % at least one negative and one positive
+    theta = [0 1 1]';   % need non-zero values for theta
+
+    J_exp = [1.0725 1.5725 2.0725];
+    grad_exp = [
+        0.43141   0.43141   0.43141;
+        0.42241   0.92241   1.42241;
+        0.42241   0.92241   1.42241; ];
+
+    % lambda = 0 --> no regularization
+    [J0, grad0] = lrCostFunction(theta, X, y, 0.0);
+    [J1, grad1] = lrCostFunction(theta, X, y, 1.0);
+    [J2, grad2] = lrCostFunction(theta, X, y, 2.0);
+    assert([J0 J1 J2], J_exp, epsilon);
+    assert([grad0 grad1 grad2], grad_exp, epsilon);
+
+    % non-zero theta 0, validates that code isn't
+    % including theta 0 in the regularization term
+    theta = [1 1 1]';
+
+    J_exp = [1.5277 2.0277 2.5277];
+    grad_exp = [
+        0.47294   0.47294   0.47294;
+        0.46959   0.96959   1.46959;
+        0.46959   0.96959   1.46959 ];
+
+    [J0, grad0] = lrCostFunction(theta, X, y, 0.0);
+    [J1, grad1] = lrCostFunction(theta, X, y, 1.0);
+    [J2, grad2] = lrCostFunction(theta, X, y, 2.0);
+    assert([J0 J1 J2], J_exp, epsilon);
+    assert([grad0 grad1 grad2], grad_exp, epsilon);
+
+
+% https://class.coursera.org/ml-005/forum/thread?thread_id=1425
+    [J grad] = lrCostFunction([0.6 0.3 0.2]', [1.1 0.1 1;1.5 1.3 2], [1 1]', 2);
+    assert(J, 0.32170, epsilon);
+    assert(grad, [-0.27694 0.18419 -0.10133]', epsilon);
+
+    [J grad] = lrCostFunction([0.6 0.3 0.2]', [1.1 0.1 1;1.5 1.3 2], [1 1]', 0);
+    assert(J, 0.25670, epsilon);
+    assert(grad, [-0.27694 -0.11581 -0.30133]', epsilon);
+
+    theta = [0.6 ; 0.3 ; 0.2];
+    X = [1.1 0.1 1.0 ; 1.5 1.3 2.0];
+    y = [0 ; 0];
+    [J grad] = lrCostFunction(theta, X, y, 0.1);
+    assert(J, 1.5499, epsilon);
+    assert(grad, [1.02306 0.59919 1.20867]', epsilon);
+
+    [c g] = lrCostFunction([2.3 1.9 0.5]', [0.1 3.1 1.2; 1.8 0.9 0.7; 3.2 -1.4 6.7], [1 2 0]', 0.3);
+    assert(c, 0.81085, epsilon);
+    assert(g, [0.46507 -0.57837 2.04833]', epsilon);
+
+endfunction
diff --git a/ex3/test_oneVsAll_converged.m b/ex3/test_oneVsAll_converged.m
new file mode 100644
index 0000000..063524f
--- /dev/null
+++ b/ex3/test_oneVsAll_converged.m
@@ -0,0 +1,16 @@
+function test_oneVsAll_converged()
+% these tests makes no difference between fmincg and fminunc
+
+    epsilon = 1e-3;
+
+    X = [0 1 2 2 1 0 3 4 5 5 4 3]';
+    y = [1 1 1 1 1 1 2 2 2 2 2 2]';
+    num_labels = 2;
+    lambda = 0.001;
+    all_theta = oneVsAll(X, y, num_labels, lambda);
+    assert(all_theta, [26.226  -10.490; -26.226   10.490], epsilon);
+
+    all_theta = oneVsAll([0 1 2 2 1 0 1 3 4 5 5 4 3]', [1 1 1 1 1 1 2 2 2 2 2 2 2]', 2, 1);
+    assert(all_theta, [2.3840  -1.1426; -2.3840   1.1426], epsilon);
+
+endfunction
diff --git a/ex3/test_predict.m b/ex3/test_predict.m
new file mode 100644
index 0000000..fea6547
--- /dev/null
+++ b/ex3/test_predict.m
@@ -0,0 +1,19 @@
+function test_predict()
+
+    %c = predict(Theta1, Theta2, X);
+    c = predict(ones(3, 8), ones(1, 4), ones(9, 7));
+    assert(c, ones(9, 1));
+
+    c = predict(ones(3, 8), ones(1, 4), magic(7));
+    assert(c, ones(7, 1));
+
+    c = predict(-1/170*ones(3, 8), -1/3*ones(1, 4), magic(7));
+    assert(c, ones(7, 1));
+
+    predict(-1/170*ones(3, 8), -1/3*ones(2, 4), magic(7));
+    assert(c, ones(7, 1));
+
+    predict(-1/170*ones(3, 8), -1/3*ones(3, 4), magic(7));
+    assert(c, ones(7, 1));
+
+endfunction
diff --git a/ex3/test_predictOneVsAll.m b/ex3/test_predictOneVsAll.m
index f0b3151..226e647 100644
--- a/ex3/test_predictOneVsAll.m
+++ b/ex3/test_predictOneVsAll.m
@@ -1,6 +1,6 @@
 function test_predictOneVsAll ()
   epsilon = 1e-3;
-  
+
   % learning three classes
   %
   %                x < 1.5 => 1
@@ -10,15 +10,29 @@ function test_predictOneVsAll ()
   X = [0 1 2 3 4 5]';
   y = [1 1 2 2 3 3]'; % direct classification
   num_labels = 3;
-  
+
   all_theta = oneVsAll(X, y, num_labels, 0);
   assert(size(all_theta), [3 2]);
-  
+
   yy = predictOneVsAll(all_theta, X);
   assert(yy, y);
-  
+
   % predict ones we haven't seen yet (should match the original formula)
   X = [-10 2.5 10.0]';
   assert(predictOneVsAll(all_theta, X), [1 2 3]');
-  
+
+
+  % https://class.coursera.org/ml-005/forum/thread?thread_id=1425
+  all_theta = oneVsAll([0.1 3.1 1.2; 1.8 0.9 0.7; 3.2 -1.4 6.7], [1 2 0]', 3, 0.3);
+  p = predictOneVsAll(all_theta, [0.1 3.1 1.2; 1.8 0.9 0.7; 3.2 -1.4 6.7]);
+  assert(p, [ 1 2 2 ]');
+
+  all_theta = oneVsAll([0 1 2 2 1 0 1 3 4 5 5 4 3]', [1 1 1 1 1 1 2 2 2 2 2 2 2]', 2, 1);
+  p = predictOneVsAll(all_theta, [0 1 2 2 1 0 1 3 4 5 5 4 3]');
+  assert(p, [1 1 1 1 1 1 1 2 2 2 2 2 2]');
+
+  all_theta = oneVsAll([0 1 2 2 1 0 3 4 5 5 4 3]', [1 1 1 1 1 1 2 2 2 2 2 2]', 2, 0.001);
+  p = predictOneVsAll(all_theta, [0 1 2 2 1 0 3 4 5 5 4 3]');
+  assert(p, [1 1 1 1 1 1 2 2 2 2 2 2]');
+
 endfunction