diff --git a/ex4/ex4.m b/ex4/ex4.m index 7a86d86..c5fcc9e 100644 --- a/ex4/ex4.m +++ b/ex4/ex4.m @@ -183,10 +183,10 @@ fprintf('\nTraining Neural Network... \n') % After you have completed the assignment, change the MaxIter to a larger % value to see how more training helps. -options = optimset('MaxIter', 50); +options = optimset('MaxIter', 100); % You should also try different values of lambda -lambda = 1; +lambda = 100; % Create "short hand" for the cost function to be minimized costFunction = @(p) nnCostFunction(p, ... diff --git a/ex4/nnCostFunction.m b/ex4/nnCostFunction.m index 91da23a..de06a9e 100644 --- a/ex4/nnCostFunction.m +++ b/ex4/nnCostFunction.m @@ -39,6 +39,21 @@ Theta2_grad = zeros(size(Theta2)); % cost function computation is correct by verifying the cost % computed in ex4.m % + +yy = zeros(m, num_labels); +for i = 1:m + yy(i, y(i)) = 1; +endfor + +a1 = [ones(m, 1), X]; +z2 = a1*Theta1'; +a2 = [ones(m, 1), sigmoid(z2)]; +z3 = a2*Theta2'; +a3 = hx = sigmoid(z3); +J = sum(sum(-yy.*log(hx)-(1.-yy).*log(1.-hx)))/m; +J += lambda*(sum(sum(Theta1(:, 2:end).^2)) + sum(sum(Theta2(:, 2:end).^2)))/(2*m); + + % Part 2: Implement the backpropagation algorithm to compute the gradients % Theta1_grad and Theta2_grad. You should return the partial derivatives of % the cost function with respect to Theta1 and Theta2 in Theta1_grad and @@ -54,6 +69,15 @@ Theta2_grad = zeros(size(Theta2)); % over the training examples if you are implementing it for the % first time. % + + +delta3 = a3 - yy; +delta2 = delta3*Theta2.*[ones(m, 1), sigmoidGradient(z2)]; +delta2 = delta2(:, 2:end); + +Theta2_grad += (delta3'*a2)/m; +Theta1_grad += (delta2'*a1)/m; + % Part 3: Implement regularization with the cost function and gradients. % % Hint: You can implement this around the code for @@ -62,25 +86,11 @@ Theta2_grad = zeros(size(Theta2)); % and Theta2_grad from Part 2. % +Theta2_grad(:, 2:end) += lambda*Theta2(:, 2:end)/m; +Theta1_grad(:, 2:end) += lambda*Theta1(:, 2:end)/m; - - - - - - - - - - - - - - - - -% ------------------------------------------------------------- +% ---------------------------------------------------------- % ========================================================================= diff --git a/ex4/randInitializeWeights.m b/ex4/randInitializeWeights.m index ce1234a..09be53a 100644 --- a/ex4/randInitializeWeights.m +++ b/ex4/randInitializeWeights.m @@ -19,11 +19,13 @@ W = zeros(L_out, 1 + L_in); % Note: The first column of W corresponds to the parameters for the bias unit % +epsilon = sqrt(6)/sqrt(L_in+L_out); +W = rand(L_out, L_in+1)*2*epsilon - epsilon; - - - - +%disp('epsilon : '); +%disp(epsilon); +%disp('first W : '); +%disp(W); diff --git a/ex4/sigmoidGradient.m b/ex4/sigmoidGradient.m index 37ec0db..a7d85ba 100644 --- a/ex4/sigmoidGradient.m +++ b/ex4/sigmoidGradient.m @@ -15,7 +15,7 @@ g = zeros(size(z)); - +g = sigmoid(z).*(1.-sigmoid(z)); diff --git a/ex4/token.mat b/ex4/token.mat new file mode 100644 index 0000000..275f4f4 --- /dev/null +++ b/ex4/token.mat @@ -0,0 +1,15 @@ +# Created by Octave 4.2.1, Tue May 23 04:07:14 2017 GMT +# name: email +# type: sq_string +# elements: 1 +# length: 16 +mjjo53@gmail.com + + +# name: token +# type: sq_string +# elements: 1 +# length: 16 +0XLX4A6W04BJycYT + +