diff --git a/ex4/ex4.m b/ex4/ex4.m
index 7a86d86..c5fcc9e 100644
--- a/ex4/ex4.m
+++ b/ex4/ex4.m
@@ -183,10 +183,10 @@ fprintf('\nTraining Neural Network... \n')
 
 %  After you have completed the assignment, change the MaxIter to a larger
 %  value to see how more training helps.
-options = optimset('MaxIter', 50);
+options = optimset('MaxIter', 100);
 
 %  You should also try different values of lambda
-lambda = 1;
+lambda = 100;
 
 % Create "short hand" for the cost function to be minimized
 costFunction = @(p) nnCostFunction(p, ...
diff --git a/ex4/nnCostFunction.m b/ex4/nnCostFunction.m
index 91da23a..de06a9e 100644
--- a/ex4/nnCostFunction.m
+++ b/ex4/nnCostFunction.m
@@ -39,6 +39,21 @@ Theta2_grad = zeros(size(Theta2));
 %         cost function computation is correct by verifying the cost
 %         computed in ex4.m
 %
+
+yy = zeros(m, num_labels);
+for i = 1:m
+  yy(i, y(i)) = 1;
+endfor
+
+a1 = [ones(m, 1), X];
+z2 = a1*Theta1';
+a2 = [ones(m, 1), sigmoid(z2)];
+z3 = a2*Theta2';
+a3 = hx = sigmoid(z3);
+J = sum(sum(-yy.*log(hx)-(1.-yy).*log(1.-hx)))/m;
+J += lambda*(sum(sum(Theta1(:, 2:end).^2)) + sum(sum(Theta2(:, 2:end).^2)))/(2*m);
+
+
 % Part 2: Implement the backpropagation algorithm to compute the gradients
 %         Theta1_grad and Theta2_grad. You should return the partial derivatives of
 %         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
@@ -54,6 +69,15 @@ Theta2_grad = zeros(size(Theta2));
 %               over the training examples if you are implementing it for the 
 %               first time.
 %
+
+
+delta3 = a3 - yy;
+delta2 = delta3*Theta2.*[ones(m, 1), sigmoidGradient(z2)];
+delta2 = delta2(:, 2:end);
+
+Theta2_grad += (delta3'*a2)/m;
+Theta1_grad += (delta2'*a1)/m;
+
 % Part 3: Implement regularization with the cost function and gradients.
 %
 %         Hint: You can implement this around the code for
@@ -62,25 +86,11 @@ Theta2_grad = zeros(size(Theta2));
 %               and Theta2_grad from Part 2.
 %
 
+Theta2_grad(:, 2:end) += lambda*Theta2(:, 2:end)/m;
+Theta1_grad(:, 2:end) += lambda*Theta1(:, 2:end)/m;
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-% -------------------------------------------------------------
+% ----------------------------------------------------------
 
 % =========================================================================
 
diff --git a/ex4/randInitializeWeights.m b/ex4/randInitializeWeights.m
index ce1234a..09be53a 100644
--- a/ex4/randInitializeWeights.m
+++ b/ex4/randInitializeWeights.m
@@ -19,11 +19,13 @@ W = zeros(L_out, 1 + L_in);
 % Note: The first column of W corresponds to the parameters for the bias unit
 %
 
+epsilon = sqrt(6)/sqrt(L_in+L_out);
+W = rand(L_out, L_in+1)*2*epsilon - epsilon;
 
-
-
-
-
+%disp('epsilon : ');
+%disp(epsilon);
+%disp('first W : ');
+%disp(W);
 
 
 
diff --git a/ex4/sigmoidGradient.m b/ex4/sigmoidGradient.m
index 37ec0db..a7d85ba 100644
--- a/ex4/sigmoidGradient.m
+++ b/ex4/sigmoidGradient.m
@@ -15,7 +15,7 @@ g = zeros(size(z));
 
 
 
-
+g = sigmoid(z).*(1.-sigmoid(z));
 
 
 
diff --git a/ex4/token.mat b/ex4/token.mat
new file mode 100644
index 0000000..275f4f4
--- /dev/null
+++ b/ex4/token.mat
@@ -0,0 +1,15 @@
+# Created by Octave 4.2.1, Tue May 23 04:07:14 2017 GMT <unknown@unknown>
+# name: email
+# type: sq_string
+# elements: 1
+# length: 16
+mjjo53@gmail.com
+
+
+# name: token
+# type: sq_string
+# elements: 1
+# length: 16
+0XLX4A6W04BJycYT
+
+