From 442944ca19229d18b01b4e658a8646144a519b93 Mon Sep 17 00:00:00 2001
From: mjjo <myeongjin_jo@solomonsc.com>
Date: Fri, 17 Aug 2018 17:32:18 +0900
Subject: [PATCH] update

---
 linear_regression3.py | 47 +++++++++++++++++++------------
 linear_regression4.py | 52 ++++++++++++++++++++++++++++++++++
 regressions.pyproj    |  5 +++-
 utility.py            | 65 +++++++++++++++++++++++++++++++++++++------
 4 files changed, 141 insertions(+), 28 deletions(-)
 create mode 100644 linear_regression4.py
diff --git a/linear_regression3.py b/linear_regression3.py
index 35b7a4a..e5ff676 100644
--- a/linear_regression3.py
+++ b/linear_regression3.py
@@ -4,38 +4,49 @@ import pandas as pd
 import utility
 
 df = utility.load_data()
-y_data = df.values[:, 0]
 x_data = df.values[:, 1:]
+y_data = df.values[:, 0]
 m, n = x_data.shape
 
 
 import tensorflow as tf
-y = tf.Variable(y_data)
 x = tf.Variable(x_data)
+y = tf.Variable(y_data)
 w = tf.Variable(tf.zeros((n, 1)))
 
-a = 0.1
+a = 0.001
 
-#h = tf.matmul(x, w)
-#cost = tf.reduce_mean(tf.square(h - y))
-#optimizer = tf.train.GradientDescentOptimizer(a)
-#train = optimizer.minimize(cost)
+iter = 0
+sum_ = 0
 
-a = tf.Variable([[1], [2], [3]])
-b = tf.Variable([1,2,3])
+print_gap = 1.0e-3
+break_gap = 1.0e-10
 
 with tf.Session() as sess:
 	sess.run(tf.global_variables_initializer())
 
-	#k = sess.run(a*b)
-	#kk = sess.run(b*a)
-	##kkk = sess.run(tf.matmul(a, b))
-	#kkkk = sess.run(tf.matmul(b, a))
+	while True:
+		h = tf.matmul(x, w)
+		diff = h-tf.reshape(y, h.shape)
+		cost = a*tf.reduce_mean(diff**2, axis=0)/2
+		gradient = a*tf.matmul(diff, x, True)
+		w -= tf.reshape(gradient, w.shape)
 
-	h = tf.matmul(x, w)
-	cost = tf.reduce_mean((h-y)**2)/2
-	gradient = (h-y)*x
+		values = sess.run((h, cost, gradient, tf.reshape(w, [-1])))
+
+		max__ = max(abs(values[2]))[0]
+		sum__ = sum(abs(values[2]))[0]
+		sum_ += sum__
+		if sum_ >= print_gap:
+			print('{}: {} ({})'.format(iter, values[1], values[3]))
+			sum_ -= print_gap
+
+		if max__ < break_gap:
+			break
+
+		iter += 1
+
+	print('{}: {} ({})'.format(iter, values[1], values[3]))
+	print('{}'.format(np.c_[values[0], y_data]))
 
-	values = sess.run((h, cost, gradient))
 
-	print(values)
diff --git a/linear_regression4.py b/linear_regression4.py
new file mode 100644
index 0000000..06a8fbd
--- /dev/null
+++ b/linear_regression4.py
@@ -0,0 +1,52 @@
+import numpy as np
+import pandas as pd
+
+import utility
+
+df = utility.Loader.load_data()
+x_data = df.values[:, 1:]
+y_data = df.values[:, 0]
+m, n = x_data.shape
+
+
+import tensorflow as tf
+x = tf.placeholder(tf.float32)
+y = tf.placeholder(tf.float32)
+feed = {x: x_data, y: y_data.reshape((m, 1))}
+
+w = tf.Variable(tf.zeros((n, 1)))
+
+a = tf.Variable(0.0002)
+h = tf.matmul(x, w)
+cost = tf.reduce_mean(tf.square(h-y))
+opt = tf.train.GradientDescentOptimizer(a)
+train = opt.minimize(cost)
+
+iter = 0
+sum_ = 0
+
+stepper = utility.Stepper()
+
+with tf.Session() as sess:
+	sess.run(tf.global_variables_initializer())
+
+	while True:
+		w1 = sess.run(w)
+
+		sess.run(train, feed_dict=feed)
+
+		stepper.add_step(sess.run(cost, feed))
+		if stepper.is_print_turn():
+			print('{}: {}'.format(iter, sess.run(cost, feed_dict=feed)))
+
+		if stepper.is_break_turn():
+			break
+
+		iter += 1
+
+	h_v = sess.run(h, feed_dict=feed)
+
+	print('{}: {}'.format(iter, sess.run(cost, feed_dict=feed)))
+	print('{}'.format(np.c_[h_v, y_data]))
+
+
diff --git a/regressions.pyproj b/regressions.pyproj
index fd53e5f..b50f2f9 100644
--- a/regressions.pyproj
+++ b/regressions.pyproj
@@ -5,7 +5,7 @@
     <SchemaVersion>2.0</SchemaVersion>
     <ProjectGuid>db253b3a-f559-48b8-9804-846029a6ebef</ProjectGuid>
     <ProjectHome>.</ProjectHome>
-    <StartupFile>linear_regression3.py</StartupFile>
+    <StartupFile>linear_regression4.py</StartupFile>
     <SearchPath>
     </SearchPath>
     <WorkingDirectory>.</WorkingDirectory>
@@ -32,6 +32,9 @@
       <SubType>Code</SubType>
     </Compile>
     <Compile Include="linear_regression.py" />
+    <Compile Include="linear_regression4.py">
+      <SubType>Code</SubType>
+    </Compile>
     <Compile Include="test.py">
       <SubType>Code</SubType>
     </Compile>
diff --git a/utility.py b/utility.py
index b9bd463..1318cce 100644
--- a/utility.py
+++ b/utility.py
@@ -2,16 +2,63 @@ import numpy as np
 import pandas as pd
 
 
-def load_data():
-	df = pd.read_csv('data/sample.txt', delimiter=',', header=None).astype(np.float32)
+class Loader:
+	def load_data():
+		#df = pd.read_csv('data/sample.txt', delimiter=',', header=None).astype(np.float32)
 
-	#df = pd.read_csv('data/ex1data1.txt', delimiter=',', header=None).astype(np.float32)
+		#df = pd.read_csv('data/ex1data1.txt', delimiter=',', header=None).astype(np.float32)
 
-	#df = pd.read_csv('data/train.csv', delimiter=',', comment='#').astype(np.float32)
-	#df[0] = df['x']
-	#df[1] = df['y']
+		df = pd.read_csv('data/train.csv', delimiter=',', comment='#').astype(np.float32)
+		df[0] = df['x']
+		df[1] = df['y']
 
-	df[2] = pd.Series([1]*len(df[0]))
-	df = df.reindex(columns=[1, 2, 0])
+		df[2] = pd.Series([1]*len(df[0]))
+		df = df.reindex(columns=[1, 2, 0])
 
-	return df
+		return df
+
+
+class Stepper:
+	def __init__(self, print_gap=1.0e-2, break_gap=1.0e-9):
+		self._prev_cost = 0
+		self._cost = 0
+		self._cost_diff = 0
+
+		self._serial_divergence_cnt = 0
+
+		self.accumulator = 0
+
+		self._print_gap = print_gap
+		self._break_gap = break_gap
+		self._b_print_turn = False
+		self._b_break_turn = False
+
+	def add_step(self, step):
+		self._b_print_turn = False
+		self._b_break_turn = False
+
+		self._prev_cost = self._cost
+		self._cost = step
+		self._cost_diff = self._cost - self._prev_cost
+
+		self.accumulator += self._cost_diff
+
+		if self.accumulator >= self._print_gap:
+			self.accumulator -= self._print_gap
+			self._b_print_turn = True
+
+		if self._prev_cost != 0 and self._cost_diff > 0:
+			self._serial_divergence_cnt += 1
+			if self._serial_divergence_cnt >= 3:
+				self._b_break_turn = True
+		else:
+			self._serial_divergence_cnt = 0
+
+		if abs(self._cost_diff) < self._break_gap:
+			self._b_break_turn = True
+
+	def is_print_turn(self):
+		return self._b_print_turn
+
+	def is_break_turn(self):
+		return self._b_break_turn