no message

2018-08-13 17:58:08 +09:00
parent 0ae676dd96
commit e3bf92dab2
3 changed files with 44 additions and 19 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 .vs/
+__pycache__/
--- a/regression2.py
+++ b/regression2.py
@@ -6,39 +6,63 @@ import pandas as pd


 def load_data():
-	df = pd.read_csv('data/sample.txt', sep=',', header=None)
+	df = pd.read_csv('data/sample.txt', delimiter=',', header=None)
+	df[2] = pd.Series([1]*len(df[0]))
+
+	#df = pd.read_csv('data/ex1data1.txt', delimiter=',', header=None)
+	#df[2] = pd.Series([1]*len(df[0]))
+
+	#df = pd.read_csv('data/train.csv', delimiter=',', comment='#')
+	#df[0] = df['x']
+	#df[1] = df['y']

 	df[2] = pd.Series([1]*len(df[0]))
 	df = df.reindex(columns=[1, 2, 0])

 	return df

-def get_cost():
-	
-	pass
-
-def get_gradient():
-	pass
-
+def feature_scaling(x):
+	mean = x.mean(0)
+	std = x.std(0)
+	std[std==0] = 1
+	x = (x-mean)/std
+	return x, mean, std


 df = load_data()
-
-
 y = df.values[:, 0]
 x = df.values[:, 1:]
 m, n = x.shape
 w = np.zeros(n)
-a = 0.001
+a = 1.0e-3

+x, mean, std = feature_scaling(x)

 sum_ = 0
 gap = 0.001
-
+break_gap = 1.0e-20
+trycnt = 0
 while True:
 	h = x@w
 	cost = ((h-y)**2).mean()/2
-	gradient = a*(h-y)@x
+	gradient = a*(h-y)@x/m
+	w -= gradient

+	sum_ += abs(gradient.sum())
+	if sum_ >= gap:
+		print('[{}] {} ({})'.format(trycnt, cost, w))
+		sum_ -= gap

+	trycnt += 1
+	if abs(gradient.max()) < break_gap:
+		break
+
+	if not np.isfinite(cost) or not np.isfinite(sum_):
+		break
+
+print('[{}] {} ({})'.format(trycnt, cost, w))
+#print('x: {}'.format(x[:10]))
+#print('y: {}'.format(y[:10]))
+#print('h: {}'.format(h[:10]))
+print(np.c_[x[:10], y[:10], h[:10]])
 pass
--- a/regressions.py
+++ b/regressions.py
@@ -46,7 +46,7 @@ def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) -

 #data = np.loadtxt('data/ex1data1.txt', delimiter=',')
 # train excercize
-#data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#')
+data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#')
 # auto mpg
 #df = pd.read_csv('data/auto-mpg.data', sep='\t')
 #df = df.drop(df.loc[df['horsepower'] == '?'].index)
@@ -57,8 +57,8 @@ def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) -
 #df = df.convert_objects(convert_numeric=True)
 #data = df.values

-df = pd.read_csv('data/mlr01.csv', sep=',')
-data = df.values
+#df = pd.read_csv('data/mlr01.csv', sep=',')
+#data = df.values



@@ -76,7 +76,7 @@ is_2d = (m == 2)

 if is_2d == True:
 	g = graph.Graph()
-	g.draw_variable(x_init, y)
+	g.draw_variable(x[:, 1], y)

 alpha = 0.001

@@ -97,13 +97,13 @@ while True:
 		print("{} : {}".format(iter, cost))
 		
 		if is_2d == True:
-			g.draw_line(x_init, h)
+			g.draw_line(x[:, 1], h)

 	iter += 1

 	if max(abs(gradient)) < 1e-5:
 		if is_2d:
-			g.draw_line(x_init, h)
+			g.draw_line(x[:, 1], h)
 		break
 print('iteration: {}'.format(iter))