no message

2018-08-13 17:58:08 +09:00
parent 0ae676dd96
commit e3bf92dab2
3 changed files with 44 additions and 19 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 .vs/
 __pycache__/
--- a/regression2.py
+++ b/regression2.py
@@ -6,39 +6,63 @@ import pandas as pd
 def load_data():
-	df = pd.read_csv('data/sample.txt', sep=',', header=None)
+	df = pd.read_csv('data/sample.txt', delimiter=',', header=None)
 	df[2] = pd.Series([1]*len(df[0]))
 	#df = pd.read_csv('data/ex1data1.txt', delimiter=',', header=None)
 	#df[2] = pd.Series([1]*len(df[0]))
 	#df = pd.read_csv('data/train.csv', delimiter=',', comment='#')
 	#df[0] = df['x']
 	#df[1] = df['y']
 	df[2] = pd.Series([1]*len(df[0]))
 	df = df.reindex(columns=[1, 2, 0])
 	return df
-def get_cost():
+def feature_scaling(x):
-	
+	mean = x.mean(0)
-	pass
+	std = x.std(0)
-
+	std[std==0] = 1
-def get_gradient():
+	x = (x-mean)/std
-	pass
+	return x, mean, std
 df = load_data()
 y = df.values[:, 0]
 x = df.values[:, 1:]
 m, n = x.shape
 w = np.zeros(n)
-a = 0.001
+a = 1.0e-3
 x, mean, std = feature_scaling(x)
 sum_ = 0
 gap = 0.001
-
+break_gap = 1.0e-20
 trycnt = 0
 while True:
 	h = x@w
 	cost = ((h-y)**2).mean()/2
-	gradient = a*(h-y)@x
+	gradient = a*(h-y)@x/m
 	w -= gradient
 	sum_ += abs(gradient.sum())
 	if sum_ >= gap:
 		print('[{}] {} ({})'.format(trycnt, cost, w))
 		sum_ -= gap
 	trycnt += 1
 	if abs(gradient.max()) < break_gap:
 		break
 	if not np.isfinite(cost) or not np.isfinite(sum_):
 		break
 print('[{}] {} ({})'.format(trycnt, cost, w))
 #print('x: {}'.format(x[:10]))
 #print('y: {}'.format(y[:10]))
 #print('h: {}'.format(h[:10]))
 print(np.c_[x[:10], y[:10], h[:10]])
 pass
--- a/regressions.py
+++ b/regressions.py
@@ -46,7 +46,7 @@ def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) -
 #data = np.loadtxt('data/ex1data1.txt', delimiter=',')
 # train excercize
-#data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#')
+data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#')
 # auto mpg
 #df = pd.read_csv('data/auto-mpg.data', sep='\t')
 #df = df.drop(df.loc[df['horsepower'] == '?'].index)
@@ -57,8 +57,8 @@ def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) -
 #df = df.convert_objects(convert_numeric=True)
 #data = df.values
-df = pd.read_csv('data/mlr01.csv', sep=',')
+#df = pd.read_csv('data/mlr01.csv', sep=',')
-data = df.values
+#data = df.values
@@ -76,7 +76,7 @@ is_2d = (m == 2)
 if is_2d == True:
 	g = graph.Graph()
-	g.draw_variable(x_init, y)
+	g.draw_variable(x[:, 1], y)
 alpha = 0.001
@@ -97,13 +97,13 @@ while True:
 		print("{} : {}".format(iter, cost))
 		if is_2d == True:
-			g.draw_line(x_init, h)
+			g.draw_line(x[:, 1], h)
 	iter += 1
 	if max(abs(gradient)) < 1e-5:
 		if is_2d:
-			g.draw_line(x_init, h)
+			g.draw_line(x[:, 1], h)
 		break
 print('iteration: {}'.format(iter))