진행 중

2018-08-16 18:16:27 +09:00
parent 6bec719ce3
commit ff4406e8ff
5 changed files with 67 additions and 3 deletions
--- a/linear_regression.py
+++ b/linear_regression.py
@@ -0,0 +1,113 @@
+import numpy as np
+import graph
+import pandas as pd
+#import tensorflow as tf
+
+
+def normalize_feature(x: np.array) -> (np.array, float, float):
+	mu = x.mean(0)
+	std = x.std(0)
+	std[std == 0] = 1
+	x_norm = (x-mu)/std
+	return x_norm, mu, std
+
+def add_column(x: np.array):
+	n, m = x.shape
+	return np.c_[np.ones((n, 1)), x.reshape((n, m))]
+
+def split_data(x, y):
+	m, n = x.shape
+	test_size = int(m*0.2)
+	test_indices = np.random.choice(m, test_size)
+
+	train_x = np.delete(x, test_indices, axis=0)
+	test_x = x[test_indices]
+	train_y = np.delete(y, test_indices, axis=0)
+	test_y = y[test_indices]
+
+	return train_x, test_x, train_y, test_y
+
+def get_regularization_term(w: np.array, wlambda: float) -> np.array:
+	m = len(w)
+	return wlambda*(w**2).sum()/(2*m)
+
+def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) -> np.array:
+	m = len(w)
+	return np.r_[0, alpha*wlambda*w[1:]/m]
+
+
+#data = np.array([
+#	[1,3+1],
+#	[2,6+1],
+#	[3,9+1],
+#	[4,12+1],
+#	[5,15+1],
+#	])
+
+data = np.loadtxt('data/sample.txt', delimiter=',')
+#data = np.loadtxt('data/ex1data1.txt', delimiter=',')
+# train excercize
+#data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#')
+# auto mpg
+#df = pd.read_csv('data/auto-mpg.data', sep='\t')
+#df = df.drop(df.loc[df['horsepower'] == '?'].index)
+#df = df.convert_objects(convert_numeric=True)
+#data = df.values
+# Airfoil Self-Noise Data Set
+#df = pd.read_csv('data/airfoil_self_noise.dat', sep='\t')
+#df = df.convert_objects(convert_numeric=True)
+#data = df.values
+
+#df = pd.read_csv('data/mlr01.csv', sep=',')
+#data = df.values
+
+
+
+x_init = data[:, 0:-1]
+y_init = data[:, -1]
+
+x, mu, std = normalize_feature(x_init)
+x = add_column(x)
+x, test_x, y, test_y = split_data(x, y_init)
+n, m = x.shape
+
+w = np.zeros(m)
+
+is_2d = (m == 2)
+
+if is_2d == True:
+	g = graph.Graph()
+	g.draw_variable(x[:, 1], y)
+
+alpha = 0.001
+
+iter = 0
+sum_gradient = 0
+display_gap = 0.5
+wlambda = 0.001
+
+while True:
+	h = x@w
+	cost = ((h - y)**2).mean() + get_regularization_term(w, wlambda)
+	gradient = alpha*((h - y)*x.T).mean(1) + get_derived_regularization_term(w, wlambda, alpha)
+	w -= gradient
+
+	sum_gradient += abs(gradient.sum())
+	if sum_gradient >= display_gap:
+		sum_gradient -= display_gap
+		print("{} : {}".format(iter, cost))
+		
+		if is_2d == True:
+			g.draw_line(x[:, 1], h)
+
+	iter += 1
+
+	if max(abs(gradient)) < 1e-5:
+		if is_2d:
+			g.draw_line(x[:, 1], h)
+		break
+print('iteration: {}'.format(iter))
+
+for i in range(len(test_x)):
+	h = test_x[i]@w
+	print('h: {:.2f} / y: {}'.format(h, test_y[i]))