진행 중

This commit is contained in:
mjjo
2018-08-16 18:16:27 +09:00
parent 6bec719ce3
commit ff4406e8ff
5 changed files with 67 additions and 3 deletions

113
linear_regression.py Normal file
View File

@@ -0,0 +1,113 @@
import numpy as np
import graph
import pandas as pd
#import tensorflow as tf
def normalize_feature(x: np.array) -> (np.array, float, float):
mu = x.mean(0)
std = x.std(0)
std[std == 0] = 1
x_norm = (x-mu)/std
return x_norm, mu, std
def add_column(x: np.array):
n, m = x.shape
return np.c_[np.ones((n, 1)), x.reshape((n, m))]
def split_data(x, y):
m, n = x.shape
test_size = int(m*0.2)
test_indices = np.random.choice(m, test_size)
train_x = np.delete(x, test_indices, axis=0)
test_x = x[test_indices]
train_y = np.delete(y, test_indices, axis=0)
test_y = y[test_indices]
return train_x, test_x, train_y, test_y
def get_regularization_term(w: np.array, wlambda: float) -> np.array:
m = len(w)
return wlambda*(w**2).sum()/(2*m)
def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) -> np.array:
m = len(w)
return np.r_[0, alpha*wlambda*w[1:]/m]
#data = np.array([
# [1,3+1],
# [2,6+1],
# [3,9+1],
# [4,12+1],
# [5,15+1],
# ])
data = np.loadtxt('data/sample.txt', delimiter=',')
#data = np.loadtxt('data/ex1data1.txt', delimiter=',')
# train excercize
#data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#')
# auto mpg
#df = pd.read_csv('data/auto-mpg.data', sep='\t')
#df = df.drop(df.loc[df['horsepower'] == '?'].index)
#df = df.convert_objects(convert_numeric=True)
#data = df.values
# Airfoil Self-Noise Data Set
#df = pd.read_csv('data/airfoil_self_noise.dat', sep='\t')
#df = df.convert_objects(convert_numeric=True)
#data = df.values
#df = pd.read_csv('data/mlr01.csv', sep=',')
#data = df.values
x_init = data[:, 0:-1]
y_init = data[:, -1]
x, mu, std = normalize_feature(x_init)
x = add_column(x)
x, test_x, y, test_y = split_data(x, y_init)
n, m = x.shape
w = np.zeros(m)
is_2d = (m == 2)
if is_2d == True:
g = graph.Graph()
g.draw_variable(x[:, 1], y)
alpha = 0.001
iter = 0
sum_gradient = 0
display_gap = 0.5
wlambda = 0.001
while True:
h = x@w
cost = ((h - y)**2).mean() + get_regularization_term(w, wlambda)
gradient = alpha*((h - y)*x.T).mean(1) + get_derived_regularization_term(w, wlambda, alpha)
w -= gradient
sum_gradient += abs(gradient.sum())
if sum_gradient >= display_gap:
sum_gradient -= display_gap
print("{} : {}".format(iter, cost))
if is_2d == True:
g.draw_line(x[:, 1], h)
iter += 1
if max(abs(gradient)) < 1e-5:
if is_2d:
g.draw_line(x[:, 1], h)
break
print('iteration: {}'.format(iter))
for i in range(len(test_x)):
h = test_x[i]@w
print('h: {:.2f} / y: {}'.format(h, test_y[i]))