113 lines
2.4 KiB
Python
113 lines
2.4 KiB
Python
import numpy as np
|
|
import graph
|
|
import pandas as pd
|
|
#import tensorflow as tf
|
|
|
|
|
|
def normalize_feature(x: np.array) -> (np.array, float, float):
|
|
mu = x.mean(0)
|
|
std = x.std(0)
|
|
std[std == 0] = 1
|
|
x_norm = (x-mu)/std
|
|
return x_norm, mu, std
|
|
|
|
def add_column(x: np.array):
|
|
n, m = x.shape
|
|
return np.c_[np.ones((n, 1)), x.reshape((n, m))]
|
|
|
|
def split_data(x, y):
|
|
m, n = x.shape
|
|
test_size = int(m*0.2)
|
|
test_indices = np.random.choice(m, test_size)
|
|
|
|
train_x = np.delete(x, test_indices, axis=0)
|
|
test_x = x[test_indices]
|
|
train_y = np.delete(y, test_indices, axis=0)
|
|
test_y = y[test_indices]
|
|
|
|
return train_x, test_x, train_y, test_y
|
|
|
|
def get_regularization_term(w: np.array, wlambda: float) -> np.array:
|
|
m = len(w)
|
|
return wlambda*(w**2).sum()/(2*m)
|
|
|
|
def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) -> np.array:
|
|
m = len(w)
|
|
return np.r_[0, alpha*wlambda*w[1:]/m]
|
|
|
|
|
|
#data = np.array([
|
|
# [1,3+1],
|
|
# [2,6+1],
|
|
# [3,9+1],
|
|
# [4,12+1],
|
|
# [5,15+1],
|
|
# ])
|
|
|
|
#data = np.loadtxt('data/ex1data1.txt', delimiter=',')
|
|
# train excercize
|
|
#data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#')
|
|
# auto mpg
|
|
#df = pd.read_csv('data/auto-mpg.data', sep='\t')
|
|
#df = df.drop(df.loc[df['horsepower'] == '?'].index)
|
|
#df = df.convert_objects(convert_numeric=True)
|
|
#data = df.values
|
|
# Airfoil Self-Noise Data Set
|
|
#df = pd.read_csv('data/airfoil_self_noise.dat', sep='\t')
|
|
#df = df.convert_objects(convert_numeric=True)
|
|
#data = df.values
|
|
|
|
df = pd.read_csv('data/mlr01.csv', sep=',')
|
|
data = df.values
|
|
|
|
|
|
|
|
x_init = data[:, 0:-1]
|
|
y_init = data[:, -1]
|
|
|
|
x, mu, std = normalize_feature(x_init)
|
|
x = add_column(x)
|
|
x, test_x, y, test_y = split_data(x, y_init)
|
|
n, m = x.shape
|
|
|
|
w = np.zeros(m)
|
|
|
|
is_2d = (m == 2)
|
|
|
|
if is_2d == True:
|
|
g = graph.Graph()
|
|
g.draw_variable(x_init, y)
|
|
|
|
alpha = 0.001
|
|
|
|
iter = 0
|
|
sum_gradient = 0
|
|
display_gap = 0.5
|
|
wlambda = 0.001
|
|
|
|
while True:
|
|
h = x@w
|
|
cost = ((h - y)**2).mean() + get_regularization_term(w, wlambda)
|
|
gradient = alpha*((h - y)*x.T).mean(1) + get_derived_regularization_term(w, wlambda, alpha)
|
|
w -= gradient
|
|
|
|
sum_gradient += abs(gradient.sum())
|
|
if sum_gradient >= display_gap:
|
|
sum_gradient -= display_gap
|
|
print("{} : {}".format(iter, cost))
|
|
|
|
if is_2d == True:
|
|
g.draw_line(x_init, h)
|
|
|
|
iter += 1
|
|
|
|
if max(abs(gradient)) < 1e-5:
|
|
if is_2d:
|
|
g.draw_line(x_init, h)
|
|
break
|
|
print('iteration: {}'.format(iter))
|
|
|
|
for i in range(len(test_x)):
|
|
h = test_x[i]@w
|
|
print('h: {:.2f} / y: {}'.format(h, test_y[i]))
|