initial commit
This commit is contained in:
112
regressions.py
Normal file
112
regressions.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import numpy as np
|
||||
import graph
|
||||
import pandas as pd
|
||||
#import tensorflow as tf
|
||||
|
||||
|
||||
def normalize_feature(x: np.array) -> (np.array, float, float):
|
||||
mu = x.mean(0)
|
||||
std = x.std(0)
|
||||
std[std == 0] = 1
|
||||
x_norm = (x-mu)/std
|
||||
return x_norm, mu, std
|
||||
|
||||
def add_column(x: np.array):
|
||||
n, m = x.shape
|
||||
return np.c_[np.ones((n, 1)), x.reshape((n, m))]
|
||||
|
||||
def split_data(x, y):
|
||||
m, n = x.shape
|
||||
test_size = int(m*0.2)
|
||||
test_indices = np.random.choice(m, test_size)
|
||||
|
||||
train_x = np.delete(x, test_indices, axis=0)
|
||||
test_x = x[test_indices]
|
||||
train_y = np.delete(y, test_indices, axis=0)
|
||||
test_y = y[test_indices]
|
||||
|
||||
return train_x, test_x, train_y, test_y
|
||||
|
||||
def get_regularization_term(w: np.array, wlambda: float) -> np.array:
|
||||
m = len(w)
|
||||
return wlambda*(w**2).sum()/(2*m)
|
||||
|
||||
def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) -> np.array:
|
||||
m = len(w)
|
||||
return np.r_[0, alpha*wlambda*w[1:]/m]
|
||||
|
||||
|
||||
#data = np.array([
|
||||
# [1,3+1],
|
||||
# [2,6+1],
|
||||
# [3,9+1],
|
||||
# [4,12+1],
|
||||
# [5,15+1],
|
||||
# ])
|
||||
|
||||
#data = np.loadtxt('data/ex1data1.txt', delimiter=',')
|
||||
# train excercize
|
||||
#data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#')
|
||||
# auto mpg
|
||||
#df = pd.read_csv('data/auto-mpg.data', sep='\t')
|
||||
#df = df.drop(df.loc[df['horsepower'] == '?'].index)
|
||||
#df = df.convert_objects(convert_numeric=True)
|
||||
#data = df.values
|
||||
# Airfoil Self-Noise Data Set
|
||||
#df = pd.read_csv('data/airfoil_self_noise.dat', sep='\t')
|
||||
#df = df.convert_objects(convert_numeric=True)
|
||||
#data = df.values
|
||||
|
||||
df = pd.read_csv('data/mlr01.csv', sep=',')
|
||||
data = df.values
|
||||
|
||||
|
||||
|
||||
x_init = data[:, 0:-1]
|
||||
y_init = data[:, -1]
|
||||
|
||||
x, mu, std = normalize_feature(x_init)
|
||||
x = add_column(x)
|
||||
x, test_x, y, test_y = split_data(x, y_init)
|
||||
n, m = x.shape
|
||||
|
||||
w = np.zeros(m)
|
||||
|
||||
is_2d = (m == 2)
|
||||
|
||||
if is_2d == True:
|
||||
g = graph.Graph()
|
||||
g.draw_variable(x_init, y)
|
||||
|
||||
alpha = 0.001
|
||||
|
||||
iter = 0
|
||||
sum_gradient = 0
|
||||
display_gap = 0.5
|
||||
wlambda = 0.001
|
||||
|
||||
while True:
|
||||
h = x@w
|
||||
cost = ((h - y)**2).mean() + get_regularization_term(w, wlambda)
|
||||
gradient = alpha*((h - y)*x.T).mean(1) + get_derived_regularization_term(w, wlambda, alpha)
|
||||
w -= gradient
|
||||
|
||||
sum_gradient += abs(gradient.sum())
|
||||
if sum_gradient >= display_gap:
|
||||
sum_gradient -= display_gap
|
||||
print("{} : {}".format(iter, cost))
|
||||
|
||||
if is_2d == True:
|
||||
g.draw_line(x_init, h)
|
||||
|
||||
iter += 1
|
||||
|
||||
if max(abs(gradient)) < 1e-5:
|
||||
if is_2d:
|
||||
g.draw_line(x_init, h)
|
||||
break
|
||||
print('iteration: {}'.format(iter))
|
||||
|
||||
for i in range(len(test_x)):
|
||||
h = test_x[i]@w
|
||||
print('h: {:.2f} / y: {}'.format(h, test_y[i]))
|
||||
Reference in New Issue
Block a user