import numpy as np import graph import pandas as pd #import tensorflow as tf def normalize_feature(x: np.array) -> (np.array, float, float): mu = x.mean(0) std = x.std(0) std[std == 0] = 1 x_norm = (x-mu)/std return x_norm, mu, std def add_column(x: np.array): n, m = x.shape return np.c_[np.ones((n, 1)), x.reshape((n, m))] def split_data(x, y): m, n = x.shape test_size = int(m*0.2) test_indices = np.random.choice(m, test_size) train_x = np.delete(x, test_indices, axis=0) test_x = x[test_indices] train_y = np.delete(y, test_indices, axis=0) test_y = y[test_indices] return train_x, test_x, train_y, test_y def get_regularization_term(w: np.array, wlambda: float) -> np.array: m = len(w) return wlambda*(w**2).sum()/(2*m) def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) -> np.array: m = len(w) return np.r_[0, alpha*wlambda*w[1:]/m] #data = np.array([ # [1,3+1], # [2,6+1], # [3,9+1], # [4,12+1], # [5,15+1], # ]) #data = np.loadtxt('data/ex1data1.txt', delimiter=',') # train excercize #data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#') # auto mpg #df = pd.read_csv('data/auto-mpg.data', sep='\t') #df = df.drop(df.loc[df['horsepower'] == '?'].index) #df = df.convert_objects(convert_numeric=True) #data = df.values # Airfoil Self-Noise Data Set #df = pd.read_csv('data/airfoil_self_noise.dat', sep='\t') #df = df.convert_objects(convert_numeric=True) #data = df.values df = pd.read_csv('data/mlr01.csv', sep=',') data = df.values x_init = data[:, 0:-1] y_init = data[:, -1] x, mu, std = normalize_feature(x_init) x = add_column(x) x, test_x, y, test_y = split_data(x, y_init) n, m = x.shape w = np.zeros(m) is_2d = (m == 2) if is_2d == True: g = graph.Graph() g.draw_variable(x_init, y) alpha = 0.001 iter = 0 sum_gradient = 0 display_gap = 0.5 wlambda = 0.001 while True: h = x@w cost = ((h - y)**2).mean() + get_regularization_term(w, wlambda) gradient = alpha*((h - y)*x.T).mean(1) + get_derived_regularization_term(w, wlambda, alpha) w -= gradient sum_gradient += abs(gradient.sum()) if sum_gradient >= display_gap: sum_gradient -= display_gap print("{} : {}".format(iter, cost)) if is_2d == True: g.draw_line(x_init, h) iter += 1 if max(abs(gradient)) < 1e-5: if is_2d: g.draw_line(x_init, h) break print('iteration: {}'.format(iter)) for i in range(len(test_x)): h = test_x[i]@w print('h: {:.2f} / y: {}'.format(h, test_y[i]))