From e3bf92dab20d504dd2b12e7f97a05157356d3b0b Mon Sep 17 00:00:00 2001 From: mjjo Date: Mon, 13 Aug 2018 17:58:08 +0900 Subject: [PATCH] no message --- .gitignore | 1 + regression2.py | 50 +++++++++++++++++++++++++++++++++++++------------- regressions.py | 12 ++++++------ 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 88dbff1..c230a68 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .vs/ +__pycache__/ diff --git a/regression2.py b/regression2.py index 6ed81bc..74df33c 100644 --- a/regression2.py +++ b/regression2.py @@ -6,39 +6,63 @@ import pandas as pd def load_data(): - df = pd.read_csv('data/sample.txt', sep=',', header=None) + df = pd.read_csv('data/sample.txt', delimiter=',', header=None) + df[2] = pd.Series([1]*len(df[0])) + + #df = pd.read_csv('data/ex1data1.txt', delimiter=',', header=None) + #df[2] = pd.Series([1]*len(df[0])) + + #df = pd.read_csv('data/train.csv', delimiter=',', comment='#') + #df[0] = df['x'] + #df[1] = df['y'] df[2] = pd.Series([1]*len(df[0])) df = df.reindex(columns=[1, 2, 0]) return df -def get_cost(): - - pass - -def get_gradient(): - pass - +def feature_scaling(x): + mean = x.mean(0) + std = x.std(0) + std[std==0] = 1 + x = (x-mean)/std + return x, mean, std df = load_data() - - y = df.values[:, 0] x = df.values[:, 1:] m, n = x.shape w = np.zeros(n) -a = 0.001 +a = 1.0e-3 +x, mean, std = feature_scaling(x) sum_ = 0 gap = 0.001 - +break_gap = 1.0e-20 +trycnt = 0 while True: h = x@w cost = ((h-y)**2).mean()/2 - gradient = a*(h-y)@x + gradient = a*(h-y)@x/m + w -= gradient + sum_ += abs(gradient.sum()) + if sum_ >= gap: + print('[{}] {} ({})'.format(trycnt, cost, w)) + sum_ -= gap + trycnt += 1 + if abs(gradient.max()) < break_gap: + break + + if not np.isfinite(cost) or not np.isfinite(sum_): + break + +print('[{}] {} ({})'.format(trycnt, cost, w)) +#print('x: {}'.format(x[:10])) +#print('y: {}'.format(y[:10])) +#print('h: {}'.format(h[:10])) +print(np.c_[x[:10], y[:10], h[:10]]) pass \ No newline at end of file diff --git a/regressions.py b/regressions.py index 0b90513..30fd30f 100644 --- a/regressions.py +++ b/regressions.py @@ -46,7 +46,7 @@ def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) - #data = np.loadtxt('data/ex1data1.txt', delimiter=',') # train excercize -#data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#') +data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#') # auto mpg #df = pd.read_csv('data/auto-mpg.data', sep='\t') #df = df.drop(df.loc[df['horsepower'] == '?'].index) @@ -57,8 +57,8 @@ def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) - #df = df.convert_objects(convert_numeric=True) #data = df.values -df = pd.read_csv('data/mlr01.csv', sep=',') -data = df.values +#df = pd.read_csv('data/mlr01.csv', sep=',') +#data = df.values @@ -76,7 +76,7 @@ is_2d = (m == 2) if is_2d == True: g = graph.Graph() - g.draw_variable(x_init, y) + g.draw_variable(x[:, 1], y) alpha = 0.001 @@ -97,13 +97,13 @@ while True: print("{} : {}".format(iter, cost)) if is_2d == True: - g.draw_line(x_init, h) + g.draw_line(x[:, 1], h) iter += 1 if max(abs(gradient)) < 1e-5: if is_2d: - g.draw_line(x_init, h) + g.draw_line(x[:, 1], h) break print('iteration: {}'.format(iter))