no message

This commit is contained in:
mjjo
2018-08-13 17:58:08 +09:00
parent 0ae676dd96
commit e3bf92dab2
3 changed files with 44 additions and 19 deletions

1
.gitignore vendored
View File

@@ -1 +1,2 @@
.vs/ .vs/
__pycache__/

View File

@@ -6,39 +6,63 @@ import pandas as pd
def load_data(): def load_data():
df = pd.read_csv('data/sample.txt', sep=',', header=None) df = pd.read_csv('data/sample.txt', delimiter=',', header=None)
df[2] = pd.Series([1]*len(df[0]))
#df = pd.read_csv('data/ex1data1.txt', delimiter=',', header=None)
#df[2] = pd.Series([1]*len(df[0]))
#df = pd.read_csv('data/train.csv', delimiter=',', comment='#')
#df[0] = df['x']
#df[1] = df['y']
df[2] = pd.Series([1]*len(df[0])) df[2] = pd.Series([1]*len(df[0]))
df = df.reindex(columns=[1, 2, 0]) df = df.reindex(columns=[1, 2, 0])
return df return df
def get_cost(): def feature_scaling(x):
mean = x.mean(0)
pass std = x.std(0)
std[std==0] = 1
def get_gradient(): x = (x-mean)/std
pass return x, mean, std
df = load_data() df = load_data()
y = df.values[:, 0] y = df.values[:, 0]
x = df.values[:, 1:] x = df.values[:, 1:]
m, n = x.shape m, n = x.shape
w = np.zeros(n) w = np.zeros(n)
a = 0.001 a = 1.0e-3
x, mean, std = feature_scaling(x)
sum_ = 0 sum_ = 0
gap = 0.001 gap = 0.001
break_gap = 1.0e-20
trycnt = 0
while True: while True:
h = x@w h = x@w
cost = ((h-y)**2).mean()/2 cost = ((h-y)**2).mean()/2
gradient = a*(h-y)@x gradient = a*(h-y)@x/m
w -= gradient
sum_ += abs(gradient.sum())
if sum_ >= gap:
print('[{}] {} ({})'.format(trycnt, cost, w))
sum_ -= gap
trycnt += 1
if abs(gradient.max()) < break_gap:
break
if not np.isfinite(cost) or not np.isfinite(sum_):
break
print('[{}] {} ({})'.format(trycnt, cost, w))
#print('x: {}'.format(x[:10]))
#print('y: {}'.format(y[:10]))
#print('h: {}'.format(h[:10]))
print(np.c_[x[:10], y[:10], h[:10]])
pass pass

View File

@@ -46,7 +46,7 @@ def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) -
#data = np.loadtxt('data/ex1data1.txt', delimiter=',') #data = np.loadtxt('data/ex1data1.txt', delimiter=',')
# train excercize # train excercize
#data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#') data = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, comments='#')
# auto mpg # auto mpg
#df = pd.read_csv('data/auto-mpg.data', sep='\t') #df = pd.read_csv('data/auto-mpg.data', sep='\t')
#df = df.drop(df.loc[df['horsepower'] == '?'].index) #df = df.drop(df.loc[df['horsepower'] == '?'].index)
@@ -57,8 +57,8 @@ def get_derived_regularization_term(w: np.array, wlambda: float, alpha: float) -
#df = df.convert_objects(convert_numeric=True) #df = df.convert_objects(convert_numeric=True)
#data = df.values #data = df.values
df = pd.read_csv('data/mlr01.csv', sep=',') #df = pd.read_csv('data/mlr01.csv', sep=',')
data = df.values #data = df.values
@@ -76,7 +76,7 @@ is_2d = (m == 2)
if is_2d == True: if is_2d == True:
g = graph.Graph() g = graph.Graph()
g.draw_variable(x_init, y) g.draw_variable(x[:, 1], y)
alpha = 0.001 alpha = 0.001
@@ -97,13 +97,13 @@ while True:
print("{} : {}".format(iter, cost)) print("{} : {}".format(iter, cost))
if is_2d == True: if is_2d == True:
g.draw_line(x_init, h) g.draw_line(x[:, 1], h)
iter += 1 iter += 1
if max(abs(gradient)) < 1e-5: if max(abs(gradient)) < 1e-5:
if is_2d: if is_2d:
g.draw_line(x_init, h) g.draw_line(x[:, 1], h)
break break
print('iteration: {}'.format(iter)) print('iteration: {}'.format(iter))