68 lines
1.3 KiB
Python
68 lines
1.3 KiB
Python
import warnings
|
|
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
|
|
def load_data():
|
|
df = pd.read_csv('data/sample.txt', delimiter=',', header=None)
|
|
df[2] = pd.Series([1]*len(df[0]))
|
|
|
|
#df = pd.read_csv('data/ex1data1.txt', delimiter=',', header=None)
|
|
#df[2] = pd.Series([1]*len(df[0]))
|
|
|
|
#df = pd.read_csv('data/train.csv', delimiter=',', comment='#')
|
|
#df[0] = df['x']
|
|
#df[1] = df['y']
|
|
|
|
df[2] = pd.Series([1]*len(df[0]))
|
|
df = df.reindex(columns=[1, 2, 0])
|
|
|
|
return df
|
|
|
|
def feature_scaling(x):
|
|
mean = x.mean(0)
|
|
std = x.std(0)
|
|
std[std==0] = 1
|
|
x = (x-mean)/std
|
|
return x, mean, std
|
|
|
|
|
|
df = load_data()
|
|
y = df.values[:, 0]
|
|
x = df.values[:, 1:]
|
|
m, n = x.shape
|
|
w = np.zeros(n)
|
|
a = 1.0e-3
|
|
|
|
x, mean, std = feature_scaling(x)
|
|
|
|
sum_ = 0
|
|
gap = 0.001
|
|
break_gap = 1.0e-20
|
|
trycnt = 0
|
|
while True:
|
|
h = x@w
|
|
cost = ((h-y)**2).mean()/2
|
|
gradient = a*(h-y)@x/m
|
|
w -= gradient
|
|
|
|
sum_ += abs(gradient.sum())
|
|
if sum_ >= gap:
|
|
print('[{}] {} ({})'.format(trycnt, cost, w))
|
|
sum_ -= gap
|
|
|
|
trycnt += 1
|
|
if abs(gradient.max()) < break_gap:
|
|
break
|
|
|
|
if not np.isfinite(cost) or not np.isfinite(sum_):
|
|
break
|
|
|
|
print('[{}] {} ({})'.format(trycnt, cost, w))
|
|
#print('x: {}'.format(x[:10]))
|
|
#print('y: {}'.format(y[:10]))
|
|
#print('h: {}'.format(h[:10]))
|
|
print(np.c_[x[:10], y[:10], h[:10]])
|
|
pass |