Files
regressions/regression2.py
2018-08-13 17:58:08 +09:00

68 lines
1.3 KiB
Python

import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
import numpy as np
import pandas as pd
def load_data():
df = pd.read_csv('data/sample.txt', delimiter=',', header=None)
df[2] = pd.Series([1]*len(df[0]))
#df = pd.read_csv('data/ex1data1.txt', delimiter=',', header=None)
#df[2] = pd.Series([1]*len(df[0]))
#df = pd.read_csv('data/train.csv', delimiter=',', comment='#')
#df[0] = df['x']
#df[1] = df['y']
df[2] = pd.Series([1]*len(df[0]))
df = df.reindex(columns=[1, 2, 0])
return df
def feature_scaling(x):
mean = x.mean(0)
std = x.std(0)
std[std==0] = 1
x = (x-mean)/std
return x, mean, std
df = load_data()
y = df.values[:, 0]
x = df.values[:, 1:]
m, n = x.shape
w = np.zeros(n)
a = 1.0e-3
x, mean, std = feature_scaling(x)
sum_ = 0
gap = 0.001
break_gap = 1.0e-20
trycnt = 0
while True:
h = x@w
cost = ((h-y)**2).mean()/2
gradient = a*(h-y)@x/m
w -= gradient
sum_ += abs(gradient.sum())
if sum_ >= gap:
print('[{}] {} ({})'.format(trycnt, cost, w))
sum_ -= gap
trycnt += 1
if abs(gradient.max()) < break_gap:
break
if not np.isfinite(cost) or not np.isfinite(sum_):
break
print('[{}] {} ({})'.format(trycnt, cost, w))
#print('x: {}'.format(x[:10]))
#print('y: {}'.format(y[:10]))
#print('h: {}'.format(h[:10]))
print(np.c_[x[:10], y[:10], h[:10]])
pass