1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
| import numpy as np import matplotlib.pyplot as plt
train = np.loadtxt('click.csv', delimiter=',', dtype='int', skiprows=1) train_x = train[:, 0] train_y = train[:, 1]
mu = train_x.mean() sigma = train_x.std() def standardize(x): return (x - mu) / sigma train_z = standardize(train_x)
def to_matrix(x): return np.vstack([np.ones(x.size), x, x ** 2]).T X = to_matrix(train_z)
theta = np.random.rand(3)
def f(x): return np.dot(x, theta)
def MSE(x, y): return (1 / x.shape[0]) * np.sum((y - f(x)) ** 2)
ETA = 1e-3
diff = 1
count = 0
error = MSE(X, train_y) while diff > 1e-2: p = np.random.permutation(X.shape[0]) for x, y in zip(X[p, :], train_y[p]): theta = theta - ETA * (f(x) - y) * x current_error = MSE(X, train_y) diff = error - current_error error = current_error count += 1 log = '第 {} 次 : theta = {}, 差值 = {:.4f}' print(log.format(count, theta, diff))
x = np.linspace(-3, 3, 100) plt.plot(train_z, train_y, 'o') plt.plot(x, f(to_matrix(x))) plt.show()
|