预测函数 - sigmoid 函数
\[
f_\theta(x) = \frac{1}{1 + exp(-\theta^Tx)}
\]
决策边界
\[
y = \begin{cases}
1 & (\theta^Tx \geq 0)\\
0 & (\theta^Tx < 0)
\end{cases}
\]
目标函数 - 似然函数
\[
L(\theta) =
\prod_{i=1}^{n}{P(y^{(i)}=1|x^{(i)})^{y^{(i)}}P(y^{(i)}=0|x^{(i)})^{1-y^{(i)}}}
\]
对数似然函数
\[
\log L(\theta) = \sum_{i=1}^{n}{(y^{(i)}\log
f_\theta(x^{(i)})+(1-y^{(i)})\log (1-f_\theta(x^{(i)})))}
\]
参数更新表达式
\[
\theta_j := \theta_j - \eta\sum_{i=1}^{n}{(f_\theta(x^{(i)}) -
y^{(i)})x_j^{(i)}}
\]
线性可分问题
\[
\theta^Tx = \theta_0x_0 + \theta_1x_1 + \theta_2x_2 = \theta_0 +
\theta_1x_1 + \theta_2x_2 = 0
\]
\[
x_2 = -\frac{\theta_0 + \theta_1x_1}{\theta_2}
\]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
| import numpy as np import matplotlib.pyplot as plt
train = np.loadtxt('images2.csv', delimiter=',', skiprows=1) train_x = train[:, 0:2] train_y = train[:, 2]
theta = np.random.rand(3)
mu = train_x.mean(axis=0) sigma = train_x.std(axis=0) def standardize(x): return (x - mu) / sigma train_z = standardize(train_x)
def to_matrix(x): x0 = np.ones([x.shape[0], 1]) return np.hstack([x0, x]) X = to_matrix(train_z)
def f(x): return 1 / (1 + np.exp(-np.dot(x, theta)))
ETA = 1e-3
epoch = 5000
count = 0
for _ in range(epoch): theta = theta - ETA * np.dot(f(X) - train_y, X) count += 1 print('第 {} 次 : theta = {}'.format(count, theta))
plt.plot(train_z[train_y == 1, 0], train_z[train_y == 1, 1], 'o') plt.plot(train_z[train_y == 0, 0], train_z[train_y == 0, 1], 'x') x1 = np.linspace(-2, 2, 100) plt.plot(x1, -(theta[0] + theta[1] * x1) / theta[2], linestyle='dashed') plt.show()
|
线性不可分问题
\[
\theta^Tx = \theta_0x_0 + \theta_1x_1 + \theta_2x_2 + \theta_3x_1^2 =
\theta_0 + \theta_1x_1 + \theta_2x_2 + \theta_3x_1^2 = 0
\]
\[
x_2 = -\frac{\theta_0 + \theta_1x_1 + \theta_3x_1^2}{\theta_2}
\]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
| import numpy as np import matplotlib.pyplot as plt
train = np.loadtxt('data3.csv', delimiter=',', skiprows=1) train_x = train[:, 0:2] train_y = train[:, 2]
theta = np.random.rand(4)
mu = train_x.mean(axis=0) sigma = train_x.std(axis=0) def standardize(x): return (x - mu) / sigma train_z = standardize(train_x)
def to_matrix(x): x0 = np.ones([x.shape[0], 1]) x3 = x[:, 0, np.newaxis] ** 2 return np.hstack([x0, x, x3]) X = to_matrix(train_z)
def f(x): return 1 / (1 + np.exp(-np.dot(x, theta)))
ETA = 1e-3
epoch = 5000
count = 0
for _ in range(epoch): theta = theta - ETA * np.dot(f(X) - train_y, X) count += 1 print('第 {} 次 : theta = {}'.format(count, theta))
plt.plot(train_z[train_y == 1, 0], train_z[train_y == 1, 1], 'o') plt.plot(train_z[train_y == 0, 0], train_z[train_y == 0, 1], 'x') x1 = np.linspace(-2, 2, 100) x2 = -(theta[0] + theta[1] * x1 + theta[3] * x1 ** 2) / theta[2] plt.plot(x1, x2, linestyle='dashed') plt.show()
|
随机梯度下降法的实现
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
| import numpy as np import matplotlib.pyplot as plt
train = np.loadtxt('data3.csv', delimiter=',', skiprows=1) train_x = train[:, 0:2] train_y = train[:, 2]
theta = np.random.rand(4)
mu = train_x.mean(axis=0) sigma = train_x.std(axis=0) def standardize(x): return (x - mu) / sigma train_z = standardize(train_x)
def to_matrix(x): x0 = np.ones([x.shape[0], 1]) x3 = x[:, 0, np.newaxis] ** 2 return np.hstack([x0, x, x3]) X = to_matrix(train_z)
def f(x): return 1 / (1 + np.exp(-np.dot(x, theta)))
ETA = 1e-3
epoch = 5000
count = 0
for _ in range(epoch): p = np.random.permutation(X.shape[0]) for x, y in zip(X[p, :], train_y[p]): theta = theta - ETA * (f(x) - y) * x count += 1 print('第 {} 次 : theta = {}'.format(count, theta))
plt.plot(train_z[train_y == 1, 0], train_z[train_y == 1, 1], 'o') plt.plot(train_z[train_y == 0, 0], train_z[train_y == 0, 1], 'x') x1 = np.linspace(-2, 2, 100) x2 = -(theta[0] + theta[1] * x1 + theta[3] * x1 ** 2) / theta[2] plt.plot(x1, x2, linestyle='dashed') plt.show()
|