# [機器學習]邏輯迴歸公式推導及其梯度下降法的Python實現

## 從線性模型推導

log⁡(p1−p)=wx” role=”presentation”>log(p1−p)=wxlog⁡(p1−p)=wx

\log(\frac{p}{1-p})=wx

p=exp(wx)1 exp(wx)=σ(wx)” role=”presentation”>p=exp(wx)1 exp(wx)=σ(wx)p=exp(wx)1 exp(wx)=σ(wx)

p=\frac{exp(wx)}{1 exp(wx)}= \sigma(wx)

## 極大似然法估計與交叉墒

∏1N[p(xi)]yi[1−p(xi)]1−yi” role=”presentation”>∏1N[p(xi)]yi[1−p(xi)]1−yi∏1N[p(xi)]yi[1−p(xi)]1−yi

\prod _1 ^N [p(x_i)]^{y_i}[1-p(x_i)]^{1-y_i}

L(w)=∑1N[yilog⁡p(xi) (1−yi)log⁡(1−p(xi))]” role=”presentation”>L(w)=∑1N[yilogp(xi) (1−yi)log(1−p(xi))]L(w)=∑1N[yilog⁡p(xi) (1−yi)log⁡(1−p(xi))]

L(w)=\sum _1 ^N [y_i \log p(x_i) (1-y_i) \log (1-p(x_i))]

## 梯度下降法最優化

L=∑iN−yilog⁡σ(wxi)−(1−yi)log⁡(1−σ(wxi))” role=”presentation”>L=∑iN−yilogσ(wxi)−(1−yi)log(1−σ(wxi))L=∑iN−yilog⁡σ(wxi)−(1−yi)log⁡(1−σ(wxi))

L= \sum_i^N -y_i \log \sigma(wx_i)-(1-y_i) \log( 1-\sigma(wx_i))

(1)∂L∂(wxi)=∂L∂(σ(wxi))∂(σ(wxi))∂(wxi)(2)=[−yi1σ(wxi)−(1−yi)−11−σ(wxi)]σ(wxi)(1−σ(wxi))(3)=−yi(1−σ(wxi)) (1−yi)σ(wxi)(4)=−yi yiσ(wxi) σ(wxi)−yiσ(wxi)(5)=σ(wxi)−yi” role=”presentation”>∂L∂(wxi)=∂L∂(σ(wxi))∂(σ(wxi))∂(wxi)=[−yi1σ(wxi)−(1−yi)−11−σ(wxi)]σ(wxi)(1−σ(wxi))=−yi(1−σ(wxi)) (1−yi)σ(wxi)=−yi yiσ(wxi) σ(wxi)−yiσ(wxi)=σ(wxi)−yi(1)(2)(3)(4)(5)(1)∂L∂(wxi)=∂L∂(σ(wxi))∂(σ(wxi))∂(wxi)(2)=[−yi1σ(wxi)−(1−yi)−11−σ(wxi)]σ(wxi)(1−σ(wxi))(3)=−yi(1−σ(wxi)) (1−yi)σ(wxi)(4)=−yi yiσ(wxi) σ(wxi)−yiσ(wxi)(5)=σ(wxi)−yi

\begin{align}
\frac{\partial L}{\partial (wx_i)} & = \frac{\partial L}{\partial (\sigma (wx_i))} \frac{\partial (\sigma (wx_i))}{\partial (wx_i)} \\
& = [-y_i \frac {1}{\sigma (wx_i)}- (1-y_i) \frac {-1} {1-\sigma(wx_i)}] \sigma(wx_i)(1-\sigma(wx_i)) \\
& = -y_i(1-\sigma(wx_i)) (1-y_i)\sigma(wx_i) \\
& = -y_i y_i \sigma(wx_i) \sigma(wx_i)-y_i \sigma(wx_i) \\
& = \sigma(wx_i)-y_i
\end{align}

(6)∂L∂(w))=∑iN∂L∂(wxi)∂(wxi)∂w(7)=∑iN(σ(wxi)−yi)×xi” role=”presentation”>∂L∂(w))=∑iN∂L∂(wxi)∂(wxi)∂w=∑iN(σ(wxi)−yi)×xi(6)(7)(6)∂L∂(w))=∑iN∂L∂(wxi)∂(wxi)∂w(7)=∑iN(σ(wxi)−yi)×xi

\begin{align}
\frac{\partial L}{\partial (w))} & = \sum_i^N \frac{\partial L} {\partial (wx_i)} \frac{\partial (wx_i)} {\partial w} \\
& = \sum_i^N (\sigma(wx_i)-y_i) \times x_i
\end{align}

## Python實現（批量梯度下降法）

from sklearn import datasets
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from numpy.linalg import inv
X = iris.data[:100, :]
y = iris.target[:100].reshape((100, -1))
def logit(x):
return 1. / (1   np.exp(-x))
m, n = X.shape
alpha = 0.0065 // 步長
w = np.random.random((n, 1)) // 引數矩陣
maxCycles = 30
J = pd.Series(np.arange(maxCycles, dtype=float))
for i in range(maxCycles):
h = logit(np.dot(X, w)) // 輸出估計值h
J[i] = -(1 / 100.) * np.sum(y * np.log(h)   (1 - y) * np.log(1 - h)) // 記錄目標函式的值
error = h - y //計算wx的梯度，輸出值減去真實值
w -= alpha * grad // 更新引數w，使用負梯度最小化J
print w
J.plot()
plt.show()

• 2018.07.26

• 2018.07.26

• 2018.07.26