# 1- 線性迴歸代價函式：

J(θ)=12m∑i=1m(yi−hθ(xi))2

J(\theta )=\frac{1}{2m}\sum_{i=1}^{m}(y^{i}-h_{\theta }(x^{i}))^{2}

# 2- 邏輯迴歸代價函式：

L(θ)=∏i=1mP(yi|xi;θ)=∏i=1m(hθ(xi))yi((1−hθ(xi)))1−yi

L(\theta )=\prod_{i=1}^{m}P(y_{i}|x_{i};\theta )=\prod_{i=1}^{m}(h_{\theta }(x_{i}))^{y_{i}}((1-h_{\theta }(x_{i})))^{1-y_{i}}

l(θ)=logL(θ)=∑i=1m(yiloghθ(xi) (1−yi)log(1−hθ(xi)))

l(\theta )=logL(\theta )=\sum_{i=1}^{m}(y_{i}logh_{\theta }(x_{i}) (1-y_{i})log(1-h_{\theta }(x_{i})))

J(θ)=−1ml(θ)

J(\theta )=-\frac{1}{m}l(\theta )

1)梯度下降θθ的更新過程，走梯度方向的反方向：

θj:=θj−αδδθjJ(θ)

\theta _{j}:=\theta _{j}-\alpha\frac{\delta }{\delta _{\theta _{j}}}J(\theta )

δδθjJ(θ)=−1m∑i=1m(yi1hθ(xi)δδθjhθ(xi)−(1−yi)11−hθ(xi)δδθjhθ(xi))=−1m∑i=1m(yi1g(θTxi)−(1−yi)11−g(θTxi))δδθjg(θTxi)=−1m∑i=1m(yi1g(θTxi)−(1−yi)11−g(θTxi))g(θTxi)(1−g(θTxi))δδθjθTxi=−1m∑i=1m(yi(1−g(θTxi))−(1−yi)g(θTxi))xji=−1m∑i=1m(yi−g(θTxi))xji=1m∑i=1m(hθ(xi)−yi))xji

\frac{\delta }{\delta _{\theta _{j}}}J(\theta )=-\frac{1}{m}\sum_{i=1}^{m}\left ( y_{i}\frac{1}{h_{\theta }(x_{i})} \frac{\delta }{\delta _{\theta _{j}}}h_{\theta }(x_{i})-(1-y_{i})\frac{1}{1-h_{\theta }(x_{i})}\frac{\delta }{\delta _{\theta _{j}}}h_{\theta }(x_{i})\right )
\\=-\frac{1}{m}\sum_{i=1}^{m}\left ( y_{i}\frac{1}{g(\theta ^{T}x_{i})}-(1-y_{i})\frac{1}{1-g(\theta ^{T}x_{i})}\right )\frac{\delta }{\delta _{\theta _{j}}}g(\theta ^{T}x_{i})
\\=-\frac{1}{m}\sum_{i=1}^{m}\left ( y_{i}\frac{1}{g(\theta ^{T}x_{i})}-(1-y_{i})\frac{1}{1-g(\theta ^{T}x_{i})}\right )g(\theta ^{T}x_{i})(1-g(\theta ^{T}x_{i}))\frac{\delta }{\delta _{\theta _{j}}}\theta ^{T}x_{i}
\\=-\frac{1}{m}\sum_{i=1}^{m}(y_{i}(1-g(\theta ^{T}x_{i}))-(1-y_{i})g(\theta ^{T}x_{i}))x_{i}^{j}
\\=-\frac{1}{m}\sum_{i=1}^{m}(y_{i}-g(\theta ^{T}x_{i}))x_{i}^{j}
\\=\frac{1}{m}\sum_{i=1}^{m}(h_{\theta }(x_{i})-y_{i}))x_{i}^{j}

(f(x)g(x))′=g(x)f′(x)−f(x)g′(x)g2(x)(ex)′=ex

\left (\frac{f(x)}{g(x)} \right ){}’=\frac{g(x)f{}'(x)-f(x)g{}'(x)}{g^{2}(x)}
\\\left (e^{x} \right ){}’=e^{x}

δδθjg(θTxi)=−e−θTxi(1 e−θTxi)2δδθj(−1)θTxi=g(θTxi)(1−g(θTxi))δθjθTxi

\frac{\delta }{\delta _{\theta _{j}}}g(\theta ^{T}x_{i})=-\frac{e^{-\theta^{T} x_{i}}}{(1 e^{-\theta^{T} x_{i}})^{2}}\frac{\delta }{\delta _{\theta _{j}}}(-1)\theta^{T} x_{i}
=g(\theta ^{T}x_{i})(1-g(\theta ^{T}x_{i})){\delta _{\theta _{j}}}\theta^{T} x_{i}

θj:=θj−α1m∑i=1m(hθ(xi)−yi))xji

\theta _{j}:=\theta _{j}-\alpha\frac{1}{m}\sum_{i=1}^{m}(h_{\theta }(x_{i})-y_{i}))x_{i}^{j}