mirror of
https://github.com/janishutz/eth-summaries.git
synced 2026-01-11 13:38:24 +00:00
345 lines
14 KiB
TeX
345 lines
14 KiB
TeX
\subsection{Partial Derivatives}
|
|
|
|
\begin{subbox}{Partial Derivative}
|
|
\smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad 1 \leq i \leq n,\quad x_0 \in X$}
|
|
$$\dfd{i}(x_{0}) := g'(x_{0,i})$$
|
|
\smalltext{for $g: \{ t \in \R \sep (x_{0, 1}, \ldots,\ t\ ,\ldots, x_{0, n}) \in X \} \to \R^n$}
|
|
$$ g(t) := \underbrace{f(x_{0,i}, \ldots, x_{0,t-1},\ t\ , x_{0, t+1},\ldots,x_{0, n})}_{ \text{ Freeze all }x_{0, k} \text{ except one } x_{0, i} \to t}$$
|
|
\end{subbox}
|
|
|
|
\notation $\dfd{i}(x_0) = \sdfd{i}(x_0) =\ssdfd{i}(x_0)$
|
|
|
|
\lemma \textbf{Properties of Partial Derivatives}\\
|
|
\smalltext{Assuming $\sdfd{i} \text{ and } \partial_{x_i} g \text{ exist }$:}
|
|
|
|
$
|
|
\begin{array}{ll}
|
|
(i) & \partial x_i (f+g) = \partial x_i f + \partial x_i g \\
|
|
(ii) & \partial x_i (fg) = \partial x_i (f)g + \partial x_i (g)f\quad \text{ if } m=1\\
|
|
(iii) & \partial x_i \Bigr(\displaystyle\frac{f}{g}\Bigl) = \displaystyle\frac{\partial x_i(f)g - \partial x_i(g)f}{g^2}\quad \text{ if } g(x) \neq 0\ \forall x \in X\\
|
|
\end{array}
|
|
$\\
|
|
\subtext{$X \subset \R^n \text{ open},\quad f.g: X \to \R^n,\quad 1 \leq i \leq n$}
|
|
|
|
\begin{subbox}{The Jacobian}
|
|
\smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R^n \text{ with partial derivatives existing}$}
|
|
$$
|
|
\textbf{J}_f(x) := \begin{bmatrix}
|
|
\partial x_1 f_1(x) & \partial x_2 f_1(x) & \cdots & \partial x_n f_1(x) \\
|
|
\partial x_1 f_2(x) & \partial x_2 f_2(x) & \ddots & \vdots \\
|
|
\vdots & \vdots & \ddots & \vdots \\
|
|
\partial x_1 f_n(x) & \partial x_2 f_n(x) & \cdots & \partial x_n f_m(x)
|
|
\end{bmatrix}
|
|
$$
|
|
\end{subbox}
|
|
\subtext{Think of $f$ as a vector of $f_i$, then $\textbf{J}_f$ is that vector stretched for all $x_j$}
|
|
|
|
\definition \textbf{Gradient} $\nabla f(x_0) := \begin{bmatrix}
|
|
\partial x_1 f(x_0) \\
|
|
\vdots \\
|
|
\partial x_n f(x_0)
|
|
\end{bmatrix} = \textbf{J}_f(x)^\top$\\
|
|
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R$, i.e. \textit{must} map to $1$ dimension}
|
|
|
|
\remark $\nabla f$ points in the direction of greatest increase.
|
|
\subtext{This generalizes that in $\R$, $\text{sgn}(f)$ shows if $f$ increases/decreases}
|
|
|
|
\definition \textbf{Divergence} $\text{div}(f)(x_0) := \text{Tr}\bigr(\textbf{J}_f(x_0)\bigl)$\\
|
|
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^n,\quad \textbf{J}_f \text{ exists}$}
|
|
|
|
\subsection{The Differential}
|
|
|
|
\smalltext{
|
|
Partial derivatives don't provide a good approx. of $f$, unlike in the $1$-dimensional case. The \textit{differential} is a linear map which replicates this purpose in $\R^n$.
|
|
}
|
|
|
|
\begin{subbox}{Differentiability in $\R^n$ \& the Differential}
|
|
\smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R^n,\quad u: \R^n \to \R^m \text{ linear map}$}
|
|
$$
|
|
df(x_0) := u
|
|
$$
|
|
If $f$ is differentiable at $x_0 \in X$ with $u$ s.t.
|
|
$$
|
|
\underset{x \neq x_0 \to x_0}{\lim} \frac{1}{\big\| x - x_0 \big\|}\Biggl( f(x) - f(x_0) - u(x - x_0) \Biggr) = 0
|
|
$$
|
|
\end{subbox}
|
|
\subtext{Similarly, $f$ is differentiable if this holds for all $x \in X$}
|
|
|
|
\lemma \textbf{Properties of Differentiable Functions}
|
|
|
|
$
|
|
\begin{array}{ll}
|
|
(i) & \text{Continuous on } X \\
|
|
(ii) & \forall i \leq m, j \leq n:\quad \partial_{x_j}f_i \text{ exists} \\
|
|
(iii) & m=1:\quad \partial_{x_i} f(x_0) = a_i \\
|
|
& \text{for:}\quad u(x_1,\ldots,x_n) = a_1x_1 + \cdots + a_nx_n
|
|
\end{array}
|
|
$
|
|
|
|
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m \text{ differentiable on } X$}
|
|
|
|
\lemma \textbf{Preservation of Differentiability}
|
|
|
|
$
|
|
\begin{array}{ll}
|
|
(i) & f + g \text{ is differentiable: } d(f+g)=df+dg \\
|
|
(ii) & fg \text{ is differentiable, if } m=1 \\
|
|
(iii) & \displaystyle\frac{f}{g}\ \text{ is differentiable, if } m=1,\ g(x) \neq 0\ \forall x \in X
|
|
\end{array}
|
|
$
|
|
|
|
\subtext{$X \subset \R^n \text{ open},\quad f,g: X \to \R^m \text{ differentiable on }X$}
|
|
|
|
\lemma \textbf{Cont. Partial Derivatives imply Differentiability}
|
|
|
|
if all $\partial_{x_j} f_i$ exist and are continuous:
|
|
$$
|
|
f \text{ differentiable on } X,\quad df(x_0) = \textbf{J}_f(x_0)
|
|
$$
|
|
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m$}
|
|
|
|
\lemma \textbf{Chain Rule} $\quad g \circ f \text{ is differentiable on } X$
|
|
\begin{align*}
|
|
& d(g \circ f)(x_0) &= dg\bigl( f(x_0) \bigr) \circ df(x_0) \\
|
|
& \textbf{J}_{g \circ f}(x_0) &= \textbf{J}_g\bigl( f(x_0) \bigr) \cdot \textbf{J}_f(x_0)
|
|
\end{align*}
|
|
\subtext{$X \subset \R^n \text{ open},\quad Y \subset \R^m \text{ open},\quad f: X \to Y, g: Y \to \R^p, f,g \text{ diff.-able}$}
|
|
|
|
\definition \textbf{Tangent Space}
|
|
$$
|
|
T_f(x_0) := \Bigl\{ (x,y) \in \R^n \times \R^m \sep y = f(x_0) + u(x-x_0) \Bigr\}
|
|
$$
|
|
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m \text{ diff.-able},\quad x_0 \in X,\quad u = df(x_0)$}
|
|
|
|
\definition \textbf{Directional Derivative}
|
|
$$
|
|
D_v f(x_0) = \underset{t \neq 0 \to 0}{\lim} \frac{f(x_0 + tv) - f(x_0)}{t}
|
|
$$
|
|
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m,\quad v \neq 0 \in \R^n,\quad x_0 \in X$}
|
|
|
|
\lemma \textbf{Directional Derivatives for Diff.-able Functions}
|
|
$$
|
|
D_vf(x_0) = df(x_0)(v) = \textbf{J}_f(x_0) \cdot v
|
|
$$
|
|
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m \text{ diff.-able},\quad v \neq 0 \in \R^n,\quad x_0 \in X$}
|
|
|
|
\remark $D_vf$ is linear w.r.t $v$, so: $D_{v_1 + v_2}f = D_{v_1}f + D_{v_2}f$
|
|
|
|
\remark $D_vf(x_0) = \nabla f(x_0) \cdot v = \big\| \nabla f(x_0) \big\| \cos(\theta)$\\
|
|
\subtext{In the case $f: X \to \R$, where $\theta$ is the angle between $v$ and $\nabla f(x_0)$}
|
|
|
|
\newpage
|
|
\subsection{Higher Derivatives}
|
|
|
|
\definition \textbf{Differentiability Classes}
|
|
\begin{align*}
|
|
& f \in C^1(X;\R^m) &\iffdef& f \text{ diff.-able on } X, \text{ all } \partial_{x_j} f_i \text{ exist} \\
|
|
& f \in C^k(X;\R^m) &\iffdef& f \text{ diff.-able on } X, \text{ all } \partial_{x_j} f_i \in C^{k-1} \\
|
|
& f \in C^\infty(X;\R^m) &\iffdef& f \in C^k(X;\R^m)\ \forall k \geq 1
|
|
\end{align*}
|
|
\subtext{$X \subset \R^n \text{ open},\quad f:X\to\R^m$}
|
|
|
|
\lemma Polynomials, Trig. functions and $\exp$ are in $C^\infty$
|
|
|
|
\lemma \textbf{Operations preserve Differentiability Classes}
|
|
|
|
$
|
|
\begin{array}{lcll}
|
|
(i) & f + g & \in C^k \\
|
|
(ii) & fg & \in C^k & \text{ if } m=1 \\
|
|
(iii) & \displaystyle\frac{f}{g} & \in C^k & \text{ if } m=1, g(x) \neq 0\ \forall x \in X
|
|
\end{array}
|
|
$\\
|
|
\subtext{$f,g \in C^k$}
|
|
|
|
\lemma \textbf{Composition preserves Differentiability Classes}
|
|
$$
|
|
g \circ f \in C^k
|
|
$$
|
|
\subtext{$f \in C^k,\quad f(X) \subset Y,\quad Y \subset \R^m \text{ open},\quad g: Y \to \R^p,\quad g \in C^k$}
|
|
|
|
\begin{subbox}{Partial Derivatives commute in $C^k$}
|
|
\smalltext{$k \geq 2,\quad X \subset \R^n \text{ open},\quad f: X \to \R^m,\quad f \in C^k$}
|
|
$$
|
|
\forall x,y:\quad \partial_{x,y}f = \partial_{y,x}f
|
|
$$
|
|
\smalltext{This generalizes for $\partial_{x_1,\ldots,x_n}f$.}
|
|
\end{subbox}
|
|
|
|
\remark Linearity of Partial Derivatives
|
|
$$
|
|
\partial_x^m(af_1 + bf_2) = a\partial_x^mf_1 + b\partial_x^mf_2
|
|
$$
|
|
\subtext{Assuming both $\partial_x f_{1,2}$ exist.}
|
|
|
|
\definition \textbf{Laplace Operator}
|
|
$$
|
|
\Delta f := \text{div}\bigl( \nabla f(x) \bigr) = \sum_{i=0}^{n} \frac{\partial}{\partial x_i}\Bigl( \frac{\partial f}{\partial x_i} \Bigr) = \sum_{i=0}^{n} \frac{\partial^2f}{\partial x_i^2}
|
|
$$
|
|
|
|
\begin{subbox}{The Hessian}
|
|
\smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^2,\quad x_0 \in X$}
|
|
$$
|
|
\textbf{H}_f(x) := \begin{bmatrix}
|
|
\partial_{1,1}f(x_0) & \partial_{2,1}f(x_0) & \cdots & \partial_{n,1}f(x_0) \\
|
|
\partial_{1,2}f(x_0) & \partial_{2,2}f(x_0) & \cdots & \partial_{n,2}f(x_0) \\
|
|
\vdots & \vdots & \ddots & \vdots \\
|
|
\partial_{1,n}f(x_0) & \partial_{2,n}f(x_0) & \cdots & \partial_{n,n}f(x_0)
|
|
\end{bmatrix}
|
|
$$
|
|
Where $\bigl( \textbf{H}_f(x) \bigr)_{i,j} = \partial_{x_i,x_j}f(x)$
|
|
\end{subbox}
|
|
\subtext{Note that $f: X \to \R$, i.e. $\textbf{H}_f$ only exists for $1$-dimensionally valued $f$}
|
|
|
|
\notation $\textbf{H}_f(x) = \text{Hess}_f(x) = \nabla^2f(x)$
|
|
|
|
\remark $\textbf{H}_f(x_0)$ is symmetric: $\bigl( \textbf{H}_f(x_0) \bigr)_{i,j} = \bigl( \textbf{H}_f(x_0) \bigr)_{j, i}$
|
|
|
|
\definition \textbf{Polar Coordinates}
|
|
\begin{align*}
|
|
g(r,\theta) &= \bigl(r \cos(\theta), r \sin(\theta)\bigr) \\
|
|
\textbf{J}_g(r,\theta) &= \begin{bmatrix}
|
|
\cos(\theta) & -r \sin(\theta) \\
|
|
\sin(\theta) & r \cos(\theta) \\
|
|
\end{bmatrix} \\
|
|
\partial_xf &= \cos(\theta)\partial_rf-\frac{1}{r}\sin(\theta)\partial_\theta f \\
|
|
\partial_yf &= \sin(\theta)\partial_rf+\frac{1}{r}\cos(\theta)\partial_\theta f
|
|
\end{align*}
|
|
\subtext{$(r,\theta) \in (0,+\infty) \times \R,\quad \det(\textbf{J}_g) = r$}
|
|
|
|
\subsection{Taylor Polynomials}
|
|
|
|
% Full definition of taylor poly
|
|
|
|
% \begin{subbox}{Taylor Polynomials}
|
|
% \smalltext{$k \geq 1,\quad f: X \to \R,\quad f \in C^k,\quad x_0 \in X$}
|
|
% \begin{align*}
|
|
% & T_kf(y;x_0) := f(x_0) + \sum_{i=0}^{n}\frac{\partial f}{\partial x_i}(x_0)y_i + \cdots \\
|
|
% & + \sum_{m_1 + \cdots + m_n}^{}\frac{1}{m_1!\cdots m_n!}\frac{\partial^kf}{\partial x_1^{m1} \cdots \partial x_n^{m_n}}(x_0)y_1^{m_1}\cdots y_n^{m_n}
|
|
% \end{align*}
|
|
% \smalltext{Where the last sum ranges over $n$-tuples in $\Z_{\geq 0}$ that sum to $k$}
|
|
% \end{subbox}
|
|
|
|
\begin{multicols}{2}
|
|
\definition $|m| := \sum_{i=1}^{n} m_1$
|
|
|
|
\definition $m! := m_1!\cdots m_n!$
|
|
|
|
\definition $y^m := y_1^m\cdots y_n^m$
|
|
\end{multicols}
|
|
\subtext{for $m = (m_1,\ldots,m_n),\quad y = (y_1,\ldots,y_n)$}
|
|
|
|
\begin{subbox}{Taylor Polynomials}
|
|
\smalltext{$k \geq 1,\quad f: X \to \R,\quad f \in C^k,\quad x_0 \in X$}
|
|
$$
|
|
T_kf(y;x_0) := \sum_{|m| \leq k}^{}\frac{1}{m!}\partial_x^m f(x_0)y^m
|
|
$$
|
|
\end{subbox}
|
|
|
|
\lemma \textbf{Taylor Approximation}
|
|
$$
|
|
\underset{x \neq x_0 \to x_0}{\lim}\frac{E_kf(x;x_0)}{\big\|x-x_0\big\|^k} = 0
|
|
$$
|
|
\smalltext{Where $f(x) = T_kf(x-x_0;x_0) + E_kf(x;x_0)$}\\
|
|
\subtext{$k \geq 1,\quad X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^k,\quad x_0 \in X$}
|
|
|
|
\remark Taylor polynomials of degree $1,2$:
|
|
\begin{align*}
|
|
& T_1f(y;x_0) = f(x_0) + \nabla f(x_0)\cdot y \\
|
|
& T_2f(y;x_0) = f(x_0) + \nabla f(x_0) \cdot y + \frac{1}{2} \Bigl( x_0^\top \cdot \textbf{H}_f(y) \cdot x_0\Bigr)
|
|
\end{align*}
|
|
|
|
\method Calculating $T_kf(y;x_0)$ also yields $\textbf{H}_f$ for $k \geq 2$.
|
|
\begin{align*}
|
|
& T_2f((x_0,y_0);(x,y)) = \ldots + ax^2 + by^2 + cxy \\
|
|
& \implies \textbf{H}_f(x_0,y_0) = \begin{bmatrix}
|
|
2a & c \\
|
|
c & 2b
|
|
\end{bmatrix}
|
|
\end{align*}
|
|
|
|
\method Taylor Polynomials can be found by combination.
|
|
|
|
\begin{footnotesize}
|
|
\textbf{Example:} $f(x,y) = \underbrace{e^{y^4}}_\text{1} + \underbrace{\sin(xy)}_\text{2} + \underbrace{2xy^2}_\text{3} - \underbrace{\ln(x^2+1)}_\text{4},\quad k = 3$
|
|
\begin{enumerate}
|
|
\item $e^x \approx 1 + x + \frac{x^2}{2} + \frac{x^3}{6} \implies e^{y^4} \approx 1 + y^4 + \frac{y^8}{2} + \frac{y^12}{6}$\\
|
|
\color{gray} Since $k=3$, discarding all terms with $\deg > 3$ yields: $e^{y^3} \approx 1$ \color{black}
|
|
\item $\sin(x) \approx x - \frac{x^3}{6} \implies \sin(xy) \approx xy$
|
|
\item $2xy^2 \approx 2xy^2\quad$ \color{gray}(Since it's already a polynomial, $\deg = 3$)\color{black}
|
|
\item $\ln(x+1) \approx x - \frac{x^2}{2} + \frac{x^3}{3} \implies \ln(x^2 + 1) \approx x^2$
|
|
\end{enumerate}
|
|
Thus: $f(x) \approx 1 + xy + 2xy^2 - x^2 = T_3f\Bigl((0,0);(x,y)\Bigr)$
|
|
\end{footnotesize}
|
|
|
|
\newpage
|
|
\subsection{Critical Points}
|
|
|
|
\lemma \textbf{Local Maxima \& Minima}
|
|
$$
|
|
\begin{rcases*}
|
|
f(y) \leq f(x_0)\ \forall y \text{ close} \\
|
|
f(y) \geq f(x_0)\ \forall y \text{ close}
|
|
\end{rcases*}\quad \frac{\partial f}{\partial x_i}(x_0) = 0\ \ \forall i \leq n
|
|
$$
|
|
\subtext{In other words: $df(x_0) = \nabla f(x_0) = 0$}\\
|
|
\subtext{$f: X \to \R,\quad X \subset \R^n \text{ open}, f \text{ diff.-able}$}
|
|
|
|
\definition \textbf{Critical Point}\\
|
|
$$
|
|
x_0 \in X \text{ is critical } \iffdef \nabla f(x_0) = 0
|
|
$$
|
|
\subtext{$X \subset \R^n \text{ open}, f: X \to \R \text{ diff.-able}$}
|
|
|
|
\remark \textbf{Existance of Maxima/Minima}\\
|
|
Don't \textit{have to} exist if $X$ is open, only if $X$ is compact.\\
|
|
\subtext{However, for compact sets, the lemma above no longer applies.}
|
|
|
|
\method \textbf{Critical points on Compact Sets}\\
|
|
Decompose $X = X' \cup B$, s.t. $X'$ is open, $B$ is a \textit{boundary}.
|
|
\begin{enumerate}
|
|
\item Find critical points in $X'$
|
|
\item Check if any $x \in B$ is a maximum/minimum
|
|
\end{enumerate}
|
|
|
|
\definition \textbf{Non-degenerate Critical Point}
|
|
$$
|
|
x_0 \in X \text{ non-deg.} \iffdef \det\Bigl(\textbf{H}_f(x_0)\Bigr) \neq 0
|
|
$$
|
|
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^2,\quad x_0 \in X \text{ is critical}$}
|
|
|
|
\lemma \textbf{Definiteness of the Hessian}
|
|
\begin{align*}
|
|
&\textbf{H}_f(x_0) \text{ positive definite} &\implies x_0 \text{ is a local min.} \\
|
|
&\textbf{H}_f(x_0) \text{ negative definite} &\implies x_0 \text{ is a local max.} \\
|
|
&\textbf{H}_f(x_0) \text{ indefinite} &\implies x_0 \text{ is a saddle point.}
|
|
\end{align*}
|
|
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^2,\quad x_0 \in X \text{ non-deg. critical}$}
|
|
|
|
% The nice tikz code below is a tightened version of code from Janis Hutz' Summary.
|
|
|
|
\method \textbf{Determining Definiteness for $2 \times 2$ Matrices}
|
|
\begin{center}
|
|
\begin{tikzpicture}[node distance = 1cm and 0.4cm, >={Stealth[round]}]
|
|
\node (det) {$\det(A)$};
|
|
\node (indef) [below=of det] {indefinite};
|
|
\node (tr0) [right=of det] {$\text{Tr}(A)$};
|
|
\node (posdef) [above right=of tr0, yshift=-0.5cm] {pos. def.};
|
|
\node (negdef) [below right=of tr0, yshift=+0.5cm] {neg. def.};
|
|
\node (tr1) [left=of det] {$\text{Tr}(A)$};
|
|
\node (possemdef) [above left=of tr1, yshift=-0.5cm] {p. semi-def.};
|
|
\node (negsemdef) [below left=of tr1, yshift=+0.5cm] {n. semi-def.};
|
|
\node (zero) [below=of tr1] {$A$ is zero};
|
|
|
|
\path[->]
|
|
% Level 0
|
|
(det) edge node [above] {pos.} (tr0)
|
|
(det) edge node [above] {$0$} (tr1)
|
|
(det) edge node [right] {neg.} (indef)
|
|
(tr0) edge node [left] {pos.} (posdef)
|
|
(tr0) edge node [left] {neg.} (negdef)
|
|
(tr1) edge node [right] {pos.} (possemdef)
|
|
(tr1) edge node [right] {neg.} (negsemdef)
|
|
(tr1) edge node [right] {$0$} (zero);
|
|
\end{tikzpicture}
|
|
\end{center} |