\subsection{Partial Derivatives} \begin{subbox}{Partial Derivative} \smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad 1 \leq i \leq n,\quad x_0 \in X$} $$\dfd{i}(x_{0}) := g'(x_{0,i})$$ \smalltext{for $g: \{ t \in \R \sep (x_{0, 1}, \ldots,\ t\ ,\ldots, x_{0, n}) \in X \} \to \R^n$} $$ g(t) := \underbrace{f(x_{0,i}, \ldots, x_{0,t-1},\ t\ , x_{0, t+1},\ldots,x_{0, n})}_{ \text{ Freeze all }x_{0, k} \text{ except one } x_{0, i} \to t}$$ \end{subbox} \notation $\dfd{i}(x_0) = \sdfd{i}(x_0) =\ssdfd{i}(x_0)$ \lemma \textbf{Properties of Partial Derivatives}\\ \smalltext{Assuming $\sdfd{i} \text{ and } \partial_{x_i} g \text{ exist }$:} $ \begin{array}{ll} (i) & \partial x_i (f+g) = \partial x_i f + \partial x_i g \\ (ii) & \partial x_i (fg) = \partial x_i (f)g + \partial x_i (g)f\quad \text{ if } m=1\\ (iii) & \partial x_i \Bigr(\displaystyle\frac{f}{g}\Bigl) = \displaystyle\frac{\partial x_i(f)g - \partial x_i(g)f}{g^2}\quad \text{ if } g(x) \neq 0\ \forall x \in X\\ \end{array} $\\ \subtext{$X \subset \R^n \text{ open},\quad f.g: X \to \R^n,\quad 1 \leq i \leq n$} \begin{subbox}{The Jacobian} \smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R^n \text{ with partial derivatives existing}$} $$ \textbf{J}_f(x) := \begin{bmatrix} \partial x_1 f_1(x) & \partial x_2 f_1(x) & \cdots & \partial x_n f_1(x) \\ \partial x_1 f_2(x) & \partial x_2 f_2(x) & \ddots & \vdots \\ \vdots & \vdots & \ddots & \vdots \\ \partial x_1 f_n(x) & \partial x_2 f_n(x) & \cdots & \partial x_n f_m(x) \end{bmatrix} $$ \end{subbox} \subtext{Think of $f$ as a vector of $f_i$, then $\textbf{J}_f$ is that vector stretched for all $x_j$} \definition \textbf{Gradient} $\nabla f(x_0) := \begin{bmatrix} \partial x_1 f(x_0) \\ \vdots \\ \partial x_n f(x_0) \end{bmatrix} = \textbf{J}_f(x)^\top$\\ \subtext{$X \subset \R^n \text{ open},\quad f: X \to \R$, i.e. \textit{must} map to $1$ dimension} \remark $\nabla f$ points in the direction of greatest increase. \subtext{This generalizes that in $\R$, $\text{sgn}(f)$ shows if $f$ increases/decreases} \definition \textbf{Divergence} $\text{div}(f)(x_0) := \text{Tr}\bigr(\textbf{J}_f(x_0)\bigl)$\\ \subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^n,\quad \textbf{J}_f \text{ exists}$} \subsection{The Differential} \smalltext{ Partial derivatives don't provide a good approx. of $f$, unlike in the $1$-dimensional case. The \textit{differential} is a linear map which replicates this purpose in $\R^n$. } \begin{subbox}{Differentiability in $\R^n$ \& the Differential} \smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R^n,\quad u: \R^n \to \R^m \text{ linear map}$} $$ df(x_0) := u $$ If $f$ is differentiable at $x_0 \in X$ with $u$ s.t. $$ \underset{x \neq x_0 \to x_0}{\lim} \frac{1}{\big\| x - x_0 \big\|}\Biggl( f(x) - f(x_0) - u(x - x_0) \Biggr) = 0 $$ \end{subbox} \subtext{Similarly, $f$ is differentiable if this holds for all $x \in X$} \lemma \textbf{Properties of Differentiable Functions} $ \begin{array}{ll} (i) & \text{Continuous on } X \\ (ii) & \forall i \leq m, j \leq n:\quad \partial_{x_j}f_i \text{ exists} \\ (iii) & m=1:\quad \partial_{x_i} f(x_0) = a_i \\ & \text{for:}\quad u(x_1,\ldots,x_n) = a_1x_1 + \cdots + a_nx_n \end{array} $ \subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m \text{ differentiable on } X$} \lemma \textbf{Preservation of Differentiability} $ \begin{array}{ll} (i) & f + g \text{ is differentiable: } d(f+g)=df+dg \\ (ii) & fg \text{ is differentiable, if } m=1 \\ (iii) & \displaystyle\frac{f}{g}\ \text{ is differentiable, if } m=1,\ g(x) \neq 0\ \forall x \in X \end{array} $ \subtext{$X \subset \R^n \text{ open},\quad f,g: X \to \R^m \text{ differentiable on }X$} \lemma \textbf{Cont. Partial Derivatives imply Differentiability} if all $\partial_{x_j} f_i$ exist and are continuous: $$ f \text{ differentiable on } X,\quad df(x_0) = \textbf{J}_f(x_0) $$ \subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m$} \lemma \textbf{Chain Rule} $\quad g \circ f \text{ is differentiable on } X$ \begin{align*} & d(g \circ f)(x_0) &= dg\bigl( f(x_0) \bigr) \circ df(x_0) \\ & \textbf{J}_{g \circ f}(x_0) &= \textbf{J}_g\bigl( f(x_0) \bigr) \cdot \textbf{J}_f(x_0) \end{align*} \subtext{$X \subset \R^n \text{ open},\quad Y \subset \R^m \text{ open},\quad f: X \to Y, g: Y \to \R^p, f,g \text{ diff.-able}$} \definition \textbf{Tangent Space} $$ T_f(x_0) := \Bigl\{ (x,y) \in \R^n \times \R^m \sep y = f(x_0) + u(x-x_0) \Bigr\} $$ \subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m \text{ diff.-able},\quad x_0 \in X,\quad u = df(x_0)$} \definition \textbf{Directional Derivative} $$ D_v f(x_0) = \underset{t \neq 0 \to 0}{\lim} \frac{f(x_0 + tv) - f(x_0)}{t} $$ \subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m,\quad v \neq 0 \in \R^n,\quad x_0 \in X$} \lemma \textbf{Directional Derivatives for Diff.-able Functions} $$ D_vf(x_0) = df(x_0)(v) = \textbf{J}_f(x_0) \cdot v $$ \subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m \text{ diff.-able},\quad v \neq 0 \in \R^n,\quad x_0 \in X$} \remark $D_vf$ is linear w.r.t $v$, so: $D_{v_1 + v_2}f = D_{v_1}f + D_{v_2}f$ \remark $D_vf(x_0) = \nabla f(x_0) \cdot v = \big\| \nabla f(x_0) \big\| \cos(\theta)$\\ \subtext{In the case $f: X \to \R$, where $\theta$ is the angle between $v$ and $\nabla f(x_0)$} \newpage \subsection{Higher Derivatives} \definition \textbf{Differentiability Classes} \begin{align*} & f \in C^1(X;\R^m) &\iffdef& f \text{ diff.-able on } X, \text{ all } \partial_{x_j} f_i \text{ exist} \\ & f \in C^k(X;\R^m) &\iffdef& f \text{ diff.-able on } X, \text{ all } \partial_{x_j} f_i \in C^{k-1} \\ & f \in C^\infty(X;\R^m) &\iffdef& f \in C^k(X;\R^m)\ \forall k \geq 1 \end{align*} \subtext{$X \subset \R^n \text{ open},\quad f:X\to\R^m$} \lemma Polynomials, Trig. functions and $\exp$ are in $C^\infty$ \lemma \textbf{Operations preserve Differentiability Classes} $ \begin{array}{lcll} (i) & f + g & \in C^k \\ (ii) & fg & \in C^k & \text{ if } m=1 \\ (iii) & \displaystyle\frac{f}{g} & \in C^k & \text{ if } m=1, g(x) \neq 0\ \forall x \in X \end{array} $\\ \subtext{$f,g \in C^k$} \lemma \textbf{Composition preserves Differentiability Classes} $$ g \circ f \in C^k $$ \subtext{$f \in C^k,\quad f(X) \subset Y,\quad Y \subset \R^m \text{ open},\quad g: Y \to \R^p,\quad g \in C^k$} \begin{subbox}{Partial Derivatives commute in $C^k$} \smalltext{$k \geq 2,\quad X \subset \R^n \text{ open},\quad f: X \to \R^m,\quad f \in C^k$} $$ \forall x,y:\quad \partial_{x,y}f = \partial_{y,x}f $$ \smalltext{This generalizes for $\partial_{x_1,\ldots,x_n}f$.} \end{subbox} \remark Linearity of Partial Derivatives $$ \partial_x^m(af_1 + bf_2) = a\partial_x^mf_1 + b\partial_x^mf_2 $$ \subtext{Assuming both $\partial_x f_{1,2}$ exist.} \definition \textbf{Laplace Operator} $$ \Delta f := \text{div}\bigl( \nabla f(x) \bigr) = \sum_{i=0}^{n} \frac{\partial}{\partial x_i}\Bigl( \frac{\partial f}{\partial x_i} \Bigr) = \sum_{i=0}^{n} \frac{\partial^2f}{\partial x_i^2} $$ \begin{subbox}{The Hessian} \smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^2,\quad x_0 \in X$} $$ \textbf{H}_f(x) := \begin{bmatrix} \partial_{1,1}f(x_0) & \partial_{2,1}f(x_0) & \cdots & \partial_{n,1}f(x_0) \\ \partial_{1,2}f(x_0) & \partial_{2,2}f(x_0) & \cdots & \partial_{n,2}f(x_0) \\ \vdots & \vdots & \ddots & \vdots \\ \partial_{1,n}f(x_0) & \partial_{2,n}f(x_0) & \cdots & \partial_{n,n}f(x_0) \end{bmatrix} $$ Where $\bigl( \textbf{H}_f(x) \bigr)_{i,j} = \partial_{x_i,x_j}f(x)$ \end{subbox} \subtext{Note that $f: X \to \R$, i.e. $\textbf{H}_f$ only exists for $1$-dimensionally valued $f$} \notation $\textbf{H}_f(x) = \text{Hess}_f(x) = \nabla^2f(x)$ \remark $\textbf{H}_f(x_0)$ is symmetric: $\bigl( \textbf{H}_f(x_0) \bigr)_{i,j} = \bigl( \textbf{H}_f(x_0) \bigr)_{j, i}$ \definition \textbf{Polar Coordinates} \begin{align*} g(r,\theta) &= \bigl(r \cos(\theta), r \sin(\theta)\bigr) \\ \textbf{J}_g(r,\theta) &= \begin{bmatrix} \cos(\theta) & -r \sin(\theta) \\ \sin(\theta) & r \cos(\theta) \\ \end{bmatrix} \\ \partial_xf &= \cos(\theta)\partial_rf-\frac{1}{r}\sin(\theta)\partial_\theta f \\ \partial_yf &= \sin(\theta)\partial_rf+\frac{1}{r}\cos(\theta)\partial_\theta f \end{align*} \subtext{$(r,\theta) \in (0,+\infty) \times \R,\quad \det(\textbf{J}_g) = r$} \subsection{Taylor Polynomials} % Full definition of taylor poly % \begin{subbox}{Taylor Polynomials} % \smalltext{$k \geq 1,\quad f: X \to \R,\quad f \in C^k,\quad x_0 \in X$} % \begin{align*} % & T_kf(y;x_0) := f(x_0) + \sum_{i=0}^{n}\frac{\partial f}{\partial x_i}(x_0)y_i + \cdots \\ % & + \sum_{m_1 + \cdots + m_n}^{}\frac{1}{m_1!\cdots m_n!}\frac{\partial^kf}{\partial x_1^{m1} \cdots \partial x_n^{m_n}}(x_0)y_1^{m_1}\cdots y_n^{m_n} % \end{align*} % \smalltext{Where the last sum ranges over $n$-tuples in $\Z_{\geq 0}$ that sum to $k$} % \end{subbox} \begin{multicols}{2} \definition $|m| := \sum_{i=1}^{n} m_1$ \definition $m! := m_1!\cdots m_n!$ \definition $y^m := y_1^m\cdots y_n^m$ \end{multicols} \subtext{for $m = (m_1,\ldots,m_n),\quad y = (y_1,\ldots,y_n)$} \begin{subbox}{Taylor Polynomials} \smalltext{$k \geq 1,\quad f: X \to \R,\quad f \in C^k,\quad x_0 \in X$} $$ T_kf(y;x_0) := \sum_{|m| \leq k}^{}\frac{1}{m!}\partial_x^m f(x_0)y^m $$ \end{subbox} \lemma \textbf{Taylor Approximation} $$ \underset{x \neq x_0 \to x_0}{\lim}\frac{E_kf(x;x_0)}{\big\|x-x_0\big\|^k} = 0 $$ \smalltext{Where $f(x) = T_kf(x-x_0;x_0) + E_kf(x;x_0)$}\\ \subtext{$k \geq 1,\quad X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^k,\quad x_0 \in X$} \remark Taylor polynomials of degree $1,2$: \begin{align*} & T_1f(y;x_0) = f(x_0) + \nabla f(x_0)\cdot y \\ & T_2f(y;x_0) = f(x_0) + \nabla f(x_0) \cdot y + \frac{1}{2} \Bigl( x_0^\top \cdot \textbf{H}_f(y) \cdot x_0\Bigr) \end{align*} \method Calculating $T_kf(y;x_0)$ also yields $\textbf{H}_f$ for $k \geq 2$. \begin{align*} & T_2f((x_0,y_0);(x,y)) = \ldots + ax^2 + by^2 + cxy \\ & \implies \textbf{H}_f(x_0,y_0) = \begin{bmatrix} 2a & c \\ c & 2b \end{bmatrix} \end{align*} \method Taylor Polynomials can be found by combination. \begin{footnotesize} \textbf{Example:} $f(x,y) = \underbrace{e^{y^4}}_\text{1} + \underbrace{\sin(xy)}_\text{2} + \underbrace{2xy^2}_\text{3} - \underbrace{\ln(x^2+1)}_\text{4},\quad k = 3$ \begin{enumerate} \item $e^x \approx 1 + x + \frac{x^2}{2} + \frac{x^3}{6} \implies e^{y^4} \approx 1 + y^4 + \frac{y^8}{2} + \frac{y^12}{6}$\\ \color{gray} Since $k=3$, discarding all terms with $\deg > 3$ yields: $e^{y^3} \approx 1$ \color{black} \item $\sin(x) \approx x - \frac{x^3}{6} \implies \sin(xy) \approx xy$ \item $2xy^2 \approx 2xy^2\quad$ \color{gray}(Since it's already a polynomial, $\deg = 3$)\color{black} \item $\ln(x+1) \approx x - \frac{x^2}{2} + \frac{x^3}{3} \implies \ln(x^2 + 1) \approx x^2$ \end{enumerate} Thus: $f(x) \approx 1 + xy + 2xy^2 - x^2 = T_3f\Bigl((0,0);(x,y)\Bigr)$ \end{footnotesize} \newpage \subsection{Critical Points} \lemma \textbf{Local Maxima \& Minima} $$ \begin{rcases*} f(y) \leq f(x_0)\ \forall y \text{ close} \\ f(y) \geq f(x_0)\ \forall y \text{ close} \end{rcases*}\quad \frac{\partial f}{\partial x_i}(x_0) = 0\ \ \forall i \leq n $$ \subtext{In other words: $df(x_0) = \nabla f(x_0) = 0$}\\ \subtext{$f: X \to \R,\quad X \subset \R^n \text{ open}, f \text{ diff.-able}$} \definition \textbf{Critical Point}\\ $$ x_0 \in X \text{ is critical } \iffdef \nabla f(x_0) = 0 $$ \subtext{$X \subset \R^n \text{ open}, f: X \to \R \text{ diff.-able}$} \remark \textbf{Existance of Maxima/Minima}\\ Don't \textit{have to} exist if $X$ is open, only if $X$ is compact.\\ \subtext{However, for compact sets, the lemma above no longer applies.} \method \textbf{Critical points on Compact Sets}\\ Decompose $X = X' \cup B$, s.t. $X'$ is open, $B$ is a \textit{boundary}. \begin{enumerate} \item Find critical points in $X'$ \item Check if any $x \in B$ is a maximum/minimum \end{enumerate} \definition \textbf{Non-degenerate Critical Point} $$ x_0 \in X \text{ non-deg.} \iffdef \det\Bigl(\textbf{H}_f(x_0)\Bigr) \neq 0 $$ \subtext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^2,\quad x_0 \in X \text{ is critical}$} \lemma \textbf{Definiteness of the Hessian} \begin{align*} &\textbf{H}_f(x_0) \text{ positive definite} &\implies x_0 \text{ is a local min.} \\ &\textbf{H}_f(x_0) \text{ negative definite} &\implies x_0 \text{ is a local max.} \\ &\textbf{H}_f(x_0) \text{ indefinite} &\implies x_0 \text{ is a saddle point.} \end{align*} \subtext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^2,\quad x_0 \in X \text{ non-deg. critical}$} % The nice tikz code below is a tightened version of code from Janis Hutz' Summary. \method \textbf{Determining Definiteness for $2 \times 2$ Matrices} \begin{center} \begin{tikzpicture}[node distance = 1cm and 0.4cm, >={Stealth[round]}] \node (det) {$\det(A)$}; \node (indef) [below=of det] {indefinite}; \node (tr0) [right=of det] {$\text{Tr}(A)$}; \node (posdef) [above right=of tr0, yshift=-0.5cm] {pos. def.}; \node (negdef) [below right=of tr0, yshift=+0.5cm] {neg. def.}; \node (tr1) [left=of det] {$\text{Tr}(A)$}; \node (possemdef) [above left=of tr1, yshift=-0.5cm] {p. semi-def.}; \node (negsemdef) [below left=of tr1, yshift=+0.5cm] {n. semi-def.}; \node (zero) [below=of tr1] {$A$ is zero}; \path[->] % Level 0 (det) edge node [above] {pos.} (tr0) (det) edge node [above] {$0$} (tr1) (det) edge node [right] {neg.} (indef) (tr0) edge node [left] {pos.} (posdef) (tr0) edge node [left] {neg.} (negdef) (tr1) edge node [right] {pos.} (possemdef) (tr1) edge node [right] {neg.} (negsemdef) (tr1) edge node [right] {$0$} (zero); \end{tikzpicture} \end{center}