\subsection{Partial Derivatives}

\begin{subbox}{Partial Derivative}
    \smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad 1 \leq i \leq n,\quad x_0 \in X$}
    $$\dfd{i}(x_{0}) := g'(x_{0,i})$$
    \smalltext{for $g: \{ t \in \R \sep (x_{0, 1}, \ldots,\ t\ ,\ldots, x_{0, n}) \in X  \} \to \R^n$}
    $$ g(t) := \underbrace{f(x_{0,i}, \ldots, x_{0,t-1},\ t\ , x_{0, t+1},\ldots,x_{0, n})}_{ \text{ Freeze all }x_{0, k} \text{ except one } x_{0, i} \to t}$$
\end{subbox}

\notation $\dfd{i}(x_0) = \sdfd{i}(x_0) =\ssdfd{i}(x_0)$

\lemma \textbf{Properties of Partial Derivatives}\\
\smalltext{Assuming $\sdfd{i} \text{ and } \partial_{x_i} g \text{ exist }$:}

$
\begin{array}{ll}
    (i)     & \partial x_i (f+g) = \partial x_i f + \partial x_i g  \\
    (ii)    & \partial x_i (fg) = \partial x_i (f)g + \partial x_i (g)f\quad \text{ if } m=1\\
    (iii)   & \partial x_i \Bigr(\displaystyle\frac{f}{g}\Bigl) = \displaystyle\frac{\partial x_i(f)g - \partial x_i(g)f}{g^2}\quad \text{ if } g(x) \neq 0\ \forall x \in X\\
\end{array}
$\\
\subtext{$X \subset \R^n \text{ open},\quad f.g: X \to \R^n,\quad 1 \leq i \leq n$}

\begin{subbox}{The Jacobian}
    \smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R^n \text{ with partial derivatives existing}$}
    $$
    \textbf{J}_f(x) := \begin{bmatrix}
        \partial x_1 f_1(x) & \partial x_2 f_1(x) & \cdots & \partial x_n f_1(x) \\
        \partial x_1 f_2(x) & \partial x_2 f_2(x) & \ddots & \vdots              \\
        \vdots              & \vdots              & \ddots & \vdots              \\
        \partial x_1 f_n(x) & \partial x_2 f_n(x) & \cdots & \partial x_n f_m(x)
    \end{bmatrix}
    $$
\end{subbox}
\subtext{Think of $f$ as a vector of $f_i$, then $\textbf{J}_f$ is that vector stretched for all $x_j$}

\definition \textbf{Gradient} $\nabla f(x_0) := \begin{bmatrix}
    \partial x_1 f(x_0) \\
    \vdots \\
    \partial x_n f(x_0)
\end{bmatrix} = \textbf{J}_f(x)^\top$\\
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R$, i.e. \textit{must} map to $1$ dimension}

\remark $\nabla f$ points in the direction of greatest increase.
\subtext{This generalizes that in $\R$, $\text{sgn}(f)$ shows if $f$ increases/decreases}

\definition \textbf{Divergence} $\text{div}(f)(x_0) := \text{Tr}\bigr(\textbf{J}_f(x_0)\bigl)$\\
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^n,\quad \textbf{J}_f \text{ exists}$}

\subsection{The Differential}

\smalltext{
    Partial derivatives don't provide a good approx. of $f$, unlike in the $1$-dimensional case. The \textit{differential} is a linear map which replicates this purpose in $\R^n$. 
}

\begin{subbox}{Differentiability in $\R^n$ \& the Differential}
    \smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R^n,\quad u: \R^n \to \R^m \text{ linear map}$}
    $$
        df(x_0) := u
    $$
    If $f$ is differentiable at $x_0 \in X$ with $u$ s.t.
    $$
        \underset{x \neq x_0 \to x_0}{\lim} \frac{1}{\big\| x - x_0 \big\|}\Biggl( f(x) - f(x_0) - u(x - x_0) \Biggr) = 0
    $$
\end{subbox}
\subtext{Similarly, $f$ is differentiable if this holds for all $x \in X$}

\lemma \textbf{Properties of Differentiable Functions}

$
\begin{array}{ll}
    (i)     & \text{Continuous on } X \\
    (ii)    & \forall i \leq m, j \leq n:\quad \partial_{x_j}f_i \text{ exists} \\
    (iii)   & m=1:\quad \partial_{x_i} f(x_0) = a_i  \\
            & \text{for:}\quad u(x_1,\ldots,x_n) = a_1x_1 + \cdots + a_nx_n
\end{array}
$

\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m \text{ differentiable on } X$}

\lemma \textbf{Preservation of Differentiability}

$
\begin{array}{ll}
    (i)     & f + g \text{ is differentiable: } d(f+g)=df+dg  \\
    (ii)    & fg \text{ is differentiable, if } m=1 \\
    (iii)   & \displaystyle\frac{f}{g}\ \text{ is differentiable, if } m=1,\ g(x) \neq 0\ \forall x \in X
\end{array}
$

\subtext{$X \subset \R^n \text{ open},\quad f,g: X \to \R^m \text{ differentiable on }X$}

\lemma \textbf{Cont. Partial Derivatives imply Differentiability}

if all $\partial_{x_j} f_i$ exist and are continuous:
$$
    f \text{ differentiable on } X,\quad df(x_0) = \textbf{J}_f(x_0)
$$
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m$}

\lemma \textbf{Chain Rule} $\quad g \circ f \text{ is differentiable on } X$
\begin{align*}
    & d(g \circ f)(x_0) &= dg\bigl( f(x_0) \bigr) \circ df(x_0) \\
    & \textbf{J}_{g \circ f}(x_0) &= \textbf{J}_g\bigl( f(x_0) \bigr) \cdot \textbf{J}_f(x_0)
\end{align*}
\subtext{$X \subset \R^n \text{ open},\quad Y \subset \R^m \text{ open},\quad f: X \to Y, g: Y \to \R^p, f,g \text{ diff.-able}$}

\definition \textbf{Tangent Space}
$$
    T_f(x_0) := \Bigl\{ (x,y) \in \R^n \times \R^m \sep y = f(x_0) + u(x-x_0) \Bigr\}
$$
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m \text{ diff.-able},\quad x_0 \in X,\quad u = df(x_0)$}

\definition \textbf{Directional Derivative}
$$
    D_v f(x_0) = \underset{t \neq 0 \to 0}{\lim} \frac{f(x_0 + tv) - f(x_0)}{t}
$$
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m,\quad v \neq 0 \in \R^n,\quad x_0 \in X$}

\lemma \textbf{Directional Derivatives for Diff.-able Functions}
$$
    D_vf(x_0) = df(x_0)(v) = \textbf{J}_f(x_0) \cdot v
$$
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R^m \text{ diff.-able},\quad v \neq 0 \in \R^n,\quad x_0 \in X$}

\remark $D_vf$ is linear w.r.t $v$, so: $D_{v_1 + v_2}f = D_{v_1}f + D_{v_2}f$

\remark $D_vf(x_0) = \nabla f(x_0) \cdot v = \big\| \nabla f(x_0) \big\| \cos(\theta)$\\
\subtext{In the case $f: X \to \R$, where $\theta$ is the angle between $v$ and $\nabla f(x_0)$}

\newpage
\subsection{Higher Derivatives}

\definition \textbf{Differentiability Classes}
\begin{align*}
    & f \in C^1(X;\R^m)         &\iffdef& f \text{ diff.-able on } X, \text{ all } \partial_{x_j} f_i \text{ exist} \\
    & f \in C^k(X;\R^m)         &\iffdef& f \text{ diff.-able on } X, \text{ all } \partial_{x_j} f_i \in C^{k-1} \\
    & f \in C^\infty(X;\R^m)    &\iffdef& f \in C^k(X;\R^m)\ \forall k \geq 1
\end{align*}
\subtext{$X \subset \R^n \text{ open},\quad f:X\to\R^m$}

\lemma Polynomials, Trig. functions and $\exp$ are in $C^\infty$

\lemma \textbf{Operations preserve Differentiability Classes}

$
\begin{array}{lcll}
    (i)     & f + g                     & \in C^k \\
    (ii)    & fg                        & \in C^k       & \text{ if } m=1 \\
    (iii)   & \displaystyle\frac{f}{g}  & \in C^k       & \text{ if } m=1, g(x) \neq 0\ \forall x \in X
\end{array}
$\\
\subtext{$f,g \in C^k$}

\lemma \textbf{Composition preserves Differentiability Classes}
$$
    g \circ f \in C^k
$$
\subtext{$f \in C^k,\quad f(X) \subset Y,\quad Y \subset \R^m \text{ open},\quad g: Y \to \R^p,\quad g \in C^k$}

\begin{subbox}{Partial Derivatives commute in $C^k$}
    \smalltext{$k \geq 2,\quad X \subset \R^n \text{ open},\quad f: X \to \R^m,\quad f \in C^k$}
    $$
        \forall x,y:\quad \partial_{x,y}f = \partial_{y,x}f
    $$
    \smalltext{This generalizes for $\partial_{x_1,\ldots,x_n}f$.}
\end{subbox}

\remark Linearity of Partial Derivatives
$$
    \partial_x^m(af_1 + bf_2) = a\partial_x^mf_1 + b\partial_x^mf_2
$$
\subtext{Assuming both $\partial_x f_{1,2}$ exist.}

\definition \textbf{Laplace Operator}
$$
    \Delta f :=  \text{div}\bigl( \nabla f(x) \bigr) = \sum_{i=0}^{n} \frac{\partial}{\partial x_i}\Bigl( \frac{\partial f}{\partial x_i} \Bigr) = \sum_{i=0}^{n} \frac{\partial^2f}{\partial x_i^2}
$$

\begin{subbox}{The Hessian}
    \smalltext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^2,\quad x_0 \in X$}
    $$
    \textbf{H}_f(x) := \begin{bmatrix}
        \partial_{1,1}f(x_0)    & \partial_{2,1}f(x_0) & \cdots & \partial_{n,1}f(x_0)  \\
        \partial_{1,2}f(x_0)    & \partial_{2,2}f(x_0) & \cdots & \partial_{n,2}f(x_0)  \\
        \vdots                      & \vdots                 & \ddots & \vdots                      \\
        \partial_{1,n}f(x_0)    & \partial_{2,n}f(x_0) & \cdots & \partial_{n,n}f(x_0)
    \end{bmatrix}
    $$
    Where $\bigl( \textbf{H}_f(x) \bigr)_{i,j} = \partial_{x_i,x_j}f(x)$
\end{subbox}
\subtext{Note that $f: X \to \R$, i.e. $\textbf{H}_f$ only exists for $1$-dimensionally valued $f$}

\notation $\textbf{H}_f(x) = \text{Hess}_f(x) = \nabla^2f(x)$

\remark $\textbf{H}_f(x_0)$ is symmetric: $\bigl( \textbf{H}_f(x_0) \bigr)_{i,j} = \bigl( \textbf{H}_f(x_0) \bigr)_{j, i}$

\definition \textbf{Polar Coordinates}
\begin{align*}
    g(r,\theta) &= \bigl(r \cos(\theta), r \sin(\theta)\bigr) \\
    \textbf{J}_g(r,\theta) &= \begin{bmatrix}
        \cos(\theta) & -r \sin(\theta) \\
        \sin(\theta) & r \cos(\theta)  \\
    \end{bmatrix} \\
    \partial_xf &= \cos(\theta)\partial_rf-\frac{1}{r}\sin(\theta)\partial_\theta f \\
    \partial_yf &= \sin(\theta)\partial_rf+\frac{1}{r}\cos(\theta)\partial_\theta f
\end{align*}
\subtext{$(r,\theta) \in (0,+\infty) \times \R,\quad \det(\textbf{J}_g) = r$}

\subsection{Taylor Polynomials}

% Full definition of taylor poly

% \begin{subbox}{Taylor Polynomials}
%     \smalltext{$k \geq 1,\quad f: X \to \R,\quad f \in C^k,\quad x_0 \in X$}
%     \begin{align*}
%         & T_kf(y;x_0) := f(x_0) + \sum_{i=0}^{n}\frac{\partial f}{\partial x_i}(x_0)y_i + \cdots \\
%         & + \sum_{m_1 + \cdots + m_n}^{}\frac{1}{m_1!\cdots m_n!}\frac{\partial^kf}{\partial x_1^{m1} \cdots \partial x_n^{m_n}}(x_0)y_1^{m_1}\cdots y_n^{m_n}
%     \end{align*}
%     \smalltext{Where the last sum ranges over $n$-tuples in $\Z_{\geq 0}$ that sum to $k$}
% \end{subbox}

\begin{multicols}{2}
    \definition $|m| := \sum_{i=1}^{n} m_1$

    \definition $m!  := m_1!\cdots m_n!$

    \definition $y^m := y_1^m\cdots y_n^m$
\end{multicols}
\subtext{for $m = (m_1,\ldots,m_n),\quad y = (y_1,\ldots,y_n)$}

\begin{subbox}{Taylor Polynomials}
    \smalltext{$k \geq 1,\quad f: X \to \R,\quad f \in C^k,\quad x_0 \in X$}
    $$
        T_kf(y;x_0) := \sum_{|m| \leq k}^{}\frac{1}{m!}\partial_x^m f(x_0)y^m
    $$
\end{subbox}

\lemma \textbf{Taylor Approximation}
$$
    \underset{x \neq x_0 \to x_0}{\lim}\frac{E_kf(x;x_0)}{\big\|x-x_0\big\|^k} = 0
$$
\smalltext{Where $f(x) = T_kf(x-x_0;x_0) + E_kf(x;x_0)$}\\
\subtext{$k \geq 1,\quad X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^k,\quad x_0 \in X$}

\remark Taylor polynomials of degree $1,2$:
\begin{align*}
        & T_1f(y;x_0) = f(x_0) + \nabla f(x_0)\cdot y \\
        & T_2f(y;x_0) = f(x_0) + \nabla f(x_0) \cdot y + \frac{1}{2} \Bigl( x_0^\top \cdot \textbf{H}_f(y) \cdot x_0\Bigr)
\end{align*}

\method Calculating $T_kf(y;x_0)$ also yields $\textbf{H}_f$ for $k \geq 2$.
\begin{align*}
    & T_2f((x_0,y_0);(x,y)) = \ldots + ax^2 + by^2 + cxy \\
    & \implies \textbf{H}_f(x_0,y_0) = \begin{bmatrix}
        2a & c \\
        c & 2b
    \end{bmatrix}
\end{align*}

\method Taylor Polynomials can be found by combination.

\begin{footnotesize}
    \textbf{Example:} $f(x,y) = \underbrace{e^{y^4}}_\text{1} + \underbrace{\sin(xy)}_\text{2} + \underbrace{2xy^2}_\text{3} - \underbrace{\ln(x^2+1)}_\text{4},\quad k = 3$
    \begin{enumerate}
        \item $e^x \approx 1 + x + \frac{x^2}{2} + \frac{x^3}{6} \implies e^{y^4} \approx 1 + y^4 + \frac{y^8}{2} + \frac{y^12}{6}$\\
        \color{gray} Since $k=3$, discarding all terms with $\deg > 3$ yields: $e^{y^3} \approx 1$ \color{black}
        \item $\sin(x) \approx x - \frac{x^3}{6} \implies \sin(xy) \approx xy$
        \item $2xy^2 \approx 2xy^2\quad$ \color{gray}(Since it's already a polynomial, $\deg = 3$)\color{black}
        \item $\ln(x+1) \approx x - \frac{x^2}{2} + \frac{x^3}{3} \implies \ln(x^2 + 1) \approx x^2$
    \end{enumerate}
    Thus: $f(x) \approx 1 + xy + 2xy^2 - x^2 = T_3f\Bigl((0,0);(x,y)\Bigr)$
\end{footnotesize}

\newpage
\subsection{Critical Points}

\lemma \textbf{Local Maxima \& Minima}
$$
    \begin{rcases*}
        f(y) \leq f(x_0)\ \forall y \text{ close} \\
        f(y) \geq f(x_0)\ \forall y \text{ close}
    \end{rcases*}\quad \frac{\partial f}{\partial x_i}(x_0) = 0\ \ \forall i \leq n
$$
\subtext{In other words: $df(x_0) = \nabla f(x_0) = 0$}\\
\subtext{$f: X \to \R,\quad X \subset \R^n \text{ open}, f \text{ diff.-able}$}

\definition \textbf{Critical Point}\\
$$
    x_0 \in X \text{ is critical } \iffdef \nabla f(x_0) = 0
$$
\subtext{$X \subset \R^n \text{ open}, f: X \to \R \text{ diff.-able}$}

\remark \textbf{Existance of Maxima/Minima}\\
Don't \textit{have to} exist if $X$ is open, only if $X$ is compact.\\
\subtext{However, for compact sets, the lemma above no longer applies.}

\method \textbf{Critical points on Compact Sets}\\
Decompose $X = X' \cup B$, s.t. $X'$ is open, $B$ is a \textit{boundary}. 
\begin{enumerate}
    \item Find critical points in $X'$
    \item Check if any $x \in B$ is a maximum/minimum 
\end{enumerate}

\definition \textbf{Non-degenerate Critical Point}
$$
    x_0 \in X \text{ non-deg.} \iffdef \det\Bigl(\textbf{H}_f(x_0)\Bigr) \neq 0
$$
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^2,\quad x_0 \in X \text{ is critical}$}

\lemma \textbf{Definiteness of the Hessian}
\begin{align*}
    &\textbf{H}_f(x_0) \text{ positive definite}    &\implies x_0 \text{ is a local min.} \\
    &\textbf{H}_f(x_0) \text{ negative definite}    &\implies x_0 \text{ is a local max.} \\
    &\textbf{H}_f(x_0) \text{ indefinite}           &\implies x_0 \text{ is a saddle point.}
\end{align*}
\subtext{$X \subset \R^n \text{ open},\quad f: X \to \R,\quad f \in C^2,\quad x_0 \in X \text{ non-deg. critical}$}

% The nice tikz code below is a tightened version of code from Janis Hutz' Summary.

\method \textbf{Determining Definiteness for $2 \times 2$ Matrices}
\begin{center}
    \begin{tikzpicture}[node distance = 1cm and 0.4cm, >={Stealth[round]}]
        \node (det) {$\det(A)$};
        \node (indef) [below=of det] {indefinite};
        \node (tr0) [right=of det] {$\text{Tr}(A)$};
        \node (posdef) [above right=of tr0, yshift=-0.5cm] {pos. def.};
        \node (negdef) [below right=of tr0, yshift=+0.5cm] {neg. def.};
        \node (tr1) [left=of det] {$\text{Tr}(A)$};
        \node (possemdef) [above left=of tr1, yshift=-0.5cm] {p. semi-def.};
        \node (negsemdef) [below left=of tr1, yshift=+0.5cm] {n. semi-def.};
        \node (zero) [below=of tr1] {$A$ is zero};

        \path[->]
        % Level 0
        (det) edge node [above] {pos.} (tr0)
        (det) edge node [above] {$0$} (tr1)
        (det) edge node [right] {neg.} (indef)
        (tr0) edge node [left] {pos.} (posdef)
        (tr0) edge node [left] {neg.} (negdef)
        (tr1) edge node [right] {pos.} (possemdef)
        (tr1) edge node [right] {neg.} (negsemdef)
        (tr1) edge node [right] {$0$} (zero);
    \end{tikzpicture}
\end{center}