\newsection
\subsection{The differential}
\setLabelNumber{all}{2}
\compactdef{Differentiable function} We have function $f: X \rightarrow \R^m$, linear map $u : \R^n \rightarrow \R^m$ and $x_0 \in X$. $f$ is differentiable at $x_0$ with differential $u$ if
$\displaystyle \lim_{\elementstack{x \rightarrow x_0}{x \neq x_0}} \frac{f(x) - f(x_0) - u(x - x_0)}{||x - x_0||} = 0$ where the limit is in $\R^m$.
We denote $\dx f(x_0) = u$.
If $f$ is differentiable at every $x_0 \in X$, then $f$ is differentiable on $X$.

% ────────────────────────────────────────────────────────────────────
\stepLabelNumber{all}
\shortproposition
\rmvspace Let $f: X \rightarrow \R^m$ be differentiable on $X$
\begin{itemize}[noitemsep]
    \item $f$ is continuous on $X$
    \item $f$ admits partial derivatives on $X$ with respect to each variable
    \item Assume $m = 1$, let $x_0 \in X$ and let $u(x_1, \ldots, x_n) = a_1 x_1 + \ldots + a_n x_n$ be diff. of $f$ at $x_0$.
          Then $\partial_{x_i} f(x_0) = a_i$ for $1 \leq i \leq n$
\end{itemize}
\rmvspace
% ────────────────────────────────────────────────────────────────────
\stepLabelNumber{all}
\shortproposition Let $f, g : X \rightarrow \R^m$ with $X \subseteq \R^n$ open
\rmvspace
\begin{itemize}[noitemsep]
    \item The function $f + g$ is differentiable with differential $\dx (f + g) = \dx f + \dx g$. If $m = 1$, then $fg$ is differentiable
    \item If $m = 1$ and if $g(x) \neq 0 \forall x \in X$, then $f \div g$ is differentiable
\end{itemize}
\rmvspace
% ────────────────────────────────────────────────────────────────────
\shortproposition If $f$ as above has all partial derivatives on $X$ and if they are all continuous on $X$, then $f$ is differentiable on $X$.
The \bi{differential is the Jacobi Matrix of $f$ at $x_0$}.
This implies that most elementary functions are differentiable.\\
% ────────────────────────────────────────────────────────────────────
\compactproposition{Chain Rule} For $X \subseteq \R^n$ and $Y \subseteq \R^m$ both open and $f: X \rightarrow Y$ and $g : Y \rightarrow \R^p$ are both differentiable.
Then $g \circ f$ is differentiable on $X$ and for any $x \in X$, its differential is given by
$\dx (g \circ f)(x_0) = \dx g(f(x_0)) \circ \dx f(x_0)$.
The Jacobi matrix is $J_{g \circ f}(x_0) = J_g(f(x_0)) J_f(x_0)$ (RHS is a matrix product, i.e. multiply rows of first with cols of second matrix)

\bi{For tasks} where we are given the value of a gradient at a certain point, as well as the function
(could not be explicitly given, but could instead be individually for each component),
we can compute the partial derivative using the chain rule as follows:
$\frac{\partial g}{\partial \phi} = \frac{\partial g}{\partial x} \cdot \frac{\partial x}{\partial \phi}
+ \frac{\partial g}{\partial y} \cdot \frac{\partial y}{\partial \phi} + \frac{\partial g}{\partial z} \cdot \frac{\partial z}{\partial \phi}$
where all $\frac{\partial g}{\partial x}$, etc are known from the gradient and the other elements can be computed quickly from the known equations.
The chain rule for higher or lower dimensional functions is as one would expect from the above formula.

Finally, evaluate $\frac{\partial g}{\partial \phi}$ at the required points and compute the result.

% ────────────────────────────────────────────────────────────────────
\setLabelNumber{all}{11}
\compactdef{Tangent space} The graph of the affine linear approximation $g(x) = f(x_0) + u(x - x_0)$, or the set
\rmvspace
\begin{align*}
    \{ (x, y) \in \R^n \times \R^m : y = f(x_0) + u(x - x_0) \}
\end{align*}

\drmvspace
\shade{gray}{Computing the tangent space} Also called the \bi{Tangent plane} in 3D.
We only need to compute $g(x) = f(x_0) + J_f(x_0) \cdot (x - x_0)$, where both $x$ and $x_0$ are vectors (and $x_0$ is the point at which we compute the tangent space).
All there is left to do is state the space: $\{ (x, y, \ldots) \in \R^n | z = g() \}$

% ────────────────────────────────────────────────────────────────────
\stepLabelNumber{all}
\compactdef{Directional derivative} $f$ has a directional derivative $w \in \R^m$ in the direction of $v \in \R^n$,
if the function $g$ defined on the set $I = \{ t \in \R : x_0 + tv \in X \}$ by $g(t) = f(x_0 + tv)$ has a derivative at $t = 0$ and is equal to $w$\\
% ────────────────────────────────────────────────────────────────────
\shortremark Because $X$ is open, the set $I$ contains an open interval $]-\delta, \delta[$ for some $\delta > 0$.\\
% ────────────────────────────────────────────────────────────────────
\shortproposition Let $f$ as previously be differentiable. Then for any $x \in X$ and non-zero $v \in \R^n$,
$f$ has a directional derivative at $x_0$ in the direction of $v$, given by$\dx f(x_0)(v)$\\
% ────────────────────────────────────────────────────────────────────
\shortremark The values of the above directional derivative are linear with respect to the vector $v$.
Suppose we know the dir. der. $w_1$ and $w_2$ in directions $v_1$ and $v_2$, then the directional derivative in direction $v_1 + v_2$ is $w_1 + w_2$

\shade{gray}{Computing a directional derivative} Always normalize the vector! We can compute a directional derivative using the differential $\limit{h}{0} \frac{f(x_0 + hv) - f(x_0)}{h}$
or using a $1$-dimensional helper function $g: h \mapsto f(x_0 + hv)$, calculating the derivative of it and evaluating $g'(0)$. That corresponds to the directional derivative.
E.g. for function $f: x, y \mapsto x^2 + y^2$, we have $g: h \mapsto (x_0 + h)^2 + (y_0 + h)^2$.
An \bi{easy option} is to use this property: $D_v f(x_0) = J_f(x_0) \cdot v = \nabla f \cdot v$

\rmvspace
\begin{center}
    \begin{tikzpicture}[node distance = 0.5cm and 0.5cm, >={Classical TikZ Rightarrow[width=7pt]}]
        \node (contdiff) {$f$ cont. diff.};
        \node (diff) [below=of contdiff] {$f$ differentiable};
        \node (cont) [below=of diff] {$f$ continuous};
        \node (diffcont) [right=of contdiff] {All $\partial_j f_i$ continuous};
        \node (diffex) [below=of diffcont] {All $\partial_j f_i$ exist};
        \node (notes) at (-5, -1) {Red arrows indicate no implication};

        \draw[arrows = ->, double distance = 1.5pt] (contdiff) -- (diff);
        \draw[arrows = ->, double distance = 1.5pt] (diff) -- (cont);
        \draw[arrows = <->, double distance = 1.5pt] (contdiff) -- (diffcont);
        \draw[arrows = ->, double distance = 1.5pt] (diffcont) -- (diffex);
        \draw[arrows = ->, double distance = 1.5pt, transform canvas={yshift=0.2cm}] (diff) -- (diffex);
        \draw[arrows = ->, double distance = 1.5pt, transform canvas={yshift=-0.2cm}, color=red] (diffex) -- (diff);
        \draw[arrows = ->, double distance = 1.5pt, color=red] (diffex) -- (cont);
    \end{tikzpicture}
\end{center}
\drmvspace