[AMR] Vision intro

2026-07-27 21:29:09 +02:00 · 2026-04-20 16:41:38 +02:00
parent 27ed2b62a9
commit eeb3198417
5 changed files with 60 additions and 0 deletions
@@ -71,4 +71,10 @@
 \input{parts/03_multi-sensor-estimation/06_extended-kalman-filter.tex}
 % \input{parts/03_multi-sensor-estimation/}
 \section{SLAM to Spatial AI}
 \input{parts/04_vision/00_keypoints.tex}
 \input{parts/04_vision/01_bootstrapping.tex}
 \input{parts/04_vision/02_place-recognition.tex}
 % \input{parts/04_vision/}
 \end{document}
@@ -0,0 +1,18 @@
 \subsection{Keypoints}
 \bi{Corner det.} $SSD(\Delta_x, \Delta_y) \approx [\Delta_x \; \Delta_y] \mat{M} [\Delta_x \; \Delta_y]^\top$
 with $\mat{M} = \mat{R}^\top \text{diag}(\lambda_1, \lambda_2) \mat{R}$; $\lambda_i$ E.V. of $M$;
 $\mat{R} = \det(M) - \kappa \cdot \text{trace}(M)^2 = \lambda_1\lambda_2 - \kappa(\lambda_1 + \lambda_2)^2$;
 $\displaystyle M = \sum_{x, y \in P} \begin{bmatrix}
        I_x^2   & I_x I_y \\
        I_x I_y & I_y^2
    \end{bmatrix}$
 \shade{gray}{Blob Detection} ($I$ is the image)
 \bi{Laplacian of Gaussian} (LoG): $L = g(x, y, t) \cdot I(x, y)$.
 Then apply Laplacian Operator $\nabla_\text{norm}^2 L = t\left( \frac{\partial^2 L}{\partial x^2} + \frac{\partial L}{\partial y^2} \right)$
 \bi{Diff. of Gaussians} (DoG): $\Delta L = L(x, y, t) - L(x, y, kt)$
 \bi{SIFT Detector} \bi{(1)} Subsample + Blur \bi{(2)} DoG on each res. image \bi{(3)} Keypoints extrema in DoG pyramid
@@ -0,0 +1,33 @@
 \subsection{Bootstrapping}
 \bi{PnP Problem} {\scriptsize Persp. n-P.}
 Find sol. for camera pose \textit{directly}
 \bi{RANSAC} {\scriptsize RANdom SAmpling Consensus} for find. outliers \& correct
 \bi{Stereo Triang.} Given two rays (known poses for points in 2D).
 Find good point in 3D. Fast sol: \bi{Midpoint Method}:
 \bi{1} Find p. along ray w/ min. dist (Lin. Least Squares)
 \rmvspace[0.7]
 \[
    \vec{\lambda}\! =\! [\lambda_1 \; \lambda_2]^\top\! = \! \argmin{} ||({_W}\vec{t}_{C_2} + \lambda_2 {_W}\vec{e}_2) - ({_W}\vec{t}_{C_1} + \lambda_1 {_W}\vec{e}_2)||^2
 \]
 \rmvspace[1]
 \bi{2} Solve normal equation $\mat{A} \vec{\lambda} = \vec{b}$ with $\vec{q} = -{_W}\vec{e}^\top_1 {_W}\vec{e}_2$:
 \rmvspace[0.7]
 \[
    \mat{A} = \begin{bmatrix}
        1       & \vec{q} \\
        \vec{q} & 1
    \end{bmatrix}
    \quad
    \vec{b} = \begin{bmatrix}
        \vec{e}_1^\top \cdot ({_W}\vec{t}_{C_2} - {_W}\vec{t}_{C_1}) \\
        -\vec{e}_2^\top \cdot ({_W}\vec{t}_{C_2} - {_W}\vec{t}_{C_1})
    \end{bmatrix}
 \]
 \rmvspace[0.7]
 \bi{3} Pick midp. ${_W}\vec{t}_P \! = \! 0.5(\tau_1 \! + \! \tau_2)$; $\tau_n \! = \! {_W}\vec{t}_{C_n} + \lambda_n{_W}\vec{e}_n)$
@@ -0,0 +1,3 @@
 \subsection{Place Recognition}
 Idea: Build vocab from ``visual words'' (in Training).
 \bi{Runtime} Detect keypoints; Extract descriptors; Build histogram; Query DB for similarity; If no match, insert into DB, else use to compute with e.g. RANSAC