mirror of
https://github.com/janishutz/eth-summaries.git
synced 2026-01-11 01:18:27 +00:00
[SPCA] Restructuring, finish memory management in C, start dynamic memory management section
This commit is contained in:
@@ -14,6 +14,13 @@ struct MyStruct {
|
||||
int el2;
|
||||
};
|
||||
|
||||
// Like structs, but can only hold one of the values!
|
||||
union MyUnion {
|
||||
int ival;
|
||||
float fval;
|
||||
char *sval;
|
||||
};
|
||||
|
||||
int fun( int j ) {
|
||||
static int i = 0; // Persists across calls of fun
|
||||
short my_var = 1; // Block scoped (deallocated when going out of scope)
|
||||
@@ -27,7 +34,10 @@ int main( int argc, char *argv[] ) {
|
||||
}
|
||||
struct MyStruct test; // Allocate memory on stack for struct
|
||||
struct MyStruct *test_p = &test; // Pointer to memory where test resides
|
||||
test.el1 = 1; // Direct element access
|
||||
test_p->el2 = 2; // Via pointer
|
||||
struct MyStruct test2;
|
||||
union MyUnion my_uval; // Work exactly like structs for access
|
||||
test.el1 = 1; // Direct element access
|
||||
test_p->el2 = 2; // Via pointer
|
||||
test2 = test; // Copies the struct
|
||||
return 0;
|
||||
}
|
||||
|
||||
22
semester3/spca/code-examples/00_c/02_memory/00_memory.c
Normal file
22
semester3/spca/code-examples/00_c/02_memory/00_memory.c
Normal file
@@ -0,0 +1,22 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
int main( int argc, char *argv[] ) {
|
||||
long *arr = (long *) malloc( 10 * sizeof( long ) ); // Allocate on heap
|
||||
if ( arr == NULL ) // Check if successful
|
||||
return EXIT_FAILURE;
|
||||
arr[ 0 ] = 5;
|
||||
|
||||
long *arr2;
|
||||
if ( ( arr2 = (long *) calloc( 10, sizeof( long ) ) ) == NULL )
|
||||
return EXIT_FAILURE; // Same as above, but fewer lines and memory zeroed
|
||||
|
||||
// Reallocate memory (to change size). Always use new pointer and do check!
|
||||
if ( ( arr2 = (long *) realloc( arr2, 15 * sizeof( long ) ) ) == NULL )
|
||||
return EXIT_FAILURE;
|
||||
|
||||
free( arr ); // Deallocate the memory
|
||||
arr = NULL; // Best practice: NULL pointer
|
||||
free( arr2 ); // *Can* omit NULLing pointer because end
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
int main( int argc, char **argv ) {
|
||||
int a[ 2 ];
|
||||
int *b = malloc( 2 * sizeof( int ) ), *c;
|
||||
a[ 2 ] = 5; // assign past the end of an array
|
||||
b[ 0 ] += 2; // assume malloc zeroes out memory
|
||||
c = b + 3; // mess up your pointer arithmetic
|
||||
free( &( a[ 0 ] ) ); // pass pointer to free() that wasn't malloc'ed
|
||||
free( b );
|
||||
free( b ); // double-free the same block
|
||||
b[ 0 ] = 5; // use a free()'d pointer
|
||||
// any many more!
|
||||
return 0;
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
\newpage
|
||||
\subsubsection{Declarations}
|
||||
We have already seen a few examples for how \texttt{C} handles declarations.
|
||||
In concept they are similar (and scoping works the same) to most other \texttt{C}-like programming languages, including \texttt{Java}.
|
||||
\inputcodewithfilename{c}{code-examples/00_c/00_basics/}{02_declarations.c}
|
||||
|
||||
A peculiarity of \texttt{C} is that the bit-count is not defined by the language, but rather the hardware it is compiled for.
|
||||
\begin{fullTable}{llll}{\texttt{C} data type & typical 32-bit & ia32 & x86-64}{Comparison of byte-sizes for each datatype on different architectures}
|
||||
\texttt{char} & 1 & 1 & 1 \\
|
||||
\texttt{short} & 2 & 2 & 2 \\
|
||||
\texttt{int} & 4 & 4 & 4 \\
|
||||
\texttt{long} & 4 & 4 & 8 \\
|
||||
\texttt{long long} & 8 & 8 & 8 \\
|
||||
\texttt{float} & 4 & 4 & 4 \\
|
||||
\texttt{double} & 4 & 8 & 8 \\
|
||||
\texttt{long double} & 8 & 10/12 & 16 \\
|
||||
\end{fullTable}
|
||||
|
||||
\drmvspace
|
||||
By default, integers in \lC\ are \texttt{signed}, to declare an unsigned integer, use \texttt{unsigned int}.
|
||||
Since it is hard and annoying to remember the number of bytes that are in each data type, \texttt{C99} has introduced the extended integer types,
|
||||
which can be imported from \texttt{stdint.h} and are of form \texttt{int<bit count>\_t} and \texttt{uint<bit count>\_t},
|
||||
where we substitute the \texttt{<bit count>} with the number of bits (have to correspond to a valid type of course).
|
||||
|
||||
Another notable difference of \texttt{C} compared to other languages is that \texttt{C} doesn't natively have a \texttt{boolean} type,
|
||||
by convention a \texttt{short} is used to represent it, where any non-zero value means \texttt{true} and \texttt{0} means \texttt{false}.
|
||||
Since boolean types are quite handy, the \texttt{!} syntax for negation turns any non-zero value of any integer type into zero and vice-versa.
|
||||
\texttt{C99} has added support for a bool type via \texttt{stdbool.h}, which however is still an integer.
|
||||
|
||||
Notably, \texttt{C} doesn't have a very rigid type system and lower bit-count types are implicitly cast to higher bit-count data types, i.e.
|
||||
if you add a \texttt{short} and an \texttt{int}, the \texttt{short} is cast to \texttt{short} (bits 16-31 are set to $0$) and the two are added.
|
||||
Explicit casting between almost all types is also supported.
|
||||
Some will force a change of bit representation, but most won't (notably, when casting to and from \texttt{float}-like types, minus to \texttt{void})
|
||||
|
||||
Another important feature is that every \lC\ statement is also an expression, see above code block for example.
|
||||
|
||||
The \texttt{void} type has \bi{no} value and is used for untyped pointers and declaring functions with no return value
|
||||
|
||||
It is also possible to define a custom type using \texttt{typedef <type it represents> <name of the new type>}
|
||||
86
semester3/spca/parts/01_c/01_basics/02_declarations.tex
Normal file
86
semester3/spca/parts/01_c/01_basics/02_declarations.tex
Normal file
@@ -0,0 +1,86 @@
|
||||
\newpage
|
||||
\subsubsection{Declarations}
|
||||
We have already seen a few examples for how \texttt{C} handles declarations.
|
||||
In concept they are similar (and scoping works the same) to most other \texttt{C}-like programming languages, including \texttt{Java}.
|
||||
|
||||
\inputcodewithfilename{c}{code-examples/00_c/00_basics/}{02_declarations.c}
|
||||
|
||||
\newpage
|
||||
A peculiarity of \texttt{C} is that the bit-count is not defined by the language, but rather the hardware it is compiled for.
|
||||
\rmvspace
|
||||
|
||||
\begin{fullTable}{llll}{\texttt{C} data type & typical 32-bit & ia32 & x86-64}{Comparison of byte-sizes for each datatype on different architectures}
|
||||
\texttt{char} & 1 & 1 & 1 \\
|
||||
\texttt{short} & 2 & 2 & 2 \\
|
||||
\texttt{int} & 4 & 4 & 4 \\
|
||||
\texttt{long} & 4 & 4 & 8 \\
|
||||
\texttt{long long} & 8 & 8 & 8 \\
|
||||
\texttt{float} & 4 & 4 & 4 \\
|
||||
\texttt{double} & 4 & 8 & 8 \\
|
||||
\texttt{long double} & 8 & 10/12 & 16 \\
|
||||
\end{fullTable}
|
||||
|
||||
\drmvspace
|
||||
\warn{Type format} Be however aware that this table uses the \texttt{LP64} format for the x86-64 sizes
|
||||
and this is the format all UNIX-Systems use (i.e. Linux, BSD, Darwin (the Mac Kernel)).
|
||||
64 bit Windows however uses \texttt{LLP64}, i.e. \texttt{int} and \texttt{long} have the same size (32) and \texttt{long long} and pointers are 64 bit.
|
||||
|
||||
|
||||
\content{Integers} By default, integers in \lC\ are \texttt{signed}, to declare an unsigned integer, use \texttt{unsigned int}.
|
||||
Since it is hard and annoying to remember the number of bytes that are in each data type, \texttt{C99} has introduced the extended integer types,
|
||||
which can be imported from \texttt{stdint.h} and are of form \texttt{int<bit count>\_t} and \texttt{uint<bit count>\_t},
|
||||
where we substitute the \texttt{<bit count>} with the number of bits (have to correspond to a valid type of course).
|
||||
|
||||
|
||||
\content{Booleans} Another notable difference of \texttt{C} compared to other languages is that \texttt{C} doesn't natively have a \texttt{boolean} type,
|
||||
by convention a \texttt{short} is used to represent it, where any non-zero value means \texttt{true} and \texttt{0} means \texttt{false}.
|
||||
Since boolean types are quite handy, the \texttt{!} syntax for negation turns any non-zero value of any integer type into zero and vice-versa.
|
||||
\texttt{C99} has added support for a bool type via \texttt{stdbool.h}, which however is still an integer.
|
||||
|
||||
|
||||
\content{Implicit casts} Notably, \texttt{C} doesn't have a very rigid type system and lower bit-count types are implicitly cast to higher bit-count data types, i.e.
|
||||
if you add a \texttt{short} and an \texttt{int}, the \texttt{short} is cast to \texttt{short} (bits 16-31 are set to $0$) and the two are added.
|
||||
Explicit casting between almost all types is also supported.
|
||||
Some will force a change of bit representation, but most won't (notably, when casting to and from \texttt{float}-like types, minus to \texttt{void})
|
||||
|
||||
|
||||
\content{Expressions} Every \lC\ statement is also an expression, see above code block for example.
|
||||
|
||||
|
||||
\content{Void} The \texttt{void} type has \bi{no} value and is used for untyped pointers and declaring functions with no return value
|
||||
|
||||
|
||||
\content{Structs} Are like classes in OOP, but they contain no logic.
|
||||
We can assign copy a struct by assignment and they behave just like everything else in \texttt{C} when used as an argument for functions
|
||||
in that they are passed by value and not by reference.
|
||||
You can of course pass it also by reference (like any other data type) by setting the argument to type \texttt{struct mystruct * name} and then calling the function using
|
||||
\texttt{func(\&test)} assuming \texttt{test} is the name of your struct
|
||||
|
||||
|
||||
\content{Typedef} To define a custom type using \texttt{typedef <type it represents> <name of the new type>}.
|
||||
|
||||
You may also use \texttt{typedef} on structs using \texttt{typedef struct <struct tag> <name of the new alias>},
|
||||
you can thus instead of e.g. \verb|struct list_el my_list;| write \verb|list my_list;|, if you have used \verb|typedef struct list_el list;| before.
|
||||
It is even possible to do this:
|
||||
\drmvspace
|
||||
\begin{code}{c}
|
||||
typedef struct list_el {
|
||||
unsigned long val;
|
||||
struct list_el *next;
|
||||
} list_el;
|
||||
|
||||
struct list_el my_list;
|
||||
list_el my_other_list;
|
||||
\end{code}
|
||||
\rmvspace
|
||||
|
||||
\content{Namespaces}
|
||||
\lC\ has a few different namespaces, i.e. you can have the one of the same name in each namespace (i.e. you can have \texttt{struct a}, \texttt{int a}, etc).
|
||||
The following namespaces were covered:
|
||||
\rmvspace
|
||||
\begin{itemize}[noitemsep]
|
||||
\item Label names (used for \texttt{goto})
|
||||
\item Tags (for \texttt{struct}, \texttt{union} and \texttt{enum})
|
||||
\item Member names one namespace for each \texttt{struct}, \texttt{union} and \texttt{enum}
|
||||
\item Everything else mostly (types, variable names, etc, including typedef)
|
||||
\end{itemize}
|
||||
@@ -1,3 +1,4 @@
|
||||
\newpage
|
||||
\subsubsection{Operators}
|
||||
The list of operators in \lC\ is similar to the one of \texttt{Java}, etc.
|
||||
In Table \ref{tab:c-operators}, you can see an overview of the operators, sorted by precedence in descending order.
|
||||
@@ -18,7 +18,7 @@ The (Linux)-Kernel randomizes the address space to prevent some common exploits.
|
||||
Some pointer arithmetic has already appeared in section \ref{sec:c-arrays}, but same kind of content with better explanation can be found here
|
||||
\end{scriptsize}
|
||||
|
||||
Note that when doing pointer arithmetic, adding $1$ will move the pointer by \texttt{sizeof(type)} bits.
|
||||
\content{Pointer Arithmetic} Note that when doing pointer arithmetic, adding $1$ will move the pointer by \texttt{sizeof(type)} bits.
|
||||
|
||||
You may use pointer arithmetic on whatever pointer you'd like (as long as it's not a null pointer).
|
||||
This means, you \textit{can} make an array wherever in memory you'd like.
|
||||
@@ -30,22 +30,24 @@ in the docs mention that one gets undefined behaviour if you do not do as it say
|
||||
As already seen in the section arrays (section \ref{sec:c-arrays}), we can use pointer arithmetic for accessing array elements.
|
||||
The array name is treated as a pointer to the first element of the array, except when:
|
||||
\begin{itemize}[noitemsep]
|
||||
\item it is operand of \texttt{sizeof} (return value is $n \cdot \texttt{ sizeof(type)}$ with $n$ the number of elements)
|
||||
\item it is operand of \texttt{sizeof} (return value is $n \cdot \texttt{sizeof(type)}$ with $n$ the number of elements)
|
||||
\item its address is taken (then \texttt{\&a == a})
|
||||
\item it is a string literal initializer. If we modify a pointer \texttt{char *b = "String";} to string literal in code,
|
||||
the \texttt{"String"} is stored in the code segment and if we modify the pointer, we get undefined behaviour
|
||||
\end{itemize}
|
||||
\shade{orange}{Fun fact}: \texttt{A[i]} is always rewritten \texttt{*(A + i)} by compiler.
|
||||
\shade{purple}{Fun fact}: \texttt{A[i]} is always rewritten \texttt{*(A + i)} by compiler.
|
||||
|
||||
Another important aspect is passing by value or by reference.
|
||||
You can pass every data type by reference, you can not however pass an array by value.
|
||||
\content{Function arguments} Another important aspect is passing by value or by reference.
|
||||
You can pass every data type by reference, you can not however pass an array by value (as an array is treated as a pointer, see above).
|
||||
|
||||
Another interesting concept that \lC\ has to offer is body-less loops:
|
||||
\content{Body-less loops}
|
||||
\rmvspace
|
||||
\begin{code}{c}
|
||||
int x = 0;
|
||||
while ( x++ < 10 ); // This is (of course) not a useful snippet, but shows the concept
|
||||
\end{code}
|
||||
|
||||
\lC\ also has an option to pass functions as arguments to functions, called function pointers.
|
||||
A function is passed using the typical address syntax with the \verb|&| symbol is annotated as argument using \verb|type (* name)(type arg1, ...)|
|
||||
\content{Function pointers}
|
||||
A function can be passed as an argument to another function using the typical address syntax with the \verb|&| symbol is annotated as argument using
|
||||
\verb|type (* name)(type arg1, ...)|
|
||||
and is called using \verb|(*func)(arg1, ...)|.
|
||||
26
semester3/spca/parts/01_c/03_memory/00_intro.tex
Normal file
26
semester3/spca/parts/01_c/03_memory/00_intro.tex
Normal file
@@ -0,0 +1,26 @@
|
||||
\subsection{Memory}
|
||||
In comparison to most other languages, \lC\ does not feature automatic memory management, but instead gives us full, manual control over memory.
|
||||
This of course has both advantages and disadvantages.
|
||||
|
||||
\rmvspace
|
||||
\inputcodewithfilename{c}{code-examples/00_c/02_memory/}{00_memory.c}
|
||||
\drmvspace
|
||||
|
||||
Notably, the argument \texttt{size\_t sz} for \texttt{malloc}, \texttt{calloc} and \texttt{realloc} is an \texttt{unsigned} integer of some size
|
||||
and differs depending on hardware and software platforms.
|
||||
|
||||
\texttt{malloc} keeps track of which blocks are allocated. If you give \texttt{free} a pointer that isn't the start of the memory region previously \texttt{malloc}'d,
|
||||
you get undefined behaviour.
|
||||
|
||||
\warn{Memory corruption} There are many ways to corrupt memory in \lC. The below code shows off a few of them:
|
||||
|
||||
\rmvspace
|
||||
\inputcodewithfilename{c}{code-examples/00_c/02_memory/}{01_mem-corruption.c}
|
||||
\drmvspace
|
||||
|
||||
\warn{Memory leaks} If we allocate memory, but never free it, we use more and more memory (old memory is inaccessible)
|
||||
|
||||
\content{Dynamic data structures} We build it using structs that have a pointer to another struct inside them.
|
||||
We have to allocate memory for each element and then add the pointer to another struct.
|
||||
For a generic dynamic data structure, make the element a \texttt{void} pointer.
|
||||
This in general is the concept used for functions operating on any data type.
|
||||
37
semester3/spca/parts/01_c/03_memory/01_allocation.tex
Normal file
37
semester3/spca/parts/01_c/03_memory/01_allocation.tex
Normal file
@@ -0,0 +1,37 @@
|
||||
\subsubsection{Dynamic Memory Allocation}
|
||||
Memory allocated with \texttt{malloc} is typically $8$- or $16$-byte aligned.
|
||||
|
||||
\content{Explicit vs. Implicit} In explicit memory management, the application does both the allocation \textit{and} deallocation memory,
|
||||
whereas in implicit memory management, the application allocates the memory, but usually a \textit{Garbage Collector} (GC) frees it.
|
||||
|
||||
For some languages, like Rust, one would assume that it does implicit allocation, but Rust is a language using explicit management,
|
||||
it's just that the \textit{compiler} and not the programmer decides when to allocate and when to deallocate.
|
||||
|
||||
\warn{Assumptions in this course} We assume that memory is \bi{word} addressed (= 8 Bytes).
|
||||
|
||||
\content{Goals} The allocation should have the highest possible throughput and at the same time the best (i.e. lowest) possible memory utilization.
|
||||
This however is usually conflicting, so we have to balance the two.
|
||||
|
||||
\numberingOff
|
||||
\inlinedef \bi{Aggregate payload} $P_k$: All \texttt{malloc}'d stuff minus all \texttt{free}'d stuff
|
||||
|
||||
\inlinedef \bi{Current heap size} $H_k$: Monotonically non-decreasing. Grows when \texttt{sbrk} system call is issued.
|
||||
|
||||
\inlinedef \bi{Peak memory utilization} $U_k = (\max_{i < k} P_i) / H_k$
|
||||
|
||||
|
||||
A bit problem for the \texttt{free} function is to know how much memory to free without knowing the size of the to be freed block.
|
||||
This is just one of many other implementation issues:
|
||||
\begin{itemize}
|
||||
\item How do we keep track of the free blocks? I.e. where and how large are they?
|
||||
\item What do we do with the extra space of a block when allocating a smaller block?
|
||||
\item How do we pick a block?
|
||||
\item How do we reinsert a freed block into the heap?
|
||||
\end{itemize}
|
||||
This all leads to an issue known as \bi{fragmentation}
|
||||
|
||||
\inlinedef \bi{Internal Fragmentation}: If for a given block the payload (i.e. the requested size) is smaller than the block size.
|
||||
This depends on the pattern of previous requests and is thus easy to measure
|
||||
|
||||
\inlinedef \bi{External Fragmentation}: There is enough aggregate heap memory, but there isn't a single large enough free block available
|
||||
This depends on the pattern of future requests and is thus hard to measure
|
||||
Binary file not shown.
@@ -8,6 +8,9 @@
|
||||
\setFontType{sans}
|
||||
|
||||
\newcommand{\lC}{\texttt{C}}
|
||||
\newcommand{\content}[1]{\shade{blue}{#1}}
|
||||
\newcommand{\warn}[1]{\bg{orange}{#1}}
|
||||
\newcommand{\danger}[1]{\shade{red}{#1}}
|
||||
|
||||
\begin{document}
|
||||
\startDocument
|
||||
@@ -56,31 +59,50 @@
|
||||
% ╭────────────────────────────────────────────────╮
|
||||
% │ Content │
|
||||
% ╰────────────────────────────────────────────────╯
|
||||
% ── Intro to x86 asm ────────────────────────────────────────────────
|
||||
\newsection
|
||||
\section{Introduction}
|
||||
This summary tries to summarize everything that is important to know for this course.
|
||||
It aims to be a full replacement for the slides, but as with all my summaries, there may be missing or incorrect information in here,
|
||||
so use at your own risk. You have been warned!
|
||||
|
||||
The summary does \textit{not} follow the order the lecture does.
|
||||
This is to make related information appear more closely to each other than they have in the lecture and the summary assumes you have already seen
|
||||
the concepts in the lectures or elsewhere (or are willing to be thrown in the deep end).
|
||||
|
||||
The target semester for this summary is HS2025, so there might have been changes in your year.
|
||||
If there are changes and you'd like to update this summary, please open a pull request in the summary's repo at
|
||||
|
||||
\begin{center}
|
||||
\hlurl{https://github.com/janishutz/eth-summaries}
|
||||
\end{center}
|
||||
|
||||
|
||||
\newsection
|
||||
\section{x86 Assembly}
|
||||
\input{parts/00_asm/00_intro.tex}
|
||||
|
||||
% ── Intro to C ──────────────────────────────────────────────────────
|
||||
\newsection
|
||||
\section{The C Programming Language}
|
||||
\input{parts/00_c/00_intro.tex}
|
||||
\input{parts/00_c/01_basics/00_intro.tex}
|
||||
\input{parts/00_c/01_basics/01_control-flow.tex}
|
||||
\input{parts/00_c/01_basics/02_declarations.tex}
|
||||
\input{parts/00_c/01_basics/03_operators.tex}
|
||||
\input{parts/00_c/01_basics/04_arrays.tex}
|
||||
\input{parts/00_c/01_basics/05_strings.tex}
|
||||
\input{parts/00_c/01_basics/06_integers.tex}
|
||||
\input{parts/00_c/01_basics/07_pointers.tex}
|
||||
\input{parts/00_c/02_preprocessor.tex}
|
||||
|
||||
|
||||
% ── Intro to x86 asm ────────────────────────────────────────────────
|
||||
\newsection
|
||||
\section{x86 Assembly}
|
||||
\input{parts/01_asm/00_intro.tex}
|
||||
\input{parts/01_c/00_intro.tex}
|
||||
\input{parts/01_c/01_basics/00_intro.tex}
|
||||
\input{parts/01_c/01_basics/01_control-flow.tex}
|
||||
\input{parts/01_c/01_basics/02_declarations.tex}
|
||||
\input{parts/01_c/01_basics/03_operators.tex}
|
||||
\input{parts/01_c/01_basics/04_arrays.tex}
|
||||
\input{parts/01_c/01_basics/05_strings.tex}
|
||||
\input{parts/01_c/01_basics/06_integers.tex}
|
||||
\input{parts/01_c/01_basics/07_pointers.tex}
|
||||
\input{parts/01_c/02_preprocessor.tex}
|
||||
\input{parts/01_c/03_memory/00_intro.tex}
|
||||
\input{parts/01_c/03_memory/01_allocation.tex}
|
||||
|
||||
|
||||
% ── Hardware recap ──────────────────────────────────────────────────
|
||||
\newsection
|
||||
\section{Hardware}
|
||||
\input{parts/02_hw/00_intro.tex}
|
||||
\input{parts/03_hw/00_intro.tex}
|
||||
|
||||
Remember: Rust and the like have an \texttt{unsafe} block... \lC's equivalent to this is
|
||||
\begin{code}{c}
|
||||
|
||||
Reference in New Issue
Block a user