diff --git a/Bilder/fig_dsp_interrupt.jpg b/Bilder/fig_dsp_interrupt.jpg
new file mode 100644
index 0000000..a8ff7b6
Binary files /dev/null and b/Bilder/fig_dsp_interrupt.jpg differ
diff --git a/Bilder/fig_dsp_setup.jpg b/Bilder/fig_dsp_setup.jpg
new file mode 100644
index 0000000..716b2cb
Binary files /dev/null and b/Bilder/fig_dsp_setup.jpg differ
diff --git a/acronyms.aux b/acronyms.aux
index 237a34c..273b186 100644
--- a/acronyms.aux
+++ b/acronyms.aux
@@ -12,6 +12,10 @@
\newacro{NLMS}[\AC@hyperlink{NLMS}{NLMS}]{Normalized Least Mean Squares}
\newacro{RLS}[\AC@hyperlink{RLS}{RLS}]{Recursive Least Squares}
\newacro{MAC}[\AC@hyperlink{MAC}{MAC}]{multiply-accumulate}
+\newacro{DMA}[\AC@hyperlink{DMA}{DMA}]{Direct Memory Access}
+\newacro{PCM}[\AC@hyperlink{PCM}{PCM}]{Pulse Code Modulation}
+\newacro{ARM}[\AC@hyperlink{ARM}{ARM}]{Advanced RISC Machine}
+\newacro{RISC}[\AC@hyperlink{RISC}{RISC}]{Reduced Instruction Set Computer}
\@setckpt{acronyms}{
\setcounter{page}{5}
\setcounter{equation}{0}
diff --git a/acronyms.tex b/acronyms.tex
index ecc236b..f8748f8 100644
--- a/acronyms.tex
+++ b/acronyms.tex
@@ -13,4 +13,8 @@
\acro{NLMS}{Normalized Least Mean Squares}
\acro{RLS}{Recursive Least Squares}
\acro{MAC}{multiply-accumulate}
+ \acro{DMA}{Direct Memory Access}
+ \acro{PCM}{Pulse Code Modulation}
+ \acro{ARM}{Advanced RISC Machine}
+ \acro{RISC}{Reduced Instruction Set Computer}
\end{acronym}
\ No newline at end of file
diff --git a/chapter_04.aux b/chapter_04.aux
index b5eea11..1f278ff 100644
--- a/chapter_04.aux
+++ b/chapter_04.aux
@@ -1,27 +1,59 @@
\relax
-\@writefile{toc}{\contentsline {section}{\numberline {4}Hardware setup and low level simulations}{40}{}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {4}DSP implementation and optimization of the ANR algorithm}{40}{}\protected@file@percent }
\acronymused{ANR}
\acronymused{ANR}
\acronymused{DSP}
\acronymused{ANR}
\acronymused{ANR}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Description of the low-power DSP}{40}{}\protected@file@percent }
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Description of the low-power DSP and it´s environment}{40}{}\protected@file@percent }
+\AC@undonewlabel{acro:ARM}
+\newlabel{acro:ARM}{{4.1}{40}{}{}{}}
+\acronymused{ARM}
\acronymused{DSP}
+\acronymused{ARM}
+\acronymused{DSP}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.1.1}Hardware overview}{40}{}\protected@file@percent }
+\acronymused{ARM}
+\acronymused{ARM}
+\acronymused{DSP}
+\acronymused{DSP}
+\acronymused{ANR}
\AC@undonewlabel{acro:ALU}
-\newlabel{acro:ALU}{{4.1}{40}{}{}{}}
+\newlabel{acro:ALU}{{4.1.1}{40}{}{}{}}
\acronymused{ALU}
\acronymused{DSP}
\acronymused{ALU}
\AC@undonewlabel{acro:MAC}
-\newlabel{acro:MAC}{{4.1}{40}{}{}{}}
+\newlabel{acro:MAC}{{4.1.1}{41}{}{}{}}
\acronymused{MAC}
\acronymused{DSP}
+\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.1.2}Communication between the ARM core and the DSP}{41}{}\protected@file@percent }
+\acronymused{ARM}
\acronymused{DSP}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Implementation of the ANR algorithm on the DSP}{41}{}\protected@file@percent }
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}First optimization approach: algorithm implementation}{41}{}\protected@file@percent }
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Second optimization approach: hybrid ANR algorithm}{41}{}\protected@file@percent }
+\acronymused{ARM}
+\acronymused{DSP}
+\acronymused{DSP}
+\acronymused{ARM}
+\@writefile{lof}{\contentsline {figure}{\numberline {32}{\ignorespaces Simplified visualization of the communication between the \ac {ARM} core and the \ac {DSP} core}}{41}{}\protected@file@percent }
+\acronymused{ARM}
+\acronymused{DSP}
+\newlabel{fig:fig_dsp_setup.jpg}{{32}{41}{}{}{}}
+\acronymused{ARM}
+\acronymused{DSP}
+\acronymused{DSP}
+\acronymused{ANR}
+\acronymused{ARM}
+\acronymused{DSP}
+\@writefile{lof}{\contentsline {figure}{\numberline {33}{\ignorespaces Simpliefied flowchart of the sample processing between the \ac {ARM} core and the \ac {DSP} core via interrupts and shared memory. For simplification, the registers of the \ac {DSP} core are not visualized.}}{42}{}\protected@file@percent }
+\acronymused{ARM}
+\acronymused{DSP}
+\acronymused{DSP}
+\newlabel{fig:fig_dsp_interrupt.jpg}{{33}{42}{}{}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Implementation of the ANR algorithm on the DSP}{42}{}\protected@file@percent }
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}First optimization approach: algorithm implementation}{42}{}\protected@file@percent }
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Second optimization approach: hybrid ANR algorithm}{42}{}\protected@file@percent }
\@setckpt{chapter_04}{
-\setcounter{page}{42}
+\setcounter{page}{43}
\setcounter{equation}{21}
\setcounter{enumi}{0}
\setcounter{enumii}{0}
@@ -35,7 +67,7 @@
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
-\setcounter{figure}{31}
+\setcounter{figure}{33}
\setcounter{table}{0}
\setcounter{float@type}{16}
\setcounter{tabx@nest}{0}
@@ -156,7 +188,7 @@
\setcounter{lstnumber}{21}
\setcounter{FancyVerbLine}{0}
\setcounter{linenumber}{1}
-\setcounter{LN@truepage}{41}
+\setcounter{LN@truepage}{42}
\setcounter{FancyVerbWriteLine}{0}
\setcounter{FancyVerbBufferLine}{0}
\setcounter{FV@TrueTabGroupLevel}{0}
diff --git a/chapter_04.tex b/chapter_04.tex
index 6ad7415..4ae24b6 100644
--- a/chapter_04.tex
+++ b/chapter_04.tex
@@ -1,15 +1,30 @@
-\section{Hardware setup and low level simulations}
-This section aims to be the main part of this thesis. The first subchapters describes the hardware, on which the \ac{ANR} algorithm is implemented. The following subchapter describes the basic implementation of the \ac{ANR} algorithm on the hardware itself and shall provide the reader with a basic understanding of its efficiency, which shall serve as a baseline for the following optimiziations.\\
+\section{DSP implementation and optimization of the ANR algorithm}
+This section aims to be the main part of this thesis. The first subchapter describes the hardware, on which the \ac{ANR} algorithm is implemented, including it´s envornment, which serves as a link to the CI system itself. The following subchapter continues with the basic implementation of the \ac{ANR} algorithm on the hardware itself and shall provide the reader with a basic understanding of its efficiency, which shall serve as a baseline for the following optimiziations.\\
During the third chapter, this initial implementation is further optimized in order to achieve an improved real-time performance on the \ac{DSP}. The last subchapter picks the final optimizations of the \ac{ANR} algorithm itself as a central theme, especially with respect to the capabilites of a hybrid \ac{ANR} approach.
-\subsection{Description of the low-power DSP}
-The \ac{DSP} used for the implementation is a 32-bit fixed-point processor primarily designed for audio signal-processing applications in low-power embedded systems. It is developed using a retargetable processor design methodology and is typically programmed in C. Its highly efficient C compiler produces optimized assembly code that is comparable in performance and quality to hand-written assembly.\\ \\
+\subsection{Description of the low-power DSP and it´s environment}
+This thesis considers a low-power system-on-chip architecture that integrates a general-purpose \ac{ARM} core with a dedicated \ac{DSP} core. The system combines the flexibility of an \ac{ARM}-based control processor with the computational efficiency of a specialized \ac{DSP}, splitting general computing tasks from real-time signal processing workloads.
+\subsubsection{Hardware overview}
+The \ac{ARM} core serves as the primary control unit of the system. It is responsible for high-level application logic, system configuration, peripheral management, and scheduling. Due to its general-purpose instruction set and extensive software ecosystem, the \ac{ARM} core is well suited for handling communication protocols, system state management, and interaction with external interfaces. Time-critical numerical processing is intentionally offloaded to the \ac{DSP} core in order to reduce computational load and power consumption on the control processor.\\ \\
+The \ac{DSP} used for the implementation is a 32-bit fixed-point processor primarily designed for audio signal-processing applications in low-power embedded systems. It is developed using a retargetable processor design methodology and is typically programmed in C. The used compiler is highly efficient and generates optimized assembler code, which is then translated in machine code to execute the \ac{ANR} algorithm on incoming samples.\\ \\
The processor is equipped with load/store architecture, meaning that, initially all operands need to be moved from the memory to the registers, before any operation can be performed. After this task is performed, the execution units (\ac{ALU} and multiplier) can perform their oeprations on the data and write back the results into the registers. Finally, the results need to be explicitly moved back to the memory.\\ \\
-The \ac{DSP} includes a three stage pipeline consisting of fetch, decode, and execute stages, aloowing for overlapping instruction execution and improved throughput.
-The architecture is optimized for high cycle efficiency when executing computationally intensive signal-processing workloads. It features a dual Harvard load store architecture and two seperate \ac{ALU}s, which enables the execution of two \ac{MAC} operations, two memory operations (load/store) and two pointer updates in a single prcoessor cycle.\\ \\
+The \ac{DSP} includes a three stage pipeline consisting of fetch, decode, and execute stages, aloowing for overlapping instruction execution and improved throughput. The architecture is optimized for high cycle efficiency when executing computationally intensive signal-processing workloads. It features a dual Harvard load store architecture and two seperate \ac{ALU}s, which enables the execution of two \ac{MAC} operations, two memory operations (load/store) and two pointer updates in a single prcoessor cycle.\\ \\
The \ac{DSP} includes a set of registers, including
+\subsubsection{Communication between the ARM core and the DSP}
+Communication between the \ac{ARM} core and the \ac{DSP} is realized through a shared memory region accessible by both processing units. This shared memory enables the exchange of data without the need for seperate communication protocols. Synchronization between the cores is achieved using interrupt-based signaling: the \ac{ARM} core initiates processing requests by triggering an interrupt on the \ac{DSP}, while the \ac{DSP} notifies the \ac{ARM} core upon completion of a task via a dedicated signaling mechanism. This approach ensures efficient coordination while minimizing active waiting and unnecessary power consumption.
+\begin{figure}[H]
+ \centering
+ \includegraphics[width=1.0\linewidth]{Bilder/fig_dsp_setup.jpg}
+ \caption{Simplified visualization of the communication between the \ac{ARM} core and the \ac{DSP} core}
+ \label{fig:fig_dsp_setup.jpg}
+\end{figure}
+\noindent The \ac{ARM} Core recieves the audio data from the CI system via a PCM interface, which offers one input and one output register. An interrupt trigger the integrated DMA controller, which transfers the audio data from the PCM interface to buffer in a predefined memory location. Once the buffer is filled with enough samples, another interrupt is triggered, notifying the \ac{DSP} core to start processing the audio data. The \ac{DSP} core then reads the audio samples from the shared memory, processes them using the implemented \ac{ANR} algorithm, and writes the processed samples back to an output buffer, also located in the shared memory. Finally, the \ac{ARM} core is notified via an interrupt from the \ac{DSP} core, that the processing is complete - the DMA controller then transfers the processed audio samples from the output buffer back to the PCM interface for playback.\\ \\
+\begin{figure}[H]
+ \centering
+ \includegraphics[width=0.9\linewidth]{Bilder/fig_dsp_interrupt.jpg}
+ \caption{Simpliefied flowchart of the sample processing between the \ac{ARM} core and the \ac{DSP} core via interrupts and shared memory. For simplification, the registers of the \ac{DSP} core are not visualized.}
+ \label{fig:fig_dsp_interrupt.jpg}
+\end{figure}
-
-Advanced addressing modes — such as cyclic and bit-reversed addressing — facilitate efficient implementation of common \ac{DSP} algorithms. Additional architectural features include hardware-supported zero-overhead looping, nested loop structures, interrupt handling, power-management mechanisms, and on-chip debugging capabilities such as JTAG, breakpoints, and watchpoints. Overall, the architecture is designed to support both control-flow operations and high-throughput signal-processing tasks within low-power embedded environments.
\subsection{Implementation of the ANR algorithm on the DSP}
\subsection{First optimization approach: algorithm implementation}
\subsection{Second optimization approach: hybrid ANR algorithm}
diff --git a/drawio/fig_dsp_interrupt.drawio b/drawio/fig_dsp_interrupt.drawio
new file mode 100644
index 0000000..eead72b
--- /dev/null
+++ b/drawio/fig_dsp_interrupt.drawio
@@ -0,0 +1,237 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/drawio/fig_dsp_setup.drawio b/drawio/fig_dsp_setup.drawio
new file mode 100644
index 0000000..7ded0f9
--- /dev/null
+++ b/drawio/fig_dsp_setup.drawio
@@ -0,0 +1,91 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+