From 76ec091e34eee407a178455e8bb5371f91431084 Mon Sep 17 00:00:00 2001
From: Patrick Hangl <patrick.hangl@medel.com>
Date: Wed, 7 Jan 2026 16:36:49 +0100
Subject: [PATCH] =?UTF-8?q?DSP=20Code=20=C3=BCbernommen,=20angefangen=20zu?=
 =?UTF-8?q?=20kommentieren?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .vscode/c_cpp_properties.json                 |  22 +
 .vscode/settings.json                         |   3 +
 dsp_code/main.c                               | 154 ++++
 .../signalProcessing/include/signal_path.h    | 141 ++++
 dsp_code/signalProcessing/signal_path.c       | 785 ++++++++++++++++++
 5 files changed, 1105 insertions(+)
 create mode 100644 .vscode/c_cpp_properties.json
 create mode 100644 .vscode/settings.json
 create mode 100644 dsp_code/main.c
 create mode 100644 dsp_code/signalProcessing/include/signal_path.h
 create mode 100644 dsp_code/signalProcessing/signal_path.c

diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json
new file mode 100644
index 0000000..bc1a018
--- /dev/null
+++ b/.vscode/c_cpp_properties.json
@@ -0,0 +1,22 @@
+{
+    "configurations": [
+        {
+            "name": "Win32",
+            "includePath": [
+                "${workspaceFolder}/**",
+                "C:\\Users\\phangl\\00_Repos\\04_Python_Simulation\\dsp_code\\signalProcessing\\include"
+            ],
+            "defines": [
+                "_DEBUG",
+                "UNICODE",
+                "_UNICODE"
+            ],
+            "windowsSdkVersion": "8.1",
+            "compilerPath": "C:/Strawberry/c/bin/gcc.exe",
+            "cStandard": "c17",
+            "cppStandard": "c++17",
+            "intelliSenseMode": "windows-msvc-x86"
+        }
+    ],
+    "version": 4
+}
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..1f3f191
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "C_Cpp.default.compilerPath": "C:/Strawberry/c/bin/gcc.exe"
+}
\ No newline at end of file
diff --git a/dsp_code/main.c b/dsp_code/main.c
new file mode 100644
index 0000000..d3ca685
--- /dev/null
+++ b/dsp_code/main.c
@@ -0,0 +1,154 @@
+//#define SIMULATE
+#ifdef SIMULATE
+    #include <stdio.h>
+#endif
+
+#define BLOCK_LEN 1 // define block length for processing - currently only 1 is supported
+
+#include <stdint.h>
+#include "signalProcessing/include/signal_path.h"
+
+// Register und Bitmasken für Interrupts zwischen ARM und LPDSP Prozessor
+#define CSS_CMD 0xC00004 
+#define CSS_CMD_0 (1<<0)
+#define CSS_CMD_1 (1<<1)
+
+// Shared Memory von ARM und DSP definieren
+#define INPUT_PORT0_ADD 0x800000  // Feste Adressen für Eingangsdaten im Shared Memory
+//#define INPUT_PORT1_ADD INPUT_PORT0_ADD + 2 //DMB - warum auskommentiert?
+#define OUTPUT_PORT_ADD (INPUT_PORT0_ADD + 16) // Feste Adressen für Ausgangsdatensdaten im Shared Memory, 16 Byte von Eingangsadresse Weg (PS: 2* for 2 channels)
+
+//Chess Compiler spezifisch: Interrupt-Register festlegen um ARM zu kontaktieren nach fertiger Berechnung (PS: Define the interrupt register to notify the ARM of a completed operation)
+volatile static unsigned char chess_storage(DMIO:CSS_CMD) CssCmdGen;
+
+// Interrupt-Flag, welche von ARM gesetzt wird, wenn eine Berechnung gewünscht ist
+static volatile int actionRequired; 
+
+// Structs anlegen für die Signalpfade - hier werden Konfigurationen abgelegt(signal_path.h)
+static SingleSignalPath cSensorSignal;
+static SingleSignalPath accSensorSignal;
+
+// Umschaltung zwischen sampleweiser und blockweiser Verarbeitung
+// Sampleweise Verarbeitung: Adresse aus Shared Memory wird direkt verwendet
+// Blockweise Verarbeitung: Blöcke kopiert und verarbeitet? Offensichtlicch nicht genutzt bisher
+#if BLOCK_LEN == 1
+static volatile int16_t chess_storage(DMB:INPUT_PORT0_ADD) intputPort[4]; //TODO: if BLOCK_LEN >1 is used, the data is interleaved: ch0ch1, ch0ch1 .... chess_storage(DMA % alignof(int)) ?
+//static volatile int16_t chess_storage(DMB:INPUT_PORT1_ADD) intputPort1[BLOCK_LEN];
+static volatile int16_t chess_storage(DMB:OUTPUT_PORT_ADD) outputPort[4];
+static volatile int16_t chess_storage(DMB) *inPtr0;
+static volatile int16_t chess_storage(DMB) *inPtr1;
+static volatile int16_t chess_storage(DMB) *outPtr;
+static volatile int16_t chess_storage(DMB) sample;
+static volatile int16_t chess_storage(DMB) *sample_ptr;
+#else
+// Int-Array für Blockverarbeitung im Shared Memory DMA anlegen (Eingabe)
+static int16_t chess_storage(DMA) intputPort[BLOCK_LEN]; //chess_storage(DMA:INPUT_PORT_ADD) TODO: volatile?  chess_storage(DMA % alignof(int))
+//static int16_t chess_storage(DMA) intputPort1[BLOCK_LEN]; //chess_storage(DMA:INPUT_PORT_ADD)
+// Int-Array für Blockverarbeitung im Shared Memory DMA anlegen (Ausgabe)
+static int16_t chess_storage(DMB) outputPort[BLOCK_LEN]; // chess_storage(DMB:OUTPUT_PORT_ADD) TODO: determine output port add
+#endif
+
+//void isr0() ist eine Interrupt Service Routine Funktion, welche als C Funktion deklariert wird
+// property (isr) ist Chess Compiler spezifisch und kennzeichnet eine Funktion als Interrupt Service Routine
+//wird Interrupt getriggert, wird actionRequired auf 1 gesetzt - etwas muss dannpassieren
+extern "C" void isr0() property (isr) {
+	actionRequired = 1;
+	}
+
+#ifdef __chess__
+extern "C"
+#endif
+
+int main(void) {
+    // Enum, welcher den Ausgabemodus definiert - wird in calc()-Funktion verwendet
+    static OutputMode mode = OUTPUT_MODE_FIR_LMS;
+
+    // Initialize the signal path
+    // Initialize the csensor signal subpath
+    // Instanciate the signal path state structs
+
+    // Deactivate preemphasis filter by initializing with coefficients {1., 0., 0., 0., 0.}
+    // biquad filter coefficients - off
+    double b0[5]={0.75, 0., 0., 0., 0.};
+    double b1[5]={0.75, 0., 0., 0., 0.};
+    int N_lms_fir_coeffs = MAX_FIR_COEFFS; // always test with max coeffs
+
+    //init-Funktion aufrufen
+    init(
+        &cSensorSignal, &accSensorSignal,
+        //&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs,
+        b0,
+        b1,
+        2,      // sample delay
+        2,
+        0.9,     // weight
+        0.9,
+        0.01,   // lms learning rate
+        N_lms_fir_coeffs // Numer of lms fir coefficients
+    );
+
+    if (mode == OUTPUT_MODE_FIR){ //FIR filter mit fixen coeffizienten wenn nicht adaptiv
+        for (int i=0; i<N_lms_fir_coeffs; i++){
+            #ifdef LPDSP16
+            ptr_fir_lms_coeffs.ptr_start[i] = ((pow(2, 15)-1) /N_lms_fir_coeffs);
+            #else
+            ptr_fir_lms_coeffs.ptr_start[i] = ((pow(2, 31)-1) /N_lms_fir_coeffs);
+            #endif 
+        }
+    }
+
+    #ifdef SIMULATE // use the simulator with file I/O
+        FILE *fp1 = fopen("./test/testdata/input/chirp_disturber.txt", "r");
+        FILE *fp2 = fopen("./test/testdata/input/disturber.txt", "r");
+        FILE *fp3 = fopen("./test/testdata/output/out_simulated.txt", "w");
+
+        int d0, d1;
+
+        while (!(feof(fp1) || feof(fp2))){
+
+            for (int i=0; i<BLOCK_LEN; i++){
+                fscanf(fp1, "%d", &d0); //load blocks
+                fscanf(fp2, "%d", &d1);
+                intputPort[i] = (int16_t) d0;
+                intputPort[i+1] = (int16_t) d1;
+            }
+
+            calc(
+                &cSensorSignal, &accSensorSignal,
+                //&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs,
+                mode,
+                &intputPort[0],
+                &intputPort[1],
+                outputPort
+                );
+
+            for (int i=0; i<BLOCK_LEN; i++){
+                fprintf(fp3, "%d\n", outputPort[i]);
+            }
+        }
+        fclose(fp1);
+        fclose(fp2);
+        fclose(fp3);
+
+     #else // how its done in hw
+        // enable the interrupts
+        enable_interrupts();
+        outPtr = &outputPort[1]; // start with second half of buffer
+        sample_ptr = &sample;
+               
+        /*Signalprocessing, called by an interrupt*/
+        actionRequired = 0;
+        while (1){
+            CssCmdGen = CSS_CMD_1; // indicate going to sleep to the cm3
+            core_halt();
+            if (actionRequired == 1) {
+                CssCmdGen = CSS_CMD_0; // indicate wakeup to the cm3
+                actionRequired = 0;     
+
+                outPtr = cyclic_add(outPtr, 2, outputPort, 4);
+                *outPtr = *sample_ptr;
+                calc(&cSensorSignal, &accSensorSignal, mode, &intputPort[1], &intputPort[0], sample_ptr);
+            }
+        }
+    #endif
+}
\ No newline at end of file
diff --git a/dsp_code/signalProcessing/include/signal_path.h b/dsp_code/signalProcessing/include/signal_path.h
new file mode 100644
index 0000000..34bf6aa
--- /dev/null
+++ b/dsp_code/signalProcessing/include/signal_path.h
@@ -0,0 +1,141 @@
+ifndef SIGNAL_PATH_H
+#define SIGNAL_PATH_H
+
+#include <math.h>
+#include <stdint.h>
+
+#define MAX_DELAY_SAMPS 16
+#if BLOCK_LEN > MAX_FIR_COEFFS
+    #error "BLOCK_LEN must be smaller than MAX_FIR_COEFFS"
+#endif
+#define BITSHIFT_16_TO_32 16
+
+static const int block_len=BLOCK_LEN; // TODO: save this an an cm3 accessible location
+
+#ifdef PLATFORM_GENERIC
+    typedef long int accum_t;
+    // empty Macros definitions
+    #define chess_storage(mem)
+    #define DMA
+    #define DMB
+    #define DMIO
+    #define chess_loop_range(a,b)
+    #define isr0(a)
+    #define chess_flatten_loop
+#endif
+
+typedef struct BufferPtr{ // used as a pointer and length storage container for cirular buffers
+    int buffer_len;
+    int *ptr_start;
+    int *ptr_current;
+} BufferPtr;
+
+typedef struct BufferPtrDMB{
+    int buffer_len;
+    int chess_storage(DMB) *ptr_start;
+    int chess_storage(DMB) *ptr_current;
+} BufferPtrDMB;
+
+/*Stuct for storage of internal state and configuration for single signal path with a biquad element, a scaling element and a delay*/
+typedef struct SingleSignalPath{
+    int input_scale; // The scaling bitshift bits for the input signal
+    int x_nbit_bitshift; // The number of bits to scale the input signal
+    int preemph_activated; //Deactivate by initializing with coefficients {1., 0., 0., 0., 0.}
+    int b_preemph[5]; // Preemphasis filter coefficients
+    int _preemph_scale_nbits; // The number of bits used to scale the pre emphasis filter
+    int _xd[2]; //preemphasis biquad filter buffers
+    int _yd[2];
+    int  _delay_buffer[MAX_DELAY_SAMPS]; // The delay buffer for the given signal path // chess_storage(DMA)
+    BufferPtr delay_buffer; // The pointers to the delay buffer and actual used length
+    int n_delay_samps; // The delay for the given signal path in samples
+    int weight_actived; //Deactivate by initializing with weight 1.0
+    int weight; // The weight for the given signal path
+    int _weight_scale_nbits; // The number of bits used to scale the weight
+} SingleSignalPath;
+
+
+/*Stuct for storage of internal state and configuration for an adaptive fir-lms filter*/
+// typedef struct LmsFilter{
+//     int lms_mu; // The learning rate for the lms algorithm
+//     int lms_num_fir_coeffs; // Number of coefficients for the adaptive filter
+// #if BLOCK_LEN == 1
+//     //int _delay_line[MAX_FIR_COEFFS]; // The delay line for the adaptive filter //
+//     BufferDMB delay_line; // The pointer to the delay line
+//     //int chess_storage(DMB) *ptr_delay_line_current; // The pointer to the current position in the delay line
+// #else
+//     //int chess_storage(%(sizeof(long long))) _delay_line[BLOCK_LEN + MAX_FIR_COEFFS]; // The delay line for the adaptive filter
+//     BufferPtr delay_line; // The pointer to the delay line
+//     //int chess_storage(DMA) *ptr_delay_line_current; // The pointer to the current position in the delay line
+//     //int chess_storage(%(sizeof(long long))) fir_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter
+// #endif
+// } LmsFilter;
+// #if BLOCK_LEN == 1
+//     int fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter //
+// #else
+//     int chess_storage(%(sizeof(long long))) fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter
+// #endif
+
+#if BLOCK_LEN == 1
+BufferPtr extern ptr_fir_lms_coeffs;
+BufferPtrDMB extern chess_storage(DMB) ptr_fir_lms_delay_line;
+int extern chess_storage(DMB) fir_lms_delay_line[MAX_FIR_COEFFS];
+
+#else
+int extern chess_storage(DMA%(sizeof(long long))) fir_lms_delay_line[BLOCK_LEN + MAX_FIR_COEFFS]; // The delay line for the adaptive filter
+BufferPtr extern ptr_fir_lms_delay_line;
+BufferPtr extern ptr_fir_lms_coeffs;
+#endif
+
+//int extern chess_storage(DMA % (sizeof(long long))) fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter
+
+// typedef struct SignalPath{
+//     SingleSignalPath cSensorSignal;
+//     SingleSignalPath accSensorSignal;
+//     LmsFilter lms;
+//     volatile int chess_storage(DMIO:INPUT_PORT_ADD) input_port;
+//     int chess_storage(DMIO:OUTPUT_PORT_ADD) output_port;
+// } SignalPath;
+
+typedef enum OutputMode{
+    OUTPUT_MODE_C_SENSOR,
+    OUTPUT_MODE_ACC_SENSOR,
+    OUTPUT_MODE_FIR_LMS,
+    OUTPUT_MODE_FIR,
+    OUTPUT_MODE_FIR_LMS_LEAKY,
+}OutputMode;
+
+// void sig_init_preemph_coef(SingleSignalPath *signal, double b0, double b1, double b2, double a1, double a2, int scale_bits);
+// int sig_init_delay(SingleSignalPath *signal, int delay_samps);
+// void sig_init_weight(SingleSignalPath *signal, double weight, int scale_nbits);
+// void sig_init_lms(LmsFilter *signal, double lms_mu, int lms_fir_num_coeffs, int scale_bits);
+// int inline sig_delay_buffer_load_and_get(SingleSignalPath *signal, int x);
+// int inline sig_calc_biquad(SingleSignalPath *signal, int x); //TODO: inline ?
+// int inline sig_calc_weight(SingleSignalPath *signal, int x); //TODO: inline ?
+// int inline sig_calc_fir_lms_single(LmsFilter *signal, int d, int x); //TODO: inline ?
+
+//void adapt_coeffs_lpdsp32_single(LmsFilter chess_storage(DMB) *filter, int *fir_lms_coeffs, int out);
+//sig_calc_fir_lpdsp32_single(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs)
+
+
+
+// top level init and calc functions
+void init(
+        SingleSignalPath *cSensorSignal, SingleSignalPath *accSensorSignal,
+        //BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs,
+        double *b_c, double *b_acc, int delay_c, int delay_acc, double weight_c, double weight_acc, double lms_mu, int lms_fir_num_coeffs);
+void calc(
+        SingleSignalPath *cSensorSignal, SingleSignalPath *accSensorSignal,
+        //BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs,
+        OutputMode output_mode,
+        #if BLOCK_LEN != 1
+        int16_t *cSensor,
+        int16_t *accSensor,
+        #else
+        int16_t volatile chess_storage(DMB) *cSensor,
+        int16_t volatile chess_storage(DMB) *accSensor,
+        #endif
+        int16_t volatile chess_storage(DMB) *out_16
+        );
+
+#endif //SIGNAL_PATH_H
+
diff --git a/dsp_code/signalProcessing/signal_path.c b/dsp_code/signalProcessing/signal_path.c
new file mode 100644
index 0000000..d0cb702
--- /dev/null
+++ b/dsp_code/signalProcessing/signal_path.c
@@ -0,0 +1,785 @@
+#include "include/signal_path.h"
+
+/* Global variables decleration*/
+static int counter=0;
+static int mu;
+
+#ifdef LPDSP16
+//static int leak=24576; //0.75
+//static int leak=29491; //0.9
+//static int leak=31129; //0.95 // no effect
+static int leak=32735; //0.999 // (1 ? µ?)
+//static int leak=32766; //0.99999
+#else
+//static int leak=2145336164; //0.999 // (1 ? µ?)
+static int leak=2147462173; //0.999 // (1 ? µ?)
+#endif
+
+
+
+#if BLOCK_LEN == 1
+int chess_storage(DMB) fir_lms_delay_line[MAX_FIR_COEFFS];
+BufferPtrDMB chess_storage(DMB) ptr_fir_lms_delay_line;
+BufferPtr ptr_fir_lms_coeffs;
+
+#else
+int chess_storage(DMA%(sizeof(long long))) fir_lms_delay_line[BLOCK_LEN + MAX_FIR_COEFFS]; // The delay line for the adaptive filter
+BufferPtr ptr_fir_lms_delay_line;
+BufferPtr ptr_fir_lms_coeffs;
+#endif
+
+int chess_storage(DMA % (sizeof(long long))) fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter
+
+
+#ifdef PLATFORM_GENERIC
+    // lpdsp32 functionallity moddeling functions
+    accum_t fract_mult(int a, int b){
+        long int a_long = a;
+        long int b_long = b;
+        return (b_long * a_long);
+    }
+    accum_t to_accum(int a){
+        long int a_long = (long int) a;
+        return a_long << 31;
+    }
+    int rnd_saturate(accum_t a){
+        return a >> 31;
+    }
+    int extract_high(accum_t a){
+        return a >> 31;
+    }
+    void lldecompose(unsigned long long l, int* int1, int* int2){
+        *int2 = (int)(l >> 32);
+        *int1 = (int)(l);
+    }
+    uint64_t llcompose(int a, int b) {
+        uint64_t result = (uint64_t)b; // Assign b to the higher 32 bits of the result
+        result <<= 32; // Shift the higher 32 bits to the left
+        result |= (uint32_t)a; // Bitwise OR operation with the lower 32 bits of a
+        return result;
+    }
+    // unsigned long long llcompose(int a, int b){
+    //     unsigned long long l;
+    //     l = a << 32;
+    //     l |= b;
+    //     return l;
+    //}
+    int* cyclic_add(int *ptr, int i_pp, int *ptr_start, int buffer_len){
+        int *p_ptr=ptr;
+        for (int i=0; i < abs(i_pp); i+=1){ // end of buffer wraparound
+            if (i_pp > 0){
+                p_ptr ++;
+                if (p_ptr >= ptr_start + buffer_len){
+                    p_ptr=ptr_start;
+                }
+            }
+            else{ // start of buffer wraparound
+                p_ptr--;
+                if (p_ptr < ptr_start){
+                    p_ptr=ptr_start + (buffer_len -1);
+                }
+            }
+        }
+        return p_ptr;
+    }
+#endif
+
+
+/*Round saturate with 16 bits return value */
+int static inline rnd_saturate16(accum_t acc){ //maybe int16_fast type?
+    acc = to_accum( // saturate
+        rnd_saturate(acc << 32)
+        );
+    return rnd_saturate(acc >> 16); //round
+}
+
+
+int sig_init_buffer(BufferPtr *buffer, int *buffer_start_add, int length, int max_buffer_len) {
+    buffer->buffer_len = length;
+    buffer->ptr_start = buffer_start_add;
+    buffer->ptr_current = buffer_start_add;
+    // initialize delay line with 0
+    for (int i = 0; i < length; i++) {
+        buffer_start_add[i] = 0;
+    }
+    if (length<max_buffer_len){
+        return 0;
+    }
+    else{
+        return 1;
+    }
+}
+
+int sig_init_buffer_DMB(BufferPtrDMB chess_storage(DMB) *buffer, int chess_storage(DMB) *buffer_start_add, int length, int max_buffer_len){
+    buffer->buffer_len = length;
+    buffer->ptr_start = buffer_start_add;
+    buffer->ptr_current = buffer_start_add;
+    // initialize delay line with 0
+    for (int i = 0; i < length; i++) {
+        buffer_start_add[i] = 0;
+    }
+    if (length<max_buffer_len){
+        return 0;
+    }
+    else{
+        return 1;
+    }
+}
+
+void sig_cirular_buffer_ptr_increment(BufferPtr *buffer, int i_incr){
+    buffer->ptr_current = cyclic_add(buffer->ptr_current, i_incr, buffer->ptr_start, buffer->buffer_len);
+}
+
+void sig_cirular_buffer_ptr_increment_DMB(BufferPtrDMB *buffer, int i_incr){
+    buffer->ptr_current = cyclic_add(buffer->ptr_current, i_incr, buffer->ptr_start, buffer->buffer_len);
+}
+
+void sig_cirular_buffer_ptr_put_sample(BufferPtr *buffer, int sample){
+    *buffer->ptr_current = sample;
+    buffer->ptr_current = cyclic_add(buffer->ptr_current, 1, buffer->ptr_start, buffer->buffer_len);
+}
+
+void sig_cirular_buffer_ptr_put_sample_DMB(BufferPtrDMB chess_storage(DMB) *buffer, int sample){
+    *buffer->ptr_current = sample;
+    buffer->ptr_current = cyclic_add(buffer->ptr_current, 1, buffer->ptr_start, buffer->buffer_len);
+}
+
+void static inline sig_circular_buffer_ptr_put_block(BufferPtr *buffer, int* block){
+    // increment pointer to oldest block
+    //buffer->ptr_current = cyclic_add(buffer->ptr_current, BLOCK_LEN, buffer->ptr_start, buffer->buffer_len);
+    // load the next block
+    for (int i=0; i<BLOCK_LEN;  i+=2){
+        buffer->ptr_current[0] = block[i]; // TODO: use llcompose
+        buffer->ptr_current[1] = block[i+1];
+        buffer->ptr_current = cyclic_add(buffer->ptr_current, 2, buffer->ptr_start, buffer->buffer_len);
+    }
+}
+
+void sig_init_preemph_coef(SingleSignalPath *signal, double b0, double b1, double b2, double a1, double a2, int scale_bits) {
+    // Check first if filter is actually activated
+    if (b0 == 1. && b1 == 0. && b2 == 0. && a1 == 0. && a2 == 0.) {
+        signal->preemph_activated = 0;
+    }
+    else{
+        signal->preemph_activated = 1;
+        signal->_preemph_scale_nbits = scale_bits;
+        int scale = pow(2, scale_bits) - 1;
+        signal->b_preemph[0] = b0 * scale;
+        signal->b_preemph[1] = b1 * scale;
+        signal->b_preemph[2] = b2 * scale;
+        signal->b_preemph[3] = a1 * scale;
+        signal->b_preemph[4] = a2 * scale;
+    }
+}
+
+/*Initialization functions - make sure all of them were called to ensure functionality*/
+int sig_init_delay(SingleSignalPath *signal, int n_delay) {
+    return sig_init_buffer(&signal->delay_buffer, signal->_delay_buffer, n_delay, MAX_DELAY_SAMPS);
+}
+
+void sig_init_weight(SingleSignalPath *signal, double weight, int scale_nbits) {
+    if (weight == 1.) {
+        signal->weight_actived = 0;
+    }
+    else{
+        signal->weight_actived = 1;
+        int scale = pow(2, scale_nbits) - 1;
+        signal->weight = weight * scale;
+        signal->_weight_scale_nbits = scale_nbits;
+    }
+}
+
+/*Calculator functions for the given signal path*/
+/*Calculate one biquad filter element*/
+int sig_calc_biquad(SingleSignalPath *signal, int x) {
+    if (signal->preemph_activated == 0) {
+        return x;
+    }
+    accum_t sum =
+        fract_mult(x, signal->b_preemph[0]) + fract_mult(signal->_xd[0], signal->b_preemph[1]) +
+        fract_mult(signal->_xd[1], signal->b_preemph[2]) + fract_mult(signal->_yd[0], signal->b_preemph[3]) +
+        fract_mult(signal->_yd[1],signal->b_preemph[4]);
+    
+    #ifdef LPDSP16
+    int y = rnd_saturate16(sum << 1);
+    #else
+    int y = rnd_saturate(sum << 1);
+    #endif
+    
+    signal->_xd[1] = signal->_xd[0];
+    signal->_xd[0] = x;
+    signal->_yd[1] = signal->_yd[0];
+    signal->_yd[0] = y;
+    return y;
+}
+int inline sig_get_delayed_sample(SingleSignalPath *signal) {
+    return *signal->delay_buffer.ptr_current;
+}
+
+int sig_delay_buffer_load_and_get(SingleSignalPath *signal, int x) {
+    if (signal->delay_buffer.buffer_len == 0) {
+        return x;
+    }
+    int out = *signal->delay_buffer.ptr_current;
+    *signal->delay_buffer.ptr_current = x;
+    sig_cirular_buffer_ptr_increment(&signal->delay_buffer, 1);
+    return out;
+}
+
+int sig_calc_weight(SingleSignalPath *signal, int x) {
+    if (signal->weight_actived == 0) {
+        return x;
+    }
+    accum_t acc = fract_mult(x, signal->weight);
+
+    return rnd_saturate(acc);
+}
+
+#if BLOCK_LEN!=1 // Block processing
+/*lpdsp32 fir filter example adapted from user guide
+#define NS 256 //No. of samples
+#define N 64 //No. of filter coefficients or No. of tap weights
+int chess_storage(DMB) y[NS]; //Output Signal
+int chess_storage(DMA %(sizeof(long long))) x[NS+N-1]; //Input Signal
+//Filter coefficients or tap weights
+int chess_storage(DMA %(sizeof(long long))) h[N];
+ */
+void sig_calc_fir_lpdsp32_block(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int chess_storage(DMB) *out){
+//void fir(int *y, int *x, int *h)
+    static int chess_storage(DMA) *p_x; // pointer to the start of the last added block
+    static int chess_storage(DMA) *p_h; // pointer to the start of the filter coefficients
+    static int chess_storage(DMB) *p_y; // pointer to the output port
+
+    p_y = out;
+
+    int *px_start = ptr_fir_lms_delay_line->ptr_start;
+    int *ph_start = ptr_fir_lms_coeffs->ptr_current;
+    int delay_line_len = ptr_fir_lms_delay_line->buffer_len;
+    int n_coeff = ptr_fir_lms_coeffs->buffer_len;
+
+    int coef1, coef2;
+    int dat1, dat2;
+
+    for(unsigned int n=0; n<BLOCK_LEN; n+=2) chess_loop_range(1,){
+        //p_x = x + n;
+        p_x = cyclic_add(px_start, n, px_start, delay_line_len);
+        p_h = ph_start;
+
+        #ifdef PLATFORM_GENERIC
+            lldecompose(*((long long *)p_h), &coef1, &coef2);
+        #else
+            lldecompose(*((long long *)p_h), coef1, coef2);
+        #endif
+        p_h+=2;
+        #ifdef PLATFORM_GENERIC
+            lldecompose(*((long long *)p_x), &dat2, &dat1);
+        #else
+            lldecompose(*((long long *)p_x), dat2, dat1);
+        #endif
+        p_x = cyclic_add(p_x, -2, px_start, delay_line_len);
+
+        accum_t sum1 = fract_mult(dat1, coef1);
+        accum_t sum2 = fract_mult(dat2, coef1);
+        sum1 += fract_mult(dat2 , coef2);
+        sum1 = to_accum(rnd_saturate(sum1));
+        for(int k=2; k < n_coeff; k+=2) chess_loop_range(1,){
+            #ifdef PLATFORM_GENERIC
+                lldecompose(*((long long *)p_x), &dat2, &dat1);
+            #else
+                lldecompose(*((long long *)p_x), dat2, dat1);
+            #endif
+            p_x = cyclic_add(p_x, -2, px_start, delay_line_len);
+
+            sum2 += fract_mult(dat1, coef2);
+            sum2 = to_accum(rnd_saturate(sum2));
+
+            #ifdef PLATFORM_GENERIC
+                lldecompose(*((long long *)p_h), &coef1, &coef2);
+            #else
+                lldecompose(*((long long *)p_h), coef1, coef2);
+            #endif
+            p_h+=2;
+
+            sum1 += fract_mult(dat1, coef1);
+            sum2 += fract_mult(dat2, coef1);
+            sum1 += fract_mult(dat2, coef2);
+            sum1 = to_accum(rnd_saturate(sum1));
+        }
+        #ifdef PLATFORM_GENERIC
+            lldecompose(*((long long *)p_x), &dat2, &dat1);
+        #else
+            lldecompose(*((long long *)p_x), dat2, dat1);
+        #endif
+        sum2 += fract_mult(dat1, coef2);
+        sum2 = to_accum(rnd_saturate(sum2));
+
+        *p_y++ = extract_high(sum2);
+        *p_y++ = extract_high(sum1);
+    }
+}
+void sig_calc_fir_generic_block(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int chess_storage(DMB) *out){
+    static int chess_storage(DMA) *p_x; // pointer to the start of the last added block
+    static int chess_storage(DMA) *p_h; // pointer to the start of the filter coefficients
+    static int chess_storage(DMB) *p_y; // pointer to the output port
+
+    static int coef1, coef2;
+    static int dat1, dat2;
+
+    p_x = ptr_fir_lms_delay_line->ptr_current;
+    p_h = ptr_fir_lms_coeffs->ptr_current;
+    p_y = out;
+
+    for(int n=0; n<BLOCK_LEN; n+=2)
+    {
+        p_x = cyclic_add(ptr_fir_lms_delay_line->ptr_current, n, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); // can be done in increments of two, assuming the buffer pointer increment is even
+        accum_t sum = to_accum(0);
+        for(int k=0; k < ptr_fir_lms_coeffs->buffer_len; k+=2) chess_loop_range(1,)
+        {
+            sum += fract_mult(p_x[0] , p_h[k]);
+            sum += fract_mult(p_x[1] , p_h[k+1]);
+
+            sum = to_accum(rnd_saturate(sum));
+            p_x = cyclic_add(p_x, -2, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); // can be done in increments of two, assuming the buffer pointer increment is even
+        }
+    *p_y++ = extract_high(sum);
+    }
+}
+/* "out" is actually an input to the function and is the output of the fir_lms filter system*/
+void adapt_coeffs_lpdsp32_block(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){ // only works for even delay line sample pointers!!
+
+    int *p_x = ptr_fir_lms_delay_line->ptr_current; // pointer to the start of the last added block - TODO: doublecheck this - might be wrong because the pointer actually points to the end of the block!
+    int *p_x_start = ptr_fir_lms_delay_line->ptr_start;
+    int *p_h = ptr_fir_lms_coeffs->ptr_current; // pointer to the start of the filter coefficients
+    int delay_line_len = ptr_fir_lms_delay_line->buffer_len;
+    int n_coeff = ptr_fir_lms_coeffs->buffer_len;
+    int prod0, x0, x1, h0, h1;
+
+    // Calculate the first term of the coefficient adaption
+    accum_t acc_C = fract_mult(mu, out);
+    prod0 = rnd_saturate(acc_C);
+    //acc_D = fract_mult(mu, out1);
+    //prod1 = rnd_saturate(acc_C);
+    for (int i=0; i<n_coeff; i+=2) chess_loop_range(1, ){
+        // Calculate the coefficient wise adaption
+
+        // utilize dual load and dual pointer update
+        // load first sample and coefficient
+        #ifdef PLATFORM_GENERIC
+            lldecompose(*((long long *)p_h), &h0, &h1);
+        #else
+            lldecompose(*((long long *)p_h), h0, h1);
+        #endif
+
+        accum_t acc_A = to_accum(h0);
+        accum_t acc_B = to_accum(h1);
+
+        #ifdef PLATFORM_GENERIC
+            lldecompose(*((long long *)p_x), &x0, &x1);
+        #else
+            lldecompose(*((long long *)p_x), x0, x1);
+        #endif
+        p_x = cyclic_add(p_x, -2, p_x_start, delay_line_len); // can be done in increments of two, assuming the buffer pointer increment is even
+
+        // initialize accumulators with old coefficients, calculate the adaptions and accumulate
+        acc_A += fract_mult(prod0, x0); // TODO: This can be further optimized by using all 4 available accums!
+        acc_B += fract_mult(prod0, x1);
+        // update the current filter coefficients - dual rnd_sat; dual store
+        *((long long *)p_h) = llcompose(rnd_saturate(acc_A), rnd_saturate(acc_B));// load/store hazard ! - 1nop
+        p_h+=2;
+    }
+}
+#else
+
+int inline sig_calc_fir_lpdsp32_single(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs){
+
+    // Calculate the fir filter output on x to get the canceller
+    int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB)
+    int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start;
+    int *p_h = ptr_fir_lms_coeffs->ptr_current;
+    int delay_line_len = ptr_fir_lms_delay_line->buffer_len;
+    int n_coeff = ptr_fir_lms_coeffs->buffer_len;
+
+    int d0,d1,h0,h1;
+    accum_t acc1_A = to_accum(0);
+    accum_t acc1_B = to_accum(0);
+    accum_t acc1_C;
+
+    // iterate over the coefficients to calculate the filter on x - the canceller
+    /* Abschaetzung cycles per 2coefficient:
+    dual - load : 1
+    dual mac and dual load: 1
+    -> 48/2 * 2 = 48 cycles for 48 coefficents
+    */
+    for (int i=0; i < n_coeff; i+=2) chess_loop_range(1,){
+        // Use dual load and dual pointer update
+        d0 = *p_x0;
+        h0 = *p_h;
+        p_h++;
+        p_x0 = cyclic_add(p_x0, -1, px_start, delay_line_len);
+
+        d1 = *p_x0;
+        h1 = *p_h;
+        p_h++;
+        p_x0 = cyclic_add(p_x0, -1, px_start, delay_line_len);
+
+        acc1_A+=fract_mult(d0, h0);
+        acc1_B+=fract_mult(d1, h1);
+        #ifndef LPDSP16
+        acc1_A = to_accum(rnd_saturate(acc1_A));
+        acc1_B = to_accum(rnd_saturate(acc1_B));
+        #endif
+        
+    }
+    // Calculate the output sample
+    acc1_C = acc1_A + acc1_B;
+    //out32 = rnd_saturate(acc1_A);
+    #ifdef LPDSP16
+    return rnd_saturate16(acc1_C);
+    #else
+    return rnd_saturate(acc1_C);
+    #endif 
+}
+
+void static inline adapt_coeffs_lpdsp32_single_v1(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){
+
+    int chess_storage(DMA) *p_h0 = ptr_fir_lms_coeffs->ptr_start; //coeff load pointer
+    //int chess_storage(DMA) *p_h1 = ptr_fir_lms_coeffs->ptr_start; //coeff store pointer
+    int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB)
+    int chess_storage(DMB) *p_x1 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB)
+
+    p_x1 = cyclic_add(p_x1, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len);
+
+    int prod, x0, x1, h0, h1;
+    int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start;
+    int delay_line_len = ptr_fir_lms_delay_line->buffer_len;
+    int n_coeff = ptr_fir_lms_coeffs->buffer_len;
+
+    accum_t acc_A, acc_B;
+
+    // Calculate the first term of the coefficient adaption
+    accum_t acc_C = fract_mult(mu, out);
+    #ifdef LPDSP16
+    prod = rnd_saturate16(acc_C);
+    #else
+    prod = rnd_saturate(acc_C);
+    #endif
+    /* AbschÃ¤tzung cycles per 2 coefficient:
+    dual load coeffs: 1
+    single load tab value: 2
+    dual mac: 1
+    dual rnd_sat - store: 1
+    load/store hazard nop: 1
+    */
+    for (int i=0; i< n_coeff; i+=2) chess_loop_range(1,){
+        // Calculate the coefficient wise adaption
+        #ifdef PLATFORM_GENERIC
+            lldecompose(*((long long *)p_h0), &h0, &h1);
+        #else
+            lldecompose(*((long long *)p_h0), h0, h1);
+        #endif
+
+        acc_A = to_accum(h0);
+        acc_B = to_accum(h1);
+        
+        #ifdef LPDSP16
+        acc_A += fract_mult(prod, *p_x0) << 16; // TODO: This could be further optimized by using all 4 available accums?
+        acc_B += fract_mult(prod, *p_x1) << 16;
+        #else
+        acc_A += fract_mult(prod, *p_x0); // TODO: This could be further optimized by using all 4 available accums?
+        acc_B += fract_mult(prod, *p_x1);
+        #endif
+         
+        p_x0 = cyclic_add(p_x0, -2, px_start, delay_line_len);
+        p_x1 = cyclic_add(p_x1, -2, px_start, delay_line_len);
+
+        // update the current filter coefficients - dual sat; dual store
+        *((long long *)p_h0) = llcompose(rnd_saturate(acc_A), rnd_saturate(acc_B));//load/store hazard ! - 1 nop is needed
+        p_h0+=2;
+    }
+}
+
+void static inline adapt_coeffs_lpdsp32_single_leaky(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){
+
+    int chess_storage(DMA) *p_h0 = ptr_fir_lms_coeffs->ptr_start; //coeff load pointer
+    //int chess_storage(DMA) *p_h1 = ptr_fir_lms_coeffs->ptr_start; //coeff store pointer
+    int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB)
+    int chess_storage(DMB) *p_x1 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB)
+
+    p_x1 = cyclic_add(p_x1, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len);
+
+    int prod, x0, x1, h0, h1;
+    int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start;
+    int delay_line_len = ptr_fir_lms_delay_line->buffer_len;
+    int n_coeff = ptr_fir_lms_coeffs->buffer_len;
+
+    accum_t acc_A, acc_B;
+
+    // Calculate the first term of the coefficient adaption
+    accum_t acc_C = fract_mult(mu, out);
+    #ifdef LPDSP16
+    prod = rnd_saturate16(acc_C);
+    #else
+    prod = rnd_saturate(acc_C);
+    #endif
+
+    for (int i=0; i< n_coeff; i+=2) chess_loop_range(1,){
+        // Calculate the coefficient wise adaption
+        #ifdef PLATFORM_GENERIC
+            lldecompose(*((long long *)p_h0), &h0, &h1);
+        #else
+            lldecompose(*((long long *)p_h0), h0, h1);
+        #endif
+
+        acc_A = fract_mult(h0, leak); // leaky
+        acc_B = fract_mult(h1, leak);
+        
+        acc_A += fract_mult(prod, *p_x0); // TODO: This could be further optimized by using all 4 available accums?
+        acc_B += fract_mult(prod, *p_x1);
+      
+        p_x0 = cyclic_add(p_x0, -2, px_start, delay_line_len);
+        p_x1 = cyclic_add(p_x1, -2, px_start, delay_line_len);
+
+        // update the current filter coefficients - dual sat; dual store
+        #ifdef LPDSP16
+        *((long long *)p_h0) = llcompose(rnd_saturate16(acc_A), rnd_saturate16(acc_B));//load/store hazard ! - 1 nop is needed
+        #else
+        *((long long *)p_h0) = llcompose(rnd_saturate(acc_A), rnd_saturate(acc_B));//load/store hazard ! - 1 nop is needed
+        #endif
+        p_h0+=2;
+    }
+}
+
+void adapt_coeffs_generic_single(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){
+    int *p_h0 = ptr_fir_lms_coeffs->ptr_start; //coeff load pointer
+    int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB)
+
+    int prod;
+
+    accum_t acc_A, acc_B;
+
+    // Calculate the first term of the coefficient adaption
+    accum_t acc_C = fract_mult(mu, out);
+    prod = rnd_saturate(acc_C);
+    for (int i=0; i< ptr_fir_lms_delay_line->buffer_len; i++){
+        // Calculate the coefficient wise adaption
+        acc_A = to_accum(p_h0[i]);
+        acc_A += fract_mult(prod, *p_x0);
+        p_x0 = cyclic_add(p_x0, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len);
+        p_h0[i]=rnd_saturate(acc_A);
+    }
+}
+#endif
+
+void init(
+    SingleSignalPath *cSensorSignal,
+    SingleSignalPath *accSensorSignal,
+    //BufferPtrDMB *ptr_fir_lms_delay_line,
+    //BufferPtr *ptr_fir_lms_coeffs,
+    double *b_c,
+    double *b_acc,
+    int delay_c,
+    int delay_acc,
+    double weight_c,
+    double weight_acc,
+    double lms_mu,
+    int lms_fir_num_coeffs
+    ){
+    #ifdef LPDSP16
+    int scale_bits=15;
+    #else
+    int scale_bits=31;
+    #endif
+
+    sig_init_preemph_coef(cSensorSignal, b_c[0], b_c[1], b_c[2], b_c[3], b_c[4], scale_bits);
+    sig_init_delay(cSensorSignal, delay_c);
+    sig_init_weight(cSensorSignal, weight_c, scale_bits);
+
+    // // Initialize the accSensor signal subpath
+    sig_init_preemph_coef(accSensorSignal, b_acc[0], b_acc[1], b_acc[2], b_acc[3], b_acc[4], scale_bits);
+    sig_init_delay(accSensorSignal, delay_acc);
+    sig_init_weight(accSensorSignal, weight_acc, 31);
+
+    // initialize the lms filter parameters
+    int scale = pow(2, scale_bits) - 1;
+    mu = lms_mu * scale;
+    // initialize the fir_lms buffers
+    #if BLOCK_LEN == 1
+        sig_init_buffer_DMB(&ptr_fir_lms_delay_line, fir_lms_delay_line, lms_fir_num_coeffs, MAX_FIR_COEFFS);
+        sig_init_buffer(&ptr_fir_lms_coeffs, fir_lms_coeffs, lms_fir_num_coeffs, MAX_FIR_COEFFS);
+    #else
+        sig_init_buffer(&ptr_fir_lms_delay_line, fir_lms_delay_line, lms_fir_num_coeffs + BLOCK_LEN, BLOCK_LEN + MAX_FIR_COEFFS);
+        sig_init_buffer(&ptr_fir_lms_coeffs, fir_lms_coeffs, lms_fir_num_coeffs, MAX_FIR_COEFFS);
+
+    #endif
+    for (int i = 0; i < lms_fir_num_coeffs; i++) {
+        ptr_fir_lms_delay_line.ptr_start[i] = 0;
+        ptr_fir_lms_coeffs.ptr_start[i] = 0;
+    }
+}
+
+// Data d(cSensor) is signal + noise
+// x (accSensor) is reference noise signal
+void calc(
+    SingleSignalPath *cSensorSignal,
+    SingleSignalPath *accSensorSignal,
+    // BufferPtrDMB *ptr_fir_lms_delay_line,
+    // BufferPtr *ptr_fir_lms_coeffs,
+    OutputMode output_mode,
+    #if BLOCK_LEN != 1
+    int16_t *cSensor,
+    int16_t *accSensor,
+    #else
+    int16_t volatile chess_storage(DMB) *cSensor,
+    int16_t volatile chess_storage(DMB) *accSensor,
+    #endif
+
+    int16_t volatile chess_storage(DMB) *out_16 
+    
+    ){
+    static int chess_storage(DMA) c_block_pre[BLOCK_LEN];
+    static int chess_storage(DMA) acc_block_pre[BLOCK_LEN];
+    static int chess_storage(DMA) cSensor_32[BLOCK_LEN];
+    static int chess_storage(DMA) accSensor_32[BLOCK_LEN];
+    
+    static int chess_storage(DMB) acc_block_filt[BLOCK_LEN];
+    static int chess_storage(DMB) out_32[BLOCK_LEN];
+
+    static int chess_storage(DMA) *p_c_block_pre =c_block_pre;
+    static int chess_storage(DMA) *p_acc_block_filt =acc_block_pre;
+    static int chess_storage(DMB) *p_out_32=out_32;
+
+
+    #ifdef LPDSP16
+    for (uint32_t i=0; i<BLOCK_LEN; i++) chess_loop_range(1,){
+        cSensor_32[i]=  (int) cSensor[i] ;
+        accSensor_32[i]= (int) accSensor[i];
+    }
+    
+    #else //LPDDSP32
+    for (uint32_t i=0; i<BLOCK_LEN; i++) chess_loop_range(1,){
+        cSensor_32[i] =  ((int) cSensor[i]) << BITSHIFT_16_TO_32;
+        accSensor_32[i] = ((int) accSensor[i]) << BITSHIFT_16_TO_32;
+    }    
+    #endif 
+  
+    // Apply bitshift, calculate the pre emphasis filter, delay and weight to each channel     
+    //#define PRE_FILTER
+    #ifdef PRE_FILTER
+    int x_csensor_emph, x_accsensor_emph, x_csensor_emph_delay, x_accsensor_emph_delay;
+    for (uint32_t i=0; i<BLOCK_LEN; i++) chess_loop_range(1,){
+         x_csensor_emph = sig_calc_biquad(cSensorSignal, cSensor_32);
+         x_accsensor_emph = sig_calc_biquad(accSensorSignal, accSensor_32);
+         x_csensor_emph_delay = sig_delay_buffer_load_and_get(cSensorSignal, x_csensor_emph);
+         x_accsensor_emph_delay = sig_delay_buffer_load_and_get(accSensorSignal, x_accsensor_emph);
+         c_block_pre[i] = sig_calc_weight(cSensorSignal, x_csensor_emph_delay);
+         acc_block_pre[i] = sig_calc_weight(accSensorSignal, x_accsensor_emph_delay);
+    }
+    #else
+    for (uint32_t i=0; i<BLOCK_LEN; i++) chess_loop_range(1,){
+         c_block_pre[i] = cSensor_32[i];
+         acc_block_pre[i] = accSensor_32[i];
+    }
+    #endif
+    
+    // Calculate the output in dependency of the selected output mode
+    switch (output_mode)
+    {
+    case OUTPUT_MODE_C_SENSOR:
+        for (uint32_t i=0; i<BLOCK_LEN; i++){
+            out_32[i] = c_block_pre[i];
+        }
+        break;
+    case OUTPUT_MODE_ACC_SENSOR:
+        for (uint32_t i=0; i<BLOCK_LEN; i++){
+            out_32[i] = acc_block_pre[i];
+        }
+        break;
+    case OUTPUT_MODE_FIR: //output filtered cSensor signal
+        #if BLOCK_LEN == 1
+            // Increment the buffer pointer and set the current sample to the delay line
+            sig_cirular_buffer_ptr_put_sample_DMB(&ptr_fir_lms_delay_line, c_block_pre[0]);
+            out_32[0] = sig_calc_fir_lpdsp32_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs);
+
+        #else // Block processing
+            // Put the next block to the buffer
+            sig_circular_buffer_ptr_put_block(&ptr_fir_lms_delay_line, c_block_pre);
+            // Calculate the fir filter output
+            sig_calc_fir_lpdsp32_block(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32);
+            // Increment the buffer pointer to get ready for the next block
+            //sig_cirular_buffer_ptr_increment(&lms->delay_line, BLOCK_LEN);
+        #endif
+        break;
+    case OUTPUT_MODE_FIR_LMS: // apply lms filter on cSensor signal
+        #if BLOCK_LEN == 1
+            // Increment the buffer pointer and set the current sample to the delay line
+            sig_cirular_buffer_ptr_put_sample_DMB(&ptr_fir_lms_delay_line, acc_block_pre[0]);
+            //*ptr_fir_lms_delay_line.ptr_current = acc_block_pre[0];
+            //ptr_fir_lms_delay_line.ptr_current = cyclic_add(ptr_fir_lms_delay_line.ptr_current, 1, ptr_fir_lms_delay_line.ptr_start, ptr_fir_lms_delay_line.buffer_len);
+
+            // Calculate the fir filter output on acc to get the canceller
+            acc_block_filt[0]= sig_calc_fir_lpdsp32_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs);
+            // Calculate the ouptut signal by c_block_pre - acc_block_filt
+            out_32[0] = c_block_pre[0] - acc_block_filt[0];
+            //if (counter >= 0){ //TODO: implement this and make it configurable
+                // Calculate the coefficient adaptation
+            //adapt_coeffs_generic_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]);
+            adapt_coeffs_lpdsp32_single_v1(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]);
+                //counter=0;
+            // }
+            // else{
+            //     counter++;
+            // }
+        #else // Block processing
+            // Put the next block to the buffer
+            sig_circular_buffer_ptr_put_block(&ptr_fir_lms_delay_line, acc_block_pre);
+            // Calculate the fir filter output on acc to get the canceller
+            sig_calc_fir_lpdsp32_block(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, acc_block_filt);
+
+            // Calculate the ouptut signal by c_block_pre - acc_block_filt
+            for (int i=0; i<BLOCK_LEN; i++) chess_flatten_loop
+            {
+                //sig_cirular_buffer_ptr_put_sample(&lms->delay_line, acc_block_pre[i]);
+                //acc_block_filt[i]= sig_calc_fir_lpdsp32_single(lms);
+                out_32[i] = c_block_pre[i] - acc_block_filt[i]; // 15 cycles with 4 samples/block
+                // adapt the coefficients with respect to the last sample in the block
+            }
+            //adapt_coeffs_lpdsp32_single(lms, out_32[1]);
+            adapt_coeffs_lpdsp32_block(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]);
+            // Increment the buffer pointer to get ready for the next block
+            //sig_cirular_buffer_ptr_increment(&lms->delay_line, BLOCK_LEN);
+        #endif
+        break;
+    case OUTPUT_MODE_FIR_LMS_LEAKY: // apply lms filter on cSensor signal
+        // Increment the buffer pointer and set the current sample to the delay line
+        sig_cirular_buffer_ptr_put_sample_DMB(&ptr_fir_lms_delay_line, acc_block_pre[0]);
+
+        // Calculate the fir filter output on acc to get the canceller
+        acc_block_filt[0]= sig_calc_fir_lpdsp32_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs);
+        // Calculate the ouptut signal by c_block_pre - acc_block_filt
+        out_32[0] = c_block_pre[0] - acc_block_filt[0];
+        //if (counter >= 0){ //TODO: implement this and make it configurable
+            // Calculate the coefficient adaptation
+        //adapt_coeffs_generic_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]);
+        adapt_coeffs_lpdsp32_single_leaky(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]);
+        
+        break;
+    default: // MUTED
+        for (uint32_t i=0; i<BLOCK_LEN; i++){
+            out_32[i] = 0;
+        }
+        break;
+    }
+    // TODO: Add a couple of biqads after ANC
+    for (uint32_t i=0; i<BLOCK_LEN; i++) chess_flatten_loop
+    {
+        #ifdef LPDSP16
+        out_16[i] = (int16_t) out_32[i];
+        #else
+        out_16[i] = rnd_saturate(to_accum(out_32[i]) >> BITSHIFT_16_TO_32); // 12 cycles for blocksize 4 //TODO: use rnd_saturate(out_32[i] >> input_nbit_bitshift)
+        #endif 
+        
+        
+    }
+    //out_16[0] = cSensor[0];
+}
+