From 76ec091e34eee407a178455e8bb5371f91431084 Mon Sep 17 00:00:00 2001 From: Patrick Hangl Date: Wed, 7 Jan 2026 16:36:49 +0100 Subject: [PATCH] =?UTF-8?q?DSP=20Code=20=C3=BCbernommen,=20angefangen=20zu?= =?UTF-8?q?=20kommentieren?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/c_cpp_properties.json | 22 + .vscode/settings.json | 3 + dsp_code/main.c | 154 ++++ .../signalProcessing/include/signal_path.h | 141 ++++ dsp_code/signalProcessing/signal_path.c | 785 ++++++++++++++++++ 5 files changed, 1105 insertions(+) create mode 100644 .vscode/c_cpp_properties.json create mode 100644 .vscode/settings.json create mode 100644 dsp_code/main.c create mode 100644 dsp_code/signalProcessing/include/signal_path.h create mode 100644 dsp_code/signalProcessing/signal_path.c diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..bc1a018 --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,22 @@ +{ + "configurations": [ + { + "name": "Win32", + "includePath": [ + "${workspaceFolder}/**", + "C:\\Users\\phangl\\00_Repos\\04_Python_Simulation\\dsp_code\\signalProcessing\\include" + ], + "defines": [ + "_DEBUG", + "UNICODE", + "_UNICODE" + ], + "windowsSdkVersion": "8.1", + "compilerPath": "C:/Strawberry/c/bin/gcc.exe", + "cStandard": "c17", + "cppStandard": "c++17", + "intelliSenseMode": "windows-msvc-x86" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1f3f191 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "C_Cpp.default.compilerPath": "C:/Strawberry/c/bin/gcc.exe" +} \ No newline at end of file diff --git a/dsp_code/main.c b/dsp_code/main.c new file mode 100644 index 0000000..d3ca685 --- /dev/null +++ b/dsp_code/main.c @@ -0,0 +1,154 @@ +//#define SIMULATE +#ifdef SIMULATE + #include +#endif + +#define BLOCK_LEN 1 // define block length for processing - currently only 1 is supported + +#include +#include "signalProcessing/include/signal_path.h" + +// Register und Bitmasken für Interrupts zwischen ARM und LPDSP Prozessor +#define CSS_CMD 0xC00004 +#define CSS_CMD_0 (1<<0) +#define CSS_CMD_1 (1<<1) + +// Shared Memory von ARM und DSP definieren +#define INPUT_PORT0_ADD 0x800000 // Feste Adressen für Eingangsdaten im Shared Memory +//#define INPUT_PORT1_ADD INPUT_PORT0_ADD + 2 //DMB - warum auskommentiert? +#define OUTPUT_PORT_ADD (INPUT_PORT0_ADD + 16) // Feste Adressen für Ausgangsdatensdaten im Shared Memory, 16 Byte von Eingangsadresse Weg (PS: 2* for 2 channels) + +//Chess Compiler spezifisch: Interrupt-Register festlegen um ARM zu kontaktieren nach fertiger Berechnung (PS: Define the interrupt register to notify the ARM of a completed operation) +volatile static unsigned char chess_storage(DMIO:CSS_CMD) CssCmdGen; + +// Interrupt-Flag, welche von ARM gesetzt wird, wenn eine Berechnung gewünscht ist +static volatile int actionRequired; + +// Structs anlegen für die Signalpfade - hier werden Konfigurationen abgelegt(signal_path.h) +static SingleSignalPath cSensorSignal; +static SingleSignalPath accSensorSignal; + +// Umschaltung zwischen sampleweiser und blockweiser Verarbeitung +// Sampleweise Verarbeitung: Adresse aus Shared Memory wird direkt verwendet +// Blockweise Verarbeitung: Blöcke kopiert und verarbeitet? Offensichtlicch nicht genutzt bisher +#if BLOCK_LEN == 1 +static volatile int16_t chess_storage(DMB:INPUT_PORT0_ADD) intputPort[4]; //TODO: if BLOCK_LEN >1 is used, the data is interleaved: ch0ch1, ch0ch1 .... chess_storage(DMA % alignof(int)) ? +//static volatile int16_t chess_storage(DMB:INPUT_PORT1_ADD) intputPort1[BLOCK_LEN]; +static volatile int16_t chess_storage(DMB:OUTPUT_PORT_ADD) outputPort[4]; +static volatile int16_t chess_storage(DMB) *inPtr0; +static volatile int16_t chess_storage(DMB) *inPtr1; +static volatile int16_t chess_storage(DMB) *outPtr; +static volatile int16_t chess_storage(DMB) sample; +static volatile int16_t chess_storage(DMB) *sample_ptr; +#else +// Int-Array für Blockverarbeitung im Shared Memory DMA anlegen (Eingabe) +static int16_t chess_storage(DMA) intputPort[BLOCK_LEN]; //chess_storage(DMA:INPUT_PORT_ADD) TODO: volatile? chess_storage(DMA % alignof(int)) +//static int16_t chess_storage(DMA) intputPort1[BLOCK_LEN]; //chess_storage(DMA:INPUT_PORT_ADD) +// Int-Array für Blockverarbeitung im Shared Memory DMA anlegen (Ausgabe) +static int16_t chess_storage(DMB) outputPort[BLOCK_LEN]; // chess_storage(DMB:OUTPUT_PORT_ADD) TODO: determine output port add +#endif + +//void isr0() ist eine Interrupt Service Routine Funktion, welche als C Funktion deklariert wird +// property (isr) ist Chess Compiler spezifisch und kennzeichnet eine Funktion als Interrupt Service Routine +//wird Interrupt getriggert, wird actionRequired auf 1 gesetzt - etwas muss dannpassieren +extern "C" void isr0() property (isr) { + actionRequired = 1; + } + +#ifdef __chess__ +extern "C" +#endif + +int main(void) { + // Enum, welcher den Ausgabemodus definiert - wird in calc()-Funktion verwendet + static OutputMode mode = OUTPUT_MODE_FIR_LMS; + + // Initialize the signal path + // Initialize the csensor signal subpath + // Instanciate the signal path state structs + + // Deactivate preemphasis filter by initializing with coefficients {1., 0., 0., 0., 0.} + // biquad filter coefficients - off + double b0[5]={0.75, 0., 0., 0., 0.}; + double b1[5]={0.75, 0., 0., 0., 0.}; + int N_lms_fir_coeffs = MAX_FIR_COEFFS; // always test with max coeffs + + //init-Funktion aufrufen + init( + &cSensorSignal, &accSensorSignal, + //&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, + b0, + b1, + 2, // sample delay + 2, + 0.9, // weight + 0.9, + 0.01, // lms learning rate + N_lms_fir_coeffs // Numer of lms fir coefficients + ); + + if (mode == OUTPUT_MODE_FIR){ //FIR filter mit fixen coeffizienten wenn nicht adaptiv + for (int i=0; i +#include + +#define MAX_DELAY_SAMPS 16 +#if BLOCK_LEN > MAX_FIR_COEFFS + #error "BLOCK_LEN must be smaller than MAX_FIR_COEFFS" +#endif +#define BITSHIFT_16_TO_32 16 + +static const int block_len=BLOCK_LEN; // TODO: save this an an cm3 accessible location + +#ifdef PLATFORM_GENERIC + typedef long int accum_t; + // empty Macros definitions + #define chess_storage(mem) + #define DMA + #define DMB + #define DMIO + #define chess_loop_range(a,b) + #define isr0(a) + #define chess_flatten_loop +#endif + +typedef struct BufferPtr{ // used as a pointer and length storage container for cirular buffers + int buffer_len; + int *ptr_start; + int *ptr_current; +} BufferPtr; + +typedef struct BufferPtrDMB{ + int buffer_len; + int chess_storage(DMB) *ptr_start; + int chess_storage(DMB) *ptr_current; +} BufferPtrDMB; + +/*Stuct for storage of internal state and configuration for single signal path with a biquad element, a scaling element and a delay*/ +typedef struct SingleSignalPath{ + int input_scale; // The scaling bitshift bits for the input signal + int x_nbit_bitshift; // The number of bits to scale the input signal + int preemph_activated; //Deactivate by initializing with coefficients {1., 0., 0., 0., 0.} + int b_preemph[5]; // Preemphasis filter coefficients + int _preemph_scale_nbits; // The number of bits used to scale the pre emphasis filter + int _xd[2]; //preemphasis biquad filter buffers + int _yd[2]; + int _delay_buffer[MAX_DELAY_SAMPS]; // The delay buffer for the given signal path // chess_storage(DMA) + BufferPtr delay_buffer; // The pointers to the delay buffer and actual used length + int n_delay_samps; // The delay for the given signal path in samples + int weight_actived; //Deactivate by initializing with weight 1.0 + int weight; // The weight for the given signal path + int _weight_scale_nbits; // The number of bits used to scale the weight +} SingleSignalPath; + + +/*Stuct for storage of internal state and configuration for an adaptive fir-lms filter*/ +// typedef struct LmsFilter{ +// int lms_mu; // The learning rate for the lms algorithm +// int lms_num_fir_coeffs; // Number of coefficients for the adaptive filter +// #if BLOCK_LEN == 1 +// //int _delay_line[MAX_FIR_COEFFS]; // The delay line for the adaptive filter // +// BufferDMB delay_line; // The pointer to the delay line +// //int chess_storage(DMB) *ptr_delay_line_current; // The pointer to the current position in the delay line +// #else +// //int chess_storage(%(sizeof(long long))) _delay_line[BLOCK_LEN + MAX_FIR_COEFFS]; // The delay line for the adaptive filter +// BufferPtr delay_line; // The pointer to the delay line +// //int chess_storage(DMA) *ptr_delay_line_current; // The pointer to the current position in the delay line +// //int chess_storage(%(sizeof(long long))) fir_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter +// #endif +// } LmsFilter; +// #if BLOCK_LEN == 1 +// int fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter // +// #else +// int chess_storage(%(sizeof(long long))) fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter +// #endif + +#if BLOCK_LEN == 1 +BufferPtr extern ptr_fir_lms_coeffs; +BufferPtrDMB extern chess_storage(DMB) ptr_fir_lms_delay_line; +int extern chess_storage(DMB) fir_lms_delay_line[MAX_FIR_COEFFS]; + +#else +int extern chess_storage(DMA%(sizeof(long long))) fir_lms_delay_line[BLOCK_LEN + MAX_FIR_COEFFS]; // The delay line for the adaptive filter +BufferPtr extern ptr_fir_lms_delay_line; +BufferPtr extern ptr_fir_lms_coeffs; +#endif + +//int extern chess_storage(DMA % (sizeof(long long))) fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter + +// typedef struct SignalPath{ +// SingleSignalPath cSensorSignal; +// SingleSignalPath accSensorSignal; +// LmsFilter lms; +// volatile int chess_storage(DMIO:INPUT_PORT_ADD) input_port; +// int chess_storage(DMIO:OUTPUT_PORT_ADD) output_port; +// } SignalPath; + +typedef enum OutputMode{ + OUTPUT_MODE_C_SENSOR, + OUTPUT_MODE_ACC_SENSOR, + OUTPUT_MODE_FIR_LMS, + OUTPUT_MODE_FIR, + OUTPUT_MODE_FIR_LMS_LEAKY, +}OutputMode; + +// void sig_init_preemph_coef(SingleSignalPath *signal, double b0, double b1, double b2, double a1, double a2, int scale_bits); +// int sig_init_delay(SingleSignalPath *signal, int delay_samps); +// void sig_init_weight(SingleSignalPath *signal, double weight, int scale_nbits); +// void sig_init_lms(LmsFilter *signal, double lms_mu, int lms_fir_num_coeffs, int scale_bits); +// int inline sig_delay_buffer_load_and_get(SingleSignalPath *signal, int x); +// int inline sig_calc_biquad(SingleSignalPath *signal, int x); //TODO: inline ? +// int inline sig_calc_weight(SingleSignalPath *signal, int x); //TODO: inline ? +// int inline sig_calc_fir_lms_single(LmsFilter *signal, int d, int x); //TODO: inline ? + +//void adapt_coeffs_lpdsp32_single(LmsFilter chess_storage(DMB) *filter, int *fir_lms_coeffs, int out); +//sig_calc_fir_lpdsp32_single(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs) + + + +// top level init and calc functions +void init( + SingleSignalPath *cSensorSignal, SingleSignalPath *accSensorSignal, + //BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, + double *b_c, double *b_acc, int delay_c, int delay_acc, double weight_c, double weight_acc, double lms_mu, int lms_fir_num_coeffs); +void calc( + SingleSignalPath *cSensorSignal, SingleSignalPath *accSensorSignal, + //BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, + OutputMode output_mode, + #if BLOCK_LEN != 1 + int16_t *cSensor, + int16_t *accSensor, + #else + int16_t volatile chess_storage(DMB) *cSensor, + int16_t volatile chess_storage(DMB) *accSensor, + #endif + int16_t volatile chess_storage(DMB) *out_16 + ); + +#endif //SIGNAL_PATH_H + diff --git a/dsp_code/signalProcessing/signal_path.c b/dsp_code/signalProcessing/signal_path.c new file mode 100644 index 0000000..d0cb702 --- /dev/null +++ b/dsp_code/signalProcessing/signal_path.c @@ -0,0 +1,785 @@ +#include "include/signal_path.h" + +/* Global variables decleration*/ +static int counter=0; +static int mu; + +#ifdef LPDSP16 +//static int leak=24576; //0.75 +//static int leak=29491; //0.9 +//static int leak=31129; //0.95 // no effect +static int leak=32735; //0.999 // (1 ? µ?) +//static int leak=32766; //0.99999 +#else +//static int leak=2145336164; //0.999 // (1 ? µ?) +static int leak=2147462173; //0.999 // (1 ? µ?) +#endif + + + +#if BLOCK_LEN == 1 +int chess_storage(DMB) fir_lms_delay_line[MAX_FIR_COEFFS]; +BufferPtrDMB chess_storage(DMB) ptr_fir_lms_delay_line; +BufferPtr ptr_fir_lms_coeffs; + +#else +int chess_storage(DMA%(sizeof(long long))) fir_lms_delay_line[BLOCK_LEN + MAX_FIR_COEFFS]; // The delay line for the adaptive filter +BufferPtr ptr_fir_lms_delay_line; +BufferPtr ptr_fir_lms_coeffs; +#endif + +int chess_storage(DMA % (sizeof(long long))) fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter + + +#ifdef PLATFORM_GENERIC + // lpdsp32 functionallity moddeling functions + accum_t fract_mult(int a, int b){ + long int a_long = a; + long int b_long = b; + return (b_long * a_long); + } + accum_t to_accum(int a){ + long int a_long = (long int) a; + return a_long << 31; + } + int rnd_saturate(accum_t a){ + return a >> 31; + } + int extract_high(accum_t a){ + return a >> 31; + } + void lldecompose(unsigned long long l, int* int1, int* int2){ + *int2 = (int)(l >> 32); + *int1 = (int)(l); + } + uint64_t llcompose(int a, int b) { + uint64_t result = (uint64_t)b; // Assign b to the higher 32 bits of the result + result <<= 32; // Shift the higher 32 bits to the left + result |= (uint32_t)a; // Bitwise OR operation with the lower 32 bits of a + return result; + } + // unsigned long long llcompose(int a, int b){ + // unsigned long long l; + // l = a << 32; + // l |= b; + // return l; + //} + int* cyclic_add(int *ptr, int i_pp, int *ptr_start, int buffer_len){ + int *p_ptr=ptr; + for (int i=0; i < abs(i_pp); i+=1){ // end of buffer wraparound + if (i_pp > 0){ + p_ptr ++; + if (p_ptr >= ptr_start + buffer_len){ + p_ptr=ptr_start; + } + } + else{ // start of buffer wraparound + p_ptr--; + if (p_ptr < ptr_start){ + p_ptr=ptr_start + (buffer_len -1); + } + } + } + return p_ptr; + } +#endif + + +/*Round saturate with 16 bits return value */ +int static inline rnd_saturate16(accum_t acc){ //maybe int16_fast type? + acc = to_accum( // saturate + rnd_saturate(acc << 32) + ); + return rnd_saturate(acc >> 16); //round +} + + +int sig_init_buffer(BufferPtr *buffer, int *buffer_start_add, int length, int max_buffer_len) { + buffer->buffer_len = length; + buffer->ptr_start = buffer_start_add; + buffer->ptr_current = buffer_start_add; + // initialize delay line with 0 + for (int i = 0; i < length; i++) { + buffer_start_add[i] = 0; + } + if (lengthbuffer_len = length; + buffer->ptr_start = buffer_start_add; + buffer->ptr_current = buffer_start_add; + // initialize delay line with 0 + for (int i = 0; i < length; i++) { + buffer_start_add[i] = 0; + } + if (lengthptr_current = cyclic_add(buffer->ptr_current, i_incr, buffer->ptr_start, buffer->buffer_len); +} + +void sig_cirular_buffer_ptr_increment_DMB(BufferPtrDMB *buffer, int i_incr){ + buffer->ptr_current = cyclic_add(buffer->ptr_current, i_incr, buffer->ptr_start, buffer->buffer_len); +} + +void sig_cirular_buffer_ptr_put_sample(BufferPtr *buffer, int sample){ + *buffer->ptr_current = sample; + buffer->ptr_current = cyclic_add(buffer->ptr_current, 1, buffer->ptr_start, buffer->buffer_len); +} + +void sig_cirular_buffer_ptr_put_sample_DMB(BufferPtrDMB chess_storage(DMB) *buffer, int sample){ + *buffer->ptr_current = sample; + buffer->ptr_current = cyclic_add(buffer->ptr_current, 1, buffer->ptr_start, buffer->buffer_len); +} + +void static inline sig_circular_buffer_ptr_put_block(BufferPtr *buffer, int* block){ + // increment pointer to oldest block + //buffer->ptr_current = cyclic_add(buffer->ptr_current, BLOCK_LEN, buffer->ptr_start, buffer->buffer_len); + // load the next block + for (int i=0; iptr_current[0] = block[i]; // TODO: use llcompose + buffer->ptr_current[1] = block[i+1]; + buffer->ptr_current = cyclic_add(buffer->ptr_current, 2, buffer->ptr_start, buffer->buffer_len); + } +} + +void sig_init_preemph_coef(SingleSignalPath *signal, double b0, double b1, double b2, double a1, double a2, int scale_bits) { + // Check first if filter is actually activated + if (b0 == 1. && b1 == 0. && b2 == 0. && a1 == 0. && a2 == 0.) { + signal->preemph_activated = 0; + } + else{ + signal->preemph_activated = 1; + signal->_preemph_scale_nbits = scale_bits; + int scale = pow(2, scale_bits) - 1; + signal->b_preemph[0] = b0 * scale; + signal->b_preemph[1] = b1 * scale; + signal->b_preemph[2] = b2 * scale; + signal->b_preemph[3] = a1 * scale; + signal->b_preemph[4] = a2 * scale; + } +} + +/*Initialization functions - make sure all of them were called to ensure functionality*/ +int sig_init_delay(SingleSignalPath *signal, int n_delay) { + return sig_init_buffer(&signal->delay_buffer, signal->_delay_buffer, n_delay, MAX_DELAY_SAMPS); +} + +void sig_init_weight(SingleSignalPath *signal, double weight, int scale_nbits) { + if (weight == 1.) { + signal->weight_actived = 0; + } + else{ + signal->weight_actived = 1; + int scale = pow(2, scale_nbits) - 1; + signal->weight = weight * scale; + signal->_weight_scale_nbits = scale_nbits; + } +} + +/*Calculator functions for the given signal path*/ +/*Calculate one biquad filter element*/ +int sig_calc_biquad(SingleSignalPath *signal, int x) { + if (signal->preemph_activated == 0) { + return x; + } + accum_t sum = + fract_mult(x, signal->b_preemph[0]) + fract_mult(signal->_xd[0], signal->b_preemph[1]) + + fract_mult(signal->_xd[1], signal->b_preemph[2]) + fract_mult(signal->_yd[0], signal->b_preemph[3]) + + fract_mult(signal->_yd[1],signal->b_preemph[4]); + + #ifdef LPDSP16 + int y = rnd_saturate16(sum << 1); + #else + int y = rnd_saturate(sum << 1); + #endif + + signal->_xd[1] = signal->_xd[0]; + signal->_xd[0] = x; + signal->_yd[1] = signal->_yd[0]; + signal->_yd[0] = y; + return y; +} +int inline sig_get_delayed_sample(SingleSignalPath *signal) { + return *signal->delay_buffer.ptr_current; +} + +int sig_delay_buffer_load_and_get(SingleSignalPath *signal, int x) { + if (signal->delay_buffer.buffer_len == 0) { + return x; + } + int out = *signal->delay_buffer.ptr_current; + *signal->delay_buffer.ptr_current = x; + sig_cirular_buffer_ptr_increment(&signal->delay_buffer, 1); + return out; +} + +int sig_calc_weight(SingleSignalPath *signal, int x) { + if (signal->weight_actived == 0) { + return x; + } + accum_t acc = fract_mult(x, signal->weight); + + return rnd_saturate(acc); +} + +#if BLOCK_LEN!=1 // Block processing +/*lpdsp32 fir filter example adapted from user guide +#define NS 256 //No. of samples +#define N 64 //No. of filter coefficients or No. of tap weights +int chess_storage(DMB) y[NS]; //Output Signal +int chess_storage(DMA %(sizeof(long long))) x[NS+N-1]; //Input Signal +//Filter coefficients or tap weights +int chess_storage(DMA %(sizeof(long long))) h[N]; + */ +void sig_calc_fir_lpdsp32_block(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int chess_storage(DMB) *out){ +//void fir(int *y, int *x, int *h) + static int chess_storage(DMA) *p_x; // pointer to the start of the last added block + static int chess_storage(DMA) *p_h; // pointer to the start of the filter coefficients + static int chess_storage(DMB) *p_y; // pointer to the output port + + p_y = out; + + int *px_start = ptr_fir_lms_delay_line->ptr_start; + int *ph_start = ptr_fir_lms_coeffs->ptr_current; + int delay_line_len = ptr_fir_lms_delay_line->buffer_len; + int n_coeff = ptr_fir_lms_coeffs->buffer_len; + + int coef1, coef2; + int dat1, dat2; + + for(unsigned int n=0; nptr_current; + p_h = ptr_fir_lms_coeffs->ptr_current; + p_y = out; + + for(int n=0; nptr_current, n, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); // can be done in increments of two, assuming the buffer pointer increment is even + accum_t sum = to_accum(0); + for(int k=0; k < ptr_fir_lms_coeffs->buffer_len; k+=2) chess_loop_range(1,) + { + sum += fract_mult(p_x[0] , p_h[k]); + sum += fract_mult(p_x[1] , p_h[k+1]); + + sum = to_accum(rnd_saturate(sum)); + p_x = cyclic_add(p_x, -2, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); // can be done in increments of two, assuming the buffer pointer increment is even + } + *p_y++ = extract_high(sum); + } +} +/* "out" is actually an input to the function and is the output of the fir_lms filter system*/ +void adapt_coeffs_lpdsp32_block(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){ // only works for even delay line sample pointers!! + + int *p_x = ptr_fir_lms_delay_line->ptr_current; // pointer to the start of the last added block - TODO: doublecheck this - might be wrong because the pointer actually points to the end of the block! + int *p_x_start = ptr_fir_lms_delay_line->ptr_start; + int *p_h = ptr_fir_lms_coeffs->ptr_current; // pointer to the start of the filter coefficients + int delay_line_len = ptr_fir_lms_delay_line->buffer_len; + int n_coeff = ptr_fir_lms_coeffs->buffer_len; + int prod0, x0, x1, h0, h1; + + // Calculate the first term of the coefficient adaption + accum_t acc_C = fract_mult(mu, out); + prod0 = rnd_saturate(acc_C); + //acc_D = fract_mult(mu, out1); + //prod1 = rnd_saturate(acc_C); + for (int i=0; iptr_current; // chess_storage(DMB) + int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start; + int *p_h = ptr_fir_lms_coeffs->ptr_current; + int delay_line_len = ptr_fir_lms_delay_line->buffer_len; + int n_coeff = ptr_fir_lms_coeffs->buffer_len; + + int d0,d1,h0,h1; + accum_t acc1_A = to_accum(0); + accum_t acc1_B = to_accum(0); + accum_t acc1_C; + + // iterate over the coefficients to calculate the filter on x - the canceller + /* Abschaetzung cycles per 2coefficient: + dual - load : 1 + dual mac and dual load: 1 + -> 48/2 * 2 = 48 cycles for 48 coefficents + */ + for (int i=0; i < n_coeff; i+=2) chess_loop_range(1,){ + // Use dual load and dual pointer update + d0 = *p_x0; + h0 = *p_h; + p_h++; + p_x0 = cyclic_add(p_x0, -1, px_start, delay_line_len); + + d1 = *p_x0; + h1 = *p_h; + p_h++; + p_x0 = cyclic_add(p_x0, -1, px_start, delay_line_len); + + acc1_A+=fract_mult(d0, h0); + acc1_B+=fract_mult(d1, h1); + #ifndef LPDSP16 + acc1_A = to_accum(rnd_saturate(acc1_A)); + acc1_B = to_accum(rnd_saturate(acc1_B)); + #endif + + } + // Calculate the output sample + acc1_C = acc1_A + acc1_B; + //out32 = rnd_saturate(acc1_A); + #ifdef LPDSP16 + return rnd_saturate16(acc1_C); + #else + return rnd_saturate(acc1_C); + #endif +} + +void static inline adapt_coeffs_lpdsp32_single_v1(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){ + + int chess_storage(DMA) *p_h0 = ptr_fir_lms_coeffs->ptr_start; //coeff load pointer + //int chess_storage(DMA) *p_h1 = ptr_fir_lms_coeffs->ptr_start; //coeff store pointer + int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) + int chess_storage(DMB) *p_x1 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) + + p_x1 = cyclic_add(p_x1, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); + + int prod, x0, x1, h0, h1; + int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start; + int delay_line_len = ptr_fir_lms_delay_line->buffer_len; + int n_coeff = ptr_fir_lms_coeffs->buffer_len; + + accum_t acc_A, acc_B; + + // Calculate the first term of the coefficient adaption + accum_t acc_C = fract_mult(mu, out); + #ifdef LPDSP16 + prod = rnd_saturate16(acc_C); + #else + prod = rnd_saturate(acc_C); + #endif + /* Abschätzung cycles per 2 coefficient: + dual load coeffs: 1 + single load tab value: 2 + dual mac: 1 + dual rnd_sat - store: 1 + load/store hazard nop: 1 + */ + for (int i=0; i< n_coeff; i+=2) chess_loop_range(1,){ + // Calculate the coefficient wise adaption + #ifdef PLATFORM_GENERIC + lldecompose(*((long long *)p_h0), &h0, &h1); + #else + lldecompose(*((long long *)p_h0), h0, h1); + #endif + + acc_A = to_accum(h0); + acc_B = to_accum(h1); + + #ifdef LPDSP16 + acc_A += fract_mult(prod, *p_x0) << 16; // TODO: This could be further optimized by using all 4 available accums? + acc_B += fract_mult(prod, *p_x1) << 16; + #else + acc_A += fract_mult(prod, *p_x0); // TODO: This could be further optimized by using all 4 available accums? + acc_B += fract_mult(prod, *p_x1); + #endif + + p_x0 = cyclic_add(p_x0, -2, px_start, delay_line_len); + p_x1 = cyclic_add(p_x1, -2, px_start, delay_line_len); + + // update the current filter coefficients - dual sat; dual store + *((long long *)p_h0) = llcompose(rnd_saturate(acc_A), rnd_saturate(acc_B));//load/store hazard ! - 1 nop is needed + p_h0+=2; + } +} + +void static inline adapt_coeffs_lpdsp32_single_leaky(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){ + + int chess_storage(DMA) *p_h0 = ptr_fir_lms_coeffs->ptr_start; //coeff load pointer + //int chess_storage(DMA) *p_h1 = ptr_fir_lms_coeffs->ptr_start; //coeff store pointer + int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) + int chess_storage(DMB) *p_x1 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) + + p_x1 = cyclic_add(p_x1, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); + + int prod, x0, x1, h0, h1; + int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start; + int delay_line_len = ptr_fir_lms_delay_line->buffer_len; + int n_coeff = ptr_fir_lms_coeffs->buffer_len; + + accum_t acc_A, acc_B; + + // Calculate the first term of the coefficient adaption + accum_t acc_C = fract_mult(mu, out); + #ifdef LPDSP16 + prod = rnd_saturate16(acc_C); + #else + prod = rnd_saturate(acc_C); + #endif + + for (int i=0; i< n_coeff; i+=2) chess_loop_range(1,){ + // Calculate the coefficient wise adaption + #ifdef PLATFORM_GENERIC + lldecompose(*((long long *)p_h0), &h0, &h1); + #else + lldecompose(*((long long *)p_h0), h0, h1); + #endif + + acc_A = fract_mult(h0, leak); // leaky + acc_B = fract_mult(h1, leak); + + acc_A += fract_mult(prod, *p_x0); // TODO: This could be further optimized by using all 4 available accums? + acc_B += fract_mult(prod, *p_x1); + + p_x0 = cyclic_add(p_x0, -2, px_start, delay_line_len); + p_x1 = cyclic_add(p_x1, -2, px_start, delay_line_len); + + // update the current filter coefficients - dual sat; dual store + #ifdef LPDSP16 + *((long long *)p_h0) = llcompose(rnd_saturate16(acc_A), rnd_saturate16(acc_B));//load/store hazard ! - 1 nop is needed + #else + *((long long *)p_h0) = llcompose(rnd_saturate(acc_A), rnd_saturate(acc_B));//load/store hazard ! - 1 nop is needed + #endif + p_h0+=2; + } +} + +void adapt_coeffs_generic_single(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){ + int *p_h0 = ptr_fir_lms_coeffs->ptr_start; //coeff load pointer + int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) + + int prod; + + accum_t acc_A, acc_B; + + // Calculate the first term of the coefficient adaption + accum_t acc_C = fract_mult(mu, out); + prod = rnd_saturate(acc_C); + for (int i=0; i< ptr_fir_lms_delay_line->buffer_len; i++){ + // Calculate the coefficient wise adaption + acc_A = to_accum(p_h0[i]); + acc_A += fract_mult(prod, *p_x0); + p_x0 = cyclic_add(p_x0, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); + p_h0[i]=rnd_saturate(acc_A); + } +} +#endif + +void init( + SingleSignalPath *cSensorSignal, + SingleSignalPath *accSensorSignal, + //BufferPtrDMB *ptr_fir_lms_delay_line, + //BufferPtr *ptr_fir_lms_coeffs, + double *b_c, + double *b_acc, + int delay_c, + int delay_acc, + double weight_c, + double weight_acc, + double lms_mu, + int lms_fir_num_coeffs + ){ + #ifdef LPDSP16 + int scale_bits=15; + #else + int scale_bits=31; + #endif + + sig_init_preemph_coef(cSensorSignal, b_c[0], b_c[1], b_c[2], b_c[3], b_c[4], scale_bits); + sig_init_delay(cSensorSignal, delay_c); + sig_init_weight(cSensorSignal, weight_c, scale_bits); + + // // Initialize the accSensor signal subpath + sig_init_preemph_coef(accSensorSignal, b_acc[0], b_acc[1], b_acc[2], b_acc[3], b_acc[4], scale_bits); + sig_init_delay(accSensorSignal, delay_acc); + sig_init_weight(accSensorSignal, weight_acc, 31); + + // initialize the lms filter parameters + int scale = pow(2, scale_bits) - 1; + mu = lms_mu * scale; + // initialize the fir_lms buffers + #if BLOCK_LEN == 1 + sig_init_buffer_DMB(&ptr_fir_lms_delay_line, fir_lms_delay_line, lms_fir_num_coeffs, MAX_FIR_COEFFS); + sig_init_buffer(&ptr_fir_lms_coeffs, fir_lms_coeffs, lms_fir_num_coeffs, MAX_FIR_COEFFS); + #else + sig_init_buffer(&ptr_fir_lms_delay_line, fir_lms_delay_line, lms_fir_num_coeffs + BLOCK_LEN, BLOCK_LEN + MAX_FIR_COEFFS); + sig_init_buffer(&ptr_fir_lms_coeffs, fir_lms_coeffs, lms_fir_num_coeffs, MAX_FIR_COEFFS); + + #endif + for (int i = 0; i < lms_fir_num_coeffs; i++) { + ptr_fir_lms_delay_line.ptr_start[i] = 0; + ptr_fir_lms_coeffs.ptr_start[i] = 0; + } +} + +// Data d(cSensor) is signal + noise +// x (accSensor) is reference noise signal +void calc( + SingleSignalPath *cSensorSignal, + SingleSignalPath *accSensorSignal, + // BufferPtrDMB *ptr_fir_lms_delay_line, + // BufferPtr *ptr_fir_lms_coeffs, + OutputMode output_mode, + #if BLOCK_LEN != 1 + int16_t *cSensor, + int16_t *accSensor, + #else + int16_t volatile chess_storage(DMB) *cSensor, + int16_t volatile chess_storage(DMB) *accSensor, + #endif + + int16_t volatile chess_storage(DMB) *out_16 + + ){ + static int chess_storage(DMA) c_block_pre[BLOCK_LEN]; + static int chess_storage(DMA) acc_block_pre[BLOCK_LEN]; + static int chess_storage(DMA) cSensor_32[BLOCK_LEN]; + static int chess_storage(DMA) accSensor_32[BLOCK_LEN]; + + static int chess_storage(DMB) acc_block_filt[BLOCK_LEN]; + static int chess_storage(DMB) out_32[BLOCK_LEN]; + + static int chess_storage(DMA) *p_c_block_pre =c_block_pre; + static int chess_storage(DMA) *p_acc_block_filt =acc_block_pre; + static int chess_storage(DMB) *p_out_32=out_32; + + + #ifdef LPDSP16 + for (uint32_t i=0; idelay_line, BLOCK_LEN); + #endif + break; + case OUTPUT_MODE_FIR_LMS: // apply lms filter on cSensor signal + #if BLOCK_LEN == 1 + // Increment the buffer pointer and set the current sample to the delay line + sig_cirular_buffer_ptr_put_sample_DMB(&ptr_fir_lms_delay_line, acc_block_pre[0]); + //*ptr_fir_lms_delay_line.ptr_current = acc_block_pre[0]; + //ptr_fir_lms_delay_line.ptr_current = cyclic_add(ptr_fir_lms_delay_line.ptr_current, 1, ptr_fir_lms_delay_line.ptr_start, ptr_fir_lms_delay_line.buffer_len); + + // Calculate the fir filter output on acc to get the canceller + acc_block_filt[0]= sig_calc_fir_lpdsp32_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs); + // Calculate the ouptut signal by c_block_pre - acc_block_filt + out_32[0] = c_block_pre[0] - acc_block_filt[0]; + //if (counter >= 0){ //TODO: implement this and make it configurable + // Calculate the coefficient adaptation + //adapt_coeffs_generic_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); + adapt_coeffs_lpdsp32_single_v1(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); + //counter=0; + // } + // else{ + // counter++; + // } + #else // Block processing + // Put the next block to the buffer + sig_circular_buffer_ptr_put_block(&ptr_fir_lms_delay_line, acc_block_pre); + // Calculate the fir filter output on acc to get the canceller + sig_calc_fir_lpdsp32_block(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, acc_block_filt); + + // Calculate the ouptut signal by c_block_pre - acc_block_filt + for (int i=0; idelay_line, acc_block_pre[i]); + //acc_block_filt[i]= sig_calc_fir_lpdsp32_single(lms); + out_32[i] = c_block_pre[i] - acc_block_filt[i]; // 15 cycles with 4 samples/block + // adapt the coefficients with respect to the last sample in the block + } + //adapt_coeffs_lpdsp32_single(lms, out_32[1]); + adapt_coeffs_lpdsp32_block(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); + // Increment the buffer pointer to get ready for the next block + //sig_cirular_buffer_ptr_increment(&lms->delay_line, BLOCK_LEN); + #endif + break; + case OUTPUT_MODE_FIR_LMS_LEAKY: // apply lms filter on cSensor signal + // Increment the buffer pointer and set the current sample to the delay line + sig_cirular_buffer_ptr_put_sample_DMB(&ptr_fir_lms_delay_line, acc_block_pre[0]); + + // Calculate the fir filter output on acc to get the canceller + acc_block_filt[0]= sig_calc_fir_lpdsp32_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs); + // Calculate the ouptut signal by c_block_pre - acc_block_filt + out_32[0] = c_block_pre[0] - acc_block_filt[0]; + //if (counter >= 0){ //TODO: implement this and make it configurable + // Calculate the coefficient adaptation + //adapt_coeffs_generic_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); + adapt_coeffs_lpdsp32_single_leaky(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); + + break; + default: // MUTED + for (uint32_t i=0; i> BITSHIFT_16_TO_32); // 12 cycles for blocksize 4 //TODO: use rnd_saturate(out_32[i] >> input_nbit_bitshift) + #endif + + + } + //out_16[0] = cSensor[0]; +} +