diff --git a/dsp_code/main.c b/dsp_code/main.c deleted file mode 100644 index d3ca685..0000000 --- a/dsp_code/main.c +++ /dev/null @@ -1,154 +0,0 @@ -//#define SIMULATE -#ifdef SIMULATE - #include -#endif - -#define BLOCK_LEN 1 // define block length for processing - currently only 1 is supported - -#include -#include "signalProcessing/include/signal_path.h" - -// Register und Bitmasken für Interrupts zwischen ARM und LPDSP Prozessor -#define CSS_CMD 0xC00004 -#define CSS_CMD_0 (1<<0) -#define CSS_CMD_1 (1<<1) - -// Shared Memory von ARM und DSP definieren -#define INPUT_PORT0_ADD 0x800000 // Feste Adressen für Eingangsdaten im Shared Memory -//#define INPUT_PORT1_ADD INPUT_PORT0_ADD + 2 //DMB - warum auskommentiert? -#define OUTPUT_PORT_ADD (INPUT_PORT0_ADD + 16) // Feste Adressen für Ausgangsdatensdaten im Shared Memory, 16 Byte von Eingangsadresse Weg (PS: 2* for 2 channels) - -//Chess Compiler spezifisch: Interrupt-Register festlegen um ARM zu kontaktieren nach fertiger Berechnung (PS: Define the interrupt register to notify the ARM of a completed operation) -volatile static unsigned char chess_storage(DMIO:CSS_CMD) CssCmdGen; - -// Interrupt-Flag, welche von ARM gesetzt wird, wenn eine Berechnung gewünscht ist -static volatile int actionRequired; - -// Structs anlegen für die Signalpfade - hier werden Konfigurationen abgelegt(signal_path.h) -static SingleSignalPath cSensorSignal; -static SingleSignalPath accSensorSignal; - -// Umschaltung zwischen sampleweiser und blockweiser Verarbeitung -// Sampleweise Verarbeitung: Adresse aus Shared Memory wird direkt verwendet -// Blockweise Verarbeitung: Blöcke kopiert und verarbeitet? Offensichtlicch nicht genutzt bisher -#if BLOCK_LEN == 1 -static volatile int16_t chess_storage(DMB:INPUT_PORT0_ADD) intputPort[4]; //TODO: if BLOCK_LEN >1 is used, the data is interleaved: ch0ch1, ch0ch1 .... chess_storage(DMA % alignof(int)) ? -//static volatile int16_t chess_storage(DMB:INPUT_PORT1_ADD) intputPort1[BLOCK_LEN]; -static volatile int16_t chess_storage(DMB:OUTPUT_PORT_ADD) outputPort[4]; -static volatile int16_t chess_storage(DMB) *inPtr0; -static volatile int16_t chess_storage(DMB) *inPtr1; -static volatile int16_t chess_storage(DMB) *outPtr; -static volatile int16_t chess_storage(DMB) sample; -static volatile int16_t chess_storage(DMB) *sample_ptr; -#else -// Int-Array für Blockverarbeitung im Shared Memory DMA anlegen (Eingabe) -static int16_t chess_storage(DMA) intputPort[BLOCK_LEN]; //chess_storage(DMA:INPUT_PORT_ADD) TODO: volatile? chess_storage(DMA % alignof(int)) -//static int16_t chess_storage(DMA) intputPort1[BLOCK_LEN]; //chess_storage(DMA:INPUT_PORT_ADD) -// Int-Array für Blockverarbeitung im Shared Memory DMA anlegen (Ausgabe) -static int16_t chess_storage(DMB) outputPort[BLOCK_LEN]; // chess_storage(DMB:OUTPUT_PORT_ADD) TODO: determine output port add -#endif - -//void isr0() ist eine Interrupt Service Routine Funktion, welche als C Funktion deklariert wird -// property (isr) ist Chess Compiler spezifisch und kennzeichnet eine Funktion als Interrupt Service Routine -//wird Interrupt getriggert, wird actionRequired auf 1 gesetzt - etwas muss dannpassieren -extern "C" void isr0() property (isr) { - actionRequired = 1; - } - -#ifdef __chess__ -extern "C" -#endif - -int main(void) { - // Enum, welcher den Ausgabemodus definiert - wird in calc()-Funktion verwendet - static OutputMode mode = OUTPUT_MODE_FIR_LMS; - - // Initialize the signal path - // Initialize the csensor signal subpath - // Instanciate the signal path state structs - - // Deactivate preemphasis filter by initializing with coefficients {1., 0., 0., 0., 0.} - // biquad filter coefficients - off - double b0[5]={0.75, 0., 0., 0., 0.}; - double b1[5]={0.75, 0., 0., 0., 0.}; - int N_lms_fir_coeffs = MAX_FIR_COEFFS; // always test with max coeffs - - //init-Funktion aufrufen - init( - &cSensorSignal, &accSensorSignal, - //&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, - b0, - b1, - 2, // sample delay - 2, - 0.9, // weight - 0.9, - 0.01, // lms learning rate - N_lms_fir_coeffs // Numer of lms fir coefficients - ); - - if (mode == OUTPUT_MODE_FIR){ //FIR filter mit fixen coeffizienten wenn nicht adaptiv - for (int i=0; i -#include - -#define MAX_DELAY_SAMPS 16 -#if BLOCK_LEN > MAX_FIR_COEFFS - #error "BLOCK_LEN must be smaller than MAX_FIR_COEFFS" -#endif -#define BITSHIFT_16_TO_32 16 - -static const int block_len=BLOCK_LEN; // TODO: save this an an cm3 accessible location - -#ifdef PLATFORM_GENERIC - typedef long int accum_t; - // empty Macros definitions - #define chess_storage(mem) - #define DMA - #define DMB - #define DMIO - #define chess_loop_range(a,b) - #define isr0(a) - #define chess_flatten_loop -#endif - -typedef struct BufferPtr{ // used as a pointer and length storage container for cirular buffers - int buffer_len; - int *ptr_start; - int *ptr_current; -} BufferPtr; - -typedef struct BufferPtrDMB{ - int buffer_len; - int chess_storage(DMB) *ptr_start; - int chess_storage(DMB) *ptr_current; -} BufferPtrDMB; - -/*Stuct for storage of internal state and configuration for single signal path with a biquad element, a scaling element and a delay*/ -typedef struct SingleSignalPath{ - int input_scale; // The scaling bitshift bits for the input signal - int x_nbit_bitshift; // The number of bits to scale the input signal - int preemph_activated; //Deactivate by initializing with coefficients {1., 0., 0., 0., 0.} - int b_preemph[5]; // Preemphasis filter coefficients - int _preemph_scale_nbits; // The number of bits used to scale the pre emphasis filter - int _xd[2]; //preemphasis biquad filter buffers - int _yd[2]; - int _delay_buffer[MAX_DELAY_SAMPS]; // The delay buffer for the given signal path // chess_storage(DMA) - BufferPtr delay_buffer; // The pointers to the delay buffer and actual used length - int n_delay_samps; // The delay for the given signal path in samples - int weight_actived; //Deactivate by initializing with weight 1.0 - int weight; // The weight for the given signal path - int _weight_scale_nbits; // The number of bits used to scale the weight -} SingleSignalPath; - - -/*Stuct for storage of internal state and configuration for an adaptive fir-lms filter*/ -// typedef struct LmsFilter{ -// int lms_mu; // The learning rate for the lms algorithm -// int lms_num_fir_coeffs; // Number of coefficients for the adaptive filter -// #if BLOCK_LEN == 1 -// //int _delay_line[MAX_FIR_COEFFS]; // The delay line for the adaptive filter // -// BufferDMB delay_line; // The pointer to the delay line -// //int chess_storage(DMB) *ptr_delay_line_current; // The pointer to the current position in the delay line -// #else -// //int chess_storage(%(sizeof(long long))) _delay_line[BLOCK_LEN + MAX_FIR_COEFFS]; // The delay line for the adaptive filter -// BufferPtr delay_line; // The pointer to the delay line -// //int chess_storage(DMA) *ptr_delay_line_current; // The pointer to the current position in the delay line -// //int chess_storage(%(sizeof(long long))) fir_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter -// #endif -// } LmsFilter; -// #if BLOCK_LEN == 1 -// int fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter // -// #else -// int chess_storage(%(sizeof(long long))) fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter -// #endif - -#if BLOCK_LEN == 1 -BufferPtr extern ptr_fir_lms_coeffs; -BufferPtrDMB extern chess_storage(DMB) ptr_fir_lms_delay_line; -int extern chess_storage(DMB) fir_lms_delay_line[MAX_FIR_COEFFS]; - -#else -int extern chess_storage(DMA%(sizeof(long long))) fir_lms_delay_line[BLOCK_LEN + MAX_FIR_COEFFS]; // The delay line for the adaptive filter -BufferPtr extern ptr_fir_lms_delay_line; -BufferPtr extern ptr_fir_lms_coeffs; -#endif - -//int extern chess_storage(DMA % (sizeof(long long))) fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter - -// typedef struct SignalPath{ -// SingleSignalPath cSensorSignal; -// SingleSignalPath accSensorSignal; -// LmsFilter lms; -// volatile int chess_storage(DMIO:INPUT_PORT_ADD) input_port; -// int chess_storage(DMIO:OUTPUT_PORT_ADD) output_port; -// } SignalPath; - -typedef enum OutputMode{ - OUTPUT_MODE_C_SENSOR, - OUTPUT_MODE_ACC_SENSOR, - OUTPUT_MODE_FIR_LMS, - OUTPUT_MODE_FIR, - OUTPUT_MODE_FIR_LMS_LEAKY, -}OutputMode; - -// void sig_init_preemph_coef(SingleSignalPath *signal, double b0, double b1, double b2, double a1, double a2, int scale_bits); -// int sig_init_delay(SingleSignalPath *signal, int delay_samps); -// void sig_init_weight(SingleSignalPath *signal, double weight, int scale_nbits); -// void sig_init_lms(LmsFilter *signal, double lms_mu, int lms_fir_num_coeffs, int scale_bits); -// int inline sig_delay_buffer_load_and_get(SingleSignalPath *signal, int x); -// int inline sig_calc_biquad(SingleSignalPath *signal, int x); //TODO: inline ? -// int inline sig_calc_weight(SingleSignalPath *signal, int x); //TODO: inline ? -// int inline sig_calc_fir_lms_single(LmsFilter *signal, int d, int x); //TODO: inline ? - -//void adapt_coeffs_lpdsp32_single(LmsFilter chess_storage(DMB) *filter, int *fir_lms_coeffs, int out); -//sig_calc_fir_lpdsp32_single(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs) - - - -// top level init and calc functions -void init( - SingleSignalPath *cSensorSignal, SingleSignalPath *accSensorSignal, - //BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, - double *b_c, double *b_acc, int delay_c, int delay_acc, double weight_c, double weight_acc, double lms_mu, int lms_fir_num_coeffs); -void calc( - SingleSignalPath *cSensorSignal, SingleSignalPath *accSensorSignal, - //BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, - OutputMode output_mode, - #if BLOCK_LEN != 1 - int16_t *cSensor, - int16_t *accSensor, - #else - int16_t volatile chess_storage(DMB) *cSensor, - int16_t volatile chess_storage(DMB) *accSensor, - #endif - int16_t volatile chess_storage(DMB) *out_16 - ); - -#endif //SIGNAL_PATH_H - diff --git a/dsp_code/signalProcessing/signal_path.c b/dsp_code/signalProcessing/signal_path.c deleted file mode 100644 index d0cb702..0000000 --- a/dsp_code/signalProcessing/signal_path.c +++ /dev/null @@ -1,785 +0,0 @@ -#include "include/signal_path.h" - -/* Global variables decleration*/ -static int counter=0; -static int mu; - -#ifdef LPDSP16 -//static int leak=24576; //0.75 -//static int leak=29491; //0.9 -//static int leak=31129; //0.95 // no effect -static int leak=32735; //0.999 // (1 ? µ?) -//static int leak=32766; //0.99999 -#else -//static int leak=2145336164; //0.999 // (1 ? µ?) -static int leak=2147462173; //0.999 // (1 ? µ?) -#endif - - - -#if BLOCK_LEN == 1 -int chess_storage(DMB) fir_lms_delay_line[MAX_FIR_COEFFS]; -BufferPtrDMB chess_storage(DMB) ptr_fir_lms_delay_line; -BufferPtr ptr_fir_lms_coeffs; - -#else -int chess_storage(DMA%(sizeof(long long))) fir_lms_delay_line[BLOCK_LEN + MAX_FIR_COEFFS]; // The delay line for the adaptive filter -BufferPtr ptr_fir_lms_delay_line; -BufferPtr ptr_fir_lms_coeffs; -#endif - -int chess_storage(DMA % (sizeof(long long))) fir_lms_coeffs[MAX_FIR_COEFFS]; // The coefficients for the adaptive filter - - -#ifdef PLATFORM_GENERIC - // lpdsp32 functionallity moddeling functions - accum_t fract_mult(int a, int b){ - long int a_long = a; - long int b_long = b; - return (b_long * a_long); - } - accum_t to_accum(int a){ - long int a_long = (long int) a; - return a_long << 31; - } - int rnd_saturate(accum_t a){ - return a >> 31; - } - int extract_high(accum_t a){ - return a >> 31; - } - void lldecompose(unsigned long long l, int* int1, int* int2){ - *int2 = (int)(l >> 32); - *int1 = (int)(l); - } - uint64_t llcompose(int a, int b) { - uint64_t result = (uint64_t)b; // Assign b to the higher 32 bits of the result - result <<= 32; // Shift the higher 32 bits to the left - result |= (uint32_t)a; // Bitwise OR operation with the lower 32 bits of a - return result; - } - // unsigned long long llcompose(int a, int b){ - // unsigned long long l; - // l = a << 32; - // l |= b; - // return l; - //} - int* cyclic_add(int *ptr, int i_pp, int *ptr_start, int buffer_len){ - int *p_ptr=ptr; - for (int i=0; i < abs(i_pp); i+=1){ // end of buffer wraparound - if (i_pp > 0){ - p_ptr ++; - if (p_ptr >= ptr_start + buffer_len){ - p_ptr=ptr_start; - } - } - else{ // start of buffer wraparound - p_ptr--; - if (p_ptr < ptr_start){ - p_ptr=ptr_start + (buffer_len -1); - } - } - } - return p_ptr; - } -#endif - - -/*Round saturate with 16 bits return value */ -int static inline rnd_saturate16(accum_t acc){ //maybe int16_fast type? - acc = to_accum( // saturate - rnd_saturate(acc << 32) - ); - return rnd_saturate(acc >> 16); //round -} - - -int sig_init_buffer(BufferPtr *buffer, int *buffer_start_add, int length, int max_buffer_len) { - buffer->buffer_len = length; - buffer->ptr_start = buffer_start_add; - buffer->ptr_current = buffer_start_add; - // initialize delay line with 0 - for (int i = 0; i < length; i++) { - buffer_start_add[i] = 0; - } - if (lengthbuffer_len = length; - buffer->ptr_start = buffer_start_add; - buffer->ptr_current = buffer_start_add; - // initialize delay line with 0 - for (int i = 0; i < length; i++) { - buffer_start_add[i] = 0; - } - if (lengthptr_current = cyclic_add(buffer->ptr_current, i_incr, buffer->ptr_start, buffer->buffer_len); -} - -void sig_cirular_buffer_ptr_increment_DMB(BufferPtrDMB *buffer, int i_incr){ - buffer->ptr_current = cyclic_add(buffer->ptr_current, i_incr, buffer->ptr_start, buffer->buffer_len); -} - -void sig_cirular_buffer_ptr_put_sample(BufferPtr *buffer, int sample){ - *buffer->ptr_current = sample; - buffer->ptr_current = cyclic_add(buffer->ptr_current, 1, buffer->ptr_start, buffer->buffer_len); -} - -void sig_cirular_buffer_ptr_put_sample_DMB(BufferPtrDMB chess_storage(DMB) *buffer, int sample){ - *buffer->ptr_current = sample; - buffer->ptr_current = cyclic_add(buffer->ptr_current, 1, buffer->ptr_start, buffer->buffer_len); -} - -void static inline sig_circular_buffer_ptr_put_block(BufferPtr *buffer, int* block){ - // increment pointer to oldest block - //buffer->ptr_current = cyclic_add(buffer->ptr_current, BLOCK_LEN, buffer->ptr_start, buffer->buffer_len); - // load the next block - for (int i=0; iptr_current[0] = block[i]; // TODO: use llcompose - buffer->ptr_current[1] = block[i+1]; - buffer->ptr_current = cyclic_add(buffer->ptr_current, 2, buffer->ptr_start, buffer->buffer_len); - } -} - -void sig_init_preemph_coef(SingleSignalPath *signal, double b0, double b1, double b2, double a1, double a2, int scale_bits) { - // Check first if filter is actually activated - if (b0 == 1. && b1 == 0. && b2 == 0. && a1 == 0. && a2 == 0.) { - signal->preemph_activated = 0; - } - else{ - signal->preemph_activated = 1; - signal->_preemph_scale_nbits = scale_bits; - int scale = pow(2, scale_bits) - 1; - signal->b_preemph[0] = b0 * scale; - signal->b_preemph[1] = b1 * scale; - signal->b_preemph[2] = b2 * scale; - signal->b_preemph[3] = a1 * scale; - signal->b_preemph[4] = a2 * scale; - } -} - -/*Initialization functions - make sure all of them were called to ensure functionality*/ -int sig_init_delay(SingleSignalPath *signal, int n_delay) { - return sig_init_buffer(&signal->delay_buffer, signal->_delay_buffer, n_delay, MAX_DELAY_SAMPS); -} - -void sig_init_weight(SingleSignalPath *signal, double weight, int scale_nbits) { - if (weight == 1.) { - signal->weight_actived = 0; - } - else{ - signal->weight_actived = 1; - int scale = pow(2, scale_nbits) - 1; - signal->weight = weight * scale; - signal->_weight_scale_nbits = scale_nbits; - } -} - -/*Calculator functions for the given signal path*/ -/*Calculate one biquad filter element*/ -int sig_calc_biquad(SingleSignalPath *signal, int x) { - if (signal->preemph_activated == 0) { - return x; - } - accum_t sum = - fract_mult(x, signal->b_preemph[0]) + fract_mult(signal->_xd[0], signal->b_preemph[1]) + - fract_mult(signal->_xd[1], signal->b_preemph[2]) + fract_mult(signal->_yd[0], signal->b_preemph[3]) + - fract_mult(signal->_yd[1],signal->b_preemph[4]); - - #ifdef LPDSP16 - int y = rnd_saturate16(sum << 1); - #else - int y = rnd_saturate(sum << 1); - #endif - - signal->_xd[1] = signal->_xd[0]; - signal->_xd[0] = x; - signal->_yd[1] = signal->_yd[0]; - signal->_yd[0] = y; - return y; -} -int inline sig_get_delayed_sample(SingleSignalPath *signal) { - return *signal->delay_buffer.ptr_current; -} - -int sig_delay_buffer_load_and_get(SingleSignalPath *signal, int x) { - if (signal->delay_buffer.buffer_len == 0) { - return x; - } - int out = *signal->delay_buffer.ptr_current; - *signal->delay_buffer.ptr_current = x; - sig_cirular_buffer_ptr_increment(&signal->delay_buffer, 1); - return out; -} - -int sig_calc_weight(SingleSignalPath *signal, int x) { - if (signal->weight_actived == 0) { - return x; - } - accum_t acc = fract_mult(x, signal->weight); - - return rnd_saturate(acc); -} - -#if BLOCK_LEN!=1 // Block processing -/*lpdsp32 fir filter example adapted from user guide -#define NS 256 //No. of samples -#define N 64 //No. of filter coefficients or No. of tap weights -int chess_storage(DMB) y[NS]; //Output Signal -int chess_storage(DMA %(sizeof(long long))) x[NS+N-1]; //Input Signal -//Filter coefficients or tap weights -int chess_storage(DMA %(sizeof(long long))) h[N]; - */ -void sig_calc_fir_lpdsp32_block(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int chess_storage(DMB) *out){ -//void fir(int *y, int *x, int *h) - static int chess_storage(DMA) *p_x; // pointer to the start of the last added block - static int chess_storage(DMA) *p_h; // pointer to the start of the filter coefficients - static int chess_storage(DMB) *p_y; // pointer to the output port - - p_y = out; - - int *px_start = ptr_fir_lms_delay_line->ptr_start; - int *ph_start = ptr_fir_lms_coeffs->ptr_current; - int delay_line_len = ptr_fir_lms_delay_line->buffer_len; - int n_coeff = ptr_fir_lms_coeffs->buffer_len; - - int coef1, coef2; - int dat1, dat2; - - for(unsigned int n=0; nptr_current; - p_h = ptr_fir_lms_coeffs->ptr_current; - p_y = out; - - for(int n=0; nptr_current, n, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); // can be done in increments of two, assuming the buffer pointer increment is even - accum_t sum = to_accum(0); - for(int k=0; k < ptr_fir_lms_coeffs->buffer_len; k+=2) chess_loop_range(1,) - { - sum += fract_mult(p_x[0] , p_h[k]); - sum += fract_mult(p_x[1] , p_h[k+1]); - - sum = to_accum(rnd_saturate(sum)); - p_x = cyclic_add(p_x, -2, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); // can be done in increments of two, assuming the buffer pointer increment is even - } - *p_y++ = extract_high(sum); - } -} -/* "out" is actually an input to the function and is the output of the fir_lms filter system*/ -void adapt_coeffs_lpdsp32_block(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){ // only works for even delay line sample pointers!! - - int *p_x = ptr_fir_lms_delay_line->ptr_current; // pointer to the start of the last added block - TODO: doublecheck this - might be wrong because the pointer actually points to the end of the block! - int *p_x_start = ptr_fir_lms_delay_line->ptr_start; - int *p_h = ptr_fir_lms_coeffs->ptr_current; // pointer to the start of the filter coefficients - int delay_line_len = ptr_fir_lms_delay_line->buffer_len; - int n_coeff = ptr_fir_lms_coeffs->buffer_len; - int prod0, x0, x1, h0, h1; - - // Calculate the first term of the coefficient adaption - accum_t acc_C = fract_mult(mu, out); - prod0 = rnd_saturate(acc_C); - //acc_D = fract_mult(mu, out1); - //prod1 = rnd_saturate(acc_C); - for (int i=0; iptr_current; // chess_storage(DMB) - int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start; - int *p_h = ptr_fir_lms_coeffs->ptr_current; - int delay_line_len = ptr_fir_lms_delay_line->buffer_len; - int n_coeff = ptr_fir_lms_coeffs->buffer_len; - - int d0,d1,h0,h1; - accum_t acc1_A = to_accum(0); - accum_t acc1_B = to_accum(0); - accum_t acc1_C; - - // iterate over the coefficients to calculate the filter on x - the canceller - /* Abschaetzung cycles per 2coefficient: - dual - load : 1 - dual mac and dual load: 1 - -> 48/2 * 2 = 48 cycles for 48 coefficents - */ - for (int i=0; i < n_coeff; i+=2) chess_loop_range(1,){ - // Use dual load and dual pointer update - d0 = *p_x0; - h0 = *p_h; - p_h++; - p_x0 = cyclic_add(p_x0, -1, px_start, delay_line_len); - - d1 = *p_x0; - h1 = *p_h; - p_h++; - p_x0 = cyclic_add(p_x0, -1, px_start, delay_line_len); - - acc1_A+=fract_mult(d0, h0); - acc1_B+=fract_mult(d1, h1); - #ifndef LPDSP16 - acc1_A = to_accum(rnd_saturate(acc1_A)); - acc1_B = to_accum(rnd_saturate(acc1_B)); - #endif - - } - // Calculate the output sample - acc1_C = acc1_A + acc1_B; - //out32 = rnd_saturate(acc1_A); - #ifdef LPDSP16 - return rnd_saturate16(acc1_C); - #else - return rnd_saturate(acc1_C); - #endif -} - -void static inline adapt_coeffs_lpdsp32_single_v1(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){ - - int chess_storage(DMA) *p_h0 = ptr_fir_lms_coeffs->ptr_start; //coeff load pointer - //int chess_storage(DMA) *p_h1 = ptr_fir_lms_coeffs->ptr_start; //coeff store pointer - int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) - int chess_storage(DMB) *p_x1 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) - - p_x1 = cyclic_add(p_x1, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); - - int prod, x0, x1, h0, h1; - int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start; - int delay_line_len = ptr_fir_lms_delay_line->buffer_len; - int n_coeff = ptr_fir_lms_coeffs->buffer_len; - - accum_t acc_A, acc_B; - - // Calculate the first term of the coefficient adaption - accum_t acc_C = fract_mult(mu, out); - #ifdef LPDSP16 - prod = rnd_saturate16(acc_C); - #else - prod = rnd_saturate(acc_C); - #endif - /* Abschätzung cycles per 2 coefficient: - dual load coeffs: 1 - single load tab value: 2 - dual mac: 1 - dual rnd_sat - store: 1 - load/store hazard nop: 1 - */ - for (int i=0; i< n_coeff; i+=2) chess_loop_range(1,){ - // Calculate the coefficient wise adaption - #ifdef PLATFORM_GENERIC - lldecompose(*((long long *)p_h0), &h0, &h1); - #else - lldecompose(*((long long *)p_h0), h0, h1); - #endif - - acc_A = to_accum(h0); - acc_B = to_accum(h1); - - #ifdef LPDSP16 - acc_A += fract_mult(prod, *p_x0) << 16; // TODO: This could be further optimized by using all 4 available accums? - acc_B += fract_mult(prod, *p_x1) << 16; - #else - acc_A += fract_mult(prod, *p_x0); // TODO: This could be further optimized by using all 4 available accums? - acc_B += fract_mult(prod, *p_x1); - #endif - - p_x0 = cyclic_add(p_x0, -2, px_start, delay_line_len); - p_x1 = cyclic_add(p_x1, -2, px_start, delay_line_len); - - // update the current filter coefficients - dual sat; dual store - *((long long *)p_h0) = llcompose(rnd_saturate(acc_A), rnd_saturate(acc_B));//load/store hazard ! - 1 nop is needed - p_h0+=2; - } -} - -void static inline adapt_coeffs_lpdsp32_single_leaky(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){ - - int chess_storage(DMA) *p_h0 = ptr_fir_lms_coeffs->ptr_start; //coeff load pointer - //int chess_storage(DMA) *p_h1 = ptr_fir_lms_coeffs->ptr_start; //coeff store pointer - int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) - int chess_storage(DMB) *p_x1 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) - - p_x1 = cyclic_add(p_x1, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); - - int prod, x0, x1, h0, h1; - int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start; - int delay_line_len = ptr_fir_lms_delay_line->buffer_len; - int n_coeff = ptr_fir_lms_coeffs->buffer_len; - - accum_t acc_A, acc_B; - - // Calculate the first term of the coefficient adaption - accum_t acc_C = fract_mult(mu, out); - #ifdef LPDSP16 - prod = rnd_saturate16(acc_C); - #else - prod = rnd_saturate(acc_C); - #endif - - for (int i=0; i< n_coeff; i+=2) chess_loop_range(1,){ - // Calculate the coefficient wise adaption - #ifdef PLATFORM_GENERIC - lldecompose(*((long long *)p_h0), &h0, &h1); - #else - lldecompose(*((long long *)p_h0), h0, h1); - #endif - - acc_A = fract_mult(h0, leak); // leaky - acc_B = fract_mult(h1, leak); - - acc_A += fract_mult(prod, *p_x0); // TODO: This could be further optimized by using all 4 available accums? - acc_B += fract_mult(prod, *p_x1); - - p_x0 = cyclic_add(p_x0, -2, px_start, delay_line_len); - p_x1 = cyclic_add(p_x1, -2, px_start, delay_line_len); - - // update the current filter coefficients - dual sat; dual store - #ifdef LPDSP16 - *((long long *)p_h0) = llcompose(rnd_saturate16(acc_A), rnd_saturate16(acc_B));//load/store hazard ! - 1 nop is needed - #else - *((long long *)p_h0) = llcompose(rnd_saturate(acc_A), rnd_saturate(acc_B));//load/store hazard ! - 1 nop is needed - #endif - p_h0+=2; - } -} - -void adapt_coeffs_generic_single(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){ - int *p_h0 = ptr_fir_lms_coeffs->ptr_start; //coeff load pointer - int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) - - int prod; - - accum_t acc_A, acc_B; - - // Calculate the first term of the coefficient adaption - accum_t acc_C = fract_mult(mu, out); - prod = rnd_saturate(acc_C); - for (int i=0; i< ptr_fir_lms_delay_line->buffer_len; i++){ - // Calculate the coefficient wise adaption - acc_A = to_accum(p_h0[i]); - acc_A += fract_mult(prod, *p_x0); - p_x0 = cyclic_add(p_x0, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); - p_h0[i]=rnd_saturate(acc_A); - } -} -#endif - -void init( - SingleSignalPath *cSensorSignal, - SingleSignalPath *accSensorSignal, - //BufferPtrDMB *ptr_fir_lms_delay_line, - //BufferPtr *ptr_fir_lms_coeffs, - double *b_c, - double *b_acc, - int delay_c, - int delay_acc, - double weight_c, - double weight_acc, - double lms_mu, - int lms_fir_num_coeffs - ){ - #ifdef LPDSP16 - int scale_bits=15; - #else - int scale_bits=31; - #endif - - sig_init_preemph_coef(cSensorSignal, b_c[0], b_c[1], b_c[2], b_c[3], b_c[4], scale_bits); - sig_init_delay(cSensorSignal, delay_c); - sig_init_weight(cSensorSignal, weight_c, scale_bits); - - // // Initialize the accSensor signal subpath - sig_init_preemph_coef(accSensorSignal, b_acc[0], b_acc[1], b_acc[2], b_acc[3], b_acc[4], scale_bits); - sig_init_delay(accSensorSignal, delay_acc); - sig_init_weight(accSensorSignal, weight_acc, 31); - - // initialize the lms filter parameters - int scale = pow(2, scale_bits) - 1; - mu = lms_mu * scale; - // initialize the fir_lms buffers - #if BLOCK_LEN == 1 - sig_init_buffer_DMB(&ptr_fir_lms_delay_line, fir_lms_delay_line, lms_fir_num_coeffs, MAX_FIR_COEFFS); - sig_init_buffer(&ptr_fir_lms_coeffs, fir_lms_coeffs, lms_fir_num_coeffs, MAX_FIR_COEFFS); - #else - sig_init_buffer(&ptr_fir_lms_delay_line, fir_lms_delay_line, lms_fir_num_coeffs + BLOCK_LEN, BLOCK_LEN + MAX_FIR_COEFFS); - sig_init_buffer(&ptr_fir_lms_coeffs, fir_lms_coeffs, lms_fir_num_coeffs, MAX_FIR_COEFFS); - - #endif - for (int i = 0; i < lms_fir_num_coeffs; i++) { - ptr_fir_lms_delay_line.ptr_start[i] = 0; - ptr_fir_lms_coeffs.ptr_start[i] = 0; - } -} - -// Data d(cSensor) is signal + noise -// x (accSensor) is reference noise signal -void calc( - SingleSignalPath *cSensorSignal, - SingleSignalPath *accSensorSignal, - // BufferPtrDMB *ptr_fir_lms_delay_line, - // BufferPtr *ptr_fir_lms_coeffs, - OutputMode output_mode, - #if BLOCK_LEN != 1 - int16_t *cSensor, - int16_t *accSensor, - #else - int16_t volatile chess_storage(DMB) *cSensor, - int16_t volatile chess_storage(DMB) *accSensor, - #endif - - int16_t volatile chess_storage(DMB) *out_16 - - ){ - static int chess_storage(DMA) c_block_pre[BLOCK_LEN]; - static int chess_storage(DMA) acc_block_pre[BLOCK_LEN]; - static int chess_storage(DMA) cSensor_32[BLOCK_LEN]; - static int chess_storage(DMA) accSensor_32[BLOCK_LEN]; - - static int chess_storage(DMB) acc_block_filt[BLOCK_LEN]; - static int chess_storage(DMB) out_32[BLOCK_LEN]; - - static int chess_storage(DMA) *p_c_block_pre =c_block_pre; - static int chess_storage(DMA) *p_acc_block_filt =acc_block_pre; - static int chess_storage(DMB) *p_out_32=out_32; - - - #ifdef LPDSP16 - for (uint32_t i=0; idelay_line, BLOCK_LEN); - #endif - break; - case OUTPUT_MODE_FIR_LMS: // apply lms filter on cSensor signal - #if BLOCK_LEN == 1 - // Increment the buffer pointer and set the current sample to the delay line - sig_cirular_buffer_ptr_put_sample_DMB(&ptr_fir_lms_delay_line, acc_block_pre[0]); - //*ptr_fir_lms_delay_line.ptr_current = acc_block_pre[0]; - //ptr_fir_lms_delay_line.ptr_current = cyclic_add(ptr_fir_lms_delay_line.ptr_current, 1, ptr_fir_lms_delay_line.ptr_start, ptr_fir_lms_delay_line.buffer_len); - - // Calculate the fir filter output on acc to get the canceller - acc_block_filt[0]= sig_calc_fir_lpdsp32_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs); - // Calculate the ouptut signal by c_block_pre - acc_block_filt - out_32[0] = c_block_pre[0] - acc_block_filt[0]; - //if (counter >= 0){ //TODO: implement this and make it configurable - // Calculate the coefficient adaptation - //adapt_coeffs_generic_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); - adapt_coeffs_lpdsp32_single_v1(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); - //counter=0; - // } - // else{ - // counter++; - // } - #else // Block processing - // Put the next block to the buffer - sig_circular_buffer_ptr_put_block(&ptr_fir_lms_delay_line, acc_block_pre); - // Calculate the fir filter output on acc to get the canceller - sig_calc_fir_lpdsp32_block(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, acc_block_filt); - - // Calculate the ouptut signal by c_block_pre - acc_block_filt - for (int i=0; idelay_line, acc_block_pre[i]); - //acc_block_filt[i]= sig_calc_fir_lpdsp32_single(lms); - out_32[i] = c_block_pre[i] - acc_block_filt[i]; // 15 cycles with 4 samples/block - // adapt the coefficients with respect to the last sample in the block - } - //adapt_coeffs_lpdsp32_single(lms, out_32[1]); - adapt_coeffs_lpdsp32_block(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); - // Increment the buffer pointer to get ready for the next block - //sig_cirular_buffer_ptr_increment(&lms->delay_line, BLOCK_LEN); - #endif - break; - case OUTPUT_MODE_FIR_LMS_LEAKY: // apply lms filter on cSensor signal - // Increment the buffer pointer and set the current sample to the delay line - sig_cirular_buffer_ptr_put_sample_DMB(&ptr_fir_lms_delay_line, acc_block_pre[0]); - - // Calculate the fir filter output on acc to get the canceller - acc_block_filt[0]= sig_calc_fir_lpdsp32_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs); - // Calculate the ouptut signal by c_block_pre - acc_block_filt - out_32[0] = c_block_pre[0] - acc_block_filt[0]; - //if (counter >= 0){ //TODO: implement this and make it configurable - // Calculate the coefficient adaptation - //adapt_coeffs_generic_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); - adapt_coeffs_lpdsp32_single_leaky(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); - - break; - default: // MUTED - for (uint32_t i=0; i> BITSHIFT_16_TO_32); // 12 cycles for blocksize 4 //TODO: use rnd_saturate(out_32[i] >> input_nbit_bitshift) - #endif - - - } - //out_16[0] = cSensor[0]; -} -