From bd94b0e79f14aa71f3a3b5d478677b1696dd791f Mon Sep 17 00:00:00 2001 From: Patrick Hangl Date: Wed, 14 Jan 2026 15:57:21 +0100 Subject: [PATCH] =?UTF-8?q?Code=20gek=C3=BCrzt=20-=20kompilliert?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.c | 102 +++----- signalProcessing/signal_path.c | 427 ++------------------------------- 2 files changed, 54 insertions(+), 475 deletions(-) diff --git a/main.c b/main.c index f1d1a60..25b63ce 100644 --- a/main.c +++ b/main.c @@ -1,10 +1,11 @@ +// BLOCK LEN 1 und MAX_FIR_COEFFS 64 werden vom Compiler mitgegeben + //#define SIMULATE + #ifdef SIMULATE #include #endif -#define BLOCK_LEN 1 // define block length for processing - currently only 1 is supported - #include #include "signalProcessing/include/signal_path.h" @@ -15,48 +16,32 @@ // Shared Memory von ARM und DSP definieren #define INPUT_PORT0_ADD 0x800000 // Feste Adressen für Eingangsdaten im Shared Memory -//#define INPUT_PORT1_ADD INPUT_PORT0_ADD + 2 //DMB - warum auskommentiert? -#define OUTPUT_PORT_ADD (INPUT_PORT0_ADD + 16) // Feste Adressen für Ausgangsdatensdaten im Shared Memory, 16 Byte von Eingangsadresse Weg (PS: 2* for 2 channels) +#define OUTPUT_PORT_ADD (INPUT_PORT0_ADD + 16) // Feste Adressen für Ausgangsdatensdaten im Shared Memory, 16 Byte von Eingangsadresse Weg -//Chess Compiler spezifisch: Interrupt-Register festlegen um ARM zu kontaktieren nach fertiger Berechnung (PS: Define the interrupt register to notify the ARM of a completed operation) -volatile static unsigned char chess_storage(DMIO:CSS_CMD) CssCmdGen; +//Chess Compiler spezifisch: Interrupt-Register festlegen um ARM zu kontaktieren nach fertiger Berechnung +volatile static unsigned char chess_storage(DMIO:CSS_CMD) css_cmd_flag; // Interrupt-Flag, welche von ARM gesetzt wird, wenn eine Berechnung gewünscht ist -static volatile int actionRequired; +static volatile int action_required; // Structs anlegen für die Signalpfade - hier werden Konfigurationen abgelegt(signal_path.h) -static SingleSignalPath cSensorSignal; -static SingleSignalPath accSensorSignal; +static SingleSignalPath corrupted_signal; +static SingleSignalPath reference_noise_signal; -// Umschaltung zwischen sampleweiser und blockweiser Verarbeitung -// Sampleweise Verarbeitung: Adresse aus Shared Memory wird direkt verwendet -// Blockweise Verarbeitung: Blöcke kopiert und verarbeitet? Offensichtlicch nicht genutzt bisher -#if BLOCK_LEN == 1 -static volatile int16_t chess_storage(DMB:INPUT_PORT0_ADD) intputPort[4]; //TODO: if BLOCK_LEN >1 is used, the data is interleaved: ch0ch1, ch0ch1 .... chess_storage(DMA % alignof(int)) ? -//static volatile int16_t chess_storage(DMB:INPUT_PORT1_ADD) intputPort1[BLOCK_LEN]; -static volatile int16_t chess_storage(DMB:OUTPUT_PORT_ADD) outputPort[4]; -static volatile int16_t chess_storage(DMB) *inPtr0; -static volatile int16_t chess_storage(DMB) *inPtr1; -static volatile int16_t chess_storage(DMB) *outPtr; -static volatile int16_t chess_storage(DMB) sample; -static volatile int16_t chess_storage(DMB) *sample_ptr; -#else -// Int-Array für Blockverarbeitung im Shared Memory DMA anlegen (Eingabe) -static int16_t chess_storage(DMA) intputPort[BLOCK_LEN]; //chess_storage(DMA:INPUT_PORT_ADD) TODO: volatile? chess_storage(DMA % alignof(int)) -//static int16_t chess_storage(DMA) intputPort1[BLOCK_LEN]; //chess_storage(DMA:INPUT_PORT_ADD) -// Int-Array für Blockverarbeitung im Shared Memory DMA anlegen (Ausgabe) -static int16_t chess_storage(DMB) outputPort[BLOCK_LEN]; // chess_storage(DMB:OUTPUT_PORT_ADD) TODO: determine output port add -#endif +static volatile int16_t chess_storage(DMB:INPUT_PORT0_ADD) input_port[4]; //Array mit 4x16 Bit Einträgen auf 2x32 Bit Registern - nur die ersten 2 werden genutzt +static volatile int16_t chess_storage(DMB:OUTPUT_PORT_ADD) output_port[4]; //Array mit 4x16 Bit Einträgen auf 2x32 Bit Registern - alle werden genutzt +static volatile int16_t chess_storage(DMB) *input_pointer_0; +static volatile int16_t chess_storage(DMB) *input_pointer_1; +static volatile int16_t chess_storage(DMB) *output_pointer; +static volatile int16_t chess_storage(DMB) *sample_pointer; +static volatile int16_t chess_storage(DMB) sample; //Speicherplatz für Ergebnis der calc()-Funktion //void isr0() ist eine Interrupt Service Routine Funktion, welche als C Funktion deklariert wird // property (isr) ist Chess Compiler spezifisch und kennzeichnet eine Funktion als Interrupt Service Routine -//wird Interrupt getriggert, wird actionRequired auf 1 gesetzt - etwas muss dannpassieren +//wird Interrupt getriggert, wird action_required auf 1 gesetzt - etwas muss dannpassieren extern "C" void isr0() property (isr) { - actionRequired = 1; + action_required = 1; } -#ifdef __chess__ -extern "C" -#endif int main(void) { // Enum, welcher den Ausgabemodus definiert - wird in calc()-Funktion verwendet @@ -65,11 +50,11 @@ int main(void) { // Alle 0 bis auf b[0] -> einfacher Gain auf 0,75 double b0[5]={0.75, 0., 0., 0., 0.}; double b1[5]={0.75, 0., 0., 0., 0.}; - int N_lms_fir_coeffs = MAX_FIR_COEFFS; // 64 Koeffizienten für ANR + int coefficients = MAX_FIR_COEFFS; // 64 Koeffizienten für ANR // Signale initialisieren, oben angelegte Structs mit Parametern füllen init( - &cSensorSignal, &accSensorSignal, //Signal-Structs + &corrupted_signal, &reference_noise_signal, //Signal-Structs b0, // Biqquad Koeffizienten C-Sensor b1, // Biqquad Koeffizienten Acc-Sensor 2, // Sample Delay C-Sensor @@ -77,20 +62,9 @@ int main(void) { 0.9, //Gewichtung C-Sensor 0.9, //Gewichtung Acc-Sensor 0.01, // Mu - N_lms_fir_coeffs // Anzahl Filterkoeffizienten + coefficients // Anzahl Filterkoeffizienten ); - // Fixer Filter wenn nicht adaptiv - if (mode == OUTPUT_MODE_FIR){ - for (int i=0; i> 16); //round -} - - int sig_init_buffer(BufferPtr *buffer, int *buffer_start_add, int length, int max_buffer_len) { buffer->buffer_len = length; buffer->ptr_start = buffer_start_add; @@ -155,6 +129,7 @@ void static inline sig_circular_buffer_ptr_put_block(BufferPtr *buffer, int* blo } } +//Initialisierungsfunktion für Biquad Filter Koeffizienten void sig_init_preemph_coef(SingleSignalPath *signal, double b0, double b1, double b2, double a1, double a2, int scale_bits) { // Wenn b0=1 und Rest 0 -> kein Filter weil effektiv 1*Xn if (b0 == 1. && b1 == 0. && b2 == 0. && a1 == 0. && a2 == 0.) { @@ -178,6 +153,7 @@ int sig_init_delay(SingleSignalPath *signal, int n_delay) { return sig_init_buffer(&signal->delay_buffer, signal->_delay_buffer, n_delay, MAX_DELAY_SAMPS); } +//Initialisierungsfunktion für Gewichtung void sig_init_weight(SingleSignalPath *signal, double weight, int scale_nbits) { // Wenn Gewichtung 1 -> kein Effekt if (weight == 1.) { @@ -201,13 +177,9 @@ int sig_calc_biquad(SingleSignalPath *signal, int x) { accum_t sum = fract_mult(x, signal->b_preemph[0]) + fract_mult(signal->_xd[0], signal->b_preemph[1]) + fract_mult(signal->_xd[1], signal->b_preemph[2]) + fract_mult(signal->_yd[0], signal->b_preemph[3]) + - fract_mult(signal->_yd[1],signal->b_preemph[4]); - - #ifdef LPDSP16 - int y = rnd_saturate16(sum << 1); - #else + fract_mult(signal->_yd[1],signal->b_preemph[4]); int y = rnd_saturate(sum << 1); - #endif + signal->_xd[1] = signal->_xd[0]; signal->_xd[0] = x; @@ -238,161 +210,6 @@ int sig_calc_weight(SingleSignalPath *signal, int x) { return rnd_saturate(acc); } -#if BLOCK_LEN!=1 // Block processing -/*lpdsp32 fir filter example adapted from user guide -#define NS 256 //No. of samples -#define N 64 //No. of filter coefficients or No. of tap weights -int chess_storage(DMB) y[NS]; //Output Signal -int chess_storage(DMA %(sizeof(long long))) x[NS+N-1]; //Input Signal -//Filter coefficients or tap weights -int chess_storage(DMA %(sizeof(long long))) h[N]; - */ -void sig_calc_fir_lpdsp32_block(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int chess_storage(DMB) *out){ -//void fir(int *y, int *x, int *h) - static int chess_storage(DMA) *p_x; // pointer to the start of the last added block - static int chess_storage(DMA) *p_h; // pointer to the start of the filter coefficients - static int chess_storage(DMB) *p_y; // pointer to the output port - - p_y = out; - - int *px_start = ptr_fir_lms_delay_line->ptr_start; - int *ph_start = ptr_fir_lms_coeffs->ptr_current; - int delay_line_len = ptr_fir_lms_delay_line->buffer_len; - int n_coeff = ptr_fir_lms_coeffs->buffer_len; - - int coef1, coef2; - int dat1, dat2; - - for(unsigned int n=0; nptr_current; - p_h = ptr_fir_lms_coeffs->ptr_current; - p_y = out; - - for(int n=0; nptr_current, n, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); // can be done in increments of two, assuming the buffer pointer increment is even - accum_t sum = to_accum(0); - for(int k=0; k < ptr_fir_lms_coeffs->buffer_len; k+=2) chess_loop_range(1,) - { - sum += fract_mult(p_x[0] , p_h[k]); - sum += fract_mult(p_x[1] , p_h[k+1]); - - sum = to_accum(rnd_saturate(sum)); - p_x = cyclic_add(p_x, -2, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); // can be done in increments of two, assuming the buffer pointer increment is even - } - *p_y++ = extract_high(sum); - } -} -/* "out" is actually an input to the function and is the output of the fir_lms filter system*/ -void adapt_coeffs_lpdsp32_block(BufferPtr *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){ // only works for even delay line sample pointers!! - - int *p_x = ptr_fir_lms_delay_line->ptr_current; // pointer to the start of the last added block - TODO: doublecheck this - might be wrong because the pointer actually points to the end of the block! - int *p_x_start = ptr_fir_lms_delay_line->ptr_start; - int *p_h = ptr_fir_lms_coeffs->ptr_current; // pointer to the start of the filter coefficients - int delay_line_len = ptr_fir_lms_delay_line->buffer_len; - int n_coeff = ptr_fir_lms_coeffs->buffer_len; - int prod0, x0, x1, h0, h1; - - // Calculate the first term of the coefficient adaption - accum_t acc_C = fract_mult(mu, out); - prod0 = rnd_saturate(acc_C); - //acc_D = fract_mult(mu, out1); - //prod1 = rnd_saturate(acc_C); - for (int i=0; iptr_start; //coeff load pointer - //int chess_storage(DMA) *p_h1 = ptr_fir_lms_coeffs->ptr_start; //coeff store pointer - int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) - int chess_storage(DMB) *p_x1 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) - - p_x1 = cyclic_add(p_x1, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); - - int prod, x0, x1, h0, h1; - int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start; - int delay_line_len = ptr_fir_lms_delay_line->buffer_len; - int n_coeff = ptr_fir_lms_coeffs->buffer_len; - - accum_t acc_A, acc_B; - - // Calculate the first term of the coefficient adaption - accum_t acc_C = fract_mult(mu, out); - #ifdef LPDSP16 - prod = rnd_saturate16(acc_C); - #else - prod = rnd_saturate(acc_C); - #endif - - for (int i=0; i< n_coeff; i+=2) chess_loop_range(1,){ - // Calculate the coefficient wise adaption - #ifdef PLATFORM_GENERIC - lldecompose(*((long long *)p_h0), &h0, &h1); - #else - lldecompose(*((long long *)p_h0), h0, h1); - #endif - - acc_A = fract_mult(h0, leak); // leaky - acc_B = fract_mult(h1, leak); - - acc_A += fract_mult(prod, *p_x0); // TODO: This could be further optimized by using all 4 available accums? - acc_B += fract_mult(prod, *p_x1); - - p_x0 = cyclic_add(p_x0, -2, px_start, delay_line_len); - p_x1 = cyclic_add(p_x1, -2, px_start, delay_line_len); - - // update the current filter coefficients - dual sat; dual store - #ifdef LPDSP16 - *((long long *)p_h0) = llcompose(rnd_saturate16(acc_A), rnd_saturate16(acc_B));//load/store hazard ! - 1 nop is needed - #else - *((long long *)p_h0) = llcompose(rnd_saturate(acc_A), rnd_saturate(acc_B));//load/store hazard ! - 1 nop is needed - #endif - p_h0+=2; - } -} - -void adapt_coeffs_generic_single(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){ - int *p_h0 = ptr_fir_lms_coeffs->ptr_start; //coeff load pointer - int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; // chess_storage(DMB) - - int prod; - - accum_t acc_A, acc_B; - - // Calculate the first term of the coefficient adaption - accum_t acc_C = fract_mult(mu, out); - prod = rnd_saturate(acc_C); - for (int i=0; i< ptr_fir_lms_delay_line->buffer_len; i++){ - // Calculate the coefficient wise adaption - acc_A = to_accum(p_h0[i]); - acc_A += fract_mult(prod, *p_x0); - p_x0 = cyclic_add(p_x0, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); - p_h0[i]=rnd_saturate(acc_A); - } -} -#endif - void init( SingleSignalPath *cSensorSignal, SingleSignalPath *accSensorSignal, @@ -585,11 +318,7 @@ void init( double lms_mu, int lms_fir_num_coeffs ){ - #ifdef LPDSP16 - int scale_bits=15; - #else int scale_bits=31; - #endif // C-Sensor Initialisierung: Biquad, Delay, Weight skalieren und in Struct schreiben sig_init_preemph_coef(cSensorSignal, b_c[0], b_c[1], b_c[2], b_c[3], b_c[4], scale_bits); @@ -605,14 +334,9 @@ void init( int scale = pow(2, scale_bits) - 1; mu = lms_mu * scale; // Buffer Initialisierung (Delay Line und Koeffizienten) und anschließend alle Werte auf 0 setzen - #if BLOCK_LEN == 1 - sig_init_buffer_DMB(&ptr_fir_lms_delay_line, fir_lms_delay_line, lms_fir_num_coeffs, MAX_FIR_COEFFS); - sig_init_buffer(&ptr_fir_lms_coeffs, fir_lms_coeffs, lms_fir_num_coeffs, MAX_FIR_COEFFS); - #else - sig_init_buffer(&ptr_fir_lms_delay_line, fir_lms_delay_line, lms_fir_num_coeffs + BLOCK_LEN, BLOCK_LEN + MAX_FIR_COEFFS); - sig_init_buffer(&ptr_fir_lms_coeffs, fir_lms_coeffs, lms_fir_num_coeffs, MAX_FIR_COEFFS); + sig_init_buffer_DMB(&ptr_fir_lms_delay_line, fir_lms_delay_line, lms_fir_num_coeffs, MAX_FIR_COEFFS); + sig_init_buffer(&ptr_fir_lms_coeffs, fir_lms_coeffs, lms_fir_num_coeffs, MAX_FIR_COEFFS); - #endif for (int i = 0; i < lms_fir_num_coeffs; i++) { ptr_fir_lms_delay_line.ptr_start[i] = 0; ptr_fir_lms_coeffs.ptr_start[i] = 0; @@ -624,17 +348,9 @@ void init( void calc( SingleSignalPath *cSensorSignal, SingleSignalPath *accSensorSignal, - // BufferPtrDMB *ptr_fir_lms_delay_line, - // BufferPtr *ptr_fir_lms_coeffs, OutputMode output_mode, - #if BLOCK_LEN != 1 - int16_t *cSensor, - int16_t *accSensor, - #else int16_t volatile chess_storage(DMB) *cSensor, int16_t volatile chess_storage(DMB) *accSensor, - #endif - int16_t volatile chess_storage(DMB) *out_16 ){ @@ -651,137 +367,32 @@ void calc( static int chess_storage(DMB) *p_out_32=out_32; - #ifdef LPDSP16 - for (uint32_t i=0; idelay_line, BLOCK_LEN); - #endif - break; - case OUTPUT_MODE_FIR_LMS: // apply lms filter on cSensor signal - #if BLOCK_LEN == 1 - // Increment the buffer pointer and set the current sample to the delay line - sig_cirular_buffer_ptr_put_sample_DMB(&ptr_fir_lms_delay_line, acc_block_pre[0]); - //*ptr_fir_lms_delay_line.ptr_current = acc_block_pre[0]; - //ptr_fir_lms_delay_line.ptr_current = cyclic_add(ptr_fir_lms_delay_line.ptr_current, 1, ptr_fir_lms_delay_line.ptr_start, ptr_fir_lms_delay_line.buffer_len); + // apply lms filter on cSensor signal + // Increment the buffer pointer and set the current sample to the delay line + sig_cirular_buffer_ptr_put_sample_DMB(&ptr_fir_lms_delay_line, acc_block_pre[0]); + // Calculate the fir filter output on acc to get the canceller + acc_block_filt[0]= sig_calc_fir_lpdsp32_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs); + // Calculate the ouptut signal by c_block_pre - acc_block_filt + out_32[0] = c_block_pre[0] - acc_block_filt[0]; + // Calculate the coefficient adaptation + adapt_coeffs_lpdsp32_single_v1(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); - // Calculate the fir filter output on acc to get the canceller - acc_block_filt[0]= sig_calc_fir_lpdsp32_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs); - // Calculate the ouptut signal by c_block_pre - acc_block_filt - out_32[0] = c_block_pre[0] - acc_block_filt[0]; - //if (counter >= 0){ //TODO: implement this and make it configurable - // Calculate the coefficient adaptation - //adapt_coeffs_generic_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); - adapt_coeffs_lpdsp32_single_v1(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); - //counter=0; - // } - // else{ - // counter++; - // } - #else // Block processing - // Put the next block to the buffer - sig_circular_buffer_ptr_put_block(&ptr_fir_lms_delay_line, acc_block_pre); - // Calculate the fir filter output on acc to get the canceller - sig_calc_fir_lpdsp32_block(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, acc_block_filt); - - // Calculate the ouptut signal by c_block_pre - acc_block_filt - for (int i=0; idelay_line, acc_block_pre[i]); - //acc_block_filt[i]= sig_calc_fir_lpdsp32_single(lms); - out_32[i] = c_block_pre[i] - acc_block_filt[i]; // 15 cycles with 4 samples/block - // adapt the coefficients with respect to the last sample in the block - } - //adapt_coeffs_lpdsp32_single(lms, out_32[1]); - adapt_coeffs_lpdsp32_block(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); - // Increment the buffer pointer to get ready for the next block - //sig_cirular_buffer_ptr_increment(&lms->delay_line, BLOCK_LEN); - #endif - break; - case OUTPUT_MODE_FIR_LMS_LEAKY: // apply lms filter on cSensor signal - // Increment the buffer pointer and set the current sample to the delay line - sig_cirular_buffer_ptr_put_sample_DMB(&ptr_fir_lms_delay_line, acc_block_pre[0]); - - // Calculate the fir filter output on acc to get the canceller - acc_block_filt[0]= sig_calc_fir_lpdsp32_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs); - // Calculate the ouptut signal by c_block_pre - acc_block_filt - out_32[0] = c_block_pre[0] - acc_block_filt[0]; - //if (counter >= 0){ //TODO: implement this and make it configurable - // Calculate the coefficient adaptation - //adapt_coeffs_generic_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); - adapt_coeffs_lpdsp32_single_leaky(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]); - - break; - default: // MUTED - for (uint32_t i=0; i> BITSHIFT_16_TO_32); // 12 cycles for blocksize 4 //TODO: use rnd_saturate(out_32[i] >> input_nbit_bitshift) - #endif - - } - //out_16[0] = cSensor[0]; + }