Files
DSP_Simulation/simulation/signal_processing/signal_path.c
2026-01-27 11:17:03 +01:00

398 lines
16 KiB
C
Raw Blame History

#include "include/signal_path.h"
#define BLOCK_LEN 1
/* Global variables decleration*/
static int counter=0;
static int mu;
static int leak=2147462173; //0.999 // (1 ? <20>?)
int chess_storage(DMB) fir_lms_delay_line[MAX_FIR_COEFFS]; //Int-Array für Acc-Sensors Samples (Delay Line) anlegen
int chess_storage(DMA % (sizeof(long long))) fir_lms_coeffs[MAX_FIR_COEFFS]; //Int-Array für Filterkoeffizienten anlegen
BufferPtrDMB chess_storage(DMB) ptr_fir_lms_delay_line;
BufferPtr ptr_fir_lms_coeffs;
#ifdef PLATFORM_GENERIC
// lpdsp32 functionallity moddeling functions
accum_t fract_mult(int a, int b){
long int a_long = a;
long int b_long = b;
return (b_long * a_long);
}
accum_t to_accum(int a){
long int a_long = (long int) a;
return a_long << 31;
}
int rnd_saturate(accum_t a){
return a >> 31;
}
int extract_high(accum_t a){
return a >> 31;
}
void lldecompose(unsigned long long l, int* int1, int* int2){
*int2 = (int)(l >> 32);
*int1 = (int)(l);
}
uint64_t llcompose(int a, int b) {
uint64_t result = (uint64_t)b; // Assign b to the higher 32 bits of the result
result <<= 32; // Shift the higher 32 bits to the left
result |= (uint32_t)a; // Bitwise OR operation with the lower 32 bits of a
return result;
}
// unsigned long long llcompose(int a, int b){
// unsigned long long l;
// l = a << 32;
// l |= b;
// return l;
//}
int* cyclic_add(int *ptr, int i_pp, int *ptr_start, int buffer_len){
int *p_ptr=ptr;
for (int i=0; i < abs(i_pp); i+=1){ // end of buffer wraparound
if (i_pp > 0){
p_ptr ++;
if (p_ptr >= ptr_start + buffer_len){
p_ptr=ptr_start;
}
}
else{ // start of buffer wraparound
p_ptr--;
if (p_ptr < ptr_start){
p_ptr=ptr_start + (buffer_len -1);
}
}
}
return p_ptr;
}
#endif
int sig_init_buffer(BufferPtr *buffer, int *buffer_start_add, int length, int max_buffer_len) {
buffer->buffer_len = length;
buffer->ptr_start = buffer_start_add;
buffer->ptr_current = buffer_start_add;
// initialize delay line with 0
for (int i = 0; i < length; i++) {
buffer_start_add[i] = 0;
}
if (length<max_buffer_len){
return 0;
}
else{
return 1;
}
}
int sig_init_buffer_DMB(BufferPtrDMB chess_storage(DMB) *buffer, int chess_storage(DMB) *buffer_start_add, int length, int max_buffer_len){
buffer->buffer_len = length;
buffer->ptr_start = buffer_start_add;
buffer->ptr_current = buffer_start_add;
// initialize delay line with 0
for (int i = 0; i < length; i++) {
buffer_start_add[i] = 0;
}
if (length<max_buffer_len){
return 0;
}
else{
return 1;
}
}
void sig_cirular_buffer_ptr_increment(BufferPtr *buffer, int i_incr){
buffer->ptr_current = cyclic_add(buffer->ptr_current, i_incr, buffer->ptr_start, buffer->buffer_len);
}
void sig_cirular_buffer_ptr_increment_DMB(BufferPtrDMB *buffer, int i_incr){
buffer->ptr_current = cyclic_add(buffer->ptr_current, i_incr, buffer->ptr_start, buffer->buffer_len);
}
void sig_cirular_buffer_ptr_put_sample(BufferPtr *buffer, int sample){
*buffer->ptr_current = sample;
buffer->ptr_current = cyclic_add(buffer->ptr_current, 1, buffer->ptr_start, buffer->buffer_len);
}
void sig_cirular_buffer_ptr_put_sample_DMB(BufferPtrDMB chess_storage(DMB) *buffer, int sample){
*buffer->ptr_current = sample; //Sample des Acc-Sensors wird in Adresse geschrieben, auf die der Pointer zeigt
buffer->ptr_current = cyclic_add(buffer->ptr_current, 1, buffer->ptr_start, buffer->buffer_len); //Pointer wird inkrementiert
}
void static inline sig_circular_buffer_ptr_put_block(BufferPtr *buffer, int* block){
// increment pointer to oldest block
//buffer->ptr_current = cyclic_add(buffer->ptr_current, BLOCK_LEN, buffer->ptr_start, buffer->buffer_len);
// load the next block
for (int i=0; i<BLOCK_LEN; i+=2){
buffer->ptr_current[0] = block[i]; // TODO: use llcompose
buffer->ptr_current[1] = block[i+1];
buffer->ptr_current = cyclic_add(buffer->ptr_current, 2, buffer->ptr_start, buffer->buffer_len);
}
}
//Initialisierungsfunktion f<>r Biquad Filter Koeffizienten
void sig_init_preemph_coef(SingleSignalPath *signal, double b0, double b1, double b2, double a1, double a2, int scale_bits) {
// Wenn b0=1 und Rest 0 -> kein Filter weil effektiv 1*Xn
if (b0 == 1. && b1 == 0. && b2 == 0. && a1 == 0. && a2 == 0.) {
signal->preemph_activated = 0;
}
else{
signal->preemph_activated = 1; // Schreibe Eintrag in Struct
signal->_preemph_scale_nbits = scale_bits; // Schreibe Eintrag in Struct - wieviel Bits wird skaliert
int scale = pow(2, scale_bits) - 1; //2^n -1 Skalierung
// Skaliere Koeffizienten zu Interger und schreibe Eintrag in Struct
signal->b_preemph[0] = b0 * scale;
signal->b_preemph[1] = b1 * scale;
signal->b_preemph[2] = b2 * scale;
signal->b_preemph[3] = a1 * scale;
signal->b_preemph[4] = a2 * scale;
}
}
/*Initialization functions - make sure all of them were called to ensure functionality*/
int sig_init_delay(SingleSignalPath *signal, int n_delay) {
return sig_init_buffer(&signal->delay_buffer, signal->_delay_buffer, n_delay, MAX_DELAY_SAMPS);
}
//Initialisierungsfunktion f<>r Gewichtung
void sig_init_weight(SingleSignalPath *signal, double weight, int scale_nbits) {
// Wenn Gewichtung 1 -> kein Effekt
if (weight == 1.) {
signal->weight_actived = 0;
}
// Wenn Gewichtung != 1 -> Zu Integer skalieren und Eintrag in Struct schreiben
else{
signal->weight_actived = 1;
int scale = pow(2, scale_nbits) - 1;
signal->weight = weight * scale;
signal->_weight_scale_nbits = scale_nbits;
}
}
/*Calculator functions for the given signal path*/
/*Calculate one biquad filter element*/
int sig_calc_biquad(SingleSignalPath *signal, int x) {
if (signal->preemph_activated == 0) {
return x;
}
accum_t sum =
fract_mult(x, signal->b_preemph[0]) + fract_mult(signal->_xd[0], signal->b_preemph[1]) +
fract_mult(signal->_xd[1], signal->b_preemph[2]) + fract_mult(signal->_yd[0], signal->b_preemph[3]) +
fract_mult(signal->_yd[1],signal->b_preemph[4]);
int y = rnd_saturate(sum << 1);
signal->_xd[1] = signal->_xd[0];
signal->_xd[0] = x;
signal->_yd[1] = signal->_yd[0];
signal->_yd[0] = y;
return y;
}
int inline sig_get_delayed_sample(SingleSignalPath *signal) {
return *signal->delay_buffer.ptr_current;
}
int sig_delay_buffer_load_and_get(SingleSignalPath *signal, int x) {
if (signal->delay_buffer.buffer_len == 0) {
return x;
}
int out = *signal->delay_buffer.ptr_current;
*signal->delay_buffer.ptr_current = x;
sig_cirular_buffer_ptr_increment(&signal->delay_buffer, 1);
return out;
}
int sig_calc_weight(SingleSignalPath *signal, int x) {
if (signal->weight_actived == 0) {
return x;
}
accum_t acc = fract_mult(x, signal->weight);
return rnd_saturate(acc);
}
int inline sig_calc_fir_lpdsp32_single(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs){
// Filterkoeffizienten mit Acc-Sensor Samples multiplizieren und aufsummieren um Akkumulator Output des adaptiven Filters zu erhalten
//Pointer für Koeffizienten und Delay Line Samples anlegen
int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current;
int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start;
int *p_h = ptr_fir_lms_coeffs->ptr_current;
int delay_line_len = ptr_fir_lms_delay_line->buffer_len;
int n_coeff = ptr_fir_lms_coeffs->buffer_len;
//Variablen und Akkumulatoren (72-Bit) anlegen
int d0,d1,h0,h1;
accum_t acc1_A = to_accum(0);
accum_t acc1_B = to_accum(0);
accum_t acc1_C;
// iterate over the coefficients to calculate the filter on x - the canceller
/* Abschaetzung cycles per 2coefficient:
dual - load : 1
dual mac and dual load: 1
-> 48/2 * 2 = 48 cycles for 48 coefficents
*/
// In 2er Schritten durch die Koeffizienten iterieren, immer 2 Samples und 2 Koeffizienten pro Schleifendurchlauf -> DUAL LOAD und DUAL MAC
for (int i=0; i < n_coeff; i+=2) chess_loop_range(1,){
d0 = *p_x0; //Sample 1 aus Delay Line
h0 = *p_h; //Koeffizient 1 aus Koeffizienten Array
p_h++; //Koeffizienten-Pointer inkrementieren
p_x0 = cyclic_add(p_x0, -1, px_start, delay_line_len); //Delay-Line-Pointer dekrementieren (rueckwaerts durch Delay Line)
d1 = *p_x0; //Sample 2 aus Delay Line
h1 = *p_h; //Koeffizient 2 aus Koeffizienten Array
p_h++; //Koeffizienten-Pointer inkrementieren
p_x0 = cyclic_add(p_x0, -1, px_start, delay_line_len); //Delay-Line-Pointer dekrementieren (rueckwaerts durch Delay Line)
acc1_A+=fract_mult(d0, h0); //Akkumulator 1 mit Sample 1 * Koeffizient 1 addieren
acc1_B+=fract_mult(d1, h1); //Akkumulator 2 mit Sample 2 * Koeffizient 2 addieren
}
// Akkumulatoren addieren um das Filterergebnis zu erhalten
acc1_C = acc1_A + acc1_B;
return rnd_saturate(acc1_C);
}
void static inline adapt_coeffs_lpdsp32_single_v1(BufferPtrDMB chess_storage(DMB) *ptr_fir_lms_delay_line, BufferPtr *ptr_fir_lms_coeffs, int out){
int chess_storage(DMA) *p_h0 = ptr_fir_lms_coeffs->ptr_start; //Pointer auf Filterkoeffizienten-Array
int chess_storage(DMB) *p_x0 = ptr_fir_lms_delay_line->ptr_current; //Current-Pointer 1 auf Delay-Line Array
int chess_storage(DMB) *p_x1 = ptr_fir_lms_delay_line->ptr_current; //Current-Pointer 2 auf Delay-Line Array
int chess_storage(DMB) *px_start = ptr_fir_lms_delay_line->ptr_start; //Start-Pointer auf Delay-Line Array
int delay_line_len = ptr_fir_lms_delay_line->buffer_len; // Länge des Delay-Line Arrays
int n_coeff = ptr_fir_lms_coeffs->buffer_len; // Anzahl der Filterkoeffizienten
int prod, x0, x1, h0, h1;
p_x1 = cyclic_add(p_x1, -1, ptr_fir_lms_delay_line->ptr_start, ptr_fir_lms_delay_line->buffer_len); //Current-Pointer 2 dekrementieren um 1
accum_t acc_A, acc_B;
accum_t acc_C = fract_mult(mu, out); //Korrektursignal * mu um Filterkoeffizienten anzupassen
prod = rnd_saturate(acc_C);
/* Abschätzung cycles per 2 coefficient:
dual load coeffs: 1
single load tab value: 2
dual mac: 1
dual rnd_sat - store: 1
load/store hazard nop: 1
*/
for (int i=0; i< n_coeff; i+=2) chess_loop_range(1,){
// Calculate the coefficient wise adaption
#ifdef PLATFORM_GENERIC
lldecompose(*((long long *)p_h0), &h0, &h1);
#else
lldecompose(*((long long *)p_h0), h0, h1);
#endif
acc_A = to_accum(h0);
acc_B = to_accum(h1);
acc_A += fract_mult(prod, *p_x0);
acc_B += fract_mult(prod, *p_x1);
p_x0 = cyclic_add(p_x0, -2, px_start, delay_line_len);
p_x1 = cyclic_add(p_x1, -2, px_start, delay_line_len);
// Filterkoeffizienten updaten - dual sat; dual store
*((long long *)p_h0) = llcompose(rnd_saturate(acc_A), rnd_saturate(acc_B));//load/store hazard ! - 1 nop is needed
p_h0+=2;
}
}
void init(
SingleSignalPath *cSensorSignal,
SingleSignalPath *accSensorSignal,
double *b_c,
double *b_acc,
int delay_c,
int delay_acc,
double weight_c,
double weight_acc,
double lms_mu,
int lms_fir_num_coeffs
){
int scale_bits=31;
// C-Sensor Initialisierung: Biquad, Delay, Weight skalieren und in Struct schreiben
sig_init_preemph_coef(cSensorSignal, b_c[0], b_c[1], b_c[2], b_c[3], b_c[4], scale_bits);
sig_init_delay(cSensorSignal, delay_c);
sig_init_weight(cSensorSignal, weight_c, scale_bits);
// Acc-Sensor Initialisierung: Biquad, Delay, Weight skalieren und in Struct schreiben
sig_init_preemph_coef(accSensorSignal, b_acc[0], b_acc[1], b_acc[2], b_acc[3], b_acc[4], scale_bits);
sig_init_delay(accSensorSignal, delay_acc);
sig_init_weight(accSensorSignal, weight_acc, 31);
//Mu Skalierung und in globale Variable schreiben
int scale = pow(2, scale_bits) - 1;
mu = lms_mu * scale;
// Buffer Initialisierung (Delay Line und Koeffizienten)
sig_init_buffer_DMB(&ptr_fir_lms_delay_line, fir_lms_delay_line, lms_fir_num_coeffs, MAX_FIR_COEFFS);
sig_init_buffer(&ptr_fir_lms_coeffs, fir_lms_coeffs, lms_fir_num_coeffs, MAX_FIR_COEFFS);
// Einträge in Delay Line und Koeffizienten-Array auf 0 setzen
for (int i = 0; i < lms_fir_num_coeffs; i++) {
ptr_fir_lms_delay_line.ptr_start[i] = 0;
ptr_fir_lms_coeffs.ptr_start[i] = 0;
}
}
// Data d(cSensor) is signal + noise
// x (accSensor) is reference noise signal
void calc(
SingleSignalPath *cSensorSignal,
SingleSignalPath *accSensorSignal,
OutputMode output_mode,
int16_t volatile chess_storage(DMB) *cSensor, //Pointer auf Input-Port im Shared Memory
int16_t volatile chess_storage(DMB) *accSensor, //Pointer auf Input-Port im Shared Memory
int16_t volatile chess_storage(DMB) *out_16 //Pointer auf Output-Port im Shared Memory
){
//Speicherbereiche anlegen -> bei blockweiser Verarbeitung hat jedes Array nur den Eintrag [0]
static int chess_storage(DMA) c_block_pre[BLOCK_LEN]; //Speicherbereich für C-Sensor Preemphasis Input
static int chess_storage(DMA) acc_block_pre[BLOCK_LEN]; //Speicherbereich für Acc-Sensor Preemphasis Input
static int chess_storage(DMA) cSensor_32[BLOCK_LEN]; //Speicherbereich für 32-Bit C-Sensor Input
static int chess_storage(DMA) accSensor_32[BLOCK_LEN]; //Speicherbereich für 32-Bit Acc-Sensor Input
static int chess_storage(DMB) acc_block_filt[BLOCK_LEN]; //Speicherbereich für Akkumulator Output des adaptiven Filters
static int chess_storage(DMB) out_32[BLOCK_LEN]; //Speicherbereich für 32-Bit Output Signal
// Pointer auf die Arrays anlegen
static int chess_storage(DMA) *p_c_block_pre =c_block_pre;
static int chess_storage(DMA) *p_acc_block_filt =acc_block_pre;
static int chess_storage(DMB) *p_out_32=out_32;
// 16-Bit Eingangssignale auf 32-Bit konvertieren mit Bitshift, in neuem Speicherbereich ablegen
for (uint32_t i=0; i<BLOCK_LEN; i++) chess_loop_range(1,){
cSensor_32[i] = ((int) cSensor[i]) << BITSHIFT_16_TO_32;
accSensor_32[i] = ((int) accSensor[i]) << BITSHIFT_16_TO_32;
}
// Preemphasis Filter anwenden - wird hier aber nicht genutzt (nur Durchreichen), in neuen Speicherbereich ablegen
for (uint32_t i=0; i<BLOCK_LEN; i++) chess_loop_range(1,){
c_block_pre[i] = cSensor_32[i];
acc_block_pre[i] = accSensor_32[i];
}
// Adaptiven Filter auf C-Sensor Signal anwenden
//Aktuelles Sample des Acc-Sensors wird in aktuelle Speicheradresse des Pointers der Delay Line geschrieben, dann wird der Pointer inkrementiert -> Delay Line hat Länge der Filterkoeffizienten
sig_cirular_buffer_ptr_put_sample_DMB(&ptr_fir_lms_delay_line, acc_block_pre[0]);
// Filter auf Acc-Sensor Signal anwenden und Korrektursignal berechnen
// Sample des Acc-Sensors in der Delay-Line werden mit den Filterkoeffizienten multipliziert und aufsummiert -> Akkumulator Output des adaptiven Filters
acc_block_filt[0]= sig_calc_fir_lpdsp32_single(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs);
// Output-Signal berechnen -> C-Sensor Sample - Akkumulator Output des adaptiven Filters
out_32[0] = c_block_pre[0] - acc_block_filt[0];
// Filterkoeffizienten adaptieren
adapt_coeffs_lpdsp32_single_v1(&ptr_fir_lms_delay_line, &ptr_fir_lms_coeffs, out_32[0]);
// Bitshift zurück auf 16-Bit und in Ausgangsarray schreiben
for (uint32_t i=0; i<BLOCK_LEN; i++) chess_flatten_loop
{
out_16[i] = rnd_saturate(to_accum(out_32[i]) >> BITSHIFT_16_TO_32); // 12 cycles for blocksize 4 //TODO: use rnd_saturate(out_32[i] >> input_nbit_bitshift)
}
}