Calc Funktion weitergecoded - compiliert

2026-01-28 15:35:03 +01:00
parent 6f52b7ace4
commit fa787bec48
72 changed files with 7469 additions and 354979 deletions
--- a/simulation/signal_processing/signal_path.c
+++ b/simulation/signal_processing/signal_path.c
@@ -222,78 +222,69 @@ int inline apply_fir_filter(BufferPtrDMB chess_storage(DMB) *pointer_delay_line,

    //Pointer für Koeffizienten und Delay Line Samples anlegen
    int chess_storage(DMB) *p_x0 = pointer_delay_line->ptr_current; 
-    int chess_storage(DMB) *px_start = pointer_delay_line->ptr_start;
-    int *p_h = pointer_filter_coefficients->ptr_current;
+    int chess_storage(DMB) *p_xstart = pointer_delay_line->ptr_start;
+    int *p_w = pointer_filter_coefficients->ptr_current;
    int delay_line_len = pointer_delay_line->buffer_len;
    int n_coeff = pointer_filter_coefficients->buffer_len;

    //Variablen und Akkumulatoren (72-Bit) anlegen
-    int d0,d1,h0,h1;
-    accum_t acc1_A = to_accum(0);
-    accum_t acc1_B = to_accum(0);
-    accum_t acc1_C;
+    int x0, x1, w0, w1;
+    accum_t acc_fir_1 = to_accum(0);
+    accum_t acc_fir_2 = to_accum(0);
+    accum_t acc_fir;

    // In 2er Schritten durch die Koeffizienten iterieren, immer 2 Samples und 2 Koeffizienten pro Schleifendurchlauf -> DUAL LOAD und DUAL MAC
    for (int i=0; i < n_coeff; i+=2) chess_loop_range(1,){
-        d0 = *p_x0; //Sample 1 aus Delay Line
-        h0 = *p_h; //Koeffizient 1 aus Koeffizienten Array
-        p_h++;      //Koeffizienten-Pointer inkrementieren
-        p_x0 = cyclic_add(p_x0, -1, px_start, delay_line_len); //Delay-Line-Pointer dekrementieren (rueckwaerts durch Delay Line)
+        x0 = *p_x0; //Sample 1 aus Delay Line
+        w0 = *p_w; //Koeffizient 1 aus Koeffizienten Array
+        p_w++;      //Koeffizienten-Pointer inkrementieren
+        p_x0 = cyclic_add(p_x0, -1, p_xstart, delay_line_len); //Delay-Line-Pointer dekrementieren (rueckwaerts durch Delay Line)

-        d1 = *p_x0; //Sample 2 aus Delay Line
-        h1 = *p_h; //Koeffizient 2 aus Koeffizienten Array
-        p_h++;    //Koeffizienten-Pointer inkrementieren
-        p_x0 = cyclic_add(p_x0, -1, px_start, delay_line_len); //Delay-Line-Pointer dekrementieren (rueckwaerts durch Delay Line)
+        x1 = *p_x0; //Sample 2 aus Delay Line
+        w1 = *p_w; //Koeffizient 2 aus Koeffizienten Array
+        p_w++;    //Koeffizienten-Pointer inkrementieren
+        p_x0 = cyclic_add(p_x0, -1, p_xstart, delay_line_len); //Delay-Line-Pointer dekrementieren (rueckwaerts durch Delay Line)

-        acc1_A+=fract_mult(d0, h0); //Akkumulator 1 mit Sample 1 * Koeffizient 1 addieren
-        acc1_B+=fract_mult(d1, h1); //Akkumulator 2 mit Sample 2 * Koeffizient 2 addieren        
+        acc_fir_1+=fract_mult(x0, w0); //Akkumulator 1 mit Sample 1 * Koeffizient 1 addieren
+        acc_fir_2+=fract_mult(x1, w1); //Akkumulator 2 mit Sample 2 * Koeffizient 2 addieren        
    }
    // Akkumulatoren addieren um das Filterergebnis zu erhalten
-    acc1_C = acc1_A + acc1_B;
-    return rnd_saturate(acc1_C);
+    acc_fir = acc_fir_1 + acc_fir_2;
+    return rnd_saturate(acc_fir);
 }

-void static inline update_filter_coefficients(BufferPtrDMB chess_storage(DMB) *pointer_delay_line, BufferPtr *pointer_filter_coefficients, int out){
+void static inline update_filter_coefficients(BufferPtrDMB chess_storage(DMB) *pointer_delay_line, BufferPtr *pointer_filter_coefficients, int output){

-    int chess_storage(DMA) *p_h0 = pointer_filter_coefficients->ptr_start; //Pointer auf Filterkoeffizienten-Array
+    int chess_storage(DMA) *p_w0 = pointer_filter_coefficients->ptr_start; //Pointer auf Filterkoeffizienten-Array
    int chess_storage(DMB) *p_x0 = pointer_delay_line->ptr_current; //Current-Pointer 1 auf Delay-Line Array
    int chess_storage(DMB) *p_x1 = pointer_delay_line->ptr_current; //Current-Pointer 2 auf Delay-Line Array
-    int chess_storage(DMB) *px_start = pointer_delay_line->ptr_start; //Start-Pointer auf Delay-Line Array
+    int chess_storage(DMB) *p_xstart = pointer_delay_line->ptr_start; //Start-Pointer auf Delay-Line Array
    
    int delay_line_len = pointer_delay_line->buffer_len;    // Länge des Delay-Line Arrays
-    int n_coeff = pointer_filter_coefficients->buffer_len;               // Anzahl der Filterkoeffizienten
-    int prod, x0, x1, h0, h1;
+    int n_coeff = pointer_filter_coefficients->buffer_len;  // Anzahl der Filterkoeffizienten
+    int correction, x0, x1, w0, w1;
+
+    accum_t acc_w0, acc_w1, product;

    p_x1 = cyclic_add(p_x1, -1, pointer_delay_line->ptr_start, pointer_delay_line->buffer_len); //Current-Pointer 2 dekrementieren um 1
+    product = fract_mult(mu, output);    //FIR-Output mit mu multiplizieren -> Korrektursignal. aktuell noch im accum-Format
+    correction = rnd_saturate(product);  //Korrektursignal wieder ins 32-Bit Format

-    accum_t acc_A, acc_B;
-
-    accum_t acc_C = fract_mult(mu, out);    //Korrektursignal * mu um Filterkoeffizienten anzupassen
-    prod = rnd_saturate(acc_C);
-
-    /* Abschätzung cycles per 2 coefficient:
-    dual load coeffs: 1
-    single load tab value: 2
-    dual mac: 1
-    dual rnd_sat - store: 1
-    load/store hazard nop: 1
-    */
    for (int i=0; i< n_coeff; i+=2) chess_loop_range(1,){
-        // Calculate the coefficient wise adaption
-        lldecompose(*((long long *)p_h0), h0, h1);
-
-        acc_A = to_accum(h0);
-        acc_B = to_accum(h1);
-        
-        acc_A += fract_mult(prod, *p_x0); 
-        acc_B += fract_mult(prod, *p_x1);
-         
-        p_x0 = cyclic_add(p_x0, -2, px_start, delay_line_len);
-        p_x1 = cyclic_add(p_x1, -2, px_start, delay_line_len);
-
-        // Filterkoeffizienten updaten - dual sat; dual store
-        *((long long *)p_h0) = llcompose(rnd_saturate(acc_A), rnd_saturate(acc_B));//load/store hazard ! - 1 nop is needed
-        p_h0+=2;
+        // Filterkoeffizienten vom 64 Bit Format am Ort wo der p_w0 Pointer hinzeigt in 2 32-Bit Werte zerlegen - 1 Cycle
+        lldecompose(*((long long *)p_w0), w0, w1);
+        // Filter Koeffizienten in Accum-Format bringen (oberste 32 Bit, Rest Nullen) - 2 Cycle?
+        acc_w0 = to_accum(w0);    
+        acc_w1 = to_accum(w1);
+        // Filterkoeffizienten mit Korrekturterm*Acc-Sensor-Sample updaten  - 1 Cycle
+        acc_w0 += fract_mult(correction, *p_x0); 
+        acc_w1 += fract_mult(correction, *p_x1);
+         //Beide Pointer in der Delay-Line um 2 dekrementieren
+        p_x0 = cyclic_add(p_x0, -2, p_xstart, delay_line_len);
+        p_x1 = cyclic_add(p_x1, -2, p_xstart, delay_line_len);
+        // Filterkoeffizienten in 64-Bit Wort schreiben - wird dann in mit einem Store-Vorgang an Ort wo p_w0 hinzeigt abgelegt - 1 Cycle 
+        *((long long *)p_w0) = llcompose(rnd_saturate(acc_w0), rnd_saturate(acc_w1));//LOAD/STORE-Hazard - +1 NOP benötigt - 1 Cycle
+        p_w0+=2; //Koeffizienten-Pointer um 2 inkrementieren
    }
 }

@@ -359,12 +350,11 @@ void calc(
    // static int chess_storage(DMA) *pointer_filter_accumulator =acc_sensor_pre; 
    // static int chess_storage(DMB) *pointer_output_32=output_32; 

-    // 16-Bit Eingangssignale auf 32-Bit konvertieren mit Bitshift, in neuem Speicherbereich ablegen
+    // 16-Bit Eingangssignale auf 32-Bit konvertieren mit Bitshift, Kopie der Samples in funktionseigenem neuen Speicherbereich ablegen (Kein Pointer mehr!)
    for (uint32_t i=0; i<BLOCK_LEN; i++) chess_loop_range(1,){
        c_sensor_32[i] =  ((int) c_sensor_input[i]) << BITSHIFT_16_TO_32;
        acc_sensor_32[i] = ((int) acc_sensor_input[i]) << BITSHIFT_16_TO_32;
    }    
-
    // Preemphasis Filter anwenden - wird hier aber nicht genutzt (nur Durchreichen), in neuen Speicherbereich ablegen
    for (uint32_t i=0; i<BLOCK_LEN; i++) chess_loop_range(1,){
         c_sensor_pre[i] = c_sensor_32[i];
@@ -382,7 +372,6 @@ void calc(
    output_32[0] = c_sensor_pre[0] - filter_accumulator[0];
    // Filterkoeffizienten adaptieren
    update_filter_coefficients(&pointer_delay_line, &pointer_filter_coefficients, output_32[0]);
-
    // Bitshift zurück auf 16-Bit und in Ausgangsarray schreiben
    for (uint32_t i=0; i<BLOCK_LEN; i++) chess_flatten_loop
    {