Add pipeline computation of lattice

This commit is contained in:
Thomas Preud'homme 2011-12-06 18:24:19 +01:00 committed by Thomas Preud'homme
parent c7eef474b5
commit a9793430f9
2 changed files with 454 additions and 0 deletions

1
pipepar/.gitignore vendored
View File

@ -3,3 +3,4 @@ fmr_omp-str_base.raw
fmr_omp-str_base.txt
fmr_omp-str_base.S
output*.dat
lattice

453
pipepar/lattice.c Normal file
View File

@ -0,0 +1,453 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <stdint.h>
#include <errno.h>
#include <sys/mman.h>
#include <asm-generic/mman.h>
#define STRTOL_OVERFLOW(n) (((n == LONG_MIN) || (n == LONG_MAX)) && \
(errno == ERANGE))
#define STRIFY(var) STRIFY_VAL(val)
#define STRIFY_VAL(val) #val
/* Number of cores to be used for the pipeline */
#ifndef NB_CORES
#define NB_CORES 8 /* Bossa */
//#define NB_CORES 12; /* Quad Hexa */
#endif
/* CRC protect length (in bytes) for single channel element */
#define AAC_SCE_PROT_BIT 192
#define AAC_SCE_PROT_PER_CORE_BIT (AAC_SCE_PROT_BIT / NB_CORES)
/* CRC CCITT-16 size */
#define CRC_BIT 16
#define DATA_BITS_PER_CORE ((AAC_SCE_PROT_BIT - 1) / NB_CORES)
long seq_len;
long nb_packets = 0;
struct stage_state
{
double bit_metrics[AAC_SCE_PROT_BIT/* / NB_CORES*/]; // Metric of each bit in a packet (~ 1.5 KB if 1 core)
double lattice_metrics[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB if 1 core)
};
// Heuristic to compute ln(exp(x) + exp(y)) with x = ln(a1/b1) and y = ln(a2/b2)
double jacolog(double x, double y)
{
double r;
double diff;
if (x > y)
{
r = x;
diff = x - y;
}
else
{
r = y;
diff = y - x;
}
if (diff > 3.7)
r += 0.00;
else if (diff > 2.25)
r += 0.05;
else if (diff > 1.5 )
r += 0.15;
else if (diff > 1.05)
r += 0.25;
else if (diff > 0.7 )
r += 0.35;
else if (diff > 0.43)
r += 0.45;
else if (diff > 0.2 )
r += 0.55;
else
r += 0.65;
return r;
}
#if 0
//TODO: rename fct and vars
mat Metric_Unknown_1(const int& nb_state_crc, const ivec& P_vec, const vec& Metric_c, const vec& seq_dem)
{
mat Metric1_u(nb_state_crc, AAC_SCE_PROT_BIT + 1);
Metric1_u.zeros();
Metric1_u.set_col(AAC_SCE_PROT_BIT, Metric_c);
// Backward method
for(int i = AAC_SCE_PROT_BIT; i > 0; i--)
{
double n = 0; // Normalisation factor
for (int j = 0; j < nb_state_crc; j++)
{
int q = j ^ P_vec(i-1);
Metric1_u(j, i - 1) = jacolog(Metric1_u(j, i) + seq_dem(i - 1), Metric1_u(q, i) - seq_dem(i - 1));
n = jacolog(Metric1_u(j, i - 1), n);
}
// Normalization
for (int k = 0; k < nb_state_crc; k++)
Metric1_u(k, i - 1) -= n;
}
return Metric1_u;
}
//TODO: rename fct and vars
bvec Decode_U1(const int& nb_state_crc, const ivec& P_vec, mat& Metric1_u, const vec& seq_dem, const int& row_initial)
{
mat Metric2_u(nb_state_crc, AAC_SCE_PROT_BIT + 1);
Metric2_u.ones();
Metric2_u = Metric2_u * (-1e10); // Because 0 in the logarithmatic form is minus infini
Metric2_u(row_initial, 0) = 1; // Initialisation of the lattice
// Forward method
for (int i = 0; i < AAC_SCE_PROT_BIT; i++)
{
for(int j = 0; j < nb_state_crc; j++)
{
int q = j ^ P_vec(i);
Metric2_u(j, i + 1) = jacolog(Metric2_u(j, i) + seq_dem(i), Metric2_u(j, i + 1));
Metric2_u(q, i + 1) = jacolog(Metric2_u(j, i) - seq_dem(i), Metric2_u(q, i + 1));
}
// Normalization
double n = 0; // Normalisation factor
for(int j = 0; j < nb_state_crc; j++)
n = jacolog(Metric2_u(j, i + 1), n);
for (int k = 0; k < nb_state_crc; k++)
Metric2_u(k, i + 1) -= n;
}
// Lattice with total metric by backward method and forward method
mat Metric3_u = Metric1_u + Metric2_u;
// Decoding
bvec seq_decode(AAC_SCE_PROT_BIT);
int indice_max = row_initial;
for (int i = 0; i < AAC_SCE_PROT_BIT; i++)
{
int q = indice_max ^ P_vec(i);
if(Metric3_u(indice_max, i + 1) > Metric3_u(q, i + 1))
{
// Zero is more likely
seq_decode(i) = 0;
}
else
{
// One is more likely
seq_decode(i) = 1;
indice_max = q;
}
}
return seq_decode;
}
#endif
inline void compute_cumulative_metrics_column(
uint_fast16_t p_matrix,
double bit_metrics,
double *lattice_metrics_in,
double lattice_metrics_out[])
{
uint_fast32_t j;
double n = 0; // Normalisation factor
for(j = 0; j < (1 << CRC_BIT); j++)
{
uint_fast16_t q = j ^ p_matrix;
lattice_metrics_out[j] = jacolog(lattice_metrics_in[j] + bit_metrics, lattice_metrics_out[j]);
lattice_metrics_out[q] = jacolog(lattice_metrics_in[j] - bit_metrics, lattice_metrics_out[q]);
n = jacolog(lattice_metrics_out[j], n);
}
// Normalization
for (j = 0; j < (1 << CRC_BIT); j++)
lattice_metrics_out[j] -= n;
}
/* Forward method */
void compute_cumulative_metrics(uint_fast16_t p_matrix[],
double lattice_metrics[][1 << CRC_BIT], struct stage_state *state_in,
struct stage_state *state_out)
{
int i, j;
// Initialize lattice_metrics (ln(0) is minus infinite)
for (i = 0; i < AAC_SCE_PROT_PER_CORE_BIT - 1; i++)
for (j = 1; j < (1 << CRC_BIT); j++)
lattice_metrics[i][j] = -1e10;
for (j = 1; j < (1 << CRC_BIT); j++)
state_out->lattice_metrics[j] = -1e10;
compute_cumulative_metrics_column(p_matrix[0], state_in->bit_metrics[0],
state_in->lattice_metrics, lattice_metrics[0]);
for (i = 0; i < AAC_SCE_PROT_PER_CORE_BIT - 2; i++)
compute_cumulative_metrics_column(p_matrix[i + 1],
state_in->bit_metrics[i + 1], lattice_metrics[i],
lattice_metrics[i + 1]);
compute_cumulative_metrics_column(p_matrix[i + 1],
state_in->bit_metrics[i + 1],
lattice_metrics[i], state_out->lattice_metrics);
for (i = 0; i < AAC_SCE_PROT_BIT - AAC_SCE_PROT_PER_CORE_BIT; i++)
state_out->bit_metrics[i] = state_in->bit_metrics[i + AAC_SCE_PROT_PER_CORE_BIT];
for (i = AAC_SCE_PROT_BIT - AAC_SCE_PROT_PER_CORE_BIT; i < AAC_SCE_PROT_BIT; i++)
state_out->bit_metrics[i] = 0;
}
void compute_last_cumulative_metrics(
uint_fast16_t p_matrix[],
double lattice_metrics[][1 << CRC_BIT],
struct stage_state *state_in)
{
unsigned int i;
struct stage_state state_out;
compute_cumulative_metrics(p_matrix, lattice_metrics, state_in, &state_out);
for (i = 0; i < (1 << CRC_BIT); i++)
printf("%f\n", state_out.lattice_metrics[i]);
}
int analyse_options(int argc, char *argv[])
{
char **arg_p;
argc--;
arg_p = argv;
while(arg_p++, argc--)
{
if ((*arg_p)[0] == '-')
{
if ((!(*arg_p)[1]) || (*arg_p)[2])
{
printf("Unsupported option: %s\n", *arg_p);
return -1;
}
switch((*arg_p)[1])
{
char *endptr;
case 'p':
argc--, arg_p++;
nb_packets =
strtol(*arg_p, &endptr, 10);
if ((endptr == *arg_p) ||
(STRTOL_OVERFLOW(nb_packets)))
{
fprintf(stderr, "Invalid number of packets: %s\n", *arg_p);
return -1;
}
break;
default:
fprintf(stderr, "Unsupported option: %s\n", *arg_p);
return -1;
}
}
}
if (AAC_SCE_PROT_BIT % NB_CORES)
{
fprintf(stderr, "NB_CORES (%d) - 1 should divide AAC_SCE_PROT_BIT (%d)\n", NB_CORES, AAC_SCE_PROT_BIT);
return -1;
}
return 0;
}
int compute_metrics(void)
{
int i, j;
unsigned int seed = 42;
uint_fast16_t p_matrix[NB_CORES][AAC_SCE_PROT_BIT / NB_CORES]; // P matrix (~ 3 KB)
double *lattice_metrics[NB_CORES];
struct stage_state state0;
#if NB_CORES > 1
struct stage_state state1; // ~3.2 MB / NB_CORES
#if NB_CORES > 2
struct stage_state state2; // ~3.2 MB / NB_CORES
#if NB_CORES > 3
struct stage_state state3; // ~3.2 MB / NB_CORES
#if NB_CORES > 4
struct stage_state state4; // ~3.2 MB / NB_CORES
#if NB_CORES > 5
struct stage_state state5; // ~3.2 MB / NB_CORES
#if NB_CORES > 6
struct stage_state state6; // ~3.2 MB / NB_CORES
#if NB_CORES > 7
struct stage_state state7; // ~3.2 MB / NB_CORES
#if NB_CORES > 8
struct stage_state state8; // ~3.2 MB / NB_CORES
#if NB_CORES > 9
struct stage_state state9; // ~3.2 MB / NB_CORES
#if NB_CORES > 10
struct stage_state state10; // ~3.2 MB / NB_CORES
#if NB_CORES > 11
struct stage_state state11; // ~3.2 MB / NB_CORES
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
{
int len;
len = AAC_SCE_PROT_PER_CORE_BIT * (1 << CRC_BIT)
* sizeof(**lattice_metrics);
for (i = 0; i < NB_CORES; i++)
{
lattice_metrics[i] = (double *) mmap(NULL, len,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS /*| MAP_HUGETLB*/,
-1, 0);
if ((lattice_metrics[i] == MAP_FAILED) && errno)
{
fprintf(stderr, "Mmap failed: %s\n", strerror(errno));
for (i--; i >= 0; i--)
munmap(lattice_metrics[i], len);
return -1;
}
}
}
// Copy P matrix in fixed size array
for (j = 0; j < AAC_SCE_PROT_BIT; j++)
{
uint_fast16_t line = (uint16_t) rand_r(&seed);
p_matrix[j / AAC_SCE_PROT_PER_CORE_BIT][j % AAC_SCE_PROT_PER_CORE_BIT] = line;
}
#pragma omp parallel default (none) \
shared (nb_packets, lattice_metrics, p_matrix, seed) \
private (state0, state1, state2, state3, i)
{
#pragma omp single
{
int j;
for (i = 0; i < nb_packets; i++)
{
// Copy bit metrics in fixed size array
for (j = 0; j < AAC_SCE_PROT_BIT; j++)
state0.bit_metrics[j] = (double) rand_r(&seed);
state0.lattice_metrics[0] = 1;
for (j = 1; j < (1 << CRC_BIT); j++)
state0.lattice_metrics[j] = -1e10;
#if NB_CORES > 1
#pragma omp task firstprivate (state0) output (state1) shared (p_matrix, lattice_metrics)
compute_cumulative_metrics(p_matrix[0], (double (*)[1 << CRC_BIT]) lattice_metrics[0], &state0, &state1);
#if NB_CORES > 2
#pragma omp task input (state1) output (state2) shared (p_matrix, lattice_metrics)
compute_cumulative_metrics(p_matrix[1], (double (*)[1 << CRC_BIT]) lattice_metrics[1], &state1, &state2);
#if NB_CORES > 3
#pragma omp task input (state2) output (state3) shared (p_matrix, lattice_metrics)
compute_cumulative_metrics(p_matrix[2], (double (*)[1 << CRC_BIT]) lattice_metrics[2], &state2, &state3);
#if NB_CORES > 4
#pragma omp task input (state3) output (state4) shared (p_matrix, lattice_metrics)
compute_cumulative_metrics(p_matrix[3], (double (*)[1 << CRC_BIT]) lattice_metrics[3], &state3, &state4);
#if NB_CORES > 5
#pragma omp task input (state4) output (state5) shared (p_matrix, lattice_metrics)
compute_cumulative_metrics(p_matrix[4], (double (*)[1 << CRC_BIT]) lattice_metrics[4], &state4, &state5);
#if NB_CORES > 6
#pragma omp task input (state5) output (state6) shared (p_matrix, lattice_metrics)
compute_cumulative_metrics(p_matrix[5], (double (*)[1 << CRC_BIT]) lattice_metrics[5], &state5, &state6);
#if NB_CORES > 7
#pragma omp task input (state6) output (state7) shared (p_matrix, lattice_metrics)
compute_cumulative_metrics(p_matrix[6], (double (*)[1 << CRC_BIT]) lattice_metrics[6], &state6, &state7);
#if NB_CORES > 8
#pragma omp task input (state7) output (state8) shared (p_matrix, lattice_metrics)
compute_cumulative_metrics(p_matrix[7], (double (*)[1 << CRC_BIT]) lattice_metrics[7], &state7, &state8);
#if NB_CORES > 9
#pragma omp task input (state8) output (state9) shared (p_matrix, lattice_metrics)
compute_cumulative_metrics(p_matrix[8], (double (*)[1 << CRC_BIT]) lattice_metrics[8], &state8, &state9);
#if NB_CORES > 10
#pragma omp task input (state9) output (state10) shared (p_matrix, lattice_metrics)
compute_cumulative_metrics(p_matrix[9], (double (*)[1 << CRC_BIT]) lattice_metrics[9], &state9, &state10);
#if NB_CORES > 11
#pragma omp task input (state10) output (state11) shared (p_matrix, lattice_metrics)
compute_cumulative_metrics(p_matrix[10], (double (*)[1 << CRC_BIT]) lattice_metrics[10], &state10, &state11);
#endif /* NB_CORES > 1 */
#endif /* NB_CORES > 2 */
#endif /* NB_CORES > 3 */
#endif /* NB_CORES > 4 */
#endif /* NB_CORES > 5 */
#endif /* NB_CORES > 6 */
#endif /* NB_CORES > 7 */
#endif /* NB_CORES > 8 */
#endif /* NB_CORES > 9 */
#endif /* NB_CORES > 10 */
#endif /* NB_CORES > 11 */
#if NB_CORES > 11
#pragma omp task input (state11) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[11], &state11);
#elif NB_CORES > 10
#pragma omp task input (state10) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[10], &state10);
#elif NB_CORES > 9
#pragma omp task input (state9) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[9], &state9);
#elif NB_CORES > 8
#pragma omp task input (state8) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[8], &state8);
#elif NB_CORES > 7
#pragma omp task input (state7) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[7], &state7);
#elif NB_CORES > 6
#pragma omp task input (state6) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[6], &state6);
#elif NB_CORES > 5
#pragma omp task input (state5) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[5], &state5);
#elif NB_CORES > 4
#pragma omp task input (state4) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[4], &state4);
#elif NB_CORES > 3
#pragma omp task input (state3) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[3], &state3);
#elif NB_CORES > 2
#pragma omp task input (state2) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[2], &state2);
#elif NB_CORES > 1
#pragma omp task input (state1) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[1], &state1);
#else
#pragma omp task firstprivate (state0) shared(p_matrix, lattice_metrics)
compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[0], &state0);
#endif
}
}
}
return 0;
}
int main(int argc, char *argv[])
{
if (analyse_options(argc, argv))
exit(EXIT_FAILURE);
if (compute_metrics())
exit(EXIT_FAILURE);
exit(EXIT_SUCCESS);
}