#include #include #include #include #include #include #include #include #define STRTOL_OVERFLOW(n) (((n == LONG_MIN) || (n == LONG_MAX)) && \ (errno == ERANGE)) #define STRIFY(var) STRIFY_VAL(val) #define STRIFY_VAL(val) #val /* Number of cores to be used for the pipeline */ #ifndef NB_CORES #define NB_CORES 8 /* Bossa */ //#define NB_CORES 12; /* Quad Hexa */ #endif /* CRC protect length (in bytes) for single channel element */ #define AAC_SCE_PROT_BIT 192 #define AAC_SCE_PROT_PER_CORE_BIT (AAC_SCE_PROT_BIT / NB_CORES) /* CRC CCITT-8 size? */ #define CRC_BIT 8 #define DATA_BITS_PER_CORE ((AAC_SCE_PROT_BIT - 1) / NB_CORES) long seq_len; long nb_packets = 0; struct stage_state { double bit_metrics[AAC_SCE_PROT_BIT/* / NB_CORES*/]; // Metric of each bit in a packet (~ 1.5 KB if 1 core) double lattice_metrics[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB if 1 core) }; uint_fast16_t p_matrix[NB_CORES][AAC_SCE_PROT_BIT / NB_CORES]; // P matrix (~ 3 KB) double *lattice_metrics[NB_CORES]; struct stage_state state0; #if NB_CORES > 1 struct stage_state state1; // ~3.2 MB / NB_CORES #if NB_CORES > 2 struct stage_state state2; // ~3.2 MB / NB_CORES #if NB_CORES > 3 struct stage_state state3; // ~3.2 MB / NB_CORES #if NB_CORES > 4 struct stage_state state4; // ~3.2 MB / NB_CORES #if NB_CORES > 5 struct stage_state state5; // ~3.2 MB / NB_CORES #if NB_CORES > 6 struct stage_state state6; // ~3.2 MB / NB_CORES #if NB_CORES > 7 struct stage_state state7; // ~3.2 MB / NB_CORES #if NB_CORES > 8 struct stage_state state8; // ~3.2 MB / NB_CORES #if NB_CORES > 9 struct stage_state state9; // ~3.2 MB / NB_CORES #if NB_CORES > 10 struct stage_state state10; // ~3.2 MB / NB_CORES #if NB_CORES > 11 struct stage_state state11; // ~3.2 MB / NB_CORES #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif // Heuristic to compute ln(exp(x) + exp(y)) with x = ln(a1/b1) and y = ln(a2/b2) double jacolog(double x, double y) { double r; double diff; if (x > y) { r = x; diff = x - y; } else { r = y; diff = y - x; } if (diff > 3.7) r += 0.00; else if (diff > 2.25) r += 0.05; else if (diff > 1.5 ) r += 0.15; else if (diff > 1.05) r += 0.25; else if (diff > 0.7 ) r += 0.35; else if (diff > 0.43) r += 0.45; else if (diff > 0.2 ) r += 0.55; else r += 0.65; return r; } #if 0 //TODO: rename fct and vars mat Metric_Unknown_1(const int& nb_state_crc, const ivec& P_vec, const vec& Metric_c, const vec& seq_dem) { mat Metric1_u(nb_state_crc, AAC_SCE_PROT_BIT + 1); Metric1_u.zeros(); Metric1_u.set_col(AAC_SCE_PROT_BIT, Metric_c); // Backward method for(int i = AAC_SCE_PROT_BIT; i > 0; i--) { double n = 0; // Normalisation factor for (int j = 0; j < nb_state_crc; j++) { int q = j ^ P_vec(i-1); Metric1_u(j, i - 1) = jacolog(Metric1_u(j, i) + seq_dem(i - 1), Metric1_u(q, i) - seq_dem(i - 1)); n = jacolog(Metric1_u(j, i - 1), n); } // Normalization for (int k = 0; k < nb_state_crc; k++) Metric1_u(k, i - 1) -= n; } return Metric1_u; } //TODO: rename fct and vars bvec Decode_U1(const int& nb_state_crc, const ivec& P_vec, mat& Metric1_u, const vec& seq_dem, const int& row_initial) { mat Metric2_u(nb_state_crc, AAC_SCE_PROT_BIT + 1); Metric2_u.ones(); Metric2_u = Metric2_u * (-1e10); // Because 0 in the logarithmatic form is minus infini Metric2_u(row_initial, 0) = 1; // Initialisation of the lattice // Forward method for (int i = 0; i < AAC_SCE_PROT_BIT; i++) { for(int j = 0; j < nb_state_crc; j++) { int q = j ^ P_vec(i); Metric2_u(j, i + 1) = jacolog(Metric2_u(j, i) + seq_dem(i), Metric2_u(j, i + 1)); Metric2_u(q, i + 1) = jacolog(Metric2_u(j, i) - seq_dem(i), Metric2_u(q, i + 1)); } // Normalization double n = 0; // Normalisation factor for(int j = 0; j < nb_state_crc; j++) n = jacolog(Metric2_u(j, i + 1), n); for (int k = 0; k < nb_state_crc; k++) Metric2_u(k, i + 1) -= n; } // Lattice with total metric by backward method and forward method mat Metric3_u = Metric1_u + Metric2_u; // Decoding bvec seq_decode(AAC_SCE_PROT_BIT); int indice_max = row_initial; for (int i = 0; i < AAC_SCE_PROT_BIT; i++) { int q = indice_max ^ P_vec(i); if(Metric3_u(indice_max, i + 1) > Metric3_u(q, i + 1)) { // Zero is more likely seq_decode(i) = 0; } else { // One is more likely seq_decode(i) = 1; indice_max = q; } } return seq_decode; } #endif inline void compute_cumulative_metrics_column( uint_fast16_t p_matrix, double bit_metrics, double *lattice_metrics_in, double lattice_metrics_out[]) { uint_fast32_t jf; double n = 0; // Normalisation factor for(jf = 0; jf < (1 << CRC_BIT); jf++) { uint_fast16_t j = (uint_fast16_t) jf; uint_fast16_t q = (j ^ p_matrix) & ((1 << CRC_BIT) - 1); lattice_metrics_out[j] = jacolog(lattice_metrics_in[j] + bit_metrics, lattice_metrics_out[j]); lattice_metrics_out[q] = jacolog(lattice_metrics_in[j] - bit_metrics, lattice_metrics_out[q]); n = jacolog(lattice_metrics_out[j], n); } // Normalization for (jf = 0; jf < (1 << CRC_BIT); jf++) lattice_metrics_out[jf] -= n; } /* Forward method */ void compute_cumulative_metrics(uint_fast16_t p_matrix[], double lattice_metrics[][1 << CRC_BIT], struct stage_state *state_in, struct stage_state *state_out) { int i, j; // Initialize lattice_metrics (ln(0) is minus infinite) for (i = 0; i < AAC_SCE_PROT_PER_CORE_BIT - 1; i++) for (j = 1; j < (1 << CRC_BIT); j++) lattice_metrics[i][j] = -1e10; for (j = 1; j < (1 << CRC_BIT); j++) state_out->lattice_metrics[j] = -1e10; compute_cumulative_metrics_column(p_matrix[0], state_in->bit_metrics[0], state_in->lattice_metrics, lattice_metrics[0]); for (i = 0; i < AAC_SCE_PROT_PER_CORE_BIT - 2; i++) compute_cumulative_metrics_column(p_matrix[i + 1], state_in->bit_metrics[i + 1], lattice_metrics[i], lattice_metrics[i + 1]); compute_cumulative_metrics_column(p_matrix[i + 1], state_in->bit_metrics[i + 1], lattice_metrics[i], state_out->lattice_metrics); for (i = 0; i < AAC_SCE_PROT_BIT - AAC_SCE_PROT_PER_CORE_BIT; i++) state_out->bit_metrics[i] = state_in->bit_metrics[i + AAC_SCE_PROT_PER_CORE_BIT]; for (i = AAC_SCE_PROT_BIT - AAC_SCE_PROT_PER_CORE_BIT; i < AAC_SCE_PROT_BIT; i++) state_out->bit_metrics[i] = 0; } void compute_last_cumulative_metrics( uint_fast16_t p_matrix[], double lattice_metrics[][1 << CRC_BIT], struct stage_state *state_in) { unsigned int i; struct stage_state state_out; static long nb_processed = 0; compute_cumulative_metrics(p_matrix, lattice_metrics, state_in, &state_out); if (++nb_processed == nb_packets) { for (i = 0; i < (1 << CRC_BIT); i += (1 << CRC_BIT)/10) printf("%f\n", state_out.lattice_metrics[i]); } } int analyse_options(int argc, char *argv[]) { char **arg_p; argc--; arg_p = argv; while(arg_p++, argc--) { if ((*arg_p)[0] == '-') { if ((!(*arg_p)[1]) || (*arg_p)[2]) { printf("Unsupported option: %s\n", *arg_p); return -1; } switch((*arg_p)[1]) { char *endptr; case 'p': argc--, arg_p++; nb_packets = strtol(*arg_p, &endptr, 10); if ((endptr == *arg_p) || (STRTOL_OVERFLOW(nb_packets))) { fprintf(stderr, "Invalid number of packets: %s\n", *arg_p); return -1; } break; default: fprintf(stderr, "Unsupported option: %s\n", *arg_p); return -1; } } } if (AAC_SCE_PROT_BIT % NB_CORES) { fprintf(stderr, "NB_CORES (%d) - 1 should divide AAC_SCE_PROT_BIT (%d)\n", NB_CORES, AAC_SCE_PROT_BIT); return -1; } return 0; } int compute_metrics(void) { int i, j; unsigned int seed = 42; { int len; len = AAC_SCE_PROT_PER_CORE_BIT * (1 << CRC_BIT) * sizeof(**lattice_metrics); for (i = 0; i < NB_CORES; i++) { lattice_metrics[i] = (double *) mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS /*| MAP_HUGETLB*/, -1, 0); if ((lattice_metrics[i] == MAP_FAILED) && errno) { fprintf(stderr, "Mmap failed: %s\n", strerror(errno)); for (i--; i >= 0; i--) munmap(lattice_metrics[i], len); return -1; } } } // Copy P matrix in fixed size array for (j = 0; j < AAC_SCE_PROT_BIT; j++) { uint_fast16_t line = (uint16_t) rand_r(&seed); p_matrix[j / AAC_SCE_PROT_PER_CORE_BIT][j % AAC_SCE_PROT_PER_CORE_BIT] = line; } #if NB_CORES > 1 #if NB_CORES == 2 #pragma omp parallel default (none) \ shared (nb_packets, lattice_metrics, p_matrix, seed) \ private (state0, state1, i) #elif NB_CORES == 3 #pragma omp parallel default (none) \ shared (nb_packets, lattice_metrics, p_matrix, seed) \ private (state0, state1, state2, i) #elif NB_CORES == 4 #pragma omp parallel default (none) \ shared (nb_packets, lattice_metrics, p_matrix, seed) \ private (state0, state1, state2, state3, i) #elif NB_CORES == 5 #pragma omp parallel default (none) \ shared (nb_packets, lattice_metrics, p_matrix, seed) \ private (state0, state1, state2, state3, state4, i) #elif NB_CORES == 6 #pragma omp parallel default (none) \ shared (nb_packets, lattice_metrics, p_matrix, seed) \ private (state0, state1, state2, state3, state4, state5, i) #elif NB_CORES == 7 #pragma omp parallel default (none) \ shared (nb_packets, lattice_metrics, p_matrix, seed) \ private (state0, state1, state2, state3, state4, state5, state6, i) #elif NB_CORES == 8 #pragma omp parallel default (none) \ shared (nb_packets, lattice_metrics, p_matrix, seed) \ private (state0, state1, state2, state3, state4, state5, state6, state7, i) #elif NB_CORES == 9 #pragma omp parallel default (none) \ shared (nb_packets, lattice_metrics, p_matrix, seed) \ private (state0, state1, state2, state3, state4, state5, state6, state7, state8, i) #elif NB_CORES == 10 #pragma omp parallel default (none) \ shared (nb_packets, lattice_metrics, p_matrix, seed) \ private (state0, state1, state2, state3, state4, state5, state6, state7, state8, state9, i) #elif NB_CORES == 11 #pragma omp parallel default (none) \ shared (nb_packets, lattice_metrics, p_matrix, seed) \ private (state0, state1, state2, state3, state4, state5, state6, state7, state8, state9, state10, i) #elif NB_CORES == 12 #pragma omp parallel default (none) \ shared (nb_packets, lattice_metrics, p_matrix, seed) \ private (state0, state1, state2, state3, state4, state5, state6, state7, state8, state9, state10, state11, i) #else #error Only supporting NB_CORES <= 12 #endif #endif { #pragma omp single { int j; for (i = 0; i < nb_packets; i++) { // Copy bit metrics in fixed size array for (j = 0; j < AAC_SCE_PROT_BIT; j++) state0.bit_metrics[j] = (double) rand_r(&seed); state0.lattice_metrics[0] = 1; for (j = 1; j < (1 << CRC_BIT); j++) state0.lattice_metrics[j] = -1e10; #if NB_CORES > 1 #pragma omp task firstprivate (state0) output (state1) shared (p_matrix, lattice_metrics) compute_cumulative_metrics(p_matrix[0], (double (*)[1 << CRC_BIT]) lattice_metrics[0], &state0, &state1); #if NB_CORES > 2 #pragma omp task input (state1) output (state2) shared (p_matrix, lattice_metrics) compute_cumulative_metrics(p_matrix[1], (double (*)[1 << CRC_BIT]) lattice_metrics[1], &state1, &state2); #if NB_CORES > 3 #pragma omp task input (state2) output (state3) shared (p_matrix, lattice_metrics) compute_cumulative_metrics(p_matrix[2], (double (*)[1 << CRC_BIT]) lattice_metrics[2], &state2, &state3); #if NB_CORES > 4 #pragma omp task input (state3) output (state4) shared (p_matrix, lattice_metrics) compute_cumulative_metrics(p_matrix[3], (double (*)[1 << CRC_BIT]) lattice_metrics[3], &state3, &state4); #if NB_CORES > 5 #pragma omp task input (state4) output (state5) shared (p_matrix, lattice_metrics) compute_cumulative_metrics(p_matrix[4], (double (*)[1 << CRC_BIT]) lattice_metrics[4], &state4, &state5); #if NB_CORES > 6 #pragma omp task input (state5) output (state6) shared (p_matrix, lattice_metrics) compute_cumulative_metrics(p_matrix[5], (double (*)[1 << CRC_BIT]) lattice_metrics[5], &state5, &state6); #if NB_CORES > 7 #pragma omp task input (state6) output (state7) shared (p_matrix, lattice_metrics) compute_cumulative_metrics(p_matrix[6], (double (*)[1 << CRC_BIT]) lattice_metrics[6], &state6, &state7); #if NB_CORES > 8 #pragma omp task input (state7) output (state8) shared (p_matrix, lattice_metrics) compute_cumulative_metrics(p_matrix[7], (double (*)[1 << CRC_BIT]) lattice_metrics[7], &state7, &state8); #if NB_CORES > 9 #pragma omp task input (state8) output (state9) shared (p_matrix, lattice_metrics) compute_cumulative_metrics(p_matrix[8], (double (*)[1 << CRC_BIT]) lattice_metrics[8], &state8, &state9); #if NB_CORES > 10 #pragma omp task input (state9) output (state10) shared (p_matrix, lattice_metrics) compute_cumulative_metrics(p_matrix[9], (double (*)[1 << CRC_BIT]) lattice_metrics[9], &state9, &state10); #if NB_CORES > 11 #pragma omp task input (state10) output (state11) shared (p_matrix, lattice_metrics) compute_cumulative_metrics(p_matrix[10], (double (*)[1 << CRC_BIT]) lattice_metrics[10], &state10, &state11); #endif /* NB_CORES > 1 */ #endif /* NB_CORES > 2 */ #endif /* NB_CORES > 3 */ #endif /* NB_CORES > 4 */ #endif /* NB_CORES > 5 */ #endif /* NB_CORES > 6 */ #endif /* NB_CORES > 7 */ #endif /* NB_CORES > 8 */ #endif /* NB_CORES > 9 */ #endif /* NB_CORES > 10 */ #endif /* NB_CORES > 11 */ #if NB_CORES > 11 #pragma omp task input (state11) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[11], &state11); #elif NB_CORES > 10 #pragma omp task input (state10) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[10], &state10); #elif NB_CORES > 9 #pragma omp task input (state9) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[9], &state9); #elif NB_CORES > 8 #pragma omp task input (state8) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[8], &state8); #elif NB_CORES > 7 #pragma omp task input (state7) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[7], &state7); #elif NB_CORES > 6 #pragma omp task input (state6) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[6], &state6); #elif NB_CORES > 5 #pragma omp task input (state5) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[5], &state5); #elif NB_CORES > 4 #pragma omp task input (state4) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[4], &state4); #elif NB_CORES > 3 #pragma omp task input (state3) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[3], &state3); #elif NB_CORES > 2 #pragma omp task input (state2) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[2], &state2); #elif NB_CORES > 1 #pragma omp task input (state1) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[1], &state1); #else #pragma omp task firstprivate (state0) shared(p_matrix, lattice_metrics) compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[0], &state0); #endif } } } return 0; } int main(int argc, char *argv[]) { if (analyse_options(argc, argv)) exit(EXIT_FAILURE); printf("-------- Beginning of computation --------\n"); if (compute_metrics()) exit(EXIT_FAILURE); printf("-------- End of computation --------\n"); exit(EXIT_SUCCESS); }