rt_benchs/pipepar/lattice-neverrun.cpp

#include <iostream>
#include <cstdlib>
#include <limits.h>
#include <errno.h>
#include <sys/mman.h>
#include <asm-generic/mman.h>
#include <itpp/itbase.h>
#include <itpp/comm/crc.h>
#include <itpp/comm/modulator.h>
#include <itpp/itcomm.h>


using namespace std;
using namespace itpp;


#define STRTOL_OVERFLOW(n) (((n == LONG_MIN) || (n == LONG_MAX)) && \
			    (errno == ERANGE))

#define STRIFY(var) STRIFY_VAL(val)
#define STRIFY_VAL(val) #val

/* Number of cores to be used for the pipeline */
#ifndef NB_CORES
#define NB_CORES 8 /* Bossa */
//#define NB_CORES 12; /* Quad Hexa */
#endif

/* CRC protect length (in bytes) for single channel element */
#define AAC_SCE_PROT_BIT 192
#define AAC_SCE_PROT_PER_CORE_BIT (AAC_SCE_PROT_BIT / NB_CORES)

/* CRC CCITT-16 size */
#define CRC_BIT 16

#define DATA_BITS_PER_CORE ((AAC_SCE_PROT_BIT - 1) / NB_CORES)

#ifdef DEBUG
int debug = 0;
#endif
long seq_len;
long nb_packets = 0;


// Heuristic to compute ln(exp(x) + exp(y)) with x = ln(a1/b1) and y = ln(a2/b2)
double jacolog(double x, double y)
{
	double r;
	double diff;

	if (x > y)
	{
		r = x;
		diff = x - y;
	}
	else
	{
		r = y;
		diff = y - x;
	}

	if (diff > 3.7)
		r += 0.00;
	else if (diff > 2.25)
		r += 0.05;
	else if (diff > 1.5 )
		r += 0.15;
	else if (diff > 1.05)
		r += 0.25;
	else if (diff > 0.7 )
		r += 0.35;
	else if (diff > 0.43)
		r += 0.45;
	else if (diff > 0.2 )
		r += 0.55;
	else
		r += 0.65;

	return r;
}

#if 0
//TODO: rename fct and vars
mat Metric_Unknown_1(const int& nb_state_crc, const ivec& P_vec, const vec& Metric_c, const vec& seq_dem)
{
	mat Metric1_u(nb_state_crc, AAC_SCE_PROT_BIT + 1);
	Metric1_u.zeros();
	Metric1_u.set_col(AAC_SCE_PROT_BIT, Metric_c);

	// Backward method
	for(int i = AAC_SCE_PROT_BIT; i > 0; i--)
	{
		double n = 0; // Normalisation factor
		for (int j = 0; j < nb_state_crc; j++)
		{
			int q = j ^ P_vec(i-1);
			Metric1_u(j, i - 1) = jacolog(Metric1_u(j, i) + seq_dem(i - 1), Metric1_u(q, i) - seq_dem(i - 1));
			n = jacolog(Metric1_u(j, i - 1), n);
		}

		// Normalization
		for (int k = 0; k < nb_state_crc; k++)
			Metric1_u(k, i - 1) -= n;
	}

	return Metric1_u;
}

//TODO: rename fct and vars
bvec Decode_U1(const int& nb_state_crc, const ivec& P_vec, mat& Metric1_u, const vec& seq_dem, const int& row_initial)
{
	mat Metric2_u(nb_state_crc, AAC_SCE_PROT_BIT + 1);
	Metric2_u.ones();
	Metric2_u = Metric2_u * (-1e10); // Because 0 in the logarithmatic form is minus infini
	Metric2_u(row_initial, 0) = 1; // Initialisation of the lattice

	// Forward method
	for (int i = 0; i < AAC_SCE_PROT_BIT; i++)
	{
		for(int j = 0; j < nb_state_crc; j++)
		{
			int q = j ^ P_vec(i);
			Metric2_u(j, i + 1) = jacolog(Metric2_u(j, i) + seq_dem(i), Metric2_u(j, i + 1));
			Metric2_u(q, i + 1) = jacolog(Metric2_u(j, i) - seq_dem(i), Metric2_u(q, i + 1));
		}

		// Normalization
		double n = 0; // Normalisation factor
		for(int j = 0; j < nb_state_crc; j++)
			n = jacolog(Metric2_u(j, i + 1), n);
		for (int k = 0; k < nb_state_crc; k++)
			Metric2_u(k, i + 1) -= n;
	}

	// Lattice with total metric by backward method and forward method
	mat Metric3_u = Metric1_u + Metric2_u;

	// Decoding
	bvec seq_decode(AAC_SCE_PROT_BIT);
	int indice_max = row_initial;
	for (int i = 0; i < AAC_SCE_PROT_BIT; i++)
	{
		int q = indice_max ^ P_vec(i);
		if(Metric3_u(indice_max, i + 1) > Metric3_u(q, i + 1))
		{
			// Zero is more likely
			seq_decode(i) = 0;
		}
		else
		{
			// One is more likely
			seq_decode(i) = 1;
			indice_max = q;
		}
	}

	return seq_decode;
}
#endif

/* All uses of unsigned short replace use of uint_fast16_t and are not portable */
inline void compute_cumulative_metrics_column(
	unsigned short p_matrix,
	double bit_metrics,
	double lattice_metrics_in[],
	double lattice_metrics_out[])
{
	double n = 0; // Normalisation factor
	for(unsigned int j = 0; j < (1 << CRC_BIT); j++)
	{
		unsigned short q = j ^ p_matrix;
		lattice_metrics_out[j] = jacolog(lattice_metrics_in[j] + bit_metrics, lattice_metrics_out[j]);
		lattice_metrics_out[q] = jacolog(lattice_metrics_in[j] - bit_metrics, lattice_metrics_out[q]);
		n = jacolog(lattice_metrics_out[j], n);
	}

	// Normalization
	for (unsigned int j = 0; j < (1 << CRC_BIT); j++)
		lattice_metrics_out[j] -= n;
}

/* Forward method */
/* All uses of unsigned short replace use of uint_fast16_t and are not portable */
void compute_cumulative_metrics(
	unsigned short p_matrix[],
	double bit_metrics[],
	double lattice_metrics[][1 << CRC_BIT],
	double lattice_metrics_in[],
	double lattice_metrics_out[])
{
	int i;

	// Initialize lattice_metrics (ln(0) is minus infinite)
	for (i = 0; i < AAC_SCE_PROT_PER_CORE_BIT - 1; i++)
		for (int j = 1; j < (1 << CRC_BIT); j++)
			lattice_metrics[i][j] = -1e10;
	for (int j = 1; j < (1 << CRC_BIT); j++)
		lattice_metrics_out[j] = -1e10;

	compute_cumulative_metrics_column(p_matrix[0], bit_metrics[0],
		lattice_metrics_in, lattice_metrics[0]);
	for (i = 0; i < AAC_SCE_PROT_PER_CORE_BIT - 2; i++)
		compute_cumulative_metrics_column(p_matrix[i], bit_metrics[i],
			lattice_metrics[i], lattice_metrics[i + 1]);
	compute_cumulative_metrics_column(p_matrix[i], bit_metrics[i],
		lattice_metrics[i], lattice_metrics_out);
}

/* All uses of unsigned short replace use of uint_fast16_t and are not portable */
void compute_last_cumulative_metrics(
	unsigned short p_matrix[],
	double bit_metrics[],
	double lattice_metrics[][1 << CRC_BIT],
	double lattice_metrics_in[])
{
	double lattice_metrics_out[1 << CRC_BIT];

	compute_cumulative_metrics(p_matrix, bit_metrics, lattice_metrics, lattice_metrics_in, lattice_metrics_out);
}

/*
 * Compute the parity matrix to obtain the P matrix included in it. The parity
 * matrix is composed of the identity matrix and the P matrix.
 * The two params are a reference to the CRC used and the length in bits of the
 * data whose CRC is computed
 */
bmat compute_p_matrix(const CRC_Code &crc, int data_length)
{
	// Temporary vector of bits
	bvec tmp(data_length);
	tmp.zeros();
	tmp(0)=1;

	// Size of the generator matrix
	bvec enc_vec = crc.encode(tmp);
	bmat G(data_length, enc_vec.length());

	int crc_size = enc_vec.length() - data_length;

	// Construction of the generator matrix
	for (int i = 0; i < data_length; i++)
	{
		enc_vec = crc.encode(tmp);
		G.set_row(i, enc_vec); // Filling the generator matrix
		tmp.shift_right(0, 1);
	}

	// Extraction of the parity-check matrix (aka P matrix)
	return G(0, data_length - 1, data_length, data_length + crc_size - 1);
}

int analyse_options(int argc, char *argv[])
{
	char **arg_p;

	argc--;
	arg_p = argv;
	while(arg_p++, argc--)
	{
		if ((*arg_p)[0] == '-')
		{
			if ((!(*arg_p)[1]) || (*arg_p)[2])
			{
				cout << "Unsupported option: " << *arg_p << endl;
				return -1;
			}
			switch((*arg_p)[1])
			{
				char *endptr;

				case 'p':
					argc--, arg_p++;
					nb_packets =
						strtol(*arg_p, &endptr, 10);
					if ((endptr == *arg_p) ||
					    (STRTOL_OVERFLOW(nb_packets)))
					{
						cerr << "Invalid number of packets: " << *arg_p << endl;
						return -1;
					}
					break;

				default:
					cerr << "Unsupported option: " << *arg_p << endl;
					return -1;
			}
		}
	}
	if (AAC_SCE_PROT_BIT % NB_CORES)
	{
		cerr << "NB_CORES (" << NB_CORES
		     << ") - 1 should divide AAC_SCE_PROT_BIT ("
		     << AAC_SCE_PROT_BIT << ")" << endl;
		return -1;
	}

	return 0;
}

/* All uses of unsigned short replace use of uint_fast16_t and are not portable */
int compute_metrics(void)
{
	int i;
	unsigned short p_matrix[NB_CORES][AAC_SCE_PROT_BIT / NB_CORES]; // P matrix (~ 3 KB)
	double bit_metrics[NB_CORES][AAC_SCE_PROT_BIT / NB_CORES]; // Metric of each bit in a packet (~ 1.5 KB)
	double *lattice_metrics[NB_CORES];
	double lattice_metrics0[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#if NB_CORES > 1
	double lattice_metrics1[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#if NB_CORES > 2
	double lattice_metrics2[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#if NB_CORES > 3
	double lattice_metrics3[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#if NB_CORES > 4
	double lattice_metrics4[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#if NB_CORES > 5
	double lattice_metrics5[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#if NB_CORES > 6
	double lattice_metrics6[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#if NB_CORES > 7
	double lattice_metrics7[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#if NB_CORES > 8
	double lattice_metrics8[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#if NB_CORES > 9
	double lattice_metrics9[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#if NB_CORES > 10
	double lattice_metrics10[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#if NB_CORES > 11
	double lattice_metrics11[1 << CRC_BIT]; // cumulative metrics (~ 3.2 MB)
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif

	{
		int len;

		len = AAC_SCE_PROT_PER_CORE_BIT * (1 << CRC_BIT)
			* sizeof(**lattice_metrics);
		for (i = 0; i < NB_CORES; i++)
		{
			lattice_metrics[i] = (double *) mmap(NULL, len,
				PROT_READ | PROT_WRITE,
				MAP_PRIVATE | MAP_ANONYMOUS /*| MAP_HUGETLB*/,
				-1, 0);
			if ((lattice_metrics[i] == MAP_FAILED) && errno)
			{
				cerr << "Mmap failed: " << strerror(errno) << endl;
				for (i--; i >= 0; i--)
					munmap(lattice_metrics[i], len);
				return -1;
			}
		}
	}

#pragma omp parallel default (none) \
	shared (nb_packets, p_matrix, bit_metrics, lattice_metrics) \
	private (lattice_metrics0, lattice_metrics1, i)
	//private (lattice_metrics0, lattice_metrics1, lattice_metrics2, lattice_metrics3, lattice_metrics4, lattice_metrics5, lattice_metrics6, lattice_metrics7, i)
	//private (p_matrix, bit_metrics, lattice_metrics, lattice_metrics0, lattice_metrics1, lattice_metrics2, lattice_metrics3, lattice_metrics4, lattice_metrics5, lattice_metrics6, lattice_metrics7, lattice_metrics8, lattice_metrics9, lattice_metrics10, lattice_metrics11, i)
	{
#pragma omp single
		{
			vec EBN0db, EBN0;
			double N0, Eb;
			bmat p_mat;
	 		BPSK bpsk;
			CRC_Code crc("CCITT-16");

			// Initialize the noise
			EBN0db = linspace(4, 10, 7); // Generate 12 values SNR between -1 et 12 in dB
			EBN0 = inv_dB(EBN0db); // In decimal
			Eb = 1.0; // We suppose the power of the sigal is 1
			N0 = Eb / EBN0(1); // Noise power
			AWGN_Channel awgn_channel(N0);

			/* Compute P matrix */
			p_mat = compute_p_matrix(crc, AAC_SCE_PROT_BIT);


			// Copy P matrix in fixed size array
			for (int j = 0; j < AAC_SCE_PROT_BIT; j++)
			{
				unsigned short line = 0;

				for (int k = 0; k < CRC_BIT; k++)
				{
					line <<= 1;
					line |= p_mat.get(j,k);
				}
				p_matrix[j / AAC_SCE_PROT_PER_CORE_BIT][j % AAC_SCE_PROT_PER_CORE_BIT] = line;
			}

			for (i = 0; i < nb_packets; i++)
			{
				bvec sequence, coded_sequence;
				vec symbol, seq_rec, seq_dem_soft;

				/* Generate randow packet */
				sequence = itpp::randb(AAC_SCE_PROT_BIT);

				/* Append CRC */
				coded_sequence = crc.encode(sequence);

 				/* Modulation (switch to analogic) and channel simulation (add noise) */
				symbol = bpsk.modulate_bits(coded_sequence);
				seq_rec = awgn_channel(symbol);

				// Demodulated symbols by the soft way => Return ln(Proba(bit == 0)/Proba(bit == 1))
				// Don't forget to remove the crc
				seq_dem_soft = bpsk.demodulate_soft_bits(seq_rec, 2 * N0, LOGMAP);

				// Copy bit metrics in fixed size array
				for (int j = 0; j < AAC_SCE_PROT_BIT; j++)
					bit_metrics[j / AAC_SCE_PROT_PER_CORE_BIT][j % AAC_SCE_PROT_PER_CORE_BIT] = seq_dem_soft.get(j);

				lattice_metrics0[0] = 1;
				for (int j = 1; j < (1 << CRC_BIT); j++)
					lattice_metrics0[j] = -1e10;

//TODO: test after this line

#if NB_CORES > 1
#pragma omp task firstprivate (lattice_metrics0) output (lattice_metrics1) shared (p_matrix, bit_metrics, lattice_metrics)
				compute_cumulative_metrics(p_matrix[0], bit_metrics[0], (double (*)[1 << CRC_BIT]) lattice_metrics[0], lattice_metrics0, lattice_metrics1);
#if NB_CORES > 2
#pragma omp task input (lattice_metrics1) output (lattice_metrics2) shared (p_matrix, bit_metrics, lattice_metrics)
				compute_cumulative_metrics(p_matrix[1], bit_metrics[1], (double (*)[1 << CRC_BIT]) lattice_metrics[1], lattice_metrics1, lattice_metrics2);
#if NB_CORES > 3
#pragma omp task input (lattice_metrics2) output (lattice_metrics3) shared (p_matrix, bit_metrics, lattice_metrics)
				compute_cumulative_metrics(p_matrix[2], bit_metrics[2], (double (*)[1 << CRC_BIT]) lattice_metrics[2], lattice_metrics2, lattice_metrics3);
#if NB_CORES > 4
#pragma omp task input (lattice_metrics3) output (lattice_metrics4) shared (p_matrix, bit_metrics, lattice_metrics)
				compute_cumulative_metrics(p_matrix[3], bit_metrics[3], (double (*)[1 << CRC_BIT]) lattice_metrics[3], lattice_metrics3, lattice_metrics4);
#if NB_CORES > 5
#pragma omp task input (lattice_metrics4) output (lattice_metrics5) shared (p_matrix, bit_metrics, lattice_metrics)
				compute_cumulative_metrics(p_matrix[4], bit_metrics[4], (double (*)[1 << CRC_BIT]) lattice_metrics[4], lattice_metrics4, lattice_metrics5);
#if NB_CORES > 6
#pragma omp task input (lattice_metrics5) output (lattice_metrics6) shared (p_matrix, bit_metrics, lattice_metrics)
				compute_cumulative_metrics(p_matrix[5], bit_metrics[5], (double (*)[1 << CRC_BIT]) lattice_metrics[5], lattice_metrics5, lattice_metrics6);
#if NB_CORES > 7
#pragma omp task input (lattice_metrics6) output (lattice_metrics7) shared (p_matrix, bit_metrics, lattice_metrics)
				compute_cumulative_metrics(p_matrix[6], bit_metrics[6], (double (*)[1 << CRC_BIT]) lattice_metrics[6], lattice_metrics6, lattice_metrics7);
#if NB_CORES > 8
#pragma omp task input (lattice_metrics7) output (lattice_metrics8) shared (p_matrix, bit_metrics, lattice_metrics)
				compute_cumulative_metrics(p_matrix[7], bit_metrics[7], (double (*)[1 << CRC_BIT]) lattice_metrics[7], lattice_metrics7, lattice_metrics8);
#if NB_CORES > 9
#pragma omp task input (lattice_metrics8) output (lattice_metrics9) shared (p_matrix, bit_metrics, lattice_metrics)
				compute_cumulative_metrics(p_matrix[8], bit_metrics[8], (double (*)[1 << CRC_BIT]) lattice_metrics[8], lattice_metrics8, lattice_metrics9);
#if NB_CORES > 10
#pragma omp task input (lattice_metrics9) output (lattice_metrics10) shared (p_matrix, bit_metrics, lattice_metrics)
				compute_cumulative_metrics(p_matrix[9], bit_metrics[9], (double (*)[1 << CRC_BIT]) lattice_metrics[9], lattice_metrics9, lattice_metrics10);
#if NB_CORES > 11
#pragma omp task input (lattice_metrics10) output (lattice_metrics11) shared (p_matrix, bit_metrics, lattice_metrics)
				compute_cumulative_metrics(p_matrix[10], bit_metrics[10], (double (*)[1 << CRC_BIT]) lattice_metrics[10], lattice_metrics10, lattice_metrics11);
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif

#if NB_CORES > 11
#pragma omp task input (lattice_metrics11) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[11], lattice_metrics11);
#elif NB_CORES > 10
#pragma omp task input (lattice_metrics1) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[10], lattice_metrics10);
#elif NB_CORES > 9
#pragma omp task input (lattice_metrics1) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[9], lattice_metrics9);
#elif NB_CORES > 8
#pragma omp task input (lattice_metrics1) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[8], lattice_metrics8);
#elif NB_CORES > 7
#pragma omp task input (lattice_metrics1) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[7], lattice_metrics7);
#elif NB_CORES > 6
#pragma omp task input (lattice_metrics1) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[6], lattice_metrics6);
#elif NB_CORES > 5
#pragma omp task input (lattice_metrics1) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[5], lattice_metrics5);
#elif NB_CORES > 4
#pragma omp task input (lattice_metrics1) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[4], lattice_metrics4);
#elif NB_CORES > 3
#pragma omp task input (lattice_metrics1) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[3], lattice_metrics3);
#elif NB_CORES > 2
#pragma omp task input (lattice_metrics1) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[2], lattice_metrics2);
#elif NB_CORES > 1
#pragma omp task input (lattice_metrics1) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[1], lattice_metrics1);
#else
#pragma omp task firstprivate (lattice_metrics0) shared(p_matrix, bit_metrics, lattice_metrics)
				compute_last_cumulative_metrics(p_matrix[NB_CORES - 1], bit_metrics[NB_CORES - 1], (double (*)[1 << CRC_BIT]) lattice_metrics[0], lattice_metrics0);
#endif
			}
		}
	}

	return 0;
}

int main(int argc, char *argv[])
{
	if (analyse_options(argc, argv))
		exit(EXIT_FAILURE);

	if (compute_metrics())
		exit(EXIT_FAILURE);

	exit(EXIT_SUCCESS);
}