280 lines
6.0 KiB
C
280 lines
6.0 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <limits.h>
|
|
#include <stdint.h>
|
|
#include <errno.h>
|
|
#include <unistd.h>
|
|
#include <time.h>
|
|
|
|
|
|
#define STRTOL_OVERFLOW(n) (((n == LONG_MIN) || (n == LONG_MAX)) && \
|
|
(errno == ERANGE))
|
|
|
|
#define STRIFY(var) STRIFY_VAL(val)
|
|
#define STRIFY_VAL(val) #val
|
|
|
|
/* Number of cores to be used for the pipeline */
|
|
#ifndef NB_CORES
|
|
#define NB_CORES 8 /* Bossa */
|
|
//#define NB_CORES 12; /* Quad Hexa */
|
|
#endif
|
|
|
|
long seq_len = 27720;
|
|
long nb_packets = 100000;
|
|
useconds_t stage_time = 0;
|
|
|
|
typedef unsigned long data_t;
|
|
|
|
|
|
/* $Stage_{i+1} (data_{j+1}) = E_i (data_{j+1}) + E_{i+1} (data_j)$ */
|
|
void process_stage(data_t *prev_pipeline_stage, data_t *next_pipeline_stage)
|
|
{
|
|
long i;
|
|
static __thread data_t *prev_data = NULL;
|
|
data_t prev_process_step;
|
|
|
|
if (prev_data == NULL)
|
|
{
|
|
prev_data = malloc(sizeof(*prev_data) * seq_len / NB_CORES);
|
|
memset(prev_data, 0, sizeof(*prev_data) * seq_len / NB_CORES);
|
|
}
|
|
usleep(stage_time); // stage_time is in usecs
|
|
prev_process_step = *prev_pipeline_stage;
|
|
for (i = 0; i < seq_len / NB_CORES; i++)
|
|
{
|
|
prev_data[i] = prev_data[i] + prev_process_step;
|
|
prev_process_step = prev_data[i];
|
|
}
|
|
*next_pipeline_stage = prev_data[i - 1];
|
|
}
|
|
|
|
void last_process_stage(data_t *prev_stage)
|
|
{
|
|
data_t next_stage;
|
|
|
|
process_stage(prev_stage, &next_stage);
|
|
printf("%lu\n", next_stage);
|
|
}
|
|
|
|
int analyse_options(int argc, char *argv[])
|
|
{
|
|
char **arg_p;
|
|
long total_time = 10000000;
|
|
|
|
argc--;
|
|
arg_p = argv;
|
|
while(arg_p++, argc--)
|
|
{
|
|
if ((*arg_p)[0] == '-')
|
|
{
|
|
if ((!(*arg_p)[1]) || (*arg_p)[2])
|
|
{
|
|
printf("Unsupported option: %s\n", *arg_p);
|
|
return -1;
|
|
}
|
|
switch((*arg_p)[1])
|
|
{
|
|
char *endptr;
|
|
|
|
case 'p': // Number of packets
|
|
argc--, arg_p++;
|
|
nb_packets =
|
|
strtol(*arg_p, &endptr, 10);
|
|
if ((endptr == *arg_p) ||
|
|
(STRTOL_OVERFLOW(nb_packets)))
|
|
{
|
|
fprintf(stderr, "Invalid number of packets: %s\n", *arg_p);
|
|
return -1;
|
|
}
|
|
break;
|
|
|
|
case 's': // Sequence length
|
|
argc--, arg_p++;
|
|
seq_len =
|
|
strtol(*arg_p, &endptr, 10);
|
|
if ((endptr == *arg_p) ||
|
|
(STRTOL_OVERFLOW(seq_len)))
|
|
{
|
|
fprintf(stderr, "Invalid sequence length: %s\n", *arg_p);
|
|
return -1;
|
|
}
|
|
break;
|
|
|
|
case 't': // total time in usec
|
|
argc--, arg_p++;
|
|
total_time =
|
|
strtol(*arg_p, &endptr, 10);
|
|
if ((endptr == *arg_p) ||
|
|
(STRTOL_OVERFLOW(total_time)))
|
|
{
|
|
fprintf(stderr, "Invalid total time: %s\n", *arg_p);
|
|
return -1;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
fprintf(stderr, "Unsupported option: %s\n", *arg_p);
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
stage_time = total_time / (nb_packets * NB_CORES);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int compute_metrics(void)
|
|
{
|
|
unsigned int seed = 42;
|
|
int i;
|
|
#if NB_CORES > 1
|
|
data_t state1;
|
|
#if NB_CORES > 2
|
|
data_t state2;
|
|
#if NB_CORES > 3
|
|
data_t state3;
|
|
#if NB_CORES > 4
|
|
data_t state4;
|
|
#if NB_CORES > 5
|
|
data_t state5;
|
|
#if NB_CORES > 6
|
|
data_t state6;
|
|
#if NB_CORES > 7
|
|
data_t state7;
|
|
#if NB_CORES > 8
|
|
data_t state8;
|
|
#if NB_CORES > 9
|
|
data_t state9;
|
|
#if NB_CORES > 10
|
|
data_t state10;
|
|
#if NB_CORES > 11
|
|
data_t state11;
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, state2, state3, i)
|
|
{
|
|
#pragma omp single
|
|
{
|
|
//unsigned int seed = 42;
|
|
|
|
for (i = 0; i < nb_packets; i++)
|
|
{
|
|
#if NB_CORES > 1
|
|
#pragma omp task output (state1)
|
|
{
|
|
data_t state0 = rand_r(&seed);
|
|
process_stage(&state0, &state1);
|
|
}
|
|
#if NB_CORES > 2
|
|
#pragma omp task input (state1) output (state2)
|
|
process_stage(&state1, &state2);
|
|
#if NB_CORES > 3
|
|
#pragma omp task input (state2) output (state3)
|
|
process_stage(&state2, &state3);
|
|
#if NB_CORES > 4
|
|
#pragma omp task input (state3) output (state4)
|
|
process_stage(&state3, &state4);
|
|
#if NB_CORES > 5
|
|
#pragma omp task input (state4) output (state5)
|
|
process_stage(&state4, &state5);
|
|
#if NB_CORES > 6
|
|
#pragma omp task input (state5) output (state6)
|
|
process_stage(&state5, &state6);
|
|
#if NB_CORES > 7
|
|
#pragma omp task input (state6) output (state7)
|
|
process_stage(&state6, &state7);
|
|
#if NB_CORES > 8
|
|
#pragma omp task input (state7) output (state8)
|
|
process_stage(&state7, &state8);
|
|
#if NB_CORES > 9
|
|
#pragma omp task input (state8) output (state9)
|
|
process_stage(&state8, &state9);
|
|
#if NB_CORES > 10
|
|
#pragma omp task input (state9) output (state10)
|
|
process_stage(&state9, &state10);
|
|
#if NB_CORES > 11
|
|
#pragma omp task input (state10) output (state11)
|
|
process_stage(&state10, &state11);
|
|
#endif /* NB_CORES > 1 */
|
|
#endif /* NB_CORES > 2 */
|
|
#endif /* NB_CORES > 3 */
|
|
#endif /* NB_CORES > 4 */
|
|
#endif /* NB_CORES > 5 */
|
|
#endif /* NB_CORES > 6 */
|
|
#endif /* NB_CORES > 7 */
|
|
#endif /* NB_CORES > 8 */
|
|
#endif /* NB_CORES > 9 */
|
|
#endif /* NB_CORES > 10 */
|
|
#endif /* NB_CORES > 11 */
|
|
|
|
#if NB_CORES > 11
|
|
#pragma omp task input (state11)
|
|
last_process_stage(&state11);
|
|
#elif NB_CORES > 10
|
|
#pragma omp task input (state10)
|
|
last_process_stage(&state10);
|
|
#elif NB_CORES > 9
|
|
#pragma omp task input (state9)
|
|
last_process_stage(&state9);
|
|
#elif NB_CORES > 8
|
|
#pragma omp task input (state8)
|
|
last_process_stage(&state8);
|
|
#elif NB_CORES > 7
|
|
#pragma omp task input (state7)
|
|
last_process_stage(&state7);
|
|
#elif NB_CORES > 6
|
|
#pragma omp task input (state6)
|
|
last_process_stage(&state6);
|
|
#elif NB_CORES > 5
|
|
#pragma omp task input (state5)
|
|
last_process_stage(&state5);
|
|
#elif NB_CORES > 4
|
|
#pragma omp task input (state4)
|
|
last_process_stage(&state4);
|
|
#elif NB_CORES > 3
|
|
#pragma omp task input (state3)
|
|
{
|
|
//printf("3.5) state1: %lu\n", state3);
|
|
last_process_stage(&state3);
|
|
}
|
|
#elif NB_CORES > 2
|
|
#pragma omp task input (state2)
|
|
last_process_stage(&state2);
|
|
#elif NB_CORES > 1
|
|
#pragma omp task input (state1)
|
|
last_process_stage(&state1);
|
|
#else
|
|
data_t state0 = rand_r(&seed);
|
|
last_process_stage(&state0);
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
if (analyse_options(argc, argv))
|
|
exit(EXIT_FAILURE);
|
|
|
|
if (compute_metrics())
|
|
exit(EXIT_FAILURE);
|
|
|
|
exit(EXIT_SUCCESS);
|
|
}
|