Add template of pipeline parallelism friendly code
pipeline_template.c is an example of a pipeline parallelism friendly code in the sense that it can't be parallelized by any other known parallelization technique.
This commit is contained in:
parent
a9793430f9
commit
e07d4d39ab
|
@ -4,3 +4,4 @@ fmr_omp-str_base.txt
|
|||
fmr_omp-str_base.S
|
||||
output*.dat
|
||||
lattice
|
||||
pipeline_template
|
||||
|
|
|
@ -0,0 +1,279 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
|
||||
|
||||
#define STRTOL_OVERFLOW(n) (((n == LONG_MIN) || (n == LONG_MAX)) && \
|
||||
(errno == ERANGE))
|
||||
|
||||
#define STRIFY(var) STRIFY_VAL(val)
|
||||
#define STRIFY_VAL(val) #val
|
||||
|
||||
/* Number of cores to be used for the pipeline */
|
||||
#ifndef NB_CORES
|
||||
#define NB_CORES 8 /* Bossa */
|
||||
//#define NB_CORES 12; /* Quad Hexa */
|
||||
#endif
|
||||
|
||||
long seq_len = 27720;
|
||||
long nb_packets = 100000;
|
||||
useconds_t stage_time = 0;
|
||||
|
||||
typedef unsigned long data_t;
|
||||
|
||||
|
||||
/* $Stage_{i+1} (data_{j+1}) = E_i (data_{j+1}) + E_{i+1} (data_j)$ */
|
||||
void process_stage(data_t *prev_pipeline_stage, data_t *next_pipeline_stage)
|
||||
{
|
||||
long i;
|
||||
static __thread data_t *prev_data = NULL;
|
||||
data_t prev_process_step;
|
||||
|
||||
if (prev_data == NULL)
|
||||
{
|
||||
prev_data = malloc(sizeof(*prev_data) * seq_len / NB_CORES);
|
||||
memset(prev_data, 0, sizeof(*prev_data) * seq_len / NB_CORES);
|
||||
}
|
||||
usleep(stage_time); // stage_time is in usecs
|
||||
prev_process_step = *prev_pipeline_stage;
|
||||
for (i = 0; i < seq_len / NB_CORES; i++)
|
||||
{
|
||||
prev_data[i] = prev_data[i] + prev_process_step;
|
||||
prev_process_step = prev_data[i];
|
||||
}
|
||||
*next_pipeline_stage = prev_data[i - 1];
|
||||
}
|
||||
|
||||
void last_process_stage(data_t *prev_stage)
|
||||
{
|
||||
data_t next_stage;
|
||||
|
||||
process_stage(prev_stage, &next_stage);
|
||||
printf("%lu\n", next_stage);
|
||||
}
|
||||
|
||||
int analyse_options(int argc, char *argv[])
|
||||
{
|
||||
char **arg_p;
|
||||
long total_time = 10000000;
|
||||
|
||||
argc--;
|
||||
arg_p = argv;
|
||||
while(arg_p++, argc--)
|
||||
{
|
||||
if ((*arg_p)[0] == '-')
|
||||
{
|
||||
if ((!(*arg_p)[1]) || (*arg_p)[2])
|
||||
{
|
||||
printf("Unsupported option: %s\n", *arg_p);
|
||||
return -1;
|
||||
}
|
||||
switch((*arg_p)[1])
|
||||
{
|
||||
char *endptr;
|
||||
|
||||
case 'p': // Number of packets
|
||||
argc--, arg_p++;
|
||||
nb_packets =
|
||||
strtol(*arg_p, &endptr, 10);
|
||||
if ((endptr == *arg_p) ||
|
||||
(STRTOL_OVERFLOW(nb_packets)))
|
||||
{
|
||||
fprintf(stderr, "Invalid number of packets: %s\n", *arg_p);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
|
||||
case 's': // Sequence length
|
||||
argc--, arg_p++;
|
||||
seq_len =
|
||||
strtol(*arg_p, &endptr, 10);
|
||||
if ((endptr == *arg_p) ||
|
||||
(STRTOL_OVERFLOW(seq_len)))
|
||||
{
|
||||
fprintf(stderr, "Invalid sequence length: %s\n", *arg_p);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
|
||||
case 't': // total time in usec
|
||||
argc--, arg_p++;
|
||||
total_time =
|
||||
strtol(*arg_p, &endptr, 10);
|
||||
if ((endptr == *arg_p) ||
|
||||
(STRTOL_OVERFLOW(total_time)))
|
||||
{
|
||||
fprintf(stderr, "Invalid total time: %s\n", *arg_p);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "Unsupported option: %s\n", *arg_p);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
stage_time = total_time / (nb_packets * NB_CORES);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int compute_metrics(void)
|
||||
{
|
||||
unsigned int seed = 42;
|
||||
int i;
|
||||
#if NB_CORES > 1
|
||||
data_t state1;
|
||||
#if NB_CORES > 2
|
||||
data_t state2;
|
||||
#if NB_CORES > 3
|
||||
data_t state3;
|
||||
#if NB_CORES > 4
|
||||
data_t state4;
|
||||
#if NB_CORES > 5
|
||||
data_t state5;
|
||||
#if NB_CORES > 6
|
||||
data_t state6;
|
||||
#if NB_CORES > 7
|
||||
data_t state7;
|
||||
#if NB_CORES > 8
|
||||
data_t state8;
|
||||
#if NB_CORES > 9
|
||||
data_t state9;
|
||||
#if NB_CORES > 10
|
||||
data_t state10;
|
||||
#if NB_CORES > 11
|
||||
data_t state11;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#pragma omp parallel default (none) \
|
||||
shared (nb_packets, seed) \
|
||||
private (state1, state2, state3, i)
|
||||
{
|
||||
#pragma omp single
|
||||
{
|
||||
//unsigned int seed = 42;
|
||||
|
||||
for (i = 0; i < nb_packets; i++)
|
||||
{
|
||||
#if NB_CORES > 1
|
||||
#pragma omp task output (state1)
|
||||
{
|
||||
data_t state0 = rand_r(&seed);
|
||||
process_stage(&state0, &state1);
|
||||
}
|
||||
#if NB_CORES > 2
|
||||
#pragma omp task input (state1) output (state2)
|
||||
process_stage(&state1, &state2);
|
||||
#if NB_CORES > 3
|
||||
#pragma omp task input (state2) output (state3)
|
||||
process_stage(&state2, &state3);
|
||||
#if NB_CORES > 4
|
||||
#pragma omp task input (state3) output (state4)
|
||||
process_stage(&state3, &state4);
|
||||
#if NB_CORES > 5
|
||||
#pragma omp task input (state4) output (state5)
|
||||
process_stage(&state4, &state5);
|
||||
#if NB_CORES > 6
|
||||
#pragma omp task input (state5) output (state6)
|
||||
process_stage(&state5, &state6);
|
||||
#if NB_CORES > 7
|
||||
#pragma omp task input (state6) output (state7)
|
||||
process_stage(&state6, &state7);
|
||||
#if NB_CORES > 8
|
||||
#pragma omp task input (state7) output (state8)
|
||||
process_stage(&state7, &state8);
|
||||
#if NB_CORES > 9
|
||||
#pragma omp task input (state8) output (state9)
|
||||
process_stage(&state8, &state9);
|
||||
#if NB_CORES > 10
|
||||
#pragma omp task input (state9) output (state10)
|
||||
process_stage(&state9, &state10);
|
||||
#if NB_CORES > 11
|
||||
#pragma omp task input (state10) output (state11)
|
||||
process_stage(&state10, &state11);
|
||||
#endif /* NB_CORES > 1 */
|
||||
#endif /* NB_CORES > 2 */
|
||||
#endif /* NB_CORES > 3 */
|
||||
#endif /* NB_CORES > 4 */
|
||||
#endif /* NB_CORES > 5 */
|
||||
#endif /* NB_CORES > 6 */
|
||||
#endif /* NB_CORES > 7 */
|
||||
#endif /* NB_CORES > 8 */
|
||||
#endif /* NB_CORES > 9 */
|
||||
#endif /* NB_CORES > 10 */
|
||||
#endif /* NB_CORES > 11 */
|
||||
|
||||
#if NB_CORES > 11
|
||||
#pragma omp task input (state11)
|
||||
last_process_stage(&state11);
|
||||
#elif NB_CORES > 10
|
||||
#pragma omp task input (state10)
|
||||
last_process_stage(&state10);
|
||||
#elif NB_CORES > 9
|
||||
#pragma omp task input (state9)
|
||||
last_process_stage(&state9);
|
||||
#elif NB_CORES > 8
|
||||
#pragma omp task input (state8)
|
||||
last_process_stage(&state8);
|
||||
#elif NB_CORES > 7
|
||||
#pragma omp task input (state7)
|
||||
last_process_stage(&state7);
|
||||
#elif NB_CORES > 6
|
||||
#pragma omp task input (state6)
|
||||
last_process_stage(&state6);
|
||||
#elif NB_CORES > 5
|
||||
#pragma omp task input (state5)
|
||||
last_process_stage(&state5);
|
||||
#elif NB_CORES > 4
|
||||
#pragma omp task input (state4)
|
||||
last_process_stage(&state4);
|
||||
#elif NB_CORES > 3
|
||||
#pragma omp task input (state3)
|
||||
{
|
||||
//printf("3.5) state1: %lu\n", state3);
|
||||
last_process_stage(&state3);
|
||||
}
|
||||
#elif NB_CORES > 2
|
||||
#pragma omp task input (state2)
|
||||
last_process_stage(&state2);
|
||||
#elif NB_CORES > 1
|
||||
#pragma omp task input (state1)
|
||||
last_process_stage(&state1);
|
||||
#else
|
||||
data_t state0 = rand_r(&seed);
|
||||
last_process_stage(&state0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (analyse_options(argc, argv))
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
if (compute_metrics())
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
Loading…
Reference in New Issue