Add template of pipeline parallelism friendly code

pipeline_template.c is an example of a pipeline parallelism friendly code in the
sense that it can't be parallelized by any other known parallelization technique.
This commit is contained in:
Thomas Preud'homme 2012-01-08 20:07:41 +01:00 committed by Thomas Preud'homme
parent a9793430f9
commit e07d4d39ab
2 changed files with 280 additions and 0 deletions

1
pipepar/.gitignore vendored
View File

@ -4,3 +4,4 @@ fmr_omp-str_base.txt
fmr_omp-str_base.S
output*.dat
lattice
pipeline_template

279
pipepar/pipeline_template.c Normal file
View File

@ -0,0 +1,279 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <stdint.h>
#include <errno.h>
#include <unistd.h>
#include <time.h>
#define STRTOL_OVERFLOW(n) (((n == LONG_MIN) || (n == LONG_MAX)) && \
(errno == ERANGE))
#define STRIFY(var) STRIFY_VAL(val)
#define STRIFY_VAL(val) #val
/* Number of cores to be used for the pipeline */
#ifndef NB_CORES
#define NB_CORES 8 /* Bossa */
//#define NB_CORES 12; /* Quad Hexa */
#endif
long seq_len = 27720;
long nb_packets = 100000;
useconds_t stage_time = 0;
typedef unsigned long data_t;
/* $Stage_{i+1} (data_{j+1}) = E_i (data_{j+1}) + E_{i+1} (data_j)$ */
void process_stage(data_t *prev_pipeline_stage, data_t *next_pipeline_stage)
{
long i;
static __thread data_t *prev_data = NULL;
data_t prev_process_step;
if (prev_data == NULL)
{
prev_data = malloc(sizeof(*prev_data) * seq_len / NB_CORES);
memset(prev_data, 0, sizeof(*prev_data) * seq_len / NB_CORES);
}
usleep(stage_time); // stage_time is in usecs
prev_process_step = *prev_pipeline_stage;
for (i = 0; i < seq_len / NB_CORES; i++)
{
prev_data[i] = prev_data[i] + prev_process_step;
prev_process_step = prev_data[i];
}
*next_pipeline_stage = prev_data[i - 1];
}
void last_process_stage(data_t *prev_stage)
{
data_t next_stage;
process_stage(prev_stage, &next_stage);
printf("%lu\n", next_stage);
}
int analyse_options(int argc, char *argv[])
{
char **arg_p;
long total_time = 10000000;
argc--;
arg_p = argv;
while(arg_p++, argc--)
{
if ((*arg_p)[0] == '-')
{
if ((!(*arg_p)[1]) || (*arg_p)[2])
{
printf("Unsupported option: %s\n", *arg_p);
return -1;
}
switch((*arg_p)[1])
{
char *endptr;
case 'p': // Number of packets
argc--, arg_p++;
nb_packets =
strtol(*arg_p, &endptr, 10);
if ((endptr == *arg_p) ||
(STRTOL_OVERFLOW(nb_packets)))
{
fprintf(stderr, "Invalid number of packets: %s\n", *arg_p);
return -1;
}
break;
case 's': // Sequence length
argc--, arg_p++;
seq_len =
strtol(*arg_p, &endptr, 10);
if ((endptr == *arg_p) ||
(STRTOL_OVERFLOW(seq_len)))
{
fprintf(stderr, "Invalid sequence length: %s\n", *arg_p);
return -1;
}
break;
case 't': // total time in usec
argc--, arg_p++;
total_time =
strtol(*arg_p, &endptr, 10);
if ((endptr == *arg_p) ||
(STRTOL_OVERFLOW(total_time)))
{
fprintf(stderr, "Invalid total time: %s\n", *arg_p);
return -1;
}
break;
default:
fprintf(stderr, "Unsupported option: %s\n", *arg_p);
return -1;
}
}
}
stage_time = total_time / (nb_packets * NB_CORES);
return 0;
}
int compute_metrics(void)
{
unsigned int seed = 42;
int i;
#if NB_CORES > 1
data_t state1;
#if NB_CORES > 2
data_t state2;
#if NB_CORES > 3
data_t state3;
#if NB_CORES > 4
data_t state4;
#if NB_CORES > 5
data_t state5;
#if NB_CORES > 6
data_t state6;
#if NB_CORES > 7
data_t state7;
#if NB_CORES > 8
data_t state8;
#if NB_CORES > 9
data_t state9;
#if NB_CORES > 10
data_t state10;
#if NB_CORES > 11
data_t state11;
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, state2, state3, i)
{
#pragma omp single
{
//unsigned int seed = 42;
for (i = 0; i < nb_packets; i++)
{
#if NB_CORES > 1
#pragma omp task output (state1)
{
data_t state0 = rand_r(&seed);
process_stage(&state0, &state1);
}
#if NB_CORES > 2
#pragma omp task input (state1) output (state2)
process_stage(&state1, &state2);
#if NB_CORES > 3
#pragma omp task input (state2) output (state3)
process_stage(&state2, &state3);
#if NB_CORES > 4
#pragma omp task input (state3) output (state4)
process_stage(&state3, &state4);
#if NB_CORES > 5
#pragma omp task input (state4) output (state5)
process_stage(&state4, &state5);
#if NB_CORES > 6
#pragma omp task input (state5) output (state6)
process_stage(&state5, &state6);
#if NB_CORES > 7
#pragma omp task input (state6) output (state7)
process_stage(&state6, &state7);
#if NB_CORES > 8
#pragma omp task input (state7) output (state8)
process_stage(&state7, &state8);
#if NB_CORES > 9
#pragma omp task input (state8) output (state9)
process_stage(&state8, &state9);
#if NB_CORES > 10
#pragma omp task input (state9) output (state10)
process_stage(&state9, &state10);
#if NB_CORES > 11
#pragma omp task input (state10) output (state11)
process_stage(&state10, &state11);
#endif /* NB_CORES > 1 */
#endif /* NB_CORES > 2 */
#endif /* NB_CORES > 3 */
#endif /* NB_CORES > 4 */
#endif /* NB_CORES > 5 */
#endif /* NB_CORES > 6 */
#endif /* NB_CORES > 7 */
#endif /* NB_CORES > 8 */
#endif /* NB_CORES > 9 */
#endif /* NB_CORES > 10 */
#endif /* NB_CORES > 11 */
#if NB_CORES > 11
#pragma omp task input (state11)
last_process_stage(&state11);
#elif NB_CORES > 10
#pragma omp task input (state10)
last_process_stage(&state10);
#elif NB_CORES > 9
#pragma omp task input (state9)
last_process_stage(&state9);
#elif NB_CORES > 8
#pragma omp task input (state8)
last_process_stage(&state8);
#elif NB_CORES > 7
#pragma omp task input (state7)
last_process_stage(&state7);
#elif NB_CORES > 6
#pragma omp task input (state6)
last_process_stage(&state6);
#elif NB_CORES > 5
#pragma omp task input (state5)
last_process_stage(&state5);
#elif NB_CORES > 4
#pragma omp task input (state4)
last_process_stage(&state4);
#elif NB_CORES > 3
#pragma omp task input (state3)
{
//printf("3.5) state1: %lu\n", state3);
last_process_stage(&state3);
}
#elif NB_CORES > 2
#pragma omp task input (state2)
last_process_stage(&state2);
#elif NB_CORES > 1
#pragma omp task input (state1)
last_process_stage(&state1);
#else
data_t state0 = rand_r(&seed);
last_process_stage(&state0);
#endif
}
}
}
return 0;
}
int main(int argc, char *argv[])
{
if (analyse_options(argc, argv))
exit(EXIT_FAILURE);
if (compute_metrics())
exit(EXIT_FAILURE);
exit(EXIT_SUCCESS);
}