diff --git a/pipepar/.gitignore b/pipepar/.gitignore index 7028c91..1670a3e 100644 --- a/pipepar/.gitignore +++ b/pipepar/.gitignore @@ -4,3 +4,4 @@ fmr_omp-str_base.txt fmr_omp-str_base.S output*.dat lattice +pipeline_template diff --git a/pipepar/pipeline_template.c b/pipepar/pipeline_template.c new file mode 100644 index 0000000..8d0ae33 --- /dev/null +++ b/pipepar/pipeline_template.c @@ -0,0 +1,279 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +#define STRTOL_OVERFLOW(n) (((n == LONG_MIN) || (n == LONG_MAX)) && \ + (errno == ERANGE)) + +#define STRIFY(var) STRIFY_VAL(val) +#define STRIFY_VAL(val) #val + +/* Number of cores to be used for the pipeline */ +#ifndef NB_CORES +#define NB_CORES 8 /* Bossa */ +//#define NB_CORES 12; /* Quad Hexa */ +#endif + +long seq_len = 27720; +long nb_packets = 100000; +useconds_t stage_time = 0; + +typedef unsigned long data_t; + + +/* $Stage_{i+1} (data_{j+1}) = E_i (data_{j+1}) + E_{i+1} (data_j)$ */ +void process_stage(data_t *prev_pipeline_stage, data_t *next_pipeline_stage) +{ + long i; + static __thread data_t *prev_data = NULL; + data_t prev_process_step; + + if (prev_data == NULL) + { + prev_data = malloc(sizeof(*prev_data) * seq_len / NB_CORES); + memset(prev_data, 0, sizeof(*prev_data) * seq_len / NB_CORES); + } + usleep(stage_time); // stage_time is in usecs + prev_process_step = *prev_pipeline_stage; + for (i = 0; i < seq_len / NB_CORES; i++) + { + prev_data[i] = prev_data[i] + prev_process_step; + prev_process_step = prev_data[i]; + } + *next_pipeline_stage = prev_data[i - 1]; +} + +void last_process_stage(data_t *prev_stage) +{ + data_t next_stage; + + process_stage(prev_stage, &next_stage); + printf("%lu\n", next_stage); +} + +int analyse_options(int argc, char *argv[]) +{ + char **arg_p; + long total_time = 10000000; + + argc--; + arg_p = argv; + while(arg_p++, argc--) + { + if ((*arg_p)[0] == '-') + { + if ((!(*arg_p)[1]) || (*arg_p)[2]) + { + printf("Unsupported option: %s\n", *arg_p); + return -1; + } + switch((*arg_p)[1]) + { + char *endptr; + + case 'p': // Number of packets + argc--, arg_p++; + nb_packets = + strtol(*arg_p, &endptr, 10); + if ((endptr == *arg_p) || + (STRTOL_OVERFLOW(nb_packets))) + { + fprintf(stderr, "Invalid number of packets: %s\n", *arg_p); + return -1; + } + break; + + case 's': // Sequence length + argc--, arg_p++; + seq_len = + strtol(*arg_p, &endptr, 10); + if ((endptr == *arg_p) || + (STRTOL_OVERFLOW(seq_len))) + { + fprintf(stderr, "Invalid sequence length: %s\n", *arg_p); + return -1; + } + break; + + case 't': // total time in usec + argc--, arg_p++; + total_time = + strtol(*arg_p, &endptr, 10); + if ((endptr == *arg_p) || + (STRTOL_OVERFLOW(total_time))) + { + fprintf(stderr, "Invalid total time: %s\n", *arg_p); + return -1; + } + break; + + default: + fprintf(stderr, "Unsupported option: %s\n", *arg_p); + return -1; + } + } + } + stage_time = total_time / (nb_packets * NB_CORES); + + return 0; +} + +int compute_metrics(void) +{ + unsigned int seed = 42; + int i; +#if NB_CORES > 1 + data_t state1; +#if NB_CORES > 2 + data_t state2; +#if NB_CORES > 3 + data_t state3; +#if NB_CORES > 4 + data_t state4; +#if NB_CORES > 5 + data_t state5; +#if NB_CORES > 6 + data_t state6; +#if NB_CORES > 7 + data_t state7; +#if NB_CORES > 8 + data_t state8; +#if NB_CORES > 9 + data_t state9; +#if NB_CORES > 10 + data_t state10; +#if NB_CORES > 11 + data_t state11; +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif + +#pragma omp parallel default (none) \ + shared (nb_packets, seed) \ + private (state1, state2, state3, i) + { +#pragma omp single + { + //unsigned int seed = 42; + + for (i = 0; i < nb_packets; i++) + { +#if NB_CORES > 1 +#pragma omp task output (state1) + { + data_t state0 = rand_r(&seed); + process_stage(&state0, &state1); + } +#if NB_CORES > 2 +#pragma omp task input (state1) output (state2) + process_stage(&state1, &state2); +#if NB_CORES > 3 +#pragma omp task input (state2) output (state3) + process_stage(&state2, &state3); +#if NB_CORES > 4 +#pragma omp task input (state3) output (state4) + process_stage(&state3, &state4); +#if NB_CORES > 5 +#pragma omp task input (state4) output (state5) + process_stage(&state4, &state5); +#if NB_CORES > 6 +#pragma omp task input (state5) output (state6) + process_stage(&state5, &state6); +#if NB_CORES > 7 +#pragma omp task input (state6) output (state7) + process_stage(&state6, &state7); +#if NB_CORES > 8 +#pragma omp task input (state7) output (state8) + process_stage(&state7, &state8); +#if NB_CORES > 9 +#pragma omp task input (state8) output (state9) + process_stage(&state8, &state9); +#if NB_CORES > 10 +#pragma omp task input (state9) output (state10) + process_stage(&state9, &state10); +#if NB_CORES > 11 +#pragma omp task input (state10) output (state11) + process_stage(&state10, &state11); +#endif /* NB_CORES > 1 */ +#endif /* NB_CORES > 2 */ +#endif /* NB_CORES > 3 */ +#endif /* NB_CORES > 4 */ +#endif /* NB_CORES > 5 */ +#endif /* NB_CORES > 6 */ +#endif /* NB_CORES > 7 */ +#endif /* NB_CORES > 8 */ +#endif /* NB_CORES > 9 */ +#endif /* NB_CORES > 10 */ +#endif /* NB_CORES > 11 */ + +#if NB_CORES > 11 +#pragma omp task input (state11) + last_process_stage(&state11); +#elif NB_CORES > 10 +#pragma omp task input (state10) + last_process_stage(&state10); +#elif NB_CORES > 9 +#pragma omp task input (state9) + last_process_stage(&state9); +#elif NB_CORES > 8 +#pragma omp task input (state8) + last_process_stage(&state8); +#elif NB_CORES > 7 +#pragma omp task input (state7) + last_process_stage(&state7); +#elif NB_CORES > 6 +#pragma omp task input (state6) + last_process_stage(&state6); +#elif NB_CORES > 5 +#pragma omp task input (state5) + last_process_stage(&state5); +#elif NB_CORES > 4 +#pragma omp task input (state4) + last_process_stage(&state4); +#elif NB_CORES > 3 +#pragma omp task input (state3) + { + //printf("3.5) state1: %lu\n", state3); + last_process_stage(&state3); + } +#elif NB_CORES > 2 +#pragma omp task input (state2) + last_process_stage(&state2); +#elif NB_CORES > 1 +#pragma omp task input (state1) + last_process_stage(&state1); +#else + data_t state0 = rand_r(&seed); + last_process_stage(&state0); +#endif + } + } + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + if (analyse_options(argc, argv)) + exit(EXIT_FAILURE); + + if (compute_metrics()) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); +}