#include #include #include #include #include #include #include #include #define STRTOL_OVERFLOW(n) (((n == LONG_MIN) || (n == LONG_MAX)) && \ (errno == ERANGE)) #define STRIFY(var) STRIFY_VAL(val) #define STRIFY_VAL(val) #val /* Number of cores to be used for the pipeline */ #ifndef NB_CORES #define NB_CORES 8 /* Bossa */ //#define NB_CORES 12; /* Quad Hexa */ #endif long seq_len = 27720; long nb_packets = 100000; useconds_t stage_time = 0; typedef unsigned long data_t; /* $Stage_{i+1} (data_{j+1}) = E_i (data_{j+1}) + E_{i+1} (data_j)$ */ void process_stage(data_t *prev_pipeline_stage, data_t *next_pipeline_stage) { long i; static __thread data_t *prev_data = NULL; data_t prev_process_step; if (prev_data == NULL) { prev_data = malloc(sizeof(*prev_data) * seq_len / NB_CORES); memset(prev_data, 0, sizeof(*prev_data) * seq_len / NB_CORES); } usleep(stage_time); // stage_time is in usecs prev_process_step = *prev_pipeline_stage; for (i = 0; i < seq_len / NB_CORES; i++) { prev_data[i] = prev_data[i] + prev_process_step; prev_process_step = prev_data[i]; } *next_pipeline_stage = prev_data[i - 1]; } void last_process_stage(data_t *prev_stage) { data_t next_stage; process_stage(prev_stage, &next_stage); printf("%lu\n", next_stage); } int analyse_options(int argc, char *argv[]) { char **arg_p; long total_time = 10000000; argc--; arg_p = argv; while(arg_p++, argc--) { if ((*arg_p)[0] == '-') { if ((!(*arg_p)[1]) || (*arg_p)[2]) { printf("Unsupported option: %s\n", *arg_p); return -1; } switch((*arg_p)[1]) { char *endptr; case 'p': // Number of packets argc--, arg_p++; nb_packets = strtol(*arg_p, &endptr, 10); if ((endptr == *arg_p) || (STRTOL_OVERFLOW(nb_packets))) { fprintf(stderr, "Invalid number of packets: %s\n", *arg_p); return -1; } break; case 's': // Sequence length argc--, arg_p++; seq_len = strtol(*arg_p, &endptr, 10); if ((endptr == *arg_p) || (STRTOL_OVERFLOW(seq_len))) { fprintf(stderr, "Invalid sequence length: %s\n", *arg_p); return -1; } break; case 't': // total time in usec argc--, arg_p++; total_time = strtol(*arg_p, &endptr, 10); if ((endptr == *arg_p) || (STRTOL_OVERFLOW(total_time))) { fprintf(stderr, "Invalid total time: %s\n", *arg_p); return -1; } break; default: fprintf(stderr, "Unsupported option: %s\n", *arg_p); return -1; } } } stage_time = total_time / (nb_packets * NB_CORES); return 0; } int compute_metrics(void) { unsigned int seed = 42; int i; #if NB_CORES > 1 data_t state1; #if NB_CORES > 2 data_t state2; #if NB_CORES > 3 data_t state3; #if NB_CORES > 4 data_t state4; #if NB_CORES > 5 data_t state5; #if NB_CORES > 6 data_t state6; #if NB_CORES > 7 data_t state7; #if NB_CORES > 8 data_t state8; #if NB_CORES > 9 data_t state9; #if NB_CORES > 10 data_t state10; #if NB_CORES > 11 data_t state11; #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, state2, state3, i) { #pragma omp single { //unsigned int seed = 42; for (i = 0; i < nb_packets; i++) { #if NB_CORES > 1 #pragma omp task output (state1) { data_t state0 = rand_r(&seed); process_stage(&state0, &state1); } #if NB_CORES > 2 #pragma omp task input (state1) output (state2) process_stage(&state1, &state2); #if NB_CORES > 3 #pragma omp task input (state2) output (state3) process_stage(&state2, &state3); #if NB_CORES > 4 #pragma omp task input (state3) output (state4) process_stage(&state3, &state4); #if NB_CORES > 5 #pragma omp task input (state4) output (state5) process_stage(&state4, &state5); #if NB_CORES > 6 #pragma omp task input (state5) output (state6) process_stage(&state5, &state6); #if NB_CORES > 7 #pragma omp task input (state6) output (state7) process_stage(&state6, &state7); #if NB_CORES > 8 #pragma omp task input (state7) output (state8) process_stage(&state7, &state8); #if NB_CORES > 9 #pragma omp task input (state8) output (state9) process_stage(&state8, &state9); #if NB_CORES > 10 #pragma omp task input (state9) output (state10) process_stage(&state9, &state10); #if NB_CORES > 11 #pragma omp task input (state10) output (state11) process_stage(&state10, &state11); #endif /* NB_CORES > 1 */ #endif /* NB_CORES > 2 */ #endif /* NB_CORES > 3 */ #endif /* NB_CORES > 4 */ #endif /* NB_CORES > 5 */ #endif /* NB_CORES > 6 */ #endif /* NB_CORES > 7 */ #endif /* NB_CORES > 8 */ #endif /* NB_CORES > 9 */ #endif /* NB_CORES > 10 */ #endif /* NB_CORES > 11 */ #if NB_CORES > 11 #pragma omp task input (state11) last_process_stage(&state11); #elif NB_CORES > 10 #pragma omp task input (state10) last_process_stage(&state10); #elif NB_CORES > 9 #pragma omp task input (state9) last_process_stage(&state9); #elif NB_CORES > 8 #pragma omp task input (state8) last_process_stage(&state8); #elif NB_CORES > 7 #pragma omp task input (state7) last_process_stage(&state7); #elif NB_CORES > 6 #pragma omp task input (state6) last_process_stage(&state6); #elif NB_CORES > 5 #pragma omp task input (state5) last_process_stage(&state5); #elif NB_CORES > 4 #pragma omp task input (state4) last_process_stage(&state4); #elif NB_CORES > 3 #pragma omp task input (state3) { //printf("3.5) state1: %lu\n", state3); last_process_stage(&state3); } #elif NB_CORES > 2 #pragma omp task input (state2) last_process_stage(&state2); #elif NB_CORES > 1 #pragma omp task input (state1) last_process_stage(&state1); #else data_t state0 = rand_r(&seed); last_process_stage(&state0); #endif } } } return 0; } int main(int argc, char *argv[]) { if (analyse_options(argc, argv)) exit(EXIT_FAILURE); if (compute_metrics()) exit(EXIT_FAILURE); exit(EXIT_SUCCESS); }