/* * Copyright (C) 2012-2013 Thomas Preud'homme * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #define STRTOL_OVERFLOW(n) (((n == LONG_MIN) || (n == LONG_MAX)) && \ (errno == ERANGE)) #define STRIFY(var) STRIFY_VAL(val) #define STRIFY_VAL(val) #val /* Number of cores to be used for the pipeline */ #ifndef NB_CORES #define NB_CORES 8 /* Bossa */ //#define NB_CORES 12; /* Quad Hexa */ #endif #define NOSLEEP typedef unsigned long data_t; long seq_len = 24/*27720*/; long nb_packets = 100000; struct timespec stage_time; data_t result = 0; /* $E_{i+1} (data_{j+1}) = E_i (data_{j+1}) + E_{i+1} (data_j)$ */ void process_stage(data_t *prev_pipeline_stage, data_t *next_pipeline_stage) { long i; static __thread data_t *prev_data = NULL; data_t prev_process_step; #ifndef NOSLEEP struct timespec beginning, now, end; clock_gettime(CLOCK_REALTIME, &beginning); end.tv_sec = beginning.tv_sec + stage_time.tv_sec; if (beginning.tv_nsec + stage_time.tv_nsec < 1000000000) end.tv_nsec = beginning.tv_nsec + stage_time.tv_nsec; else { end.tv_nsec = beginning.tv_nsec + stage_time.tv_nsec - 1000000000; end.tv_sec++; } #endif if (prev_data == NULL) { prev_data = malloc(sizeof(*prev_data) * seq_len / NB_CORES); memset(prev_data, 0, sizeof(*prev_data) * seq_len / NB_CORES); } prev_process_step = *prev_pipeline_stage; for (i = 0; i < seq_len / NB_CORES; i++) { prev_data[i] = prev_data[i] + prev_process_step; prev_process_step = prev_data[i]; } *next_pipeline_stage = prev_data[i - 1]; #ifndef NOSLEEP do { clock_gettime(CLOCK_REALTIME, &now); } while ((now.tv_sec < end.tv_sec) || (now.tv_nsec < end.tv_nsec)); #endif } void last_process_stage(data_t *prev_stage) { data_t next_stage; process_stage(prev_stage, &next_stage); result = next_stage; } int analyse_options(int argc, char *argv[]) { char **arg_p; long stg_time, total_time = 10000000000; argc--; arg_p = argv; while(arg_p++, argc--) { if ((*arg_p)[0] == '-') { if ((!(*arg_p)[1]) || (*arg_p)[2]) { printf("Unsupported option: %s\n", *arg_p); return -1; } switch((*arg_p)[1]) { char *endptr; case 'p': // Number of packets argc--, arg_p++; nb_packets = strtol(*arg_p, &endptr, 10); if ((endptr == *arg_p) || (STRTOL_OVERFLOW(nb_packets))) { fprintf(stderr, "Invalid number of packets: %s\n", *arg_p); return -1; } break; case 's': // Sequence length argc--, arg_p++; seq_len = strtol(*arg_p, &endptr, 10); if ((endptr == *arg_p) || (STRTOL_OVERFLOW(seq_len))) { fprintf(stderr, "Invalid sequence length: %s\n", *arg_p); return -1; } break; case 't': // total time in usec argc--, arg_p++; total_time = strtol(*arg_p, &endptr, 10); if ((endptr == *arg_p) || (STRTOL_OVERFLOW(total_time))) { fprintf(stderr, "Invalid total time: %s\n", *arg_p); return -1; } break; default: fprintf(stderr, "Unsupported option: %s\n", *arg_p); return -1; } } } stg_time = total_time / (nb_packets * NB_CORES); stage_time.tv_sec = stg_time / 1000000000; stage_time.tv_nsec = stg_time % 1000000000; return 0; } int compute_metrics(void) { unsigned int seed = 42; int i; #if NB_CORES > 1 data_t state1; #if NB_CORES > 2 data_t state2; #if NB_CORES > 3 data_t state3; #if NB_CORES > 4 data_t state4; #if NB_CORES > 5 data_t state5; #if NB_CORES > 6 data_t state6; #if NB_CORES > 7 data_t state7; #if NB_CORES > 8 data_t state8; #if NB_CORES > 9 data_t state9; #if NB_CORES > 10 data_t state10; #if NB_CORES > 11 data_t state11; #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #endif #if NB_CORES > 1 #if NB_CORES == 2 #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, i) #elif NB_CORES == 3 #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, state2, i) #elif NB_CORES == 4 #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, state2, state3, i) #elif NB_CORES == 5 #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, state2, state3, state4, i) #elif NB_CORES == 6 #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, state2, state3, state4, state5, i) #elif NB_CORES == 7 #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, state2, state3, state4, state5, state6, i) #elif NB_CORES == 8 #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, state2, state3, state4, state5, state6, state7, i) #elif NB_CORES == 9 #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, state2, state3, state4, state5, state6, state7, state8, i) #elif NB_CORES == 10 #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, state2, state3, state4, state5, state6, state7, state8, state9, i) #elif NB_CORES == 11 #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, state2, state3, state4, state5, state6, state7, state8, state9, state10, i) #elif NB_CORES == 12 #pragma omp parallel default (none) \ shared (nb_packets, seed) \ private (state1, state2, state3, state4, state5, state6, state7, state8, state9, state10, state11, i) #else #error Only supporting NB_CORES <= 12 #endif #endif { #pragma omp single { //unsigned int seed = 42; for (i = 0; i < nb_packets; i++) { #if NB_CORES > 1 #pragma omp task output (state1) { data_t state0 = rand_r(&seed); process_stage(&state0, &state1); } #if NB_CORES > 2 #pragma omp task input (state1) output (state2) process_stage(&state1, &state2); #if NB_CORES > 3 #pragma omp task input (state2) output (state3) process_stage(&state2, &state3); #if NB_CORES > 4 #pragma omp task input (state3) output (state4) process_stage(&state3, &state4); #if NB_CORES > 5 #pragma omp task input (state4) output (state5) process_stage(&state4, &state5); #if NB_CORES > 6 #pragma omp task input (state5) output (state6) process_stage(&state5, &state6); #if NB_CORES > 7 #pragma omp task input (state6) output (state7) process_stage(&state6, &state7); #if NB_CORES > 8 #pragma omp task input (state7) output (state8) process_stage(&state7, &state8); #if NB_CORES > 9 #pragma omp task input (state8) output (state9) process_stage(&state8, &state9); #if NB_CORES > 10 #pragma omp task input (state9) output (state10) process_stage(&state9, &state10); #if NB_CORES > 11 #pragma omp task input (state10) output (state11) process_stage(&state10, &state11); #endif /* NB_CORES > 1 */ #endif /* NB_CORES > 2 */ #endif /* NB_CORES > 3 */ #endif /* NB_CORES > 4 */ #endif /* NB_CORES > 5 */ #endif /* NB_CORES > 6 */ #endif /* NB_CORES > 7 */ #endif /* NB_CORES > 8 */ #endif /* NB_CORES > 9 */ #endif /* NB_CORES > 10 */ #endif /* NB_CORES > 11 */ #if NB_CORES > 11 #pragma omp task input (state11) last_process_stage(&state11); #elif NB_CORES > 10 #pragma omp task input (state10) last_process_stage(&state10); #elif NB_CORES > 9 #pragma omp task input (state9) last_process_stage(&state9); #elif NB_CORES > 8 #pragma omp task input (state8) last_process_stage(&state8); #elif NB_CORES > 7 #pragma omp task input (state7) last_process_stage(&state7); #elif NB_CORES > 6 #pragma omp task input (state6) last_process_stage(&state6); #elif NB_CORES > 5 #pragma omp task input (state5) last_process_stage(&state5); #elif NB_CORES > 4 #pragma omp task input (state4) last_process_stage(&state4); #elif NB_CORES > 3 #pragma omp task input (state3) last_process_stage(&state3); #elif NB_CORES > 2 #pragma omp task input (state2) last_process_stage(&state2); #elif NB_CORES > 1 #pragma omp task input (state1) last_process_stage(&state1); #else data_t state0 = rand_r(&seed); last_process_stage(&state0); #endif } } } printf("Final result: %lu\n", result); return 0; } int main(int argc, char *argv[]) { if (analyse_options(argc, argv)) exit(EXIT_FAILURE); if (compute_metrics()) exit(EXIT_FAILURE); exit(EXIT_SUCCESS); }