368 lines
9.3 KiB
C
368 lines
9.3 KiB
C
/*
|
|
* Copyright (C) 2012-2013 Thomas Preud'homme <thomas.preud-homme@lip6.fr>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <limits.h>
|
|
#include <stdint.h>
|
|
#include <errno.h>
|
|
#include <unistd.h>
|
|
#include <time.h>
|
|
|
|
|
|
#define STRTOL_OVERFLOW(n) (((n == LONG_MIN) || (n == LONG_MAX)) && \
|
|
(errno == ERANGE))
|
|
|
|
#define STRIFY(var) STRIFY_VAL(val)
|
|
#define STRIFY_VAL(val) #val
|
|
|
|
/* Number of cores to be used for the pipeline */
|
|
#ifndef NB_CORES
|
|
#define NB_CORES 8 /* Bossa */
|
|
//#define NB_CORES 12; /* Quad Hexa */
|
|
#endif
|
|
|
|
#define NOSLEEP
|
|
|
|
typedef unsigned long data_t;
|
|
|
|
long seq_len = 24/*27720*/;
|
|
long nb_packets = 100000;
|
|
struct timespec stage_time;
|
|
data_t result = 0;
|
|
|
|
|
|
/* $E_{i+1} (data_{j+1}) = E_i (data_{j+1}) + E_{i+1} (data_j)$ */
|
|
void process_stage(data_t *prev_pipeline_stage, data_t *next_pipeline_stage)
|
|
{
|
|
long i;
|
|
static __thread data_t *prev_data = NULL;
|
|
data_t prev_process_step;
|
|
#ifndef NOSLEEP
|
|
struct timespec beginning, now, end;
|
|
|
|
clock_gettime(CLOCK_REALTIME, &beginning);
|
|
end.tv_sec = beginning.tv_sec + stage_time.tv_sec;
|
|
if (beginning.tv_nsec + stage_time.tv_nsec < 1000000000)
|
|
end.tv_nsec = beginning.tv_nsec + stage_time.tv_nsec;
|
|
else
|
|
{
|
|
end.tv_nsec = beginning.tv_nsec + stage_time.tv_nsec - 1000000000;
|
|
end.tv_sec++;
|
|
}
|
|
#endif
|
|
if (prev_data == NULL)
|
|
{
|
|
prev_data = malloc(sizeof(*prev_data) * seq_len / NB_CORES);
|
|
memset(prev_data, 0, sizeof(*prev_data) * seq_len / NB_CORES);
|
|
}
|
|
prev_process_step = *prev_pipeline_stage;
|
|
for (i = 0; i < seq_len / NB_CORES; i++)
|
|
{
|
|
prev_data[i] = prev_data[i] + prev_process_step;
|
|
prev_process_step = prev_data[i];
|
|
}
|
|
*next_pipeline_stage = prev_data[i - 1];
|
|
#ifndef NOSLEEP
|
|
do
|
|
{
|
|
clock_gettime(CLOCK_REALTIME, &now);
|
|
} while ((now.tv_sec < end.tv_sec) || (now.tv_nsec < end.tv_nsec));
|
|
#endif
|
|
}
|
|
|
|
void last_process_stage(data_t *prev_stage)
|
|
{
|
|
data_t next_stage;
|
|
|
|
process_stage(prev_stage, &next_stage);
|
|
result = next_stage;
|
|
}
|
|
|
|
int analyse_options(int argc, char *argv[])
|
|
{
|
|
char **arg_p;
|
|
long stg_time, total_time = 10000000000;
|
|
|
|
argc--;
|
|
arg_p = argv;
|
|
while(arg_p++, argc--)
|
|
{
|
|
if ((*arg_p)[0] == '-')
|
|
{
|
|
if ((!(*arg_p)[1]) || (*arg_p)[2])
|
|
{
|
|
printf("Unsupported option: %s\n", *arg_p);
|
|
return -1;
|
|
}
|
|
switch((*arg_p)[1])
|
|
{
|
|
char *endptr;
|
|
|
|
case 'p': // Number of packets
|
|
argc--, arg_p++;
|
|
nb_packets =
|
|
strtol(*arg_p, &endptr, 10);
|
|
if ((endptr == *arg_p) ||
|
|
(STRTOL_OVERFLOW(nb_packets)))
|
|
{
|
|
fprintf(stderr, "Invalid number of packets: %s\n", *arg_p);
|
|
return -1;
|
|
}
|
|
break;
|
|
|
|
case 's': // Sequence length
|
|
argc--, arg_p++;
|
|
seq_len =
|
|
strtol(*arg_p, &endptr, 10);
|
|
if ((endptr == *arg_p) ||
|
|
(STRTOL_OVERFLOW(seq_len)))
|
|
{
|
|
fprintf(stderr, "Invalid sequence length: %s\n", *arg_p);
|
|
return -1;
|
|
}
|
|
break;
|
|
|
|
case 't': // total time in usec
|
|
argc--, arg_p++;
|
|
total_time =
|
|
strtol(*arg_p, &endptr, 10);
|
|
if ((endptr == *arg_p) ||
|
|
(STRTOL_OVERFLOW(total_time)))
|
|
{
|
|
fprintf(stderr, "Invalid total time: %s\n", *arg_p);
|
|
return -1;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
fprintf(stderr, "Unsupported option: %s\n", *arg_p);
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
stg_time = total_time / (nb_packets * NB_CORES);
|
|
stage_time.tv_sec = stg_time / 1000000000;
|
|
stage_time.tv_nsec = stg_time % 1000000000;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int compute_metrics(void)
|
|
{
|
|
unsigned int seed = 42;
|
|
int i;
|
|
#if NB_CORES > 1
|
|
data_t state1;
|
|
#if NB_CORES > 2
|
|
data_t state2;
|
|
#if NB_CORES > 3
|
|
data_t state3;
|
|
#if NB_CORES > 4
|
|
data_t state4;
|
|
#if NB_CORES > 5
|
|
data_t state5;
|
|
#if NB_CORES > 6
|
|
data_t state6;
|
|
#if NB_CORES > 7
|
|
data_t state7;
|
|
#if NB_CORES > 8
|
|
data_t state8;
|
|
#if NB_CORES > 9
|
|
data_t state9;
|
|
#if NB_CORES > 10
|
|
data_t state10;
|
|
#if NB_CORES > 11
|
|
data_t state11;
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
|
|
#if NB_CORES > 1
|
|
#if NB_CORES == 2
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, i)
|
|
#elif NB_CORES == 3
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, state2, i)
|
|
#elif NB_CORES == 4
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, state2, state3, i)
|
|
#elif NB_CORES == 5
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, state2, state3, state4, i)
|
|
#elif NB_CORES == 6
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, state2, state3, state4, state5, i)
|
|
#elif NB_CORES == 7
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, state2, state3, state4, state5, state6, i)
|
|
#elif NB_CORES == 8
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, state2, state3, state4, state5, state6, state7, i)
|
|
#elif NB_CORES == 9
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, state2, state3, state4, state5, state6, state7, state8, i)
|
|
#elif NB_CORES == 10
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, state2, state3, state4, state5, state6, state7, state8, state9, i)
|
|
#elif NB_CORES == 11
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, state2, state3, state4, state5, state6, state7, state8, state9, state10, i)
|
|
#elif NB_CORES == 12
|
|
#pragma omp parallel default (none) \
|
|
shared (nb_packets, seed) \
|
|
private (state1, state2, state3, state4, state5, state6, state7, state8, state9, state10, state11, i)
|
|
#else
|
|
#error Only supporting NB_CORES <= 12
|
|
#endif
|
|
#endif
|
|
{
|
|
#pragma omp single
|
|
{
|
|
//unsigned int seed = 42;
|
|
|
|
for (i = 0; i < nb_packets; i++)
|
|
{
|
|
#if NB_CORES > 1
|
|
#pragma omp task output (state1)
|
|
{
|
|
data_t state0 = rand_r(&seed);
|
|
process_stage(&state0, &state1);
|
|
}
|
|
#if NB_CORES > 2
|
|
#pragma omp task input (state1) output (state2)
|
|
process_stage(&state1, &state2);
|
|
#if NB_CORES > 3
|
|
#pragma omp task input (state2) output (state3)
|
|
process_stage(&state2, &state3);
|
|
#if NB_CORES > 4
|
|
#pragma omp task input (state3) output (state4)
|
|
process_stage(&state3, &state4);
|
|
#if NB_CORES > 5
|
|
#pragma omp task input (state4) output (state5)
|
|
process_stage(&state4, &state5);
|
|
#if NB_CORES > 6
|
|
#pragma omp task input (state5) output (state6)
|
|
process_stage(&state5, &state6);
|
|
#if NB_CORES > 7
|
|
#pragma omp task input (state6) output (state7)
|
|
process_stage(&state6, &state7);
|
|
#if NB_CORES > 8
|
|
#pragma omp task input (state7) output (state8)
|
|
process_stage(&state7, &state8);
|
|
#if NB_CORES > 9
|
|
#pragma omp task input (state8) output (state9)
|
|
process_stage(&state8, &state9);
|
|
#if NB_CORES > 10
|
|
#pragma omp task input (state9) output (state10)
|
|
process_stage(&state9, &state10);
|
|
#if NB_CORES > 11
|
|
#pragma omp task input (state10) output (state11)
|
|
process_stage(&state10, &state11);
|
|
#endif /* NB_CORES > 1 */
|
|
#endif /* NB_CORES > 2 */
|
|
#endif /* NB_CORES > 3 */
|
|
#endif /* NB_CORES > 4 */
|
|
#endif /* NB_CORES > 5 */
|
|
#endif /* NB_CORES > 6 */
|
|
#endif /* NB_CORES > 7 */
|
|
#endif /* NB_CORES > 8 */
|
|
#endif /* NB_CORES > 9 */
|
|
#endif /* NB_CORES > 10 */
|
|
#endif /* NB_CORES > 11 */
|
|
|
|
#if NB_CORES > 11
|
|
#pragma omp task input (state11)
|
|
last_process_stage(&state11);
|
|
#elif NB_CORES > 10
|
|
#pragma omp task input (state10)
|
|
last_process_stage(&state10);
|
|
#elif NB_CORES > 9
|
|
#pragma omp task input (state9)
|
|
last_process_stage(&state9);
|
|
#elif NB_CORES > 8
|
|
#pragma omp task input (state8)
|
|
last_process_stage(&state8);
|
|
#elif NB_CORES > 7
|
|
#pragma omp task input (state7)
|
|
last_process_stage(&state7);
|
|
#elif NB_CORES > 6
|
|
#pragma omp task input (state6)
|
|
last_process_stage(&state6);
|
|
#elif NB_CORES > 5
|
|
#pragma omp task input (state5)
|
|
last_process_stage(&state5);
|
|
#elif NB_CORES > 4
|
|
#pragma omp task input (state4)
|
|
last_process_stage(&state4);
|
|
#elif NB_CORES > 3
|
|
#pragma omp task input (state3)
|
|
last_process_stage(&state3);
|
|
#elif NB_CORES > 2
|
|
#pragma omp task input (state2)
|
|
last_process_stage(&state2);
|
|
#elif NB_CORES > 1
|
|
#pragma omp task input (state1)
|
|
last_process_stage(&state1);
|
|
#else
|
|
data_t state0 = rand_r(&seed);
|
|
last_process_stage(&state0);
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
printf("Final result: %lu\n", result);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
if (analyse_options(argc, argv))
|
|
exit(EXIT_FAILURE);
|
|
|
|
if (compute_metrics())
|
|
exit(EXIT_FAILURE);
|
|
|
|
exit(EXIT_SUCCESS);
|
|
}
|