rt_benchs/pipepar/pipeline_template.c

368 lines
9.3 KiB
C

/*
* Copyright (C) 2012-2013 Thomas Preud'homme <thomas.preud-homme@lip6.fr>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <stdint.h>
#include <errno.h>
#include <unistd.h>
#include <time.h>
#define STRTOL_OVERFLOW(n) (((n == LONG_MIN) || (n == LONG_MAX)) && \
(errno == ERANGE))
#define STRIFY(var) STRIFY_VAL(val)
#define STRIFY_VAL(val) #val
/* Number of cores to be used for the pipeline */
#ifndef NB_CORES
#define NB_CORES 8 /* Bossa */
//#define NB_CORES 12; /* Quad Hexa */
#endif
#define NOSLEEP
typedef unsigned long data_t;
long seq_len = 24/*27720*/;
long nb_packets = 100000;
struct timespec stage_time;
data_t result = 0;
/* $E_{i+1} (data_{j+1}) = E_i (data_{j+1}) + E_{i+1} (data_j)$ */
void process_stage(data_t *prev_pipeline_stage, data_t *next_pipeline_stage)
{
long i;
static __thread data_t *prev_data = NULL;
data_t prev_process_step;
#ifndef NOSLEEP
struct timespec beginning, now, end;
clock_gettime(CLOCK_REALTIME, &beginning);
end.tv_sec = beginning.tv_sec + stage_time.tv_sec;
if (beginning.tv_nsec + stage_time.tv_nsec < 1000000000)
end.tv_nsec = beginning.tv_nsec + stage_time.tv_nsec;
else
{
end.tv_nsec = beginning.tv_nsec + stage_time.tv_nsec - 1000000000;
end.tv_sec++;
}
#endif
if (prev_data == NULL)
{
prev_data = malloc(sizeof(*prev_data) * seq_len / NB_CORES);
memset(prev_data, 0, sizeof(*prev_data) * seq_len / NB_CORES);
}
prev_process_step = *prev_pipeline_stage;
for (i = 0; i < seq_len / NB_CORES; i++)
{
prev_data[i] = prev_data[i] + prev_process_step;
prev_process_step = prev_data[i];
}
*next_pipeline_stage = prev_data[i - 1];
#ifndef NOSLEEP
do
{
clock_gettime(CLOCK_REALTIME, &now);
} while ((now.tv_sec < end.tv_sec) || (now.tv_nsec < end.tv_nsec));
#endif
}
void last_process_stage(data_t *prev_stage)
{
data_t next_stage;
process_stage(prev_stage, &next_stage);
result = next_stage;
}
int analyse_options(int argc, char *argv[])
{
char **arg_p;
long stg_time, total_time = 10000000000;
argc--;
arg_p = argv;
while(arg_p++, argc--)
{
if ((*arg_p)[0] == '-')
{
if ((!(*arg_p)[1]) || (*arg_p)[2])
{
printf("Unsupported option: %s\n", *arg_p);
return -1;
}
switch((*arg_p)[1])
{
char *endptr;
case 'p': // Number of packets
argc--, arg_p++;
nb_packets =
strtol(*arg_p, &endptr, 10);
if ((endptr == *arg_p) ||
(STRTOL_OVERFLOW(nb_packets)))
{
fprintf(stderr, "Invalid number of packets: %s\n", *arg_p);
return -1;
}
break;
case 's': // Sequence length
argc--, arg_p++;
seq_len =
strtol(*arg_p, &endptr, 10);
if ((endptr == *arg_p) ||
(STRTOL_OVERFLOW(seq_len)))
{
fprintf(stderr, "Invalid sequence length: %s\n", *arg_p);
return -1;
}
break;
case 't': // total time in usec
argc--, arg_p++;
total_time =
strtol(*arg_p, &endptr, 10);
if ((endptr == *arg_p) ||
(STRTOL_OVERFLOW(total_time)))
{
fprintf(stderr, "Invalid total time: %s\n", *arg_p);
return -1;
}
break;
default:
fprintf(stderr, "Unsupported option: %s\n", *arg_p);
return -1;
}
}
}
stg_time = total_time / (nb_packets * NB_CORES);
stage_time.tv_sec = stg_time / 1000000000;
stage_time.tv_nsec = stg_time % 1000000000;
return 0;
}
int compute_metrics(void)
{
unsigned int seed = 42;
int i;
#if NB_CORES > 1
data_t state1;
#if NB_CORES > 2
data_t state2;
#if NB_CORES > 3
data_t state3;
#if NB_CORES > 4
data_t state4;
#if NB_CORES > 5
data_t state5;
#if NB_CORES > 6
data_t state6;
#if NB_CORES > 7
data_t state7;
#if NB_CORES > 8
data_t state8;
#if NB_CORES > 9
data_t state9;
#if NB_CORES > 10
data_t state10;
#if NB_CORES > 11
data_t state11;
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#if NB_CORES > 1
#if NB_CORES == 2
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, i)
#elif NB_CORES == 3
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, state2, i)
#elif NB_CORES == 4
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, state2, state3, i)
#elif NB_CORES == 5
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, state2, state3, state4, i)
#elif NB_CORES == 6
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, state2, state3, state4, state5, i)
#elif NB_CORES == 7
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, state2, state3, state4, state5, state6, i)
#elif NB_CORES == 8
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, state2, state3, state4, state5, state6, state7, i)
#elif NB_CORES == 9
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, state2, state3, state4, state5, state6, state7, state8, i)
#elif NB_CORES == 10
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, state2, state3, state4, state5, state6, state7, state8, state9, i)
#elif NB_CORES == 11
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, state2, state3, state4, state5, state6, state7, state8, state9, state10, i)
#elif NB_CORES == 12
#pragma omp parallel default (none) \
shared (nb_packets, seed) \
private (state1, state2, state3, state4, state5, state6, state7, state8, state9, state10, state11, i)
#else
#error Only supporting NB_CORES <= 12
#endif
#endif
{
#pragma omp single
{
//unsigned int seed = 42;
for (i = 0; i < nb_packets; i++)
{
#if NB_CORES > 1
#pragma omp task output (state1)
{
data_t state0 = rand_r(&seed);
process_stage(&state0, &state1);
}
#if NB_CORES > 2
#pragma omp task input (state1) output (state2)
process_stage(&state1, &state2);
#if NB_CORES > 3
#pragma omp task input (state2) output (state3)
process_stage(&state2, &state3);
#if NB_CORES > 4
#pragma omp task input (state3) output (state4)
process_stage(&state3, &state4);
#if NB_CORES > 5
#pragma omp task input (state4) output (state5)
process_stage(&state4, &state5);
#if NB_CORES > 6
#pragma omp task input (state5) output (state6)
process_stage(&state5, &state6);
#if NB_CORES > 7
#pragma omp task input (state6) output (state7)
process_stage(&state6, &state7);
#if NB_CORES > 8
#pragma omp task input (state7) output (state8)
process_stage(&state7, &state8);
#if NB_CORES > 9
#pragma omp task input (state8) output (state9)
process_stage(&state8, &state9);
#if NB_CORES > 10
#pragma omp task input (state9) output (state10)
process_stage(&state9, &state10);
#if NB_CORES > 11
#pragma omp task input (state10) output (state11)
process_stage(&state10, &state11);
#endif /* NB_CORES > 1 */
#endif /* NB_CORES > 2 */
#endif /* NB_CORES > 3 */
#endif /* NB_CORES > 4 */
#endif /* NB_CORES > 5 */
#endif /* NB_CORES > 6 */
#endif /* NB_CORES > 7 */
#endif /* NB_CORES > 8 */
#endif /* NB_CORES > 9 */
#endif /* NB_CORES > 10 */
#endif /* NB_CORES > 11 */
#if NB_CORES > 11
#pragma omp task input (state11)
last_process_stage(&state11);
#elif NB_CORES > 10
#pragma omp task input (state10)
last_process_stage(&state10);
#elif NB_CORES > 9
#pragma omp task input (state9)
last_process_stage(&state9);
#elif NB_CORES > 8
#pragma omp task input (state8)
last_process_stage(&state8);
#elif NB_CORES > 7
#pragma omp task input (state7)
last_process_stage(&state7);
#elif NB_CORES > 6
#pragma omp task input (state6)
last_process_stage(&state6);
#elif NB_CORES > 5
#pragma omp task input (state5)
last_process_stage(&state5);
#elif NB_CORES > 4
#pragma omp task input (state4)
last_process_stage(&state4);
#elif NB_CORES > 3
#pragma omp task input (state3)
last_process_stage(&state3);
#elif NB_CORES > 2
#pragma omp task input (state2)
last_process_stage(&state2);
#elif NB_CORES > 1
#pragma omp task input (state1)
last_process_stage(&state1);
#else
data_t state0 = rand_r(&seed);
last_process_stage(&state0);
#endif
}
}
}
printf("Final result: %lu\n", result);
return 0;
}
int main(int argc, char *argv[])
{
if (analyse_options(argc, argv))
exit(EXIT_FAILURE);
if (compute_metrics())
exit(EXIT_FAILURE);
exit(EXIT_SUCCESS);
}