[commtech] Provide 64 cache lines version of algos
* Provide for BatchQueue, CSQ, FastForward, MCRingBuffer and GOMP stream a version using 64 cache lines in total for all buffers. * Rename common version from _common_comm.h to _common.h to avoid considering them as communication technique on their own
This commit is contained in:
parent
c37c100355
commit
a80decaef4
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (512 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (64 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (8 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (1024 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (128 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (1 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (16 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (2048 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (2 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (256 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (32 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define BUF_SIZE (4 * CACHE_LINE_SIZE)
|
||||
#include <batch_queue_common_comm.h>
|
||||
#include <batch_queue_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -3,6 +3,6 @@
|
|||
|
||||
#define SLOTS 2 /* Value used in the article, section V.A */
|
||||
#define BUF_SIZE (2 * CACHE_LINE_SIZE)
|
||||
#include <csq_common_comm.h>
|
||||
#include <csq_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define SLOTS 2 /* Value used in the article, section V.A */
|
||||
#include <csq_common_comm.h>
|
||||
#include <csq_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -3,6 +3,6 @@
|
|||
|
||||
#define SLOTS 64 /* Value used in the article, section V.A */
|
||||
#define BUF_SIZE CACHE_LINE_SIZE
|
||||
#include <csq_common_comm.h>
|
||||
#include <csq_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#ifndef _SPECIFIC_COMM_H_
|
||||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define SLOTS 64 /* Value used in the article, section V.A */
|
||||
#include <csq_common_comm.h>
|
||||
#include <csq_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
#ifndef _SPECIFIC_COMM_H_
|
||||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define SHARED_SPACE_SIZE (64 * BUF_SIZE)
|
||||
#include <fast_forward_common.h>
|
||||
|
||||
#endif
|
|
@ -1,52 +1,6 @@
|
|||
#ifndef _SPECIFIC_COMM_H_
|
||||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
/* Non standard include */
|
||||
#include <commtech.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define SHARED_SPACE_SIZE (16 * BUF_SIZE)
|
||||
#define SHARED_SPACE_VOIDPTR (SHARED_SPACE_SIZE / sizeof(void *))
|
||||
|
||||
#ifndef BUF_SIZE
|
||||
#define BUF_SIZE CACHE_LINE_SIZE
|
||||
#endif
|
||||
|
||||
#define DANGER (2 * BUF_SIZE / sizeof(void *))
|
||||
#define GOOD (6 * BUF_SIZE / sizeof(void *))
|
||||
#define ADJUST_FREQ 64
|
||||
|
||||
struct channel
|
||||
{
|
||||
void * volatile *shared_space;
|
||||
unsigned int head __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
unsigned int tail __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
};
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
extern int adjust_slip(struct channel *channel);
|
||||
|
||||
static inline void send(struct channel *channel, void **addr)
|
||||
{
|
||||
static __thread unsigned int nb_iter = 0;
|
||||
|
||||
assert(addr != NULL);
|
||||
if (nb_iter == ADJUST_FREQ)
|
||||
{
|
||||
adjust_slip(channel);
|
||||
nb_iter = 0;
|
||||
}
|
||||
while (1)
|
||||
{
|
||||
if (channel->shared_space[channel->head] != NULL)
|
||||
continue;
|
||||
channel->shared_space[channel->head] = addr;
|
||||
channel->head = (channel->head + 1) % SHARED_SPACE_VOIDPTR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
__END_DECLS
|
||||
#include <fast_forward_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
#ifndef _FAST_FORWARD_COMMON_H_
|
||||
#define _FAST_FORWARD_COMMON_H_ 1
|
||||
|
||||
/* Non standard include */
|
||||
#include <commtech.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifndef SHARED_SPACE_SIZE
|
||||
#define SHARED_SPACE_SIZE (16 * BUF_SIZE)
|
||||
#endif
|
||||
#define SHARED_SPACE_VOIDPTR (SHARED_SPACE_SIZE / sizeof(void *))
|
||||
|
||||
#ifndef BUF_SIZE
|
||||
#define BUF_SIZE CACHE_LINE_SIZE
|
||||
#endif
|
||||
|
||||
#define DANGER (2 * BUF_SIZE / sizeof(void *))
|
||||
#define GOOD (6 * BUF_SIZE / sizeof(void *))
|
||||
#define ADJUST_FREQ 64
|
||||
|
||||
struct channel
|
||||
{
|
||||
void * volatile *shared_space;
|
||||
unsigned int head __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
unsigned int tail __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
};
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
extern int adjust_slip(struct channel *channel);
|
||||
|
||||
static inline void send(struct channel *channel, void **addr)
|
||||
{
|
||||
static __thread unsigned int nb_iter = 0;
|
||||
|
||||
assert(addr != NULL);
|
||||
if (nb_iter == ADJUST_FREQ)
|
||||
{
|
||||
adjust_slip(channel);
|
||||
nb_iter = 0;
|
||||
}
|
||||
while (1)
|
||||
{
|
||||
if (channel->shared_space[channel->head] != NULL)
|
||||
continue;
|
||||
channel->shared_space[channel->head] = addr;
|
||||
channel->head = (channel->head + 1) % SHARED_SPACE_VOIDPTR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
__END_DECLS
|
||||
|
||||
#endif
|
|
@ -0,0 +1,8 @@
|
|||
#ifndef _SPECIFIC_COMM_H_
|
||||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
/* BUF_SIZE is multiplied by 2 to create the buffer */
|
||||
#define BUF_SIZE (32 * CACHE_LINE_SIZE)
|
||||
#include <gomp_stream_common.h>
|
||||
|
||||
#endif
|
|
@ -1,329 +1,6 @@
|
|||
/* Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Antoniu Pop <antoniu.pop@gmail.com>.
|
||||
#ifndef _SPECIFIC_COMM_H_
|
||||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
This file is part of the GNU OpenMP Library (libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with libgomp; see the file COPYING.LIB. If not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
/* As a special exception, if you link this library with other files, some
|
||||
of which are compiled with GCC, to produce an executable, this library
|
||||
does not by itself cause the resulting executable to be covered by the
|
||||
GNU General Public License. This exception does not however invalidate
|
||||
any other reasons why the executable file might be covered by the GNU
|
||||
General Public License. */
|
||||
|
||||
/* This implements the stream communication layer for libGOMP. */
|
||||
|
||||
#ifndef GOMP_STREAM_H
|
||||
#define GOMP_STREAM_H 1
|
||||
#define _CSQ_COMMON_COMM_H_ 1
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
/* Define the type and maximum value for the indices used within the
|
||||
stream. The indices are strictly monotonically increasing
|
||||
integers, so we need a type that does not wrap around too fast. An
|
||||
implementation of the wrap-around is necessary for systems where
|
||||
this poses a problem. */
|
||||
/* typedef unsigned long long int gomp_stream_index_t; */
|
||||
#define GOMP_STREAM_MAX_INDEX ULLONG_MAX
|
||||
|
||||
/*#define LAZY_SYNCH
|
||||
#define GUARDED_WAKE*/
|
||||
|
||||
/*
|
||||
typedef enum {
|
||||
GOMP_STREAM_INITIALIZED = 1,
|
||||
GOMP_STREAM_ALLOCATED = 2,
|
||||
GOMP_STREAM_PRODUCED = 4,
|
||||
GOMP_STREAM_CONSUMED = 8,
|
||||
GOMP_STREAM_ZOMBIE = 16,
|
||||
GOMP_STREAM_STALL = 32
|
||||
} gomp_stream_state;
|
||||
*/
|
||||
|
||||
// Things added by RoboTux
|
||||
#include <commtech.h>
|
||||
|
||||
#ifndef BUF_SIZE
|
||||
#define BUF_SIZE (64 * CACHE_LINE_SIZE)
|
||||
#endif
|
||||
#define BURST_SIZE 128
|
||||
typedef enum
|
||||
{
|
||||
false,
|
||||
true
|
||||
} bool;
|
||||
|
||||
typedef pthread_mutex_t gomp_mutex_t;
|
||||
|
||||
struct channel
|
||||
{
|
||||
void *stream;
|
||||
void *task;
|
||||
void *rview;
|
||||
void *wview;
|
||||
void **read_ptr __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
void **read_end_ptr;
|
||||
unsigned long long read_act;
|
||||
void **write_ptr __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
void **write_end_ptr;
|
||||
unsigned long long write_act;
|
||||
};
|
||||
|
||||
void *
|
||||
GOMP_stream_stall (void *, const unsigned long long,
|
||||
const unsigned long long);
|
||||
|
||||
void
|
||||
GOMP_stream_commit (void *, const unsigned long long);
|
||||
|
||||
static inline void send(struct channel *channel, void **addr)
|
||||
{
|
||||
if (channel->write_ptr == channel->write_end_ptr)
|
||||
{
|
||||
channel->write_ptr = (void **) GOMP_stream_stall(channel->wview,
|
||||
channel->write_act, channel->write_act + 1);
|
||||
channel->write_act++;
|
||||
channel->write_end_ptr = channel->write_ptr + BURST_SIZE / sizeof(void *);
|
||||
}
|
||||
*channel->write_ptr++ = addr;
|
||||
if (channel->write_ptr == channel->write_end_ptr)
|
||||
GOMP_stream_commit(channel->wview, channel->write_act);
|
||||
}
|
||||
// End of things added by RoboTux
|
||||
|
||||
typedef enum
|
||||
{
|
||||
READ_VIEW,
|
||||
WRITE_VIEW
|
||||
} gomp_stream_view_type_t;
|
||||
|
||||
struct gomp_stream;
|
||||
struct gomp_stream_task;
|
||||
|
||||
|
||||
/* GOMP_STREAM_VIEW data structure. Producer and consumer tasks
|
||||
connect to a GOMP_STREAM using views. */
|
||||
|
||||
typedef struct gomp_stream_view
|
||||
{
|
||||
/* The stream accessed through this view. WARNING: this field needs
|
||||
to be first. */
|
||||
struct gomp_stream *stream;
|
||||
|
||||
/* Size in bytes of the burst associated to this view. Later this
|
||||
may become a stream. */
|
||||
size_t view_size;
|
||||
size_t burst_size;
|
||||
size_t pxxk_size;
|
||||
|
||||
unsigned long long min_update;
|
||||
unsigned long long max_update;
|
||||
unsigned long long nb_updates;
|
||||
unsigned long long sum_updates;
|
||||
unsigned long long min_commit;
|
||||
unsigned long long max_commit;
|
||||
unsigned long long nb_commits;
|
||||
unsigned long long sum_commits;
|
||||
/* The alignment directives are needed to ensure these
|
||||
high-potential false-sharing fields are on their own cache
|
||||
lines. */
|
||||
/* Lower and upper bounds accessible in the stream through this
|
||||
view. */
|
||||
unsigned long long lower_index __attribute__((aligned (64)));
|
||||
unsigned long long upper_index __attribute__((aligned (64)));
|
||||
|
||||
/* The task using this view. */
|
||||
struct gomp_stream_task *task __attribute__((aligned (64)));
|
||||
|
||||
/* Setting this flag means the process accessing the stream through
|
||||
this view relinquishes his access to the stream (generally when
|
||||
terminating). */
|
||||
bool end_p;
|
||||
|
||||
/* Type of this view (read or write). */
|
||||
gomp_stream_view_type_t type;
|
||||
|
||||
/* In order to avoid excessive accesses to the global minimum index
|
||||
(released or consumed), which strongly impacts cache traffic, use
|
||||
this duplcated field for an additional level of caching and only
|
||||
update when needed. */
|
||||
unsigned long long local_min_value;
|
||||
|
||||
} gomp_stream_view_t, *gomp_stream_view_p;
|
||||
|
||||
/* List of GOMP_STREAM_VIEWs. As this list is only modified in the
|
||||
initialization phase and we never remove items, we'll use an
|
||||
array. */
|
||||
|
||||
typedef struct gomp_stream_view_list
|
||||
{
|
||||
gomp_stream_view_p *views;
|
||||
int nr_views;
|
||||
int size;
|
||||
|
||||
/* Enforce atomic connection of the views in this list. */
|
||||
gomp_mutex_t connect_view_mutex;
|
||||
|
||||
} gomp_stream_view_list_t, *gomp_stream_view_list_p;
|
||||
|
||||
|
||||
/* GOMP_STREAM_VIEW_HANDLE data structure. This allows the tasks
|
||||
interacting through the GOMP_STREAM to which this handle is
|
||||
attached to keep updated information global to all similar views on
|
||||
this stream. */
|
||||
|
||||
typedef struct gomp_stream_view_handle
|
||||
{
|
||||
/* The alignment directives are needed to ensure these
|
||||
high-potential false-sharing fields are on their own cache
|
||||
lines. */
|
||||
/* Latest computed value of the min released index and max acquired
|
||||
index values across views. */
|
||||
volatile unsigned long long current_min __attribute__((aligned (64)));
|
||||
volatile unsigned long long current_max __attribute__((aligned (64)));
|
||||
|
||||
/* Bookkeeping for view connections. */
|
||||
gomp_stream_view_list_t view_list __attribute__((aligned (64)));
|
||||
int nr_expected_views;
|
||||
int nr_registered_views;
|
||||
int nr_unregistered_views;
|
||||
|
||||
} gomp_stream_view_handle_t, *gomp_stream_view_handle_p;
|
||||
|
||||
|
||||
/* GOMP_STREAM data structure. */
|
||||
|
||||
typedef struct gomp_stream
|
||||
{
|
||||
/* WARNING: the first two fields (BUFFER and BUFFER_MASK) need to
|
||||
remain in their respective positions. */
|
||||
|
||||
/* The pointer to the buffer, as well as the subsequent buffer
|
||||
description, should be on a single mostly read cache line. The
|
||||
EOS_P flag is only set at the very end of the use of this
|
||||
stream. */
|
||||
/* Circular buffer containing the data communicated through this
|
||||
stream. */
|
||||
char *buffer __attribute__((aligned (64)));
|
||||
|
||||
/* Size of the buffer and the bitmask used for modulo computation
|
||||
for the wrap-around. The size is expressed in basic elements for
|
||||
this stream. The size in bytes of the buffer is
|
||||
BUFFER_SIZE * ELEMENT_SIZE. */
|
||||
unsigned long long buffer_size;
|
||||
unsigned long long buffer_mask;
|
||||
unsigned long long pre_shift;
|
||||
|
||||
/* True once all the tasks that should be expected to connect to
|
||||
this stream been declared. */
|
||||
bool expected_ready_p;
|
||||
|
||||
/* True once all the tasks expected to connect to this stream have
|
||||
done so. */
|
||||
bool connected_p;
|
||||
|
||||
/* End of stream: true when all producers have finished committing
|
||||
all the data and are terminating. */
|
||||
bool eos_p;
|
||||
|
||||
/* Handles for read and write views on this stream. */
|
||||
gomp_stream_view_handle_t read_views;
|
||||
gomp_stream_view_handle_t write_views;
|
||||
|
||||
/* Barrier used both for waiting for all views to connect to the
|
||||
stream and to find the last view disconnecting (and who therefore
|
||||
frees this stream). */
|
||||
/*gomp_barrier_t view_handling_barrier;*/
|
||||
|
||||
/* Counter of the number of total unregistered views, both read and
|
||||
write, used to determine the last task deconnecting its view.
|
||||
The last task will also free the data structures. */
|
||||
int unregistered_views;
|
||||
|
||||
#ifndef HAVE_SYNC_BUILTINS
|
||||
/* We may need a lock for atomicity if no atomic operations are
|
||||
available. */
|
||||
gomp_mutex_t stream_mutex;
|
||||
#endif
|
||||
} gomp_stream_t, *gomp_stream_p;
|
||||
|
||||
|
||||
/* GOMP_STREAM_TASK data structure. Runtime node in the task
|
||||
graph. */
|
||||
|
||||
typedef struct gomp_stream_task
|
||||
{
|
||||
/* Lists of views on streams used by this task. */
|
||||
gomp_stream_view_list_t read_view_list;
|
||||
gomp_stream_view_list_t write_view_list;
|
||||
|
||||
/* The following are used directly in the generated code and should
|
||||
only be read here. A memory fence is guaranteed before the
|
||||
termination flag is set to true. */
|
||||
|
||||
/* Number of activations allowed for the task. */
|
||||
volatile unsigned long long activation_counter;
|
||||
volatile unsigned long long first_unassigned_activation_counter;
|
||||
|
||||
int num_instances;
|
||||
|
||||
/* True only when the activation counter has reached the maximum
|
||||
number of activations allowed for this task. */
|
||||
volatile bool termination_flag;
|
||||
|
||||
} gomp_stream_task_t, *gomp_stream_task_p;
|
||||
|
||||
|
||||
#if 0
|
||||
/* GOMP_STREAM_CONTROL_STREAM data structure. Implements a simple
|
||||
if-conversion analog that allows a non data-driven task to conform
|
||||
to its original control dependences. */
|
||||
|
||||
typedef struct gomp_stream_control_stream
|
||||
{
|
||||
/* In all cases where the streams bypass control (i.e., for
|
||||
sequential control flow only), an activation counter is
|
||||
sufficient to carry the control flow. */
|
||||
|
||||
/* Local counter of the number of times a task has been activated. */
|
||||
unsigned long long activation_counter __attribute__((aligned (64)));
|
||||
unsigned long long local_enabled_activations;
|
||||
|
||||
/* Number of times this task is allowed to activate. */
|
||||
unsigned long long enabled_activations __attribute__((aligned (64)));
|
||||
|
||||
/* When a task's inputs or outputs cross a parallel control flow
|
||||
boundary (i.e., worksharing construct), their activation pattern
|
||||
can be sparse wrt. the actual stream of data that they share with
|
||||
their sibling tasks. We use activation ranges as an optimization
|
||||
to streaming the activation indexes themselves, but the two
|
||||
options are equivalent. These streams are inherently 1-to-1, so
|
||||
a simpler implementation of the synchronization should be used in
|
||||
this case.*/
|
||||
/* gomp_stream_p activation_range_stream; */
|
||||
|
||||
/* End of stream: true when all producers have finished committing
|
||||
all the data and are terminating. */
|
||||
bool eos_p;
|
||||
|
||||
} gomp_stream_control_stream_t, *gomp_stream_control_stream_p;
|
||||
#include <gomp_stream_common.h>
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* GOMP_STREAM_H */
|
||||
|
|
|
@ -0,0 +1,329 @@
|
|||
/* Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Antoniu Pop <antoniu.pop@gmail.com>.
|
||||
|
||||
This file is part of the GNU OpenMP Library (libgomp).
|
||||
|
||||
Libgomp is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with libgomp; see the file COPYING.LIB. If not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
/* As a special exception, if you link this library with other files, some
|
||||
of which are compiled with GCC, to produce an executable, this library
|
||||
does not by itself cause the resulting executable to be covered by the
|
||||
GNU General Public License. This exception does not however invalidate
|
||||
any other reasons why the executable file might be covered by the GNU
|
||||
General Public License. */
|
||||
|
||||
/* This implements the stream communication layer for libGOMP. */
|
||||
|
||||
#ifndef GOMP_STREAM_H
|
||||
#define GOMP_STREAM_H 1
|
||||
#define GOMP_STREAM_COMMON_H_ 1
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
/* Define the type and maximum value for the indices used within the
|
||||
stream. The indices are strictly monotonically increasing
|
||||
integers, so we need a type that does not wrap around too fast. An
|
||||
implementation of the wrap-around is necessary for systems where
|
||||
this poses a problem. */
|
||||
/* typedef unsigned long long int gomp_stream_index_t; */
|
||||
#define GOMP_STREAM_MAX_INDEX ULLONG_MAX
|
||||
|
||||
/*#define LAZY_SYNCH
|
||||
#define GUARDED_WAKE*/
|
||||
|
||||
/*
|
||||
typedef enum {
|
||||
GOMP_STREAM_INITIALIZED = 1,
|
||||
GOMP_STREAM_ALLOCATED = 2,
|
||||
GOMP_STREAM_PRODUCED = 4,
|
||||
GOMP_STREAM_CONSUMED = 8,
|
||||
GOMP_STREAM_ZOMBIE = 16,
|
||||
GOMP_STREAM_STALL = 32
|
||||
} gomp_stream_state;
|
||||
*/
|
||||
|
||||
// Things added by RoboTux
|
||||
#include <commtech.h>
|
||||
|
||||
#ifndef BUF_SIZE
|
||||
#define BUF_SIZE (64 * CACHE_LINE_SIZE)
|
||||
#endif
|
||||
#define BURST_SIZE 128
|
||||
typedef enum
|
||||
{
|
||||
false,
|
||||
true
|
||||
} bool;
|
||||
|
||||
typedef pthread_mutex_t gomp_mutex_t;
|
||||
|
||||
struct channel
|
||||
{
|
||||
void *stream;
|
||||
void *task;
|
||||
void *rview;
|
||||
void *wview;
|
||||
void **read_ptr __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
void **read_end_ptr;
|
||||
unsigned long long read_act;
|
||||
void **write_ptr __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
void **write_end_ptr;
|
||||
unsigned long long write_act;
|
||||
};
|
||||
|
||||
void *
|
||||
GOMP_stream_stall (void *, const unsigned long long,
|
||||
const unsigned long long);
|
||||
|
||||
void
|
||||
GOMP_stream_commit (void *, const unsigned long long);
|
||||
|
||||
static inline void send(struct channel *channel, void **addr)
|
||||
{
|
||||
if (channel->write_ptr == channel->write_end_ptr)
|
||||
{
|
||||
channel->write_ptr = (void **) GOMP_stream_stall(channel->wview,
|
||||
channel->write_act, channel->write_act + 1);
|
||||
channel->write_act++;
|
||||
channel->write_end_ptr = channel->write_ptr + BURST_SIZE / sizeof(void *);
|
||||
}
|
||||
*channel->write_ptr++ = addr;
|
||||
if (channel->write_ptr == channel->write_end_ptr)
|
||||
GOMP_stream_commit(channel->wview, channel->write_act);
|
||||
}
|
||||
// End of things added by RoboTux
|
||||
|
||||
typedef enum
|
||||
{
|
||||
READ_VIEW,
|
||||
WRITE_VIEW
|
||||
} gomp_stream_view_type_t;
|
||||
|
||||
struct gomp_stream;
|
||||
struct gomp_stream_task;
|
||||
|
||||
|
||||
/* GOMP_STREAM_VIEW data structure. Producer and consumer tasks
|
||||
connect to a GOMP_STREAM using views. */
|
||||
|
||||
typedef struct gomp_stream_view
|
||||
{
|
||||
/* The stream accessed through this view. WARNING: this field needs
|
||||
to be first. */
|
||||
struct gomp_stream *stream;
|
||||
|
||||
/* Size in bytes of the burst associated to this view. Later this
|
||||
may become a stream. */
|
||||
size_t view_size;
|
||||
size_t burst_size;
|
||||
size_t pxxk_size;
|
||||
|
||||
unsigned long long min_update;
|
||||
unsigned long long max_update;
|
||||
unsigned long long nb_updates;
|
||||
unsigned long long sum_updates;
|
||||
unsigned long long min_commit;
|
||||
unsigned long long max_commit;
|
||||
unsigned long long nb_commits;
|
||||
unsigned long long sum_commits;
|
||||
/* The alignment directives are needed to ensure these
|
||||
high-potential false-sharing fields are on their own cache
|
||||
lines. */
|
||||
/* Lower and upper bounds accessible in the stream through this
|
||||
view. */
|
||||
unsigned long long lower_index __attribute__((aligned (64)));
|
||||
unsigned long long upper_index __attribute__((aligned (64)));
|
||||
|
||||
/* The task using this view. */
|
||||
struct gomp_stream_task *task __attribute__((aligned (64)));
|
||||
|
||||
/* Setting this flag means the process accessing the stream through
|
||||
this view relinquishes his access to the stream (generally when
|
||||
terminating). */
|
||||
bool end_p;
|
||||
|
||||
/* Type of this view (read or write). */
|
||||
gomp_stream_view_type_t type;
|
||||
|
||||
/* In order to avoid excessive accesses to the global minimum index
|
||||
(released or consumed), which strongly impacts cache traffic, use
|
||||
this duplcated field for an additional level of caching and only
|
||||
update when needed. */
|
||||
unsigned long long local_min_value;
|
||||
|
||||
} gomp_stream_view_t, *gomp_stream_view_p;
|
||||
|
||||
/* List of GOMP_STREAM_VIEWs. As this list is only modified in the
|
||||
initialization phase and we never remove items, we'll use an
|
||||
array. */
|
||||
|
||||
typedef struct gomp_stream_view_list
|
||||
{
|
||||
gomp_stream_view_p *views;
|
||||
int nr_views;
|
||||
int size;
|
||||
|
||||
/* Enforce atomic connection of the views in this list. */
|
||||
gomp_mutex_t connect_view_mutex;
|
||||
|
||||
} gomp_stream_view_list_t, *gomp_stream_view_list_p;
|
||||
|
||||
|
||||
/* GOMP_STREAM_VIEW_HANDLE data structure. This allows the tasks
|
||||
interacting through the GOMP_STREAM to which this handle is
|
||||
attached to keep updated information global to all similar views on
|
||||
this stream. */
|
||||
|
||||
typedef struct gomp_stream_view_handle
|
||||
{
|
||||
/* The alignment directives are needed to ensure these
|
||||
high-potential false-sharing fields are on their own cache
|
||||
lines. */
|
||||
/* Latest computed value of the min released index and max acquired
|
||||
index values across views. */
|
||||
volatile unsigned long long current_min __attribute__((aligned (64)));
|
||||
volatile unsigned long long current_max __attribute__((aligned (64)));
|
||||
|
||||
/* Bookkeeping for view connections. */
|
||||
gomp_stream_view_list_t view_list __attribute__((aligned (64)));
|
||||
int nr_expected_views;
|
||||
int nr_registered_views;
|
||||
int nr_unregistered_views;
|
||||
|
||||
} gomp_stream_view_handle_t, *gomp_stream_view_handle_p;
|
||||
|
||||
|
||||
/* GOMP_STREAM data structure. */
|
||||
|
||||
typedef struct gomp_stream
|
||||
{
|
||||
/* WARNING: the first two fields (BUFFER and BUFFER_MASK) need to
|
||||
remain in their respective positions. */
|
||||
|
||||
/* The pointer to the buffer, as well as the subsequent buffer
|
||||
description, should be on a single mostly read cache line. The
|
||||
EOS_P flag is only set at the very end of the use of this
|
||||
stream. */
|
||||
/* Circular buffer containing the data communicated through this
|
||||
stream. */
|
||||
char *buffer __attribute__((aligned (64)));
|
||||
|
||||
/* Size of the buffer and the bitmask used for modulo computation
|
||||
for the wrap-around. The size is expressed in basic elements for
|
||||
this stream. The size in bytes of the buffer is
|
||||
BUFFER_SIZE * ELEMENT_SIZE. */
|
||||
unsigned long long buffer_size;
|
||||
unsigned long long buffer_mask;
|
||||
unsigned long long pre_shift;
|
||||
|
||||
/* True once all the tasks that should be expected to connect to
|
||||
this stream been declared. */
|
||||
bool expected_ready_p;
|
||||
|
||||
/* True once all the tasks expected to connect to this stream have
|
||||
done so. */
|
||||
bool connected_p;
|
||||
|
||||
/* End of stream: true when all producers have finished committing
|
||||
all the data and are terminating. */
|
||||
bool eos_p;
|
||||
|
||||
/* Handles for read and write views on this stream. */
|
||||
gomp_stream_view_handle_t read_views;
|
||||
gomp_stream_view_handle_t write_views;
|
||||
|
||||
/* Barrier used both for waiting for all views to connect to the
|
||||
stream and to find the last view disconnecting (and who therefore
|
||||
frees this stream). */
|
||||
/*gomp_barrier_t view_handling_barrier;*/
|
||||
|
||||
/* Counter of the number of total unregistered views, both read and
|
||||
write, used to determine the last task deconnecting its view.
|
||||
The last task will also free the data structures. */
|
||||
int unregistered_views;
|
||||
|
||||
#ifndef HAVE_SYNC_BUILTINS
|
||||
/* We may need a lock for atomicity if no atomic operations are
|
||||
available. */
|
||||
gomp_mutex_t stream_mutex;
|
||||
#endif
|
||||
} gomp_stream_t, *gomp_stream_p;
|
||||
|
||||
|
||||
/* GOMP_STREAM_TASK data structure. Runtime node in the task
|
||||
graph. */
|
||||
|
||||
typedef struct gomp_stream_task
|
||||
{
|
||||
/* Lists of views on streams used by this task. */
|
||||
gomp_stream_view_list_t read_view_list;
|
||||
gomp_stream_view_list_t write_view_list;
|
||||
|
||||
/* The following are used directly in the generated code and should
|
||||
only be read here. A memory fence is guaranteed before the
|
||||
termination flag is set to true. */
|
||||
|
||||
/* Number of activations allowed for the task. */
|
||||
volatile unsigned long long activation_counter;
|
||||
volatile unsigned long long first_unassigned_activation_counter;
|
||||
|
||||
int num_instances;
|
||||
|
||||
/* True only when the activation counter has reached the maximum
|
||||
number of activations allowed for this task. */
|
||||
volatile bool termination_flag;
|
||||
|
||||
} gomp_stream_task_t, *gomp_stream_task_p;
|
||||
|
||||
|
||||
#if 0
|
||||
/* GOMP_STREAM_CONTROL_STREAM data structure. Implements a simple
|
||||
if-conversion analog that allows a non data-driven task to conform
|
||||
to its original control dependences. */
|
||||
|
||||
typedef struct gomp_stream_control_stream
|
||||
{
|
||||
/* In all cases where the streams bypass control (i.e., for
|
||||
sequential control flow only), an activation counter is
|
||||
sufficient to carry the control flow. */
|
||||
|
||||
/* Local counter of the number of times a task has been activated. */
|
||||
unsigned long long activation_counter __attribute__((aligned (64)));
|
||||
unsigned long long local_enabled_activations;
|
||||
|
||||
/* Number of times this task is allowed to activate. */
|
||||
unsigned long long enabled_activations __attribute__((aligned (64)));
|
||||
|
||||
/* When a task's inputs or outputs cross a parallel control flow
|
||||
boundary (i.e., worksharing construct), their activation pattern
|
||||
can be sparse wrt. the actual stream of data that they share with
|
||||
their sibling tasks. We use activation ranges as an optimization
|
||||
to streaming the activation indexes themselves, but the two
|
||||
options are equivalent. These streams are inherently 1-to-1, so
|
||||
a simpler implementation of the synchronization should be used in
|
||||
this case.*/
|
||||
/* gomp_stream_p activation_range_stream; */
|
||||
|
||||
/* End of stream: true when all producers have finished committing
|
||||
all the data and are terminating. */
|
||||
bool eos_p;
|
||||
|
||||
} gomp_stream_control_stream_t, *gomp_stream_control_stream_p;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* GOMP_STREAM_H */
|
|
@ -0,0 +1,7 @@
|
|||
#ifndef _SPECIFIC_COMM_H_
|
||||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
#define SHARED_SPACE_SIZE (64 * CACHE_LINE_SIZE) // Check with batchSize
|
||||
#include <mcringbuffer_common.h>
|
||||
|
||||
#endif
|
|
@ -1,70 +1,6 @@
|
|||
#ifndef _SPECIFIC_COMM_H_
|
||||
#define _SPECIFIC_COMM_H_ 1
|
||||
|
||||
/* Non standard include */
|
||||
#include <commtech.h>
|
||||
|
||||
#define SHARED_SPACE_SIZE (250 * CACHE_LINE_SIZE) // Check with batchSize
|
||||
#define SHARED_SPACE_VOIDPTR (SHARED_SPACE_SIZE / sizeof(void *))
|
||||
|
||||
struct control
|
||||
{
|
||||
volatile unsigned int read;
|
||||
volatile unsigned int write;
|
||||
};
|
||||
|
||||
struct cons
|
||||
{
|
||||
unsigned int localWrite;
|
||||
unsigned int nextRead;
|
||||
unsigned int rBatch;
|
||||
};
|
||||
|
||||
struct prod
|
||||
{
|
||||
unsigned int localRead;
|
||||
unsigned int nextWrite;
|
||||
unsigned int wBatch;
|
||||
};
|
||||
|
||||
|
||||
struct channel
|
||||
{
|
||||
struct control ctrl __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
struct prod prod __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
struct cons cons __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
void * volatile *shared_space __attribute__ ((aligned (CACHE_LINE_SIZE))); // Align only to isolate cons on its cache line
|
||||
};
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
extern const unsigned int batchSize;
|
||||
|
||||
static inline void send(struct channel *channel, void **addr)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
unsigned int afterNextWrite;
|
||||
|
||||
afterNextWrite = (channel->prod.nextWrite + 1) % SHARED_SPACE_VOIDPTR;
|
||||
if (afterNextWrite == channel->prod.localRead)
|
||||
{
|
||||
if (afterNextWrite == channel->ctrl.read)
|
||||
continue;
|
||||
channel->prod.localRead = channel->ctrl.read;
|
||||
}
|
||||
channel->shared_space[channel->prod.nextWrite] = addr;
|
||||
channel->prod.nextWrite = afterNextWrite;
|
||||
channel->prod.wBatch++;
|
||||
if (channel->prod.wBatch >= batchSize)
|
||||
{
|
||||
channel->ctrl.write = channel->prod.nextWrite;
|
||||
channel->prod.wBatch = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
__END_DECLS
|
||||
#include <mcringbuffer_common.h>
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
#ifndef _MCRINGBUFFER_COMMON_H_
|
||||
#define _MCRINGBUFFER_COMMON_H_ 1
|
||||
|
||||
/* Non standard include */
|
||||
#include <commtech.h>
|
||||
|
||||
#ifndef SHARED_SPACE_SIZE
|
||||
#define SHARED_SPACE_SIZE (250 * CACHE_LINE_SIZE) // Check with batchSize
|
||||
#endif
|
||||
#define SHARED_SPACE_VOIDPTR (SHARED_SPACE_SIZE / sizeof(void *))
|
||||
|
||||
struct control
|
||||
{
|
||||
volatile unsigned int read;
|
||||
volatile unsigned int write;
|
||||
};
|
||||
|
||||
struct cons
|
||||
{
|
||||
unsigned int localWrite;
|
||||
unsigned int nextRead;
|
||||
unsigned int rBatch;
|
||||
};
|
||||
|
||||
struct prod
|
||||
{
|
||||
unsigned int localRead;
|
||||
unsigned int nextWrite;
|
||||
unsigned int wBatch;
|
||||
};
|
||||
|
||||
|
||||
struct channel
|
||||
{
|
||||
struct control ctrl __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
struct prod prod __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
struct cons cons __attribute__ ((aligned (CACHE_LINE_SIZE)));
|
||||
void * volatile *shared_space __attribute__ ((aligned (CACHE_LINE_SIZE))); // Align only to isolate cons on its cache line
|
||||
};
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
extern const unsigned int batchSize;
|
||||
|
||||
static inline void send(struct channel *channel, void **addr)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
unsigned int afterNextWrite;
|
||||
|
||||
afterNextWrite = (channel->prod.nextWrite + 1) % SHARED_SPACE_VOIDPTR;
|
||||
if (afterNextWrite == channel->prod.localRead)
|
||||
{
|
||||
if (afterNextWrite == channel->ctrl.read)
|
||||
continue;
|
||||
channel->prod.localRead = channel->ctrl.read;
|
||||
}
|
||||
channel->shared_space[channel->prod.nextWrite] = addr;
|
||||
channel->prod.nextWrite = afterNextWrite;
|
||||
channel->prod.wBatch++;
|
||||
if (channel->prod.wBatch >= batchSize)
|
||||
{
|
||||
channel->ctrl.write = channel->prod.nextWrite;
|
||||
channel->prod.wBatch = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
__END_DECLS
|
||||
|
||||
#endif
|
|
@ -0,0 +1 @@
|
|||
fast_forward.c
|
|
@ -0,0 +1 @@
|
|||
gomp_stream.c
|
|
@ -0,0 +1 @@
|
|||
mcringbuffer.c
|
Loading…
Reference in New Issue