Add native algo from OpenMP stream extension

Add native algorithm from OpenMP stream extension. This require adding
one function in commtech.h: end_producer(). This function does nothing
for all communication algorithm but gomp_stream (the algorithm added by
this commit).
This commit is contained in:
Thomas Preud'homme 2012-01-30 19:40:50 +01:00
parent a30a5bfe06
commit c6786815cd
13 changed files with 1411 additions and 1 deletions

View File

@ -25,7 +25,7 @@ BINNAMES+=batch_queue_16_comm batch_queue_32_comm batch_queue_64_comm
BINNAMES+=batch_queue_128_comm batch_queue_256_comm batch_queue_512_comm
BINNAMES+=batch_queue_1024_comm batch_queue_2048_comm batch_queue_4096_comm
BINNAMES+=lamport_comm shared_mem_opt_comm none_comm csq_2_comm csq_64_comm
BINNAMES+=fast_forward_comm mcringbuffer_comm pipe_comm
BINNAMES+=fast_forward_comm mcringbuffer_comm pipe_comm gomp_stream_comm
#BINNAMES+=jikes_barrier_comm
CALCLIBSNAMES:=calc_mat calc_line calc_useless_loop
BINS:=$(patsubst %,$(BINDIR)/%,$(BINNAMES))

View File

@ -32,6 +32,16 @@ void *create_comm_channel(void);
*/
int destroy_comm_channel(void *);
/*
* @param channel The production channel
* @return 0 if success, -1 else
*
* Notify the communication algorithm that the producer has finished producing
*
* @comment Must be called by the producer when it has finished producing
*/
int end_producer(void *);
/*
* @param channel Channel from which to receive data

View File

@ -0,0 +1,329 @@
/* Copyright (C) 2010 Free Software Foundation, Inc.
Contributed by Antoniu Pop <antoniu.pop@gmail.com>.
This file is part of the GNU OpenMP Library (libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
more details.
You should have received a copy of the GNU Lesser General Public License
along with libgomp; see the file COPYING.LIB. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/* As a special exception, if you link this library with other files, some
of which are compiled with GCC, to produce an executable, this library
does not by itself cause the resulting executable to be covered by the
GNU General Public License. This exception does not however invalidate
any other reasons why the executable file might be covered by the GNU
General Public License. */
/* This implements the stream communication layer for libGOMP. */
#ifndef GOMP_STREAM_H
#define GOMP_STREAM_H 1
#define _CSQ_COMMON_COMM_H_ 1
#include <limits.h>
/* Define the type and maximum value for the indices used within the
stream. The indices are strictly monotonically increasing
integers, so we need a type that does not wrap around too fast. An
implementation of the wrap-around is necessary for systems where
this poses a problem. */
/* typedef unsigned long long int gomp_stream_index_t; */
#define GOMP_STREAM_MAX_INDEX ULLONG_MAX
/*#define LAZY_SYNCH
#define GUARDED_WAKE*/
/*
typedef enum {
GOMP_STREAM_INITIALIZED = 1,
GOMP_STREAM_ALLOCATED = 2,
GOMP_STREAM_PRODUCED = 4,
GOMP_STREAM_CONSUMED = 8,
GOMP_STREAM_ZOMBIE = 16,
GOMP_STREAM_STALL = 32
} gomp_stream_state;
*/
// Things added by RoboTux
#include <commtech.h>
#ifndef BUF_SIZE
#define BUF_SIZE (64 * CACHE_LINE_SIZE)
#endif
#define BURST_SIZE 128
typedef enum
{
false,
true
} bool;
typedef pthread_mutex_t gomp_mutex_t;
struct channel
{
void *stream;
void *task;
void *rview;
void *wview;
void **read_ptr __attribute__ ((aligned (CACHE_LINE_SIZE)));
void **read_end_ptr;
unsigned long long read_act;
void **write_ptr __attribute__ ((aligned (CACHE_LINE_SIZE)));
void **write_end_ptr;
unsigned long long write_act;
};
void *
GOMP_stream_stall (void *, const unsigned long long,
const unsigned long long);
void
GOMP_stream_commit (void *, const unsigned long long);
static inline void send(struct channel *channel, void **addr)
{
if (channel->write_ptr == channel->write_end_ptr)
{
channel->write_ptr = (void **) GOMP_stream_stall(channel->wview,
channel->write_act, channel->write_act + 1);
channel->write_act++;
channel->write_end_ptr = channel->write_ptr + BURST_SIZE / sizeof(void *);
}
*channel->write_ptr++ = addr;
if (channel->write_ptr == channel->write_end_ptr)
GOMP_stream_commit(channel->wview, channel->write_act);
}
// End of things added by RoboTux
typedef enum
{
READ_VIEW,
WRITE_VIEW
} gomp_stream_view_type_t;
struct gomp_stream;
struct gomp_stream_task;
/* GOMP_STREAM_VIEW data structure. Producer and consumer tasks
connect to a GOMP_STREAM using views. */
typedef struct gomp_stream_view
{
/* The stream accessed through this view. WARNING: this field needs
to be first. */
struct gomp_stream *stream;
/* Size in bytes of the burst associated to this view. Later this
may become a stream. */
size_t view_size;
size_t burst_size;
size_t pxxk_size;
unsigned long long min_update;
unsigned long long max_update;
unsigned long long nb_updates;
unsigned long long sum_updates;
unsigned long long min_commit;
unsigned long long max_commit;
unsigned long long nb_commits;
unsigned long long sum_commits;
/* The alignment directives are needed to ensure these
high-potential false-sharing fields are on their own cache
lines. */
/* Lower and upper bounds accessible in the stream through this
view. */
unsigned long long lower_index __attribute__((aligned (64)));
unsigned long long upper_index __attribute__((aligned (64)));
/* The task using this view. */
struct gomp_stream_task *task __attribute__((aligned (64)));
/* Setting this flag means the process accessing the stream through
this view relinquishes his access to the stream (generally when
terminating). */
bool end_p;
/* Type of this view (read or write). */
gomp_stream_view_type_t type;
/* In order to avoid excessive accesses to the global minimum index
(released or consumed), which strongly impacts cache traffic, use
this duplcated field for an additional level of caching and only
update when needed. */
unsigned long long local_min_value;
} gomp_stream_view_t, *gomp_stream_view_p;
/* List of GOMP_STREAM_VIEWs. As this list is only modified in the
initialization phase and we never remove items, we'll use an
array. */
typedef struct gomp_stream_view_list
{
gomp_stream_view_p *views;
int nr_views;
int size;
/* Enforce atomic connection of the views in this list. */
gomp_mutex_t connect_view_mutex;
} gomp_stream_view_list_t, *gomp_stream_view_list_p;
/* GOMP_STREAM_VIEW_HANDLE data structure. This allows the tasks
interacting through the GOMP_STREAM to which this handle is
attached to keep updated information global to all similar views on
this stream. */
typedef struct gomp_stream_view_handle
{
/* The alignment directives are needed to ensure these
high-potential false-sharing fields are on their own cache
lines. */
/* Latest computed value of the min released index and max acquired
index values across views. */
volatile unsigned long long current_min __attribute__((aligned (64)));
volatile unsigned long long current_max __attribute__((aligned (64)));
/* Bookkeeping for view connections. */
gomp_stream_view_list_t view_list __attribute__((aligned (64)));
int nr_expected_views;
int nr_registered_views;
int nr_unregistered_views;
} gomp_stream_view_handle_t, *gomp_stream_view_handle_p;
/* GOMP_STREAM data structure. */
typedef struct gomp_stream
{
/* WARNING: the first two fields (BUFFER and BUFFER_MASK) need to
remain in their respective positions. */
/* The pointer to the buffer, as well as the subsequent buffer
description, should be on a single mostly read cache line. The
EOS_P flag is only set at the very end of the use of this
stream. */
/* Circular buffer containing the data communicated through this
stream. */
char *buffer __attribute__((aligned (64)));
/* Size of the buffer and the bitmask used for modulo computation
for the wrap-around. The size is expressed in basic elements for
this stream. The size in bytes of the buffer is
BUFFER_SIZE * ELEMENT_SIZE. */
unsigned long long buffer_size;
unsigned long long buffer_mask;
unsigned long long pre_shift;
/* True once all the tasks that should be expected to connect to
this stream been declared. */
bool expected_ready_p;
/* True once all the tasks expected to connect to this stream have
done so. */
bool connected_p;
/* End of stream: true when all producers have finished committing
all the data and are terminating. */
bool eos_p;
/* Handles for read and write views on this stream. */
gomp_stream_view_handle_t read_views;
gomp_stream_view_handle_t write_views;
/* Barrier used both for waiting for all views to connect to the
stream and to find the last view disconnecting (and who therefore
frees this stream). */
/*gomp_barrier_t view_handling_barrier;*/
/* Counter of the number of total unregistered views, both read and
write, used to determine the last task deconnecting its view.
The last task will also free the data structures. */
int unregistered_views;
#ifndef HAVE_SYNC_BUILTINS
/* We may need a lock for atomicity if no atomic operations are
available. */
gomp_mutex_t stream_mutex;
#endif
} gomp_stream_t, *gomp_stream_p;
/* GOMP_STREAM_TASK data structure. Runtime node in the task
graph. */
typedef struct gomp_stream_task
{
/* Lists of views on streams used by this task. */
gomp_stream_view_list_t read_view_list;
gomp_stream_view_list_t write_view_list;
/* The following are used directly in the generated code and should
only be read here. A memory fence is guaranteed before the
termination flag is set to true. */
/* Number of activations allowed for the task. */
volatile unsigned long long activation_counter;
volatile unsigned long long first_unassigned_activation_counter;
int num_instances;
/* True only when the activation counter has reached the maximum
number of activations allowed for this task. */
volatile bool termination_flag;
} gomp_stream_task_t, *gomp_stream_task_p;
#if 0
/* GOMP_STREAM_CONTROL_STREAM data structure. Implements a simple
if-conversion analog that allows a non data-driven task to conform
to its original control dependences. */
typedef struct gomp_stream_control_stream
{
/* In all cases where the streams bypass control (i.e., for
sequential control flow only), an activation counter is
sufficient to carry the control flow. */
/* Local counter of the number of times a task has been activated. */
unsigned long long activation_counter __attribute__((aligned (64)));
unsigned long long local_enabled_activations;
/* Number of times this task is allowed to activate. */
unsigned long long enabled_activations __attribute__((aligned (64)));
/* When a task's inputs or outputs cross a parallel control flow
boundary (i.e., worksharing construct), their activation pattern
can be sparse wrt. the actual stream of data that they share with
their sibling tasks. We use activation ranges as an optimization
to streaming the activation indexes themselves, but the two
options are equivalent. These streams are inherently 1-to-1, so
a simpler implementation of the synchronization should be used in
this case.*/
/* gomp_stream_p activation_range_stream; */
/* End of stream: true when all producers have finished committing
all the data and are terminating. */
bool eos_p;
} gomp_stream_control_stream_t, *gomp_stream_control_stream_p;
#endif
#endif /* GOMP_STREAM_H */

View File

@ -21,6 +21,11 @@ void *create_comm_channel(void)
return NULL;
}
int end_producer(void *unused __attribute__ ((unused)))
{
return 0;
}
int destroy_comm_channel(void *channel)
{
free(channel);

View File

@ -26,6 +26,11 @@ void *create_comm_channel(void)
return NULL;
}
int end_producer(void *unused __attribute__ ((unused)))
{
return 0;
}
int destroy_comm_channel(void *channel)
{
free(channel);

View File

@ -30,6 +30,11 @@ void *create_comm_channel(void)
return NULL;
}
int end_producer(void *unused __attribute__ ((unused)))
{
return 0;
}
int destroy_comm_channel(void *channel)
{
free((void *) ((struct channel *) channel)->shared_space);

File diff suppressed because it is too large Load Diff

View File

@ -26,6 +26,11 @@ void *create_comm_channel(void)
return NULL;
}
int end_producer(void *unused __attribute__ ((unused)))
{
return 0;
}
int destroy_comm_channel(void *channel)
{
free((void *) ((struct channel *) channel)->shared_space);

View File

@ -34,6 +34,11 @@ void *create_comm_channel(void)
return NULL;
}
int end_producer(void *unused __attribute__ ((unused)))
{
return 0;
}
int destroy_comm_channel(void *channel)
{
free((void *) ((struct channel *) channel)->shared_space);

View File

@ -16,6 +16,11 @@ void *create_comm_channel(void)
return (void *) &store_var;
}
int end_producer(void *unused __attribute__ ((unused)))
{
return 0;
}
int destroy_comm_channel(void *unused __attribute__ ((unused)))
{
return 0;

View File

@ -30,6 +30,11 @@ void *create_comm_channel(void)
return NULL;
}
int end_producer(void *unused __attribute__ ((unused)))
{
return 0;
}
int destroy_comm_channel(void *channel)
{
free(channel);

View File

@ -26,6 +26,11 @@ void *create_comm_channel(void)
return NULL;
}
int end_producer(void *unused __attribute__ ((unused)))
{
return 0;
}
int destroy_comm_channel(void *channel)
{
free((void *) ((struct channel *) channel)->shared_space);

View File

@ -337,6 +337,12 @@ static int initial_producer(node_param_t *node_param)
for(j = 0; j < WORDS_PER_LINE; j++)
send(node_param->next_comm_channel, do_calc());
}
if (end_producer(node_param->next_comm_channel))
{
fprintf(stderr, "Notification of end of production to the "
"communication algorithm failed\n");
return 1;
}
if (end_calc())
{
fprintf(stderr, "uninitialization of calculation has failed\n");