rt_gccstream/gcc/omp-low.c

9904 lines
294 KiB
C

/* Lowering pass for OpenMP directives. Converts OpenMP directives
into explicit calls to the runtime library (libgomp) and data
marshalling to implement data sharing and copying clauses.
Contributed by Diego Novillo <dnovillo@redhat.com>
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>.
Streamization expansion to Erbium:
This version contains the expansion for both task and single regions
It is not currently able to perform data-parallelism but only pipeline
parallelism.
Implementation:
During lowering pass variables that represent streams / views are
identified and registered for the purpose.
Also for the non SESE regions (such as single regions) PUSH / POP stub builtins
are created before and after any SESE streaming region.
At this stage a hashtable of streams is initialized and elements are
introduced to it.
In the expansion pass, we first perform a traversal through all the
regions, identifying every streamization task and creating a connectivity graph
between all of the tasks.
For each of the tasks, we identify the type of view (read / writer) and add the
view to the respective stream by inserting it in a hashtable within the stream
structure.
Once this recognition is performed, All the regions are traversed, starting on
the child tasks and going to the parent ones.
Each of the expansion function for the specific type of region was patched to
perform the streamization branch.
Supported types are currently:
- Single region
- Task region
*/
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
#include "rtl.h"
#include "gimple.h"
#include "tree-iterator.h"
#include "tree-inline.h"
#include "langhooks.h"
#include "diagnostic.h"
#include "tree-flow.h"
#include "timevar.h"
#include "flags.h"
#include "function.h"
#include "expr.h"
#include "toplev.h"
#include "tree-pass.h"
#include "ggc.h"
#include "except.h"
#include "splay-tree.h"
#include "optabs.h"
#include "cfgloop.h"
/* Return the entry edge of a conditional block (true branch). */
static edge
get_true_entry_from_cond (basic_block bb)
{
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, bb->succs)
if (e->flags & EDGE_TRUE_VALUE)
return e;
gcc_unreachable ();
return NULL;
}
/* Return the exit edge of a conditional block (true branch) where the
false branch is empty. */
static edge
get_true_exit_from_cond (basic_block bb)
{
basic_block join_bb;
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, bb->succs)
if (!(e->flags & EDGE_TRUE_VALUE))
break;
join_bb = e->dest;
FOR_EACH_EDGE (e, ei, join_bb->preds)
if (e->src != bb)
return e;
gcc_unreachable ();
return NULL;
}
#define HORIZON 1 << 23
#define AGGREGATION_FACTOR 32
/* Reduce verbosity. */
typedef struct omp_region *omp_region_p;
typedef struct view *view_p;
DEF_VEC_P(view_p);
DEF_VEC_ALLOC_P(view_p,heap);
typedef struct stream *stream_p;
DEF_VEC_P(stream_p);
DEF_VEC_ALLOC_P(stream_p,heap);
typedef struct streamization_info *task_p;
DEF_VEC_P(task_p);
DEF_VEC_ALLOC_P(task_p,heap);
/* During OMP lowering, we have to add some firstprivate clauses to
pass pointers to generated shared data between the enclosing
context to the tasks. This hashtab keeps the mapping between the
OMP directive and the decls of the variables. */
htab_t htab_omp_stmt;
typedef struct omp_stmt
{
/* The OMP directive stmt. */
gimple stmt;
/* For a GIMPLE_OMP_TASK. */
#if 0 /*zzzz*/
tree activation_counter_p;
tree termination_flag_p;
#endif
/* For a GIMPLE_OMP_PARALLEL. */
tree task_decl;
/* Pointer map for keeping track, at this directive's level, of the
mapping between any decl and the local variable. */
struct pointer_map_t *var_map;
struct pointer_map_t *reverse_var_map;
struct pointer_map_t *firstprivate_view_var_pmap;
} omp_stmt_t, *omp_stmt_p;
/* Compute a hash function for stream. */
static hashval_t
hash_omp_stmt (const void *elt)
{
return htab_hash_pointer (((const omp_stmt_t *) elt)->stmt);
}
/* Compares stream elements E1 and E2. */
static int
eq_omp_stmt (const void *e1, const void *e2)
{
const omp_stmt_t *elt1 = (const omp_stmt_t *) e1;
const omp_stmt_t *elt2 = (const omp_stmt_t *) e2;
return elt1->stmt == elt2->stmt;
}
/* Find a STREAM mapping for VAR or create one if missing in the
HTAB_STREAM of the outermost enclosing parallel region. */
static omp_stmt_p
lookup_omp_stmt (gimple stmt)
{
omp_stmt_t **slot, tmp;
if (!htab_omp_stmt)
htab_omp_stmt = htab_create_ggc (10, hash_omp_stmt,
eq_omp_stmt, NULL);
tmp.stmt = stmt;
slot = (omp_stmt_p *) htab_find_slot (htab_omp_stmt, &tmp, INSERT);
if (!*slot)
{
(*slot) = GGC_CNEW (omp_stmt_t);
(*slot)->stmt = stmt;
(*slot)->task_decl = NULL_TREE;
(*slot)->var_map = pointer_map_create ();
(*slot)->reverse_var_map = pointer_map_create ();
(*slot)->firstprivate_view_var_pmap = pointer_map_create ();
}
return (*slot);
}
static inline tree
lookup_var (tree var, gimple stmt)
{
omp_stmt_p omp_stmt = lookup_omp_stmt (stmt);
tree *n;
n = (tree *) pointer_map_contains (omp_stmt->var_map, var);
return *n;
}
static inline tree
reverse_lookup_var (tree var, gimple stmt)
{
omp_stmt_p omp_stmt = lookup_omp_stmt (stmt);
tree *n;
n = (tree *) pointer_map_contains (omp_stmt->reverse_var_map, var);
return n ? *n : NULL_TREE;
}
static inline tree
lookup_fp_view_var (tree view, gimple stmt)
{
omp_stmt_p omp_stmt = lookup_omp_stmt (stmt);
tree *n;
n = (tree *) pointer_map_contains (omp_stmt->firstprivate_view_var_pmap, view);
return n ? *n : NULL_TREE;
}
/* Structure holding pointers to all relevant basic blocks for
streamization within a omp_region. */
typedef struct streamization_info
{
/* The task pragma statement. */
omp_region_p region;
/* R/W or input and output views linking this task to streams in
which this task is either a producer or a consumer. */
VEC (view_p,heap) *rviews;
VEC (view_p,heap) *wviews;
VEC (view_p,heap) *fpviews;
/* For an enclosing parallel region, streams and tasks being used in
its taskgraph. */
VEC (stream_p,heap) *streams;
VEC (stream_p,heap) *fp_streams;
VEC (task_p,heap) *tasks;
/* omp pragma statement */
gimple stmt;
/* Each task is controlled by an activation counter that determines
how many times it should be executed. The access to the activation
counter needs to be atomic when the task appears in a concurrent
environment (E.g., a parallel for section). The termination flag
should only be set to true after a full memory fence. */
/* Entry and exit points to the ORIGINAL body of the region, not to
be mistaken with the .->region->entry/exit blocks. */
edge sese_body_entry;
edge sese_body_exit;
/* Initialization and finalization blocks. These are used in all
regions and are generally single-thread execution blocks at the
beginning and end of the region. */
basic_block pre_initialization_bb;
basic_block initialization_bb;
basic_block post_initialization_bb;
basic_block finalization_bb;
/* Local initialization and finalization blocks. These are used in
task regions to issue statements that initialize or finalize one
iteration of the aggregated task. These blocks are part of the
task's body. */
basic_block local_initialization_bb;
basic_block local_finalization_bb;
/* Number of activations allowed for the task. */
#if 0 /*zzzz*/
tree activation_counter;
tree activation_counter_p;
#endif
tree local_activation_counter;
tree local_act_ctr_p;
tree local_activation_index;
tree local_activation_index_next;
/* True only when the activation counter has reached the maximum
number of activations allowed for this task. */
#if 0 /*zzzz*/
tree termination_flag;
tree termination_flag_p;
#endif
/* VAR_DECL to be used to define the runtime task. Should exist a
single one per region, and thats why it is defined in this data
structure. */
tree task_decl;
tree num_instances;
tree can_replicate;
tree avl_work, act_idx_start;
basic_block original_location_bb;
bool is_pre_task;
} streamization_info_t, *streamization_info_p;
/* Map all streamized variables in this region to their respective
streams. For now, only keep this up at the outermost enclosing
parallel region. */
htab_t htab_stream;
/* Build the streamization info data structures for this OMP_REGION
and all enclosing regions. */
static void
build_streamization_info (omp_region_p region)
{
if(!region->streamization_info)
{
streamization_info_p sinfo = GGC_CNEW (streamization_info_t);
region->streamization_info = sinfo;
sinfo->region = region;
sinfo->stmt = last_stmt (region->entry);
sinfo->task_decl = NULL_TREE;
if (region->outer)
build_streamization_info (region->outer);
}
}
/* Return the streamization info linked to an OpenMP region. */
static inline streamization_info_p
get_streamization_info (omp_region_p region)
{
gcc_assert (region->streamization_info);
return (streamization_info_p) region->streamization_info;
}
/* Return the streamization info linked to the enclosing OpenMP
region. */
static inline streamization_info_p
get_outer_streamization_info (omp_region_p region)
{
gcc_assert (region->outer);
region = region->outer;
return get_streamization_info (region);
}
/* Return the streamization info linked to the outermost enclosing
OpenMP parallel region. */
static inline streamization_info_p
get_outermost_parallel_streamization_info (omp_region_p region)
{
omp_region_p outer_parallel_region = NULL;
while(region)
{
if(region->type == GIMPLE_OMP_PARALLEL)
outer_parallel_region = region;
region = region->outer;
}
gcc_assert (outer_parallel_region);
gcc_assert (outer_parallel_region->streamization_info);
return (streamization_info_p) outer_parallel_region->streamization_info;
}
/* Returns true if REGION contains streaming tasks. */
static inline bool
is_streaming_region (omp_region_p region)
{
return (region->streamization_info != NULL);
}
/* Enum to identify type of variable in clause */
enum stream_access_type {
STREAM_ACCESS_TYPE_DIRECT = 0, /* sinple type variable */
STREAM_ACCESS_TYPE_REFERENCE, /* unsized variable, is passed as a pointer */
STREAM_ACCESS_TYPE_ARRAY /* sized array, will be copied to the buffer */
};
/* This structure maps a stream to the variable it privatizes. */
typedef struct stream
{
/* The variable this stream privatizes. Can be NULL if this is a
control stream. */
tree var;
/* The parallel region this stream depends on. */
streamization_info_p *outermost_parallel_sinfo;
/* R/W or input and output views linked to this stream for producer
and consumer tasks. */
VEC (view_p,heap) *rviews;
VEC (view_p,heap) *wviews;
VEC (view_p,heap) *fpviews;
/* The stream decl */
tree stream;
/* Buffer related tree nodes. */
tree buffer_type;
/* Set to access var by ref */
enum stream_access_type access_type;
tree element_type;
tree base_type;
/* Used for traversals */
bool visit;
} stream_t;
#define TYPE_SIZE_HAS_INT(TYPE) \
TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TYPE))
/* Compute a hash function for stream. */
static hashval_t
hash_stream (const void *elt)
{
return htab_hash_pointer (((const stream_t *) elt)->var);
}
/* Compares stream elements E1 and E2. */
static int
eq_stream (const void *e1, const void *e2)
{
const stream_t *elt1 = (const stream_t *) e1;
const stream_t *elt2 = (const stream_t *) e2;
return elt1->var == elt2->var;
}
/* TODO: Remove prototype by moving code */
struct omp_context;
static bool
use_pointer_for_field (tree, struct omp_context *);
/* Find a STREAM mapping for VAR or create one if missing in the
HTAB_STREAM of the outermost enclosing parallel region. */
static stream_p
lookup_stream (tree var/*, omp_region_p region*/)
{
stream_t **slot, tmp;
/*streamization_info_p sinfo =
get_outermost_parallel_streamization_info (region);*/
if (!htab_stream)
htab_stream = htab_create_ggc (10, hash_stream,
eq_stream, NULL);
tmp.var = var;
slot = (stream_p *) htab_find_slot (htab_stream, &tmp, INSERT);
if (!*slot)
{
(*slot) = GGC_CNEW (stream_t);
(*slot)->var = var;
(*slot)->stream = create_tmp_var_raw (ptr_type_node, "gomp_stream");
(*slot)->element_type = TREE_TYPE (var);
if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
(*slot)->base_type = TREE_TYPE (TREE_TYPE (var));
else
(*slot)->base_type = TREE_TYPE (var);
(*slot)->buffer_type = build_pointer_type ((*slot)->element_type);
}
return (*slot);
}
/* Enum to identify type of a view. */
enum view_type {
READ_VIEW = 0,
WRITE_VIEW,
FIRSTPRIVATE_VIEW
};
/* Structure representing a VIEW */
typedef struct view
{
/* Region using this view to connect to a stream. */
streamization_info_p sinfo;
/* Stream to which this view connects to */
stream_p stream;
/* Type of this view. */
enum view_type type;
/* VAR_DECL that this view is associated to. */
tree var_decl;
/* Clause that it represents */
tree clause;
/* VAR_DECLS created for code generation */
tree view;
tree buffer_pointer;
/* Decl that defines the view var.
* In case old syntax view_var is equal to stream->var */
tree view_var;
tree view_size;
tree burst_size;
tree element_size;
bool is_parametric_burst_size;
bool is_firstprivate_view;
} view_t/*, *view_p*/;
/* Build a view on STREAM for CLAUSE. */
static view_p
build_view (stream_p stream, tree clause)
{
view_p view = GGC_CNEW (view_t);
tree type;
view->stream = stream;
view->view = create_tmp_var (ptr_type_node, "view");
view->buffer_pointer = create_tmp_var (view->stream->buffer_type,
"buffer_access_pointer");
view->clause = clause;
view->var_decl = stream->var;
view->is_parametric_burst_size = false;
view->is_firstprivate_view = false;
if (clause == NULL_TREE)
{
/* If this view is not built from an INPUT/OUTPUT clause, then
it is the enclosing context's view for a FIRSTPRIVATE clause,
so the VIEW and the STREAM are the same (no view/burst
syntax). */
view->view_var = stream->var;
type = TREE_TYPE (view->view_var);
view->element_size = TYPE_SIZE_UNIT (type);
view->is_firstprivate_view = true;
view->type = WRITE_VIEW;
view->burst_size = view->element_size;
view->view_size = view->element_size;
return view;
}
view->view_var = OMP_CLAUSE_VIEW_ID (clause) ?
OMP_CLAUSE_VIEW_ID (clause) : stream->var;
view->view_size = OMP_CLAUSE_VIEW_SIZE (clause);
view->burst_size = OMP_CLAUSE_BURST_SIZE (clause);
gcc_assert (view->view_var);
type = TREE_TYPE (view->view_var);
if (TREE_CODE (type) == ARRAY_TYPE)
{
view->element_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
if (view->view_size == NULL_TREE)
view->view_size = TYPE_SIZE_UNIT (type);
}
else if (TREE_CODE (type) == POINTER_TYPE)
{
view->element_size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (type)));
if (view->view_size == NULL_TREE)
view->view_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
}
else
{
view->element_size = TYPE_SIZE_UNIT (type);
if (view->view_size == NULL_TREE)
view->view_size = TYPE_SIZE_UNIT (type);
}
if (view->burst_size != NULL_TREE)
{
if (DECL_P (view->burst_size))
{
/* We will have to issue the multiplication before view
creation. */
view->is_parametric_burst_size = true;
}
else
{
HOST_WIDE_INT burst_size_bytes =
TREE_INT_CST_LOW (view->element_size) *
TREE_INT_CST_LOW (view->burst_size);
view->burst_size = size_int (burst_size_bytes);
}
}
else
{
view->burst_size = view->view_size;
}
return view;
}
/* Return the burst size for a view and insert at GSI any computation
statement required in case it is parametric. */
static tree
get_view_burst_size (view_p view, gimple_stmt_iterator *gsi)
{
if (view->is_parametric_burst_size)
{
tree burst_size = create_tmp_var (size_type_node, "burst_size");
gimple stmt;
tree tmp;
tmp = fold_convert (size_type_node, view->burst_size);
stmt = gimple_build_assign (burst_size, tmp);
gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign_with_ops (MULT_EXPR, burst_size,
view->element_size, burst_size);
gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
view->burst_size = burst_size;
return burst_size;
}
else
{
return view->burst_size;
}
}
struct replace_var_in_stmt_info
{
view_p view;
tree array_idx;
tree idx;
tree elt;
tree access;
tree base_ptr;
tree reg_addr;
tree deref;
tree addr;
bool is_pointer_type;
};
static tree
replace_var_in_stmt (tree *tp, int *walk_subtrees, void *data)
{
struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
struct replace_var_in_stmt_info *info =
(struct replace_var_in_stmt_info *) wi->info;
view_p view = info->view;
tree t = *tp;
gimple stmt;
tree base_type = view->stream->base_type;
tree base_ptr_type = build_pointer_type (base_type);
tree element_type = view->stream->element_type;
tree element_ptr_type = build_pointer_type (element_type);
/* We're only interested in ARRAY_REFs and COMPONENT_REFs. */
switch (TREE_CODE (t))
{
/* (view): just replace the view by a cast and dereferenced
pointer to the buffer. */
case VAR_DECL:
if (DECL_PT_UID (t) == DECL_PT_UID (view->view_var))
{
wi->changed = true;
if (info->is_pointer_type)
*tp = view->buffer_pointer;
else
*tp = build_fold_indirect_ref (view->buffer_pointer);
}
break;
/* (*view): should only be possible if the stream carries
pointers. Other cases would not be possible (they would
require temporaries). Two cases: *view or *view[i]. */
case INDIRECT_REF:
{
t = TREE_OPERAND (t, 0);
wi->changed = false;
replace_var_in_stmt (&t, walk_subtrees, data);
if (wi->changed && !info->is_pointer_type)
{
/* We require that t be dereferenceable. */
gcc_assert (POINTER_TYPE_P (TREE_TYPE (t)));
info->deref = create_tmp_var (element_ptr_type, "deref");
stmt = gimple_build_assign (info->deref, t);
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
*tp = build_fold_indirect_ref (info->deref);
}
if (info->is_pointer_type)
*tp = build_fold_indirect_ref (t);
}
break;
/* Not handled in our case. */
case COMPONENT_REF:
{
t = TREE_OPERAND (t, 0);
wi->changed = false;
replace_var_in_stmt (&t, walk_subtrees, data);
if (wi->changed)
TREE_OPERAND (*tp, 0) = t;
}
break;
/* (&view): for example (&view[i]) */
case ADDR_EXPR:
{
t = TREE_OPERAND (t, 0);
wi->changed = false;
replace_var_in_stmt (&t, walk_subtrees, data);
if (wi->changed)
{
/* &*view is pointless for us. Given the transformations
of the subtree this can happen from &view[i] too. */
switch (TREE_CODE (t))
{
case INDIRECT_REF:
*tp = TREE_OPERAND (t, 0);
break;
case ARRAY_REF:
/* This case can also be simplified, with caution on
the array ref index. */
{
tree base_size = build_int_cst (size_type_node,
TYPE_SIZE_HAS_INT (view->stream->base_type));
tree array_index = TREE_OPERAND (t, 1);
tree index;
tree cast = view->buffer_pointer;
tree tmp;
/* A special common case: &view[0]. */
if (TREE_CODE (array_index) == INTEGER_CST
&& TREE_INT_CST_LOW (array_index) == 0)
{
cast = create_tmp_var (base_ptr_type, NULL);
tmp = fold_convert (base_ptr_type, view->buffer_pointer);
stmt = gimple_build_assign (cast, tmp);
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
*tp = cast;
return NULL_TREE;
}
if (!is_gimple_constant (array_index))
{
index = create_tmp_var (size_type_node, NULL);
tmp = fold_convert (size_type_node, array_index);
stmt = gimple_build_assign (index, tmp);
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
array_index = index;
}
index = create_tmp_var (size_type_node, NULL);
tmp = fold_convert (size_type_node, array_index);
stmt = gimple_build_assign_with_ops (MULT_EXPR, index, tmp, base_size);
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
cast = create_tmp_var (base_ptr_type, NULL);
stmt = gimple_build_assign (cast, fold_convert (base_ptr_type, view->buffer_pointer));
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, cast, cast, index);
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
*tp = cast;
}
break;
case COMPONENT_REF:
break;
default:
info->reg_addr = create_tmp_var (base_type, "reg_addr");
info->addr = create_tmp_var (base_type, "addr");
TREE_ADDRESSABLE (info->addr) = 1;
stmt = gimple_build_assign (info->reg_addr, t);
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
stmt = gimple_build_assign (info->addr, info->reg_addr);
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
*tp = build_fold_addr_expr (info->addr);
}
}
}
break;
case ARRAY_REF:
{
t = TREE_OPERAND (t, 0);
wi->changed = false;
replace_var_in_stmt (&t, walk_subtrees, data);
if (wi->changed)
{
tree base = TREE_OPERAND (*tp, 0);
/* Ensure that if we only have a pointer type available
(e.g., if the view is parametric sized), we compute the
pointer position rather keep the array access. */
if (TREE_TYPE (t) == TREE_TYPE (base))
TREE_OPERAND (*tp, 0) = t;
else
{
tree pointer_to_array_type = build_pointer_type (TREE_TYPE (TREE_OPERAND (*tp, 0)));
tree tmp = create_tmp_var (pointer_to_array_type, NULL);
gcc_assert (TREE_CODE (t) == INDIRECT_REF);
stmt = gimple_build_assign (tmp, fold_convert (pointer_to_array_type, TREE_OPERAND (t, 0)));
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
TREE_OPERAND (*tp, 0) = build_fold_indirect_ref (tmp);
/*
tree array_index = TREE_OPERAND (*tp, 1);
tree shift;
tree base_size = TYPE_SIZE_UNIT (TREE_TYPE (t));
tree base_pointer = TREE_OPERAND (t, 0);
gcc_assert (TREE_CODE (t) == INDIRECT_REF);
if (!is_gimple_constant (array_index))
{
tree index = create_tmp_var (size_type_node, NULL);
stmt = gimple_build_assign (index, fold_convert (size_type_node, array_index));
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
array_index = index;
}
shift = create_tmp_var (size_type_node, NULL);
stmt = gimple_build_assign_with_ops (MULT_EXPR, shift, array_index, base_size);
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, base_pointer, base_pointer, shift);
gsi_insert_before (&wi->gsi, stmt, GSI_SAME_STMT);
*tp = build_fold_indirect_ref (base_pointer);
*/
}
}
}
break;
default:
break;
}
return NULL_TREE;
}
static void
convert_view_to_var (view_p view, basic_block entry_bb, basic_block exit_bb)
{
gimple_stmt_iterator gsi;
gimple stmt;
tree view_type = TREE_TYPE (view->view_var);
/* If the type cannot fit a register, we won't duplicate, so we need
to replace in the body of the task all occurrences. Otherwise,
just load to a register the value (there is no penalty in doing
so) at the beginning of the body or store back at the end. */
if (AGGREGATE_TYPE_P (view_type) || POINTER_TYPE_P (view_type))
{
VEC (basic_block, heap) *bbs = NULL;
struct walk_stmt_info wi;
basic_block bb;
int i;
VEC_safe_push (basic_block, heap, bbs, entry_bb);
gather_blocks_in_sese_region (entry_bb, exit_bb, &bbs);
for (i = 0; VEC_iterate (basic_block, bbs, i, bb); ++i)
{
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
struct replace_var_in_stmt_info info;
info.view = view;
info.is_pointer_type = POINTER_TYPE_P (view_type);
memset (&wi, 0, sizeof (wi));
wi.gsi = gsi;
wi.info = &info;;
walk_gimple_op (gsi_stmt (gsi), replace_var_in_stmt, &wi);
}
}
}
else
{
tree var = (is_gimple_reg (view->view_var)) ? view->view_var :
create_tmp_var (TREE_TYPE (view->view_var), "reg_load");
if (view->type == READ_VIEW)
{
gsi = gsi_last_bb (entry_bb);
stmt = gimple_build_assign (var, build_fold_indirect_ref (view->buffer_pointer));
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
if (!is_gimple_reg (view->view_var))
{
stmt = gimple_build_assign (view->view_var, var);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
}
else
{
gsi = gsi_last_bb (exit_bb);
if (!is_gimple_reg (view->view_var))
{
stmt = gimple_build_assign (var, view->view_var);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
stmt = gimple_build_assign (build_fold_indirect_ref (view->buffer_pointer), var);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
}
}
/* Map a clause to the related VAR_DECL */
typedef struct clause_to_var_struct
{
tree clause;
tree var;
} *clause_to_var;
/* Map clauses to the respective variables. */
htab_t htab_clause_to_var;
/* Compute a hash function for clause_to_var. */
static hashval_t
hash_clause_to_var (const void *elt)
{
return htab_hash_pointer (((const struct clause_to_var_struct *) elt)->clause);
}
/* Compares clause_to_var elements E1 and E2. */
static int
eq_clause_to_var (const void *e1, const void *e2)
{
const struct clause_to_var_struct *elt1 = (const struct clause_to_var_struct *) e1;
const struct clause_to_var_struct *elt2 = (const struct clause_to_var_struct *) e2;
return elt1->clause == elt2->clause;
}
/* Find a variable for a input / output clause. */
static tree *
lookup_var_for_clause (tree clause)
{
struct clause_to_var_struct **slot, tmp;
tmp.clause = clause;
if (!htab_clause_to_var)
htab_clause_to_var = htab_create_ggc (10, hash_clause_to_var,
eq_clause_to_var, NULL);
slot = (clause_to_var *) htab_find_slot (htab_clause_to_var, &tmp, INSERT);
if (!*slot)
{
(*slot) = GGC_CNEW (struct clause_to_var_struct);
(*slot)->clause = clause;
}
return &((*slot)->var);
}
/* Location types for statement re-ordering when generating code for
streamization during OMP expansion. */
enum stmt_location
{
SL_NONE,
SL_INIT,
SL_COPY_IN,
SL_COPY_OUT,
SL_PUSH,
SL_HEAD,
SL_DEREF,
SL_POP,
SL_DONT_MOVE
};
/* This structure maps a STMT to the LOCation it needs to be moved
during streamization. */
typedef struct stmt_loc
{
/* The statement. */
gimple stmt;
/* The location. */
enum stmt_location loc;
} *stmt_loc;
/* Map the statements generated for streamization to their respective
locations. */
htab_t htab_stmt_loc;
/* Compute a hash function for STMT_LOC. */
static hashval_t
hash_stmt_loc (const void *elt)
{
return htab_hash_pointer (((const struct stmt_loc *) elt)->stmt);
}
/* Compares STMT_LOC elements E1 and E2. */
static int
eq_stmt_loc (const void *e1, const void *e2)
{
const struct stmt_loc *elt1 = (const struct stmt_loc *) e1;
const struct stmt_loc *elt2 = (const struct stmt_loc *) e2;
return elt1->stmt == elt2->stmt;
}
/* Returns the STMT_LOCATION corresponding to STMT. If it has not yet
been set, returns SL_NONE. */
static enum stmt_location
get_stmt_loc (gimple stmt)
{
struct stmt_loc **slot, tmp;
tmp.stmt = stmt;
slot = (stmt_loc *) htab_find_slot (htab_stmt_loc, &tmp, NO_INSERT);
if (!slot || !*slot)
return SL_NONE;
return (*slot)->loc;
}
/* Find a STMT_LOC mapping for STMT or create one if missing in
HTAB_STMT_LOC. */
static stmt_loc
lookup_stmt_loc (gimple stmt)
{
struct stmt_loc **slot, tmp;
tmp.stmt = stmt;
slot = (stmt_loc *) htab_find_slot (htab_stmt_loc, &tmp, INSERT);
gcc_assert (slot);
if (!*slot)
{
(*slot) = GGC_CNEW (struct stmt_loc);
(*slot)->stmt = stmt;
(*slot)->loc = SL_NONE;
}
return (*slot);
}
/* Set the STMT_LOCATION of STMT to LOC. If no location information
is found, install it in the map. */
static void
set_stmt_loc (gimple stmt, enum stmt_location loc)
{
stmt_loc sl = lookup_stmt_loc (stmt);
sl->loc = loc;
}
/* Same as SET_STMT_LOC but do not modify the location if it is set
(!= SL_NONE). */
static void
set_stmt_loc_if_none (gimple stmt, enum stmt_location loc)
{
stmt_loc sl = lookup_stmt_loc (stmt);
if (sl->loc == SL_NONE)
sl->loc = loc;
}
/* Lowering of OpenMP parallel and workshare constructs proceeds in two
phases. The first phase scans the function looking for OMP statements
and then for variables that must be replaced to satisfy data sharing
clauses. The second phase expands code for the constructs, as well as
re-gimplifying things when variables have been replaced with complex
expressions.
Final code generation is done by pass_expand_omp. The flowgraph is
scanned for parallel regions which are then moved to a new
function, to be invoked by the thread library. */
/* Context structure. Used to store information about each parallel
directive in the code. */
typedef struct omp_context
{
/* This field must be at the beginning, as we do "inheritance": Some
callback functions for tree-inline.c (e.g., omp_copy_decl)
receive a copy_body_data pointer that is up-casted to an
omp_context pointer. */
copy_body_data cb;
/* The tree of contexts corresponding to the encountered constructs. */
struct omp_context *outer;
gimple stmt;
/* Map variables to fields in a structure that allows communication
between sending and receiving threads. */
splay_tree field_map;
tree record_type;
tree sender_decl;
tree receiver_decl;
/* These are used just by task contexts, if task firstprivate fn is
needed. srecord_type is used to communicate from the thread
that encountered the task construct to task firstprivate fn,
record_type is allocated by GOMP_task, initialized by task firstprivate
fn and passed to the task body fn. */
splay_tree sfield_map;
tree srecord_type;
/* A chain of variables to add to the top-level block surrounding the
construct. In the case of a parallel, this is in the child function. */
tree block_vars;
/* What to do with variables with implicitly determined sharing
attributes. */
enum omp_clause_default_kind default_kind;
/* Nesting depth of this context. Used to beautify error messages re
invalid gotos. The outermost ctx is depth 1, with depth 0 being
reserved for the main body of the function. */
int depth;
/* True if this parallel directive is nested within another. */
bool is_nested;
/* True only when the activation counter has reached the maximum
number of activations allowed for this task. */
/*tree termination_flag_p;*/
} omp_context;
struct omp_for_data_loop
{
tree v, n1, n2, step;
enum tree_code cond_code;
};
/* A structure describing the main elements of a parallel loop. */
struct omp_for_data
{
struct omp_for_data_loop loop;
tree chunk_size;
gimple for_stmt;
tree pre, iter_type;
int collapse;
bool have_nowait, have_ordered;
enum omp_clause_schedule_kind sched_kind;
struct omp_for_data_loop *loops;
};
static splay_tree all_contexts;
static int taskreg_nesting_level;
omp_region_p root_omp_region;
static bitmap task_shared_vars;
static void scan_omp (gimple_seq, omp_context *);
static tree scan_omp_1_op (tree *, int *, void *);
#define WALK_SUBSTMTS \
case GIMPLE_BIND: \
case GIMPLE_TRY: \
case GIMPLE_CATCH: \
case GIMPLE_EH_FILTER: \
/* The sub-statements for these should be walked. */ \
*handled_ops_p = false; \
break;
/* Convenience function for calling scan_omp_1_op on tree operands. */
static inline tree
scan_omp_op (tree *tp, omp_context *ctx)
{
struct walk_stmt_info wi;
memset (&wi, 0, sizeof (wi));
wi.info = ctx;
wi.want_locations = true;
return walk_tree (tp, scan_omp_1_op, &wi, NULL);
}
static void lower_omp (gimple_seq, omp_context *);
static tree lookup_decl_in_outer_ctx (tree, omp_context *);
static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *);
/* Find an OpenMP clause of type KIND within CLAUSES. */
tree
find_omp_clause (tree clauses, enum omp_clause_code kind)
{
for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
if (OMP_CLAUSE_CODE (clauses) == kind)
return clauses;
return NULL_TREE;
}
/* Return true if CTX is for an omp parallel. */
static inline bool
is_parallel_ctx (omp_context *ctx)
{
return gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL;
}
/* Return true if CTX is for an omp task. */
static inline bool
is_task_ctx (omp_context *ctx)
{
return gimple_code (ctx->stmt) == GIMPLE_OMP_TASK;
}
/* Return true if CTX is for an omp parallel or omp task. */
static inline bool
is_taskreg_ctx (omp_context *ctx)
{
return gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL
|| gimple_code (ctx->stmt) == GIMPLE_OMP_TASK;
}
/* Return true if REGION is a combined parallel+workshare region. */
static inline bool
is_combined_parallel (omp_region_p region)
{
return region->is_combined_parallel;
}
/* Extract the header elements of parallel loop FOR_STMT and store
them into *FD. */
static void
extract_omp_for_data (gimple for_stmt, struct omp_for_data *fd,
struct omp_for_data_loop *loops)
{
tree t, var, *collapse_iter, *collapse_count;
tree count = NULL_TREE, iter_type = long_integer_type_node;
struct omp_for_data_loop *loop;
int i;
struct omp_for_data_loop dummy_loop;
location_t loc = gimple_location (for_stmt);
fd->for_stmt = for_stmt;
fd->pre = NULL;
fd->collapse = gimple_omp_for_collapse (for_stmt);
if (fd->collapse > 1)
fd->loops = loops;
else
fd->loops = &fd->loop;
fd->have_nowait = fd->have_ordered = false;
fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
fd->chunk_size = NULL_TREE;
collapse_iter = NULL;
collapse_count = NULL;
for (t = gimple_omp_for_clauses (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t))
switch (OMP_CLAUSE_CODE (t))
{
case OMP_CLAUSE_NOWAIT:
fd->have_nowait = true;
break;
case OMP_CLAUSE_ORDERED:
fd->have_ordered = true;
break;
case OMP_CLAUSE_SCHEDULE:
fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t);
fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
break;
case OMP_CLAUSE_COLLAPSE:
if (fd->collapse > 1)
{
collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t);
collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t);
}
default:
break;
}
/* FIXME: for now map schedule(auto) to schedule(static).
There should be analysis to determine whether all iterations
are approximately the same amount of work (then schedule(static)
is best) or if it varies (then schedule(dynamic,N) is better). */
if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO)
{
fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
gcc_assert (fd->chunk_size == NULL);
}
gcc_assert (fd->collapse == 1 || collapse_iter != NULL);
if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
gcc_assert (fd->chunk_size == NULL);
else if (fd->chunk_size == NULL)
{
/* We only need to compute a default chunk size for ordered
static loops and dynamic loops. */
if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
|| fd->have_ordered
|| fd->collapse > 1)
fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
? integer_zero_node : integer_one_node;
}
for (i = 0; i < fd->collapse; i++)
{
if (fd->collapse == 1)
loop = &fd->loop;
else if (loops != NULL)
loop = loops + i;
else
loop = &dummy_loop;
loop->v = gimple_omp_for_index (for_stmt, i);
gcc_assert (SSA_VAR_P (loop->v));
gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
|| TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE);
var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v;
loop->n1 = gimple_omp_for_initial (for_stmt, i);
loop->cond_code = gimple_omp_for_cond (for_stmt, i);
loop->n2 = gimple_omp_for_final (for_stmt, i);
switch (loop->cond_code)
{
case LT_EXPR:
case GT_EXPR:
break;
case LE_EXPR:
if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
loop->n2 = fold_build2_loc (loc,
POINTER_PLUS_EXPR, TREE_TYPE (loop->n2),
loop->n2, size_one_node);
else
loop->n2 = fold_build2_loc (loc,
PLUS_EXPR, TREE_TYPE (loop->n2), loop->n2,
build_int_cst (TREE_TYPE (loop->n2), 1));
loop->cond_code = LT_EXPR;
break;
case GE_EXPR:
if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
loop->n2 = fold_build2_loc (loc,
POINTER_PLUS_EXPR, TREE_TYPE (loop->n2),
loop->n2, size_int (-1));
else
loop->n2 = fold_build2_loc (loc,
MINUS_EXPR, TREE_TYPE (loop->n2), loop->n2,
build_int_cst (TREE_TYPE (loop->n2), 1));
loop->cond_code = GT_EXPR;
break;
default:
gcc_unreachable ();
}
t = gimple_omp_for_incr (for_stmt, i);
gcc_assert (TREE_OPERAND (t, 0) == var);
switch (TREE_CODE (t))
{
case PLUS_EXPR:
case POINTER_PLUS_EXPR:
loop->step = TREE_OPERAND (t, 1);
break;
case MINUS_EXPR:
loop->step = TREE_OPERAND (t, 1);
loop->step = fold_build1_loc (loc,
NEGATE_EXPR, TREE_TYPE (loop->step),
loop->step);
break;
default:
gcc_unreachable ();
}
if (iter_type != long_long_unsigned_type_node)
{
if (POINTER_TYPE_P (TREE_TYPE (loop->v)))
iter_type = long_long_unsigned_type_node;
else if (TYPE_UNSIGNED (TREE_TYPE (loop->v))
&& TYPE_PRECISION (TREE_TYPE (loop->v))
>= TYPE_PRECISION (iter_type))
{
tree n;
if (loop->cond_code == LT_EXPR)
n = fold_build2_loc (loc,
PLUS_EXPR, TREE_TYPE (loop->v),
loop->n2, loop->step);
else
n = loop->n1;
if (TREE_CODE (n) != INTEGER_CST
|| tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n))
iter_type = long_long_unsigned_type_node;
}
else if (TYPE_PRECISION (TREE_TYPE (loop->v))
> TYPE_PRECISION (iter_type))
{
tree n1, n2;
if (loop->cond_code == LT_EXPR)
{
n1 = loop->n1;
n2 = fold_build2_loc (loc,
PLUS_EXPR, TREE_TYPE (loop->v),
loop->n2, loop->step);
}
else
{
n1 = fold_build2_loc (loc,
MINUS_EXPR, TREE_TYPE (loop->v),
loop->n2, loop->step);
n2 = loop->n1;
}
if (TREE_CODE (n1) != INTEGER_CST
|| TREE_CODE (n2) != INTEGER_CST
|| !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1)
|| !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type)))
iter_type = long_long_unsigned_type_node;
}
}
if (collapse_count && *collapse_count == NULL)
{
if ((i == 0 || count != NULL_TREE)
&& TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
&& TREE_CONSTANT (loop->n1)
&& TREE_CONSTANT (loop->n2)
&& TREE_CODE (loop->step) == INTEGER_CST)
{
tree itype = TREE_TYPE (loop->v);
if (POINTER_TYPE_P (itype))
itype
= lang_hooks.types.type_for_size (TYPE_PRECISION (itype), 0);
t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1));
t = fold_build2_loc (loc,
PLUS_EXPR, itype,
fold_convert_loc (loc, itype, loop->step), t);
t = fold_build2_loc (loc, PLUS_EXPR, itype, t,
fold_convert_loc (loc, itype, loop->n2));
t = fold_build2_loc (loc, MINUS_EXPR, itype, t,
fold_convert_loc (loc, itype, loop->n1));
if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR)
t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype,
fold_build1_loc (loc, NEGATE_EXPR, itype, t),
fold_build1_loc (loc, NEGATE_EXPR, itype,
fold_convert_loc (loc, itype,
loop->step)));
else
t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t,
fold_convert_loc (loc, itype, loop->step));
t = fold_convert_loc (loc, long_long_unsigned_type_node, t);
if (count != NULL_TREE)
count = fold_build2_loc (loc,
MULT_EXPR, long_long_unsigned_type_node,
count, t);
else
count = t;
if (TREE_CODE (count) != INTEGER_CST)
count = NULL_TREE;
}
else
count = NULL_TREE;
}
}
if (count)
{
if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node)))
iter_type = long_long_unsigned_type_node;
else
iter_type = long_integer_type_node;
}
else if (collapse_iter && *collapse_iter != NULL)
iter_type = TREE_TYPE (*collapse_iter);
fd->iter_type = iter_type;
if (collapse_iter && *collapse_iter == NULL)
*collapse_iter = create_tmp_var (iter_type, ".iter");
if (collapse_count && *collapse_count == NULL)
{
if (count)
*collapse_count = fold_convert_loc (loc, iter_type, count);
else
*collapse_count = create_tmp_var (iter_type, ".count");
}
if (fd->collapse > 1)
{
fd->loop.v = *collapse_iter;
fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0);
fd->loop.n2 = *collapse_count;
fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1);
fd->loop.cond_code = LT_EXPR;
}
}
/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
is the immediate dominator of PAR_ENTRY_BB, return true if there
are no data dependencies that would prevent expanding the parallel
directive at PAR_ENTRY_BB as a combined parallel+workshare region.
When expanding a combined parallel+workshare region, the call to
the child function may need additional arguments in the case of
GIMPLE_OMP_FOR regions. In some cases, these arguments are
computed out of variables passed in from the parent to the child
via 'struct .omp_data_s'. For instance:
#pragma omp parallel for schedule (guided, i * 4)
for (j ...)
Is lowered into:
# BLOCK 2 (PAR_ENTRY_BB)
.omp_data_o.i = i;
#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
# BLOCK 3 (WS_ENTRY_BB)
.omp_data_i = &.omp_data_o;
D.1667 = .omp_data_i->i;
D.1598 = D.1667 * 4;
#pragma omp for schedule (guided, D.1598)
When we outline the parallel region, the call to the child function
'bar.omp_fn.0' will need the value D.1598 in its argument list, but
that value is computed *after* the call site. So, in principle we
cannot do the transformation.
To see whether the code in WS_ENTRY_BB blocks the combined
parallel+workshare call, we collect all the variables used in the
GIMPLE_OMP_FOR header check whether they appear on the LHS of any
statement in WS_ENTRY_BB. If so, then we cannot emit the combined
call.
FIXME. If we had the SSA form built at this point, we could merely
hoist the code in block 3 into block 2 and be done with it. But at
this point we don't have dataflow information and though we could
hack something up here, it is really not worth the aggravation. */
static bool
workshare_safe_to_combine_p (basic_block ws_entry_bb)
{
struct omp_for_data fd;
gimple ws_stmt = last_stmt (ws_entry_bb);
if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
return true;
gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
extract_omp_for_data (ws_stmt, &fd, NULL);
if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
return false;
if (fd.iter_type != long_integer_type_node)
return false;
/* FIXME. We give up too easily here. If any of these arguments
are not constants, they will likely involve variables that have
been mapped into fields of .omp_data_s for sharing with the child
function. With appropriate data flow, it would be possible to
see through this. */
if (!is_gimple_min_invariant (fd.loop.n1)
|| !is_gimple_min_invariant (fd.loop.n2)
|| !is_gimple_min_invariant (fd.loop.step)
|| (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
return false;
return true;
}
/* Collect additional arguments needed to emit a combined
parallel+workshare call. WS_STMT is the workshare directive being
expanded. */
static tree
get_ws_args_for (gimple ws_stmt)
{
tree t;
location_t loc = gimple_location (ws_stmt);
if (gimple_code (ws_stmt) == GIMPLE_OMP_FOR)
{
struct omp_for_data fd;
tree ws_args;
extract_omp_for_data (ws_stmt, &fd, NULL);
ws_args = NULL_TREE;
if (fd.chunk_size)
{
t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
ws_args = tree_cons (NULL, t, ws_args);
}
t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
ws_args = tree_cons (NULL, t, ws_args);
t = fold_convert_loc (loc, long_integer_type_node, fd.loop.n2);
ws_args = tree_cons (NULL, t, ws_args);
t = fold_convert_loc (loc, long_integer_type_node, fd.loop.n1);
ws_args = tree_cons (NULL, t, ws_args);
return ws_args;
}
else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
{
/* Number of sections is equal to the number of edges from the
GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
the exit of the sections region. */
basic_block bb = single_succ (gimple_bb (ws_stmt));
t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
t = tree_cons (NULL, t, NULL);
return t;
}
gcc_unreachable ();
}
/* Discover whether REGION is a combined parallel+workshare region. */
static void
determine_parallel_type (omp_region_p region)
{
basic_block par_entry_bb, par_exit_bb;
basic_block ws_entry_bb, ws_exit_bb;
if (region == NULL || region->inner == NULL
|| region->exit == NULL || region->inner->exit == NULL
|| region->inner->cont == NULL)
return;
/* We only support parallel+for and parallel+sections. */
if (region->type != GIMPLE_OMP_PARALLEL
|| (region->inner->type != GIMPLE_OMP_FOR
&& region->inner->type != GIMPLE_OMP_SECTIONS))
return;
/* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
WS_EXIT_BB -> PAR_EXIT_BB. */
par_entry_bb = region->entry;
par_exit_bb = region->exit;
ws_entry_bb = region->inner->entry;
ws_exit_bb = region->inner->exit;
if (single_succ (par_entry_bb) == ws_entry_bb
&& single_succ (ws_exit_bb) == par_exit_bb
&& workshare_safe_to_combine_p (ws_entry_bb)
&& (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
|| (last_and_only_stmt (ws_entry_bb)
&& last_and_only_stmt (par_exit_bb))))
{
gimple ws_stmt = last_stmt (ws_entry_bb);
if (region->inner->type == GIMPLE_OMP_FOR)
{
/* If this is a combined parallel loop, we need to determine
whether or not to use the combined library calls. There
are two cases where we do not apply the transformation:
static loops and any kind of ordered loop. In the first
case, we already open code the loop so there is no need
to do anything else. In the latter case, the combined
parallel loop call would still need extra synchronization
to implement ordered semantics, so there would not be any
gain in using the combined call. */
tree clauses = gimple_omp_for_clauses (ws_stmt);
tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE);
if (c == NULL
|| OMP_CLAUSE_SCHEDULE_KIND (c) == OMP_CLAUSE_SCHEDULE_STATIC
|| find_omp_clause (clauses, OMP_CLAUSE_ORDERED))
{
region->is_combined_parallel = false;
region->inner->is_combined_parallel = false;
return;
}
}
region->is_combined_parallel = true;
region->inner->is_combined_parallel = true;
region->ws_args = get_ws_args_for (ws_stmt);
}
}
/* Return true if EXPR is variable sized. */
static inline bool
is_variable_sized (const_tree expr)
{
return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
}
/* Return true if DECL is a reference type. */
static inline bool
is_reference (tree decl)
{
return lang_hooks.decls.omp_privatize_by_reference (decl);
}
/* Lookup variables in the decl or field splay trees. The "maybe" form
allows for the variable form to not have been entered, otherwise we
assert that the variable must have been entered. */
static inline tree
lookup_decl (tree var, omp_context *ctx)
{
tree *n;
n = (tree *) pointer_map_contains (ctx->cb.decl_map, var);
return *n;
}
static inline tree
maybe_lookup_decl (const_tree var, omp_context *ctx)
{
tree *n;
n = (tree *) pointer_map_contains (ctx->cb.decl_map, var);
return n ? *n : NULL_TREE;
}
static inline tree
lookup_field (tree var, omp_context *ctx)
{
splay_tree_node n;
n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
return (tree) n->value;
}
static inline tree
lookup_sfield (tree var, omp_context *ctx)
{
splay_tree_node n;
n = splay_tree_lookup (ctx->sfield_map
? ctx->sfield_map : ctx->field_map,
(splay_tree_key) var);
return (tree) n->value;
}
static inline tree
maybe_lookup_field (tree var, omp_context *ctx)
{
splay_tree_node n;
n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
return n ? (tree) n->value : NULL_TREE;
}
/* Return true if DECL should be copied by pointer. SHARED_CTX is
the parallel context if DECL is to be shared. */
static bool
use_pointer_for_field (tree decl, omp_context *shared_ctx)
{
if (AGGREGATE_TYPE_P (TREE_TYPE (decl)))
return true;
/* We can only use copy-in/copy-out semantics for shared variables
when we know the value is not accessible from an outer scope. */
if (shared_ctx)
{
/* ??? Trivially accessible from anywhere. But why would we even
be passing an address in this case? Should we simply assert
this to be false, or should we have a cleanup pass that removes
these from the list of mappings? */
if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
return true;
/* For variables with DECL_HAS_VALUE_EXPR_P set, we cannot tell
without analyzing the expression whether or not its location
is accessible to anyone else. In the case of nested parallel
regions it certainly may be. */
if (TREE_CODE (decl) != RESULT_DECL && DECL_HAS_VALUE_EXPR_P (decl))
return true;
/* Do not use copy-in/copy-out for variables that have their
address taken. */
if (TREE_ADDRESSABLE (decl))
return true;
/* Disallow copy-in/out in nested parallel if
decl is shared in outer parallel, otherwise
each thread could store the shared variable
in its own copy-in location, making the
variable no longer really shared. */
if (!TREE_READONLY (decl) && shared_ctx->is_nested)
{
omp_context *up;
for (up = shared_ctx->outer; up; up = up->outer)
if (is_taskreg_ctx (up) && maybe_lookup_decl (decl, up))
break;
if (up)
{
tree c;
for (c = gimple_omp_taskreg_clauses (up->stmt);
c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
&& OMP_CLAUSE_DECL (c) == decl)
break;
if (c)
return true;
}
}
/* For tasks avoid using copy-in/out, unless they are readonly
(in which case just copy-in is used). As tasks can be
deferred or executed in different thread, when GOMP_task
returns, the task hasn't necessarily terminated. */
if (!TREE_READONLY (decl) && is_task_ctx (shared_ctx))
{
tree outer = maybe_lookup_decl_in_outer_ctx (decl, shared_ctx);
if (is_gimple_reg (outer))
{
/* Taking address of OUTER in lower_send_shared_vars
might need regimplification of everything that uses the
variable. */
if (!task_shared_vars)
task_shared_vars = BITMAP_ALLOC (NULL);
bitmap_set_bit (task_shared_vars, DECL_UID (outer));
TREE_ADDRESSABLE (outer) = 1;
}
return true;
}
}
return false;
}
/* Create a new VAR_DECL and copy information from VAR to it. */
tree
copy_var_decl (tree var, tree name, tree type)
{
tree copy = build_decl (DECL_SOURCE_LOCATION (var), VAR_DECL, name, type);
TREE_ADDRESSABLE (copy) = TREE_ADDRESSABLE (var);
TREE_THIS_VOLATILE (copy) = TREE_THIS_VOLATILE (var);
DECL_GIMPLE_REG_P (copy) = DECL_GIMPLE_REG_P (var);
DECL_ARTIFICIAL (copy) = DECL_ARTIFICIAL (var);
DECL_IGNORED_P (copy) = DECL_IGNORED_P (var);
DECL_CONTEXT (copy) = DECL_CONTEXT (var);
TREE_USED (copy) = 1;
DECL_SEEN_IN_BIND_EXPR_P (copy) = 1;
return copy;
}
/* Construct a new automatic decl similar to VAR. */
static tree
omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx)
{
tree copy = copy_var_decl (var, name, type);
DECL_CONTEXT (copy) = current_function_decl;
TREE_CHAIN (copy) = ctx->block_vars;
ctx->block_vars = copy;
return copy;
}
static tree
omp_copy_decl_1 (tree var, omp_context *ctx)
{
return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
}
/* Build tree nodes to access the field for VAR on the receiver side. */
static tree
build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
{
tree x, field = lookup_field (var, ctx);
/* If the receiver record type was remapped in the child function,
remap the field into the new record type. */
x = maybe_lookup_field (field, ctx);
if (x != NULL)
field = x;
x = build_fold_indirect_ref (ctx->receiver_decl);
x = build3 (COMPONENT_REF, TREE_TYPE (field), x, field, NULL);
if (by_ref)
x = build_fold_indirect_ref (x);
return x;
}
/* Build tree nodes to access VAR in the scope outer to CTX. In the case
of a parallel, this is a component reference; for workshare constructs
this is some variable. */
static tree
build_outer_var_ref (tree var, omp_context *ctx)
{
tree x;
if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)))
x = var;
else if (is_variable_sized (var))
{
x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
x = build_outer_var_ref (x, ctx);
x = build_fold_indirect_ref (x);
}
else if (is_taskreg_ctx (ctx))
{
bool by_ref = use_pointer_for_field (var, NULL);
x = build_receiver_ref (var, by_ref, ctx);
}
else if (ctx->outer)
x = lookup_decl (var, ctx->outer);
else if (is_reference (var))
/* This can happen with orphaned constructs. If var is reference, it is
possible it is shared and as such valid. */
x = var;
else
gcc_unreachable ();
if (is_reference (var))
x = build_fold_indirect_ref (x);
return x;
}
/* Build tree nodes to access the field for VAR on the sender side. */
static tree
build_sender_ref (tree var, omp_context *ctx)
{
tree field = lookup_sfield (var, ctx);
return build3 (COMPONENT_REF, TREE_TYPE (field),
ctx->sender_decl, field, NULL);
}
/* Add a new field for VAR inside the structure CTX->SENDER_DECL. */
static void
install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
{
tree field, type, sfield = NULL_TREE;
gcc_assert ((mask & 1) == 0
|| !splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
gcc_assert ((mask & 2) == 0 || !ctx->sfield_map
|| !splay_tree_lookup (ctx->sfield_map, (splay_tree_key) var));
type = TREE_TYPE (var);
if (by_ref)
type = build_pointer_type (type);
else if ((mask & 3) == 1 && is_reference (var))
type = TREE_TYPE (type);
field = build_decl (DECL_SOURCE_LOCATION (var),
FIELD_DECL, DECL_NAME (var), type);
/* Remember what variable this field was created for. This does have a
side effect of making dwarf2out ignore this member, so for helpful
debugging we clear it later in delete_omp_context. */
DECL_ABSTRACT_ORIGIN (field) = var;
if (type == TREE_TYPE (var))
{
DECL_ALIGN (field) = DECL_ALIGN (var);
DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
}
else
DECL_ALIGN (field) = TYPE_ALIGN (type);
if ((mask & 3) == 3)
{
insert_field_into_struct (ctx->record_type, field);
if (ctx->srecord_type)
{
sfield = build_decl (DECL_SOURCE_LOCATION (var),
FIELD_DECL, DECL_NAME (var), type);
DECL_ABSTRACT_ORIGIN (sfield) = var;
DECL_ALIGN (sfield) = DECL_ALIGN (field);
DECL_USER_ALIGN (sfield) = DECL_USER_ALIGN (field);
TREE_THIS_VOLATILE (sfield) = TREE_THIS_VOLATILE (field);
insert_field_into_struct (ctx->srecord_type, sfield);
}
}
else
{
if (ctx->srecord_type == NULL_TREE)
{
tree t;
ctx->srecord_type = lang_hooks.types.make_type (RECORD_TYPE);
ctx->sfield_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
{
sfield = build_decl (DECL_SOURCE_LOCATION (var),
FIELD_DECL, DECL_NAME (t), TREE_TYPE (t));
DECL_ABSTRACT_ORIGIN (sfield) = DECL_ABSTRACT_ORIGIN (t);
insert_field_into_struct (ctx->srecord_type, sfield);
splay_tree_insert (ctx->sfield_map,
(splay_tree_key) DECL_ABSTRACT_ORIGIN (t),
(splay_tree_value) sfield);
}
}
sfield = field;
insert_field_into_struct ((mask & 1) ? ctx->record_type
: ctx->srecord_type, field);
}
if (mask & 1)
splay_tree_insert (ctx->field_map, (splay_tree_key) var,
(splay_tree_value) field);
if ((mask & 2) && ctx->sfield_map)
splay_tree_insert (ctx->sfield_map, (splay_tree_key) var,
(splay_tree_value) sfield);
}
static tree
install_var_local (tree var, omp_context *ctx)
{
tree new_var = omp_copy_decl_1 (var, ctx);
insert_decl_map (&ctx->cb, var, new_var);
return new_var;
}
/* Adjust the replacement for DECL in CTX for the new context. This means
copying the DECL_VALUE_EXPR, and fixing up the type. */
static void
fixup_remapped_decl (tree decl, omp_context *ctx, bool private_debug)
{
tree new_decl, size;
new_decl = lookup_decl (decl, ctx);
TREE_TYPE (new_decl) = remap_type (TREE_TYPE (decl), &ctx->cb);
if ((!TREE_CONSTANT (DECL_SIZE (new_decl)) || private_debug)
&& DECL_HAS_VALUE_EXPR_P (decl))
{
tree ve = DECL_VALUE_EXPR (decl);
walk_tree (&ve, copy_tree_body_r, &ctx->cb, NULL);
SET_DECL_VALUE_EXPR (new_decl, ve);
DECL_HAS_VALUE_EXPR_P (new_decl) = 1;
}
if (!TREE_CONSTANT (DECL_SIZE (new_decl)))
{
size = remap_decl (DECL_SIZE (decl), &ctx->cb);
if (size == error_mark_node)
size = TYPE_SIZE (TREE_TYPE (new_decl));
DECL_SIZE (new_decl) = size;
size = remap_decl (DECL_SIZE_UNIT (decl), &ctx->cb);
if (size == error_mark_node)
size = TYPE_SIZE_UNIT (TREE_TYPE (new_decl));
DECL_SIZE_UNIT (new_decl) = size;
}
}
/* The callback for remap_decl. Search all containing contexts for a
mapping of the variable; this avoids having to duplicate the splay
tree ahead of time. We know a mapping doesn't already exist in the
given context. Create new mappings to implement default semantics. */
static tree
omp_copy_decl (tree var, copy_body_data *cb)
{
omp_context *ctx = (omp_context *) cb;
tree new_var;
if (TREE_CODE (var) == LABEL_DECL)
{
new_var = create_artificial_label (DECL_SOURCE_LOCATION (var));
DECL_CONTEXT (new_var) = current_function_decl;
insert_decl_map (&ctx->cb, var, new_var);
return new_var;
}
while (!is_taskreg_ctx (ctx))
{
ctx = ctx->outer;
if (ctx == NULL)
return var;
new_var = maybe_lookup_decl (var, ctx);
if (new_var)
return new_var;
}
if (is_global_var (var) || decl_function_context (var) != ctx->cb.src_fn)
return var;
return error_mark_node;
}
/* Return the parallel region associated with STMT. */
/* Debugging dumps for parallel regions. */
void dump_omp_region (FILE *, omp_region_p , int);
void debug_omp_region (omp_region_p );
void debug_all_omp_regions (void);
/* Dump the parallel region tree rooted at REGION. */
void
dump_omp_region (FILE *file, omp_region_p region, int indent)
{
fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
gimple_code_name[region->type]);
if (region->inner)
dump_omp_region (file, region->inner, indent + 4);
if (region->cont)
{
fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
region->cont->index);
}
if (region->exit)
fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
region->exit->index);
else
fprintf (file, "%*s[no exit marker]\n", indent, "");
if (region->next)
dump_omp_region (file, region->next, indent);
}
void
debug_omp_region (omp_region_p region)
{
dump_omp_region (stderr, region, 0);
}
void
debug_all_omp_regions (void)
{
dump_omp_region (stderr, root_omp_region, 0);
}
/* Create a new parallel region starting at STMT inside region PARENT. */
omp_region_p
new_omp_region (basic_block bb, enum gimple_code type,
omp_region_p parent)
{
omp_region_p region = XCNEW (struct omp_region);
region->outer = parent;
region->entry = bb;
region->type = type;
if (parent)
{
/* This is a nested region. Add it to the list of inner
regions in PARENT. */
region->next = parent->inner;
parent->inner = region;
}
else
{
/* This is a toplevel region. Add it to the list of toplevel
regions in ROOT_OMP_REGION. */
region->next = root_omp_region;
root_omp_region = region;
}
return region;
}
/* Release the memory associated with the region tree rooted at REGION. */
static void
free_omp_region_1 (omp_region_p region)
{
omp_region_p i, n;
for (i = region->inner; i ; i = n)
{
n = i->next;
free_omp_region_1 (i);
}
free (region);
}
/* Release the memory for the entire omp region tree. */
void
free_omp_regions (void)
{
omp_region_p r, n;
for (r = root_omp_region; r ; r = n)
{
n = r->next;
free_omp_region_1 (r);
}
root_omp_region = NULL;
}
/* Create a new context, with OUTER_CTX being the surrounding context. */
static omp_context *
new_omp_context (gimple stmt, omp_context *outer_ctx)
{
omp_context *ctx = XCNEW (omp_context);
splay_tree_insert (all_contexts, (splay_tree_key) stmt,
(splay_tree_value) ctx);
ctx->stmt = stmt;
if (outer_ctx)
{
ctx->outer = outer_ctx;
ctx->cb = outer_ctx->cb;
ctx->cb.block = NULL;
ctx->depth = outer_ctx->depth + 1;
}
else
{
ctx->cb.src_fn = current_function_decl;
ctx->cb.dst_fn = current_function_decl;
ctx->cb.src_node = cgraph_node (current_function_decl);
ctx->cb.dst_node = ctx->cb.src_node;
ctx->cb.src_cfun = cfun;
ctx->cb.copy_decl = omp_copy_decl;
ctx->cb.eh_lp_nr = 0;
ctx->cb.transform_call_graph_edges = CB_CGE_MOVE;
ctx->depth = 1;
}
ctx->cb.decl_map = pointer_map_create ();
return ctx;
}
static gimple_seq maybe_catch_exception (gimple_seq);
/* Finalize task copyfn. */
static void
finalize_task_copyfn (gimple task_stmt)
{
struct function *child_cfun;
tree child_fn, old_fn;
gimple_seq seq, new_seq;
gimple bind;
child_fn = gimple_omp_task_copy_fn (task_stmt);
if (child_fn == NULL_TREE)
return;
child_cfun = DECL_STRUCT_FUNCTION (child_fn);
/* Inform the callgraph about the new function. */
DECL_STRUCT_FUNCTION (child_fn)->curr_properties
= cfun->curr_properties;
old_fn = current_function_decl;
push_cfun (child_cfun);
current_function_decl = child_fn;
bind = gimplify_body (&DECL_SAVED_TREE (child_fn), child_fn, false);
seq = gimple_seq_alloc ();
gimple_seq_add_stmt (&seq, bind);
new_seq = maybe_catch_exception (seq);
if (new_seq != seq)
{
bind = gimple_build_bind (NULL, new_seq, NULL);
seq = gimple_seq_alloc ();
gimple_seq_add_stmt (&seq, bind);
}
gimple_set_body (child_fn, seq);
pop_cfun ();
current_function_decl = old_fn;
cgraph_add_new_function (child_fn, false);
}
/* Destroy a omp_context data structures. Called through the splay tree
value delete callback. */
static void
delete_omp_context (splay_tree_value value)
{
omp_context *ctx = (omp_context *) value;
pointer_map_destroy (ctx->cb.decl_map);
if (ctx->field_map)
splay_tree_delete (ctx->field_map);
if (ctx->sfield_map)
splay_tree_delete (ctx->sfield_map);
/* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before
it produces corrupt debug information. */
if (ctx->record_type)
{
tree t;
for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
DECL_ABSTRACT_ORIGIN (t) = NULL;
}
if (ctx->srecord_type)
{
tree t;
for (t = TYPE_FIELDS (ctx->srecord_type); t ; t = TREE_CHAIN (t))
DECL_ABSTRACT_ORIGIN (t) = NULL;
}
if (is_task_ctx (ctx))
finalize_task_copyfn (ctx->stmt);
XDELETE (ctx);
}
/* Fix up RECEIVER_DECL with a type that has been remapped to the child
context. */
static void
fixup_child_record_type (omp_context *ctx)
{
tree f, type = ctx->record_type;
/* ??? It isn't sufficient to just call remap_type here, because
variably_modified_type_p doesn't work the way we expect for
record types. Testing each field for whether it needs remapping
and creating a new record by hand works, however. */
for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
break;
if (f)
{
tree name, new_fields = NULL;
type = lang_hooks.types.make_type (RECORD_TYPE);
name = DECL_NAME (TYPE_NAME (ctx->record_type));
name = build_decl (DECL_SOURCE_LOCATION (ctx->receiver_decl),
TYPE_DECL, name, type);
TYPE_NAME (type) = name;
for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
{
tree new_f = copy_node (f);
DECL_CONTEXT (new_f) = type;
TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &ctx->cb);
TREE_CHAIN (new_f) = new_fields;
walk_tree (&DECL_SIZE (new_f), copy_tree_body_r, &ctx->cb, NULL);
walk_tree (&DECL_SIZE_UNIT (new_f), copy_tree_body_r,
&ctx->cb, NULL);
walk_tree (&DECL_FIELD_OFFSET (new_f), copy_tree_body_r,
&ctx->cb, NULL);
new_fields = new_f;
/* Arrange to be able to look up the receiver field
given the sender field. */
splay_tree_insert (ctx->field_map, (splay_tree_key) f,
(splay_tree_value) new_f);
}
TYPE_FIELDS (type) = nreverse (new_fields);
layout_type (type);
}
TREE_TYPE (ctx->receiver_decl) = build_pointer_type (type);
}
/* Instantiate decls as necessary in CTX to satisfy the data sharing
specified by CLAUSES. */
static void
scan_sharing_clauses (tree clauses, omp_context *ctx)
{
tree c, decl;
bool scan_array_reductions = false;
for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
bool install_firstprivate_field = true;
bool by_ref;
switch (OMP_CLAUSE_CODE (c))
{
case OMP_CLAUSE_PRIVATE:
decl = OMP_CLAUSE_DECL (c);
if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
goto do_private;
else if (!is_variable_sized (decl))
install_var_local (decl, ctx);
break;
case OMP_CLAUSE_SHARED:
gcc_assert (is_taskreg_ctx (ctx));
decl = OMP_CLAUSE_DECL (c);
gcc_assert (!COMPLETE_TYPE_P (TREE_TYPE (decl))
|| !is_variable_sized (decl));
/* Global variables don't need to be copied,
the receiver side will use them directly. */
if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
break;
by_ref = use_pointer_for_field (decl, ctx);
if (! TREE_READONLY (decl)
|| TREE_ADDRESSABLE (decl)
|| by_ref
|| is_reference (decl))
{
install_var_field (decl, by_ref, 3, ctx);
install_var_local (decl, ctx);
break;
}
/* We don't need to copy const scalar vars back. */
OMP_CLAUSE_SET_CODE (c, OMP_CLAUSE_FIRSTPRIVATE);
goto do_private;
case OMP_CLAUSE_INPUT:
case OMP_CLAUSE_OUTPUT:
{
/* The variable itself should be made private in the task's
context. FIXME_stream: add varible-sized or ptr
cases. */
tree *new_var;
stream_p vs;
decl = OMP_CLAUSE_DECL (c);
vs = lookup_stream (decl);
install_var_field (vs->stream, false, 3, ctx);
install_var_local (vs->stream, ctx);
install_var_local (decl, ctx);
new_var = lookup_var_for_clause (c);
*new_var = lookup_decl (decl, ctx);
}
break;
case OMP_CLAUSE_LASTPRIVATE:
/* Let the corresponding firstprivate clause create
the variable. */
if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
break;
/* FALLTHRU */
case OMP_CLAUSE_FIRSTPRIVATE:
install_firstprivate_field =
(OMP_CLAUSE_VIEW_VAR_KIND (c) != OMP_CLAUSE_VIEW_VAR_DISCARD);
case OMP_CLAUSE_REDUCTION:
decl = OMP_CLAUSE_DECL (c);
do_private:
if (is_variable_sized (decl))
{
if (is_task_ctx (ctx) && install_firstprivate_field)
install_var_field (decl, false, 1, ctx);
break;
}
else if (is_taskreg_ctx (ctx) && install_firstprivate_field)
{
bool global
= is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx));
by_ref = use_pointer_for_field (decl, NULL);
if (is_task_ctx (ctx)
&& (global || by_ref || is_reference (decl)))
{
install_var_field (decl, false, 1, ctx);
if (!global)
install_var_field (decl, by_ref, 2, ctx);
}
else if (!global)
install_var_field (decl, by_ref, 3, ctx);
}
install_var_local (decl, ctx);
break;
case OMP_CLAUSE_COPYPRIVATE:
case OMP_CLAUSE_COPYIN:
decl = OMP_CLAUSE_DECL (c);
by_ref = use_pointer_for_field (decl, NULL);
install_var_field (decl, by_ref, 3, ctx);
break;
case OMP_CLAUSE_DEFAULT:
ctx->default_kind = OMP_CLAUSE_DEFAULT_KIND (c);
break;
case OMP_CLAUSE_IF:
case OMP_CLAUSE_NUM_THREADS:
case OMP_CLAUSE_SCHEDULE:
if (ctx->outer)
scan_omp_op (&OMP_CLAUSE_OPERAND (c, 0), ctx->outer);
break;
case OMP_CLAUSE_NOWAIT:
case OMP_CLAUSE_ORDERED:
case OMP_CLAUSE_COLLAPSE:
case OMP_CLAUSE_UNTIED:
break;
default:
gcc_unreachable ();
}
}
for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
switch (OMP_CLAUSE_CODE (c))
{
case OMP_CLAUSE_LASTPRIVATE:
/* Let the corresponding firstprivate clause create
the variable. */
if (OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
scan_array_reductions = true;
if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
break;
/* FALLTHRU */
case OMP_CLAUSE_PRIVATE:
case OMP_CLAUSE_FIRSTPRIVATE:
case OMP_CLAUSE_REDUCTION:
decl = OMP_CLAUSE_DECL (c);
if (is_variable_sized (decl))
install_var_local (decl, ctx);
fixup_remapped_decl (decl, ctx,
OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
&& OMP_CLAUSE_PRIVATE_DEBUG (c));
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
scan_array_reductions = true;
break;
case OMP_CLAUSE_SHARED:
decl = OMP_CLAUSE_DECL (c);
if (! is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
fixup_remapped_decl (decl, ctx, false);
break;
case OMP_CLAUSE_INPUT:
case OMP_CLAUSE_OUTPUT:
{
tree view = OMP_CLAUSE_VIEW_ID (c);
tree burst = OMP_CLAUSE_BURST_SIZE (c);
tree size = OMP_CLAUSE_VIEW_SIZE (c);
/* The update of the view must happen in the second pass
as the view variable may be updated by later
firstprivate clauses. */
if (view != NULL)
OMP_CLAUSE_VIEW_ID (c) = lookup_decl (view, ctx);
else
{
decl = OMP_CLAUSE_DECL (c);
OMP_CLAUSE_VIEW_ID (c) = lookup_decl (lookup_stream (decl)->var, ctx);
}
if (burst != NULL_TREE && DECL_P (burst))
OMP_CLAUSE_BURST_SIZE (c) = lookup_decl (burst, ctx);
if (size != NULL_TREE && DECL_P (size))
OMP_CLAUSE_VIEW_SIZE (c) = lookup_decl (size, ctx);
}
break;
case OMP_CLAUSE_COPYPRIVATE:
case OMP_CLAUSE_COPYIN:
case OMP_CLAUSE_DEFAULT:
case OMP_CLAUSE_IF:
case OMP_CLAUSE_NUM_THREADS:
case OMP_CLAUSE_SCHEDULE:
case OMP_CLAUSE_NOWAIT:
case OMP_CLAUSE_ORDERED:
case OMP_CLAUSE_COLLAPSE:
case OMP_CLAUSE_UNTIED:
break;
default:
gcc_unreachable ();
}
}
if (scan_array_reductions)
for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
scan_omp (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c), ctx);
scan_omp (OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
}
else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
&& OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
scan_omp (OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
}
/* Create a new name for omp child function. Returns an identifier. */
static GTY(()) unsigned int tmp_ompfn_id_num;
static tree
create_omp_child_function_name (bool task_copy)
{
tree name = DECL_ASSEMBLER_NAME (current_function_decl);
size_t len = IDENTIFIER_LENGTH (name);
char *tmp_name, *prefix;
const char *suffix;
suffix = task_copy ? "_omp_cpyfn" : "_omp_fn";
prefix = XALLOCAVEC (char, len + strlen (suffix) + 1);
memcpy (prefix, IDENTIFIER_POINTER (name), len);
strcpy (prefix + len, suffix);
#ifndef NO_DOT_IN_LABEL
prefix[len] = '.';
#elif !defined NO_DOLLAR_IN_LABEL
prefix[len] = '$';
#endif
ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, tmp_ompfn_id_num++);
return get_identifier (tmp_name);
}
/* Build a decl for the omp child function. It'll not contain a body
yet, just the bare decl. */
static void
create_omp_child_function (omp_context *ctx, bool task_copy)
{
tree decl, type, name, t;
name = create_omp_child_function_name (task_copy);
if (task_copy)
type = build_function_type_list (void_type_node, ptr_type_node,
ptr_type_node, NULL_TREE);
else
type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
decl = build_decl (gimple_location (ctx->stmt),
FUNCTION_DECL, name, type);
if (!task_copy)
ctx->cb.dst_fn = decl;
else
gimple_omp_task_set_copy_fn (ctx->stmt, decl);
TREE_STATIC (decl) = 1;
TREE_USED (decl) = 1;
DECL_ARTIFICIAL (decl) = 1;
DECL_IGNORED_P (decl) = 0;
TREE_PUBLIC (decl) = 0;
DECL_UNINLINABLE (decl) = 1;
DECL_EXTERNAL (decl) = 0;
DECL_CONTEXT (decl) = NULL_TREE;
DECL_INITIAL (decl) = make_node (BLOCK);
t = build_decl (DECL_SOURCE_LOCATION (decl),
RESULT_DECL, NULL_TREE, void_type_node);
DECL_ARTIFICIAL (t) = 1;
DECL_IGNORED_P (t) = 1;
DECL_CONTEXT (t) = decl;
DECL_RESULT (decl) = t;
t = build_decl (DECL_SOURCE_LOCATION (decl),
PARM_DECL, get_identifier (".omp_data_i"), ptr_type_node);
DECL_ARTIFICIAL (t) = 1;
DECL_ARG_TYPE (t) = ptr_type_node;
DECL_CONTEXT (t) = current_function_decl;
TREE_USED (t) = 1;
DECL_ARGUMENTS (decl) = t;
if (!task_copy)
ctx->receiver_decl = t;
else
{
t = build_decl (DECL_SOURCE_LOCATION (decl),
PARM_DECL, get_identifier (".omp_data_o"),
ptr_type_node);
DECL_ARTIFICIAL (t) = 1;
DECL_ARG_TYPE (t) = ptr_type_node;
DECL_CONTEXT (t) = current_function_decl;
TREE_USED (t) = 1;
TREE_ADDRESSABLE (t) = 1;
TREE_CHAIN (t) = DECL_ARGUMENTS (decl);
DECL_ARGUMENTS (decl) = t;
}
/* Allocate memory for the function structure. The call to
allocate_struct_function clobbers CFUN, so we need to restore
it afterward. */
push_struct_function (decl);
cfun->function_end_locus = gimple_location (ctx->stmt);
pop_cfun ();
}
static void
install_firstprivate_in_enclosing_contexts (omp_context *ctx, tree var)
{
if (ctx)
{
tree clause, decl;
omp_stmt_p omp_stmt;
gimple stmt;
install_firstprivate_in_enclosing_contexts (ctx->outer, var);
stmt = ctx->stmt;
clause = build_omp_clause (input_location, OMP_CLAUSE_FIRSTPRIVATE);
OMP_CLAUSE_DECL (clause) = var;
switch (gimple_code (stmt))
{
case GIMPLE_OMP_PARALLEL:
case GIMPLE_OMP_TASK:
OMP_CLAUSE_CHAIN (clause) = gimple_omp_taskreg_clauses (stmt);
gimple_omp_taskreg_set_clauses (stmt, clause);
break;
case GIMPLE_OMP_SINGLE:
OMP_CLAUSE_CHAIN (clause) = gimple_omp_single_clauses (stmt);
gimple_omp_single_set_clauses (stmt, clause);
break;
case GIMPLE_OMP_SECTIONS:
OMP_CLAUSE_CHAIN (clause) = gimple_omp_sections_clauses (stmt);
gimple_omp_sections_set_clauses (stmt, clause);
break;
case GIMPLE_OMP_FOR:
OMP_CLAUSE_CHAIN (clause) = gimple_omp_for_clauses (stmt);
gimple_omp_for_set_clauses (stmt, clause);
break;
default:
gcc_unreachable ();
}
/* Perform the required part of scan_sharing_clauses from the
outermost directive in. */
decl = var;
if (is_variable_sized (decl))
{
if (is_task_ctx (ctx))
install_var_field (decl, false, 1, ctx);
}
else if (is_taskreg_ctx (ctx))
{
bool global
= is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx));
bool by_ref = use_pointer_for_field (decl, NULL);
if (is_task_ctx (ctx)
&& (global || by_ref || is_reference (decl)))
{
install_var_field (decl, false, 1, ctx);
if (!global)
install_var_field (decl, by_ref, 2, ctx);
}
else if (!global)
install_var_field (decl, by_ref, 3, ctx);
}
install_var_local (decl, ctx);
if (is_variable_sized (decl))
install_var_local (decl, ctx);
fixup_remapped_decl (decl, ctx, false);
/* Insert mappings to local variables for each context. */
omp_stmt = lookup_omp_stmt (stmt);
decl = lookup_decl (var, ctx);
*pointer_map_insert (omp_stmt->var_map, var) = decl;
*pointer_map_insert (omp_stmt->reverse_var_map, decl) = var;
}
}
/* Scan an OpenMP parallel directive. */
static void
scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
{
omp_context *ctx;
tree name, task_decl;
gimple stmt = gsi_stmt (*gsi);
omp_stmt_p omp_stmt = lookup_omp_stmt (stmt);
/* Ignore parallel directives with empty bodies, unless there
are copyin clauses. */
if (optimize > 0
&& empty_body_p (gimple_omp_body (stmt))
&& find_omp_clause (gimple_omp_parallel_clauses (stmt),
OMP_CLAUSE_COPYIN) == NULL)
{
gsi_replace (gsi, gimple_build_nop (), false);
return;
}
ctx = new_omp_context (stmt, outer_ctx);
if (taskreg_nesting_level > 1)
ctx->is_nested = true;
ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
name = create_tmp_var_name (".omp_data_s");
name = build_decl (gimple_location (stmt),
TYPE_DECL, name, ctx->record_type);
TYPE_NAME (ctx->record_type) = name;
create_omp_child_function (ctx, false);
gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn);
scan_sharing_clauses (gimple_omp_parallel_clauses (stmt), ctx);
/* We only need one single task structure for the generation of
control flow, though there will be more instances. And we can
skip them from nested parallel regions. We will only add it for
the outermost for now. */
{
omp_context *ictx = ctx, *outermost_parallel_ctx = NULL;
while (ictx)
{
if (gimple_code (ictx->stmt) == GIMPLE_OMP_PARALLEL)
outermost_parallel_ctx = ictx;
ictx = ictx->outer;
}
gcc_assert (outermost_parallel_ctx);
if (ctx == outermost_parallel_ctx)
{
omp_stmt->task_decl = create_tmp_var (ptr_type_node, "parallel_region_task");
install_firstprivate_in_enclosing_contexts (ctx, omp_stmt->task_decl);
}
}
scan_omp (gimple_omp_body (stmt), ctx);
if (TYPE_FIELDS (ctx->record_type) == NULL)
ctx->record_type = ctx->receiver_decl = NULL;
else
{
layout_type (ctx->record_type);
fixup_child_record_type (ctx);
}
}
/* Scan an OpenMP task directive. */
static void
scan_omp_task (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
{
omp_context *ctx;
tree name, t;
gimple stmt = gsi_stmt (*gsi);
location_t loc = gimple_location (stmt);
/* Ignore task directives with empty bodies. */
if (optimize > 0
&& empty_body_p (gimple_omp_body (stmt)))
{
gsi_replace (gsi, gimple_build_nop (), false);
return;
}
ctx = new_omp_context (stmt, outer_ctx);
if (taskreg_nesting_level > 1)
ctx->is_nested = true;
ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
name = create_tmp_var_name (".omp_data_s");
name = build_decl (gimple_location (stmt),
TYPE_DECL, name, ctx->record_type);
TYPE_NAME (ctx->record_type) = name;
create_omp_child_function (ctx, false);
gimple_omp_task_set_child_fn (stmt, ctx->cb.dst_fn);
scan_sharing_clauses (gimple_omp_task_clauses (stmt), ctx);
{
tree clause = gimple_omp_task_clauses (stmt);
bool is_streaming = false;
for (; clause ; clause = OMP_CLAUSE_CHAIN (clause))
if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_INPUT
|| OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_OUTPUT)
{
is_streaming = true;
break;
}
if (is_streaming)
{
omp_stmt_p omp_stmt = lookup_omp_stmt (stmt);
#if 0 /*zzzz*/
omp_stmt->termination_flag_p = create_tmp_var (build_pointer_type (boolean_type_node),
"gomp_termination_flag_p");
install_firstprivate_in_enclosing_contexts (ctx, omp_stmt->termination_flag_p);
omp_stmt->activation_counter_p = create_tmp_var (build_pointer_type (long_long_unsigned_type_node),
"gomp_activation_counter_p");
install_firstprivate_in_enclosing_contexts (ctx, omp_stmt->activation_counter_p);
#endif
omp_stmt->task_decl = create_tmp_var (ptr_type_node, "task");
install_firstprivate_in_enclosing_contexts (ctx, omp_stmt->task_decl);
/* Add a view pointer for each firstprivate clause that was
promoted to an input clause. */
clause = gimple_omp_task_clauses (stmt);
for (; clause ; clause = OMP_CLAUSE_CHAIN (clause))
if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_INPUT
&& OMP_CLAUSE_FIRSTPRIVATE_INPUT (clause) != NULL_TREE)
{
tree fp_view = create_tmp_var (ptr_type_node,
"gomp_firstprivate_view_p");
OMP_CLAUSE_FIRSTPRIVATE_INPUT (clause) = fp_view;
install_firstprivate_in_enclosing_contexts (ctx, fp_view);
}
}
}
if (ctx->srecord_type)
{
name = create_tmp_var_name (".omp_data_a");
name = build_decl (gimple_location (stmt),
TYPE_DECL, name, ctx->srecord_type);
TYPE_NAME (ctx->srecord_type) = name;
create_omp_child_function (ctx, true);
}
scan_omp (gimple_omp_body (stmt), ctx);
if (TYPE_FIELDS (ctx->record_type) == NULL)
{
ctx->record_type = ctx->receiver_decl = NULL;
t = build_int_cst (long_integer_type_node, 0);
gimple_omp_task_set_arg_size (stmt, t);
t = build_int_cst (long_integer_type_node, 1);
gimple_omp_task_set_arg_align (stmt, t);
}
else
{
tree *p, vla_fields = NULL_TREE, *q = &vla_fields;
/* Move VLA fields to the end. */
p = &TYPE_FIELDS (ctx->record_type);
while (*p)
if (!TYPE_SIZE_UNIT (TREE_TYPE (*p))
|| ! TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (*p))))
{
*q = *p;
*p = TREE_CHAIN (*p);
TREE_CHAIN (*q) = NULL_TREE;
q = &TREE_CHAIN (*q);
}
else
p = &TREE_CHAIN (*p);
*p = vla_fields;
layout_type (ctx->record_type);
fixup_child_record_type (ctx);
if (ctx->srecord_type)
layout_type (ctx->srecord_type);
t = fold_convert_loc (loc, long_integer_type_node,
TYPE_SIZE_UNIT (ctx->record_type));
gimple_omp_task_set_arg_size (stmt, t);
t = build_int_cst (long_integer_type_node,
TYPE_ALIGN_UNIT (ctx->record_type));
gimple_omp_task_set_arg_align (stmt, t);
}
}
/* Scan an OpenMP loop directive. */
static void
scan_omp_for (gimple stmt, omp_context *outer_ctx)
{
omp_context *ctx;
size_t i;
ctx = new_omp_context (stmt, outer_ctx);
scan_sharing_clauses (gimple_omp_for_clauses (stmt), ctx);
scan_omp (gimple_omp_for_pre_body (stmt), ctx);
for (i = 0; i < gimple_omp_for_collapse (stmt); i++)
{
scan_omp_op (gimple_omp_for_index_ptr (stmt, i), ctx);
scan_omp_op (gimple_omp_for_initial_ptr (stmt, i), ctx);
scan_omp_op (gimple_omp_for_final_ptr (stmt, i), ctx);
scan_omp_op (gimple_omp_for_incr_ptr (stmt, i), ctx);
}
scan_omp (gimple_omp_body (stmt), ctx);
}
/* Scan an OpenMP sections directive. */
static void
scan_omp_sections (gimple stmt, omp_context *outer_ctx)
{
omp_context *ctx;
ctx = new_omp_context (stmt, outer_ctx);
scan_sharing_clauses (gimple_omp_sections_clauses (stmt), ctx);
scan_omp (gimple_omp_body (stmt), ctx);
}
/* Scan an OpenMP single directive. */
static void
scan_omp_single (gimple stmt, omp_context *outer_ctx)
{
omp_context *ctx;
tree name;
ctx = new_omp_context (stmt, outer_ctx);
ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
name = create_tmp_var_name (".omp_copy_s");
name = build_decl (gimple_location (stmt),
TYPE_DECL, name, ctx->record_type);
TYPE_NAME (ctx->record_type) = name;
scan_sharing_clauses (gimple_omp_single_clauses (stmt), ctx);
scan_omp (gimple_omp_body (stmt), ctx);
if (TYPE_FIELDS (ctx->record_type) == NULL)
ctx->record_type = NULL;
else
layout_type (ctx->record_type);
}
/* Check OpenMP nesting restrictions. */
static void
check_omp_nesting_restrictions (gimple stmt, omp_context *ctx)
{
switch (gimple_code (stmt))
{
case GIMPLE_OMP_FOR:
case GIMPLE_OMP_SECTIONS:
case GIMPLE_OMP_SINGLE:
case GIMPLE_CALL:
for (; ctx != NULL; ctx = ctx->outer)
switch (gimple_code (ctx->stmt))
{
case GIMPLE_OMP_FOR:
case GIMPLE_OMP_SECTIONS:
case GIMPLE_OMP_SINGLE:
case GIMPLE_OMP_ORDERED:
case GIMPLE_OMP_MASTER:
case GIMPLE_OMP_TASK:
if (is_gimple_call (stmt))
{
warning (0, "barrier region may not be closely nested inside "
"of work-sharing, critical, ordered, master or "
"explicit task region");
return;
}
warning (0, "work-sharing region may not be closely nested inside "
"of work-sharing, critical, ordered, master or explicit "
"task region");
return;
case GIMPLE_OMP_PARALLEL:
return;
default:
break;
}
break;
case GIMPLE_OMP_MASTER:
for (; ctx != NULL; ctx = ctx->outer)
switch (gimple_code (ctx->stmt))
{
case GIMPLE_OMP_FOR:
case GIMPLE_OMP_SECTIONS:
case GIMPLE_OMP_SINGLE:
case GIMPLE_OMP_TASK:
warning (0, "master region may not be closely nested inside "
"of work-sharing or explicit task region");
return;
case GIMPLE_OMP_PARALLEL:
return;
default:
break;
}
break;
case GIMPLE_OMP_ORDERED:
for (; ctx != NULL; ctx = ctx->outer)
switch (gimple_code (ctx->stmt))
{
case GIMPLE_OMP_CRITICAL:
case GIMPLE_OMP_TASK:
warning (0, "ordered region may not be closely nested inside "
"of critical or explicit task region");
return;
case GIMPLE_OMP_FOR:
if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
OMP_CLAUSE_ORDERED) == NULL)
warning (0, "ordered region must be closely nested inside "
"a loop region with an ordered clause");
return;
case GIMPLE_OMP_PARALLEL:
return;
default:
break;
}
break;
case GIMPLE_OMP_CRITICAL:
for (; ctx != NULL; ctx = ctx->outer)
if (gimple_code (ctx->stmt) == GIMPLE_OMP_CRITICAL
&& (gimple_omp_critical_name (stmt)
== gimple_omp_critical_name (ctx->stmt)))
{
warning (0, "critical region may not be nested inside a critical "
"region with the same name");
return;
}
break;
default:
break;
}
}
/* Helper function scan_omp.
Callback for walk_tree or operators in walk_gimple_stmt used to
scan for OpenMP directives in TP. */
static tree
scan_omp_1_op (tree *tp, int *walk_subtrees, void *data)
{
struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
omp_context *ctx = (omp_context *) wi->info;
tree t = *tp;
switch (TREE_CODE (t))
{
case VAR_DECL:
case PARM_DECL:
case LABEL_DECL:
case RESULT_DECL:
if (ctx)
*tp = remap_decl (t, &ctx->cb);
break;
default:
if (ctx && TYPE_P (t))
*tp = remap_type (t, &ctx->cb);
else if (!DECL_P (t))
{
*walk_subtrees = 1;
if (ctx)
TREE_TYPE (t) = remap_type (TREE_TYPE (t), &ctx->cb);
}
break;
}
return NULL_TREE;
}
/* Helper function for scan_omp.
Callback for walk_gimple_stmt used to scan for OpenMP directives in
the current statement in GSI. */
static tree
scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
struct walk_stmt_info *wi)
{
gimple stmt = gsi_stmt (*gsi);
omp_context *ctx = (omp_context *) wi->info;
if (gimple_has_location (stmt))
input_location = gimple_location (stmt);
/* Check the OpenMP nesting restrictions. */
if (ctx != NULL)
{
if (is_gimple_omp (stmt))
check_omp_nesting_restrictions (stmt, ctx);
else if (is_gimple_call (stmt))
{
tree fndecl = gimple_call_fndecl (stmt);
if (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
&& DECL_FUNCTION_CODE (fndecl) == BUILT_IN_GOMP_BARRIER)
check_omp_nesting_restrictions (stmt, ctx);
}
}
*handled_ops_p = true;
switch (gimple_code (stmt))
{
case GIMPLE_OMP_PARALLEL:
taskreg_nesting_level++;
scan_omp_parallel (gsi, ctx);
taskreg_nesting_level--;
break;
case GIMPLE_OMP_TASK:
taskreg_nesting_level++;
scan_omp_task (gsi, ctx);
taskreg_nesting_level--;
break;
case GIMPLE_OMP_FOR:
scan_omp_for (stmt, ctx);
break;
case GIMPLE_OMP_SECTIONS:
scan_omp_sections (stmt, ctx);
break;
case GIMPLE_OMP_SINGLE:
scan_omp_single (stmt, ctx);
break;
case GIMPLE_OMP_SECTION:
case GIMPLE_OMP_MASTER:
case GIMPLE_OMP_ORDERED:
case GIMPLE_OMP_CRITICAL:
ctx = new_omp_context (stmt, ctx);
scan_omp (gimple_omp_body (stmt), ctx);
break;
case GIMPLE_BIND:
{
tree var;
*handled_ops_p = false;
if (ctx)
for (var = gimple_bind_vars (stmt); var ; var = TREE_CHAIN (var))
insert_decl_map (&ctx->cb, var, var);
}
break;
default:
*handled_ops_p = false;
break;
}
return NULL_TREE;
}
/* Scan all the statements starting at the current statement. CTX
contains context information about the OpenMP directives and
clauses found during the scan. */
static void
scan_omp (gimple_seq body, omp_context *ctx)
{
location_t saved_location;
struct walk_stmt_info wi;
memset (&wi, 0, sizeof (wi));
wi.info = ctx;
wi.want_locations = true;
saved_location = input_location;
walk_gimple_seq (body, scan_omp_1_stmt, scan_omp_1_op, &wi);
input_location = saved_location;
}
/* Re-gimplification and code generation routines. */
/* Build a call to GOMP_barrier. */
static tree
build_omp_barrier (void)
{
return build_call_expr (built_in_decls[BUILT_IN_GOMP_BARRIER], 0);
}
/* If a context was created for STMT when it was scanned, return it. */
static omp_context *
maybe_lookup_ctx (gimple stmt)
{
splay_tree_node n;
n = splay_tree_lookup (all_contexts, (splay_tree_key) stmt);
return n ? (omp_context *) n->value : NULL;
}
/* Find the mapping for DECL in CTX or the immediately enclosing
context that has a mapping for DECL.
If CTX is a nested parallel directive, we may have to use the decl
mappings created in CTX's parent context. Suppose that we have the
following parallel nesting (variable UIDs showed for clarity):
iD.1562 = 0;
#omp parallel shared(iD.1562) -> outer parallel
iD.1562 = iD.1562 + 1;
#omp parallel shared (iD.1562) -> inner parallel
iD.1562 = iD.1562 - 1;
Each parallel structure will create a distinct .omp_data_s structure
for copying iD.1562 in/out of the directive:
outer parallel .omp_data_s.1.i -> iD.1562
inner parallel .omp_data_s.2.i -> iD.1562
A shared variable mapping will produce a copy-out operation before
the parallel directive and a copy-in operation after it. So, in
this case we would have:
iD.1562 = 0;
.omp_data_o.1.i = iD.1562;
#omp parallel shared(iD.1562) -> outer parallel
.omp_data_i.1 = &.omp_data_o.1
.omp_data_i.1->i = .omp_data_i.1->i + 1;
.omp_data_o.2.i = iD.1562; -> **
#omp parallel shared(iD.1562) -> inner parallel
.omp_data_i.2 = &.omp_data_o.2
.omp_data_i.2->i = .omp_data_i.2->i - 1;
** This is a problem. The symbol iD.1562 cannot be referenced
inside the body of the outer parallel region. But since we are
emitting this copy operation while expanding the inner parallel
directive, we need to access the CTX structure of the outer
parallel directive to get the correct mapping:
.omp_data_o.2.i = .omp_data_i.1->i
Since there may be other workshare or parallel directives enclosing
the parallel directive, it may be necessary to walk up the context
parent chain. This is not a problem in general because nested
parallelism happens only rarely. */
static tree
lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
{
tree t;
omp_context *up;
for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
t = maybe_lookup_decl (decl, up);
gcc_assert (!ctx->is_nested || t || is_global_var (decl));
return t ? t : decl;
}
/* Similar to lookup_decl_in_outer_ctx, but return DECL if not found
in outer contexts. */
static tree
maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
{
tree t = NULL;
omp_context *up;
for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
t = maybe_lookup_decl (decl, up);
return t ? t : decl;
}
/* Construct the initialization value for reduction CLAUSE. */
tree
omp_reduction_init (tree clause, tree type)
{
location_t loc = OMP_CLAUSE_LOCATION (clause);
switch (OMP_CLAUSE_REDUCTION_CODE (clause))
{
case PLUS_EXPR:
case MINUS_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case TRUTH_OR_EXPR:
case TRUTH_ORIF_EXPR:
case TRUTH_XOR_EXPR:
case NE_EXPR:
return fold_convert_loc (loc, type, integer_zero_node);
case MULT_EXPR:
case TRUTH_AND_EXPR:
case TRUTH_ANDIF_EXPR:
case EQ_EXPR:
return fold_convert_loc (loc, type, integer_one_node);
case BIT_AND_EXPR:
return fold_convert_loc (loc, type, integer_minus_one_node);
case MAX_EXPR:
if (SCALAR_FLOAT_TYPE_P (type))
{
REAL_VALUE_TYPE max, min;
if (HONOR_INFINITIES (TYPE_MODE (type)))
{
real_inf (&max);
real_arithmetic (&min, NEGATE_EXPR, &max, NULL);
}
else
real_maxval (&min, 1, TYPE_MODE (type));
return build_real (type, min);
}
else
{
gcc_assert (INTEGRAL_TYPE_P (type));
return TYPE_MIN_VALUE (type);
}
case MIN_EXPR:
if (SCALAR_FLOAT_TYPE_P (type))
{
REAL_VALUE_TYPE max;
if (HONOR_INFINITIES (TYPE_MODE (type)))
real_inf (&max);
else
real_maxval (&max, 0, TYPE_MODE (type));
return build_real (type, max);
}
else
{
gcc_assert (INTEGRAL_TYPE_P (type));
return TYPE_MAX_VALUE (type);
}
default:
gcc_unreachable ();
}
}
/* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
from the receiver (aka child) side and initializers for REFERENCE_TYPE
private variables. Initialization statements go in ILIST, while calls
to destructors go in DLIST. */
static void
lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
omp_context *ctx)
{
gimple_stmt_iterator diter;
tree c, dtor, copyin_seq, x, ptr;
bool copyin_by_ref = false;
bool lastprivate_firstprivate = false;
int pass;
*dlist = gimple_seq_alloc ();
diter = gsi_start (*dlist);
copyin_seq = NULL;
/* Do all the fixed sized types in the first pass, and the variable sized
types in the second pass. This makes sure that the scalar arguments to
the variable sized types are processed before we use them in the
variable sized operations. */
for (pass = 0; pass < 2; ++pass)
{
for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
{
enum omp_clause_code c_kind = OMP_CLAUSE_CODE (c);
tree var, new_var;
bool by_ref;
location_t clause_loc = OMP_CLAUSE_LOCATION (c);
switch (c_kind)
{
case OMP_CLAUSE_PRIVATE:
if (OMP_CLAUSE_PRIVATE_DEBUG (c))
continue;
break;
case OMP_CLAUSE_SHARED:
if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
{
gcc_assert (is_global_var (OMP_CLAUSE_DECL (c)));
continue;
}
case OMP_CLAUSE_FIRSTPRIVATE:
case OMP_CLAUSE_INPUT:
case OMP_CLAUSE_OUTPUT:
case OMP_CLAUSE_COPYIN:
case OMP_CLAUSE_REDUCTION:
break;
case OMP_CLAUSE_LASTPRIVATE:
if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
{
lastprivate_firstprivate = true;
if (pass != 0)
continue;
}
break;
default:
continue;
}
new_var = var = OMP_CLAUSE_DECL (c);
if (c_kind != OMP_CLAUSE_COPYIN)
new_var = lookup_decl (var, ctx);
if (c_kind == OMP_CLAUSE_SHARED || c_kind == OMP_CLAUSE_COPYIN)
{
if (pass != 0)
continue;
}
else if (is_variable_sized (var))
{
/* For variable sized types, we need to allocate the
actual storage here. Call alloca and store the
result in the pointer decl that we created elsewhere. */
if (pass == 0)
continue;
if (c_kind != OMP_CLAUSE_FIRSTPRIVATE || !is_task_ctx (ctx))
{
gimple stmt;
tree tmp;
ptr = DECL_VALUE_EXPR (new_var);
gcc_assert (TREE_CODE (ptr) == INDIRECT_REF);
ptr = TREE_OPERAND (ptr, 0);
gcc_assert (DECL_P (ptr));
x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
/* void *tmp = __builtin_alloca */
stmt
= gimple_build_call (built_in_decls[BUILT_IN_ALLOCA], 1, x);
tmp = create_tmp_var_raw (ptr_type_node, NULL);
gimple_add_tmp_var (tmp);
gimple_call_set_lhs (stmt, tmp);
gimple_seq_add_stmt (ilist, stmt);
x = fold_convert_loc (clause_loc, TREE_TYPE (ptr), tmp);
gimplify_assign (ptr, x, ilist);
}
}
else if (is_reference (var))
{
/* For references that are being privatized for Fortran,
allocate new backing storage for the new pointer
variable. This allows us to avoid changing all the
code that expects a pointer to something that expects
a direct variable. Note that this doesn't apply to
C++, since reference types are disallowed in data
sharing clauses there, except for NRV optimized
return values. */
if (pass == 0)
continue;
x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
if (c_kind == OMP_CLAUSE_FIRSTPRIVATE && is_task_ctx (ctx)
&& OMP_CLAUSE_VIEW_VAR_KIND (c) != OMP_CLAUSE_VIEW_VAR_DISCARD)
{
x = build_receiver_ref (var, false, ctx);
x = build_fold_addr_expr_loc (clause_loc, x);
}
else if (TREE_CONSTANT (x))
{
const char *name = NULL;
if (DECL_NAME (var))
name = IDENTIFIER_POINTER (DECL_NAME (new_var));
x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
name);
gimple_add_tmp_var (x);
TREE_ADDRESSABLE (x) = 1;
x = build_fold_addr_expr_loc (clause_loc, x);
}
else
{
x = build_call_expr_loc (clause_loc,
built_in_decls[BUILT_IN_ALLOCA], 1, x);
}
x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
gimplify_assign (new_var, x, ilist);
new_var = build_fold_indirect_ref_loc (clause_loc, new_var);
}
else if (c_kind == OMP_CLAUSE_REDUCTION
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
if (pass == 0)
continue;
}
else if (pass != 0)
continue;
switch (OMP_CLAUSE_CODE (c))
{
case OMP_CLAUSE_SHARED:
/* Shared global vars are just accessed directly. */
if (is_global_var (new_var))
break;
/* Set up the DECL_VALUE_EXPR for shared variables now. This
needs to be delayed until after fixup_child_record_type so
that we get the correct type during the dereference. */
by_ref = use_pointer_for_field (var, ctx);
x = build_receiver_ref (var, by_ref, ctx);
SET_DECL_VALUE_EXPR (new_var, x);
DECL_HAS_VALUE_EXPR_P (new_var) = 1;
/* ??? If VAR is not passed by reference, and the variable
hasn't been initialized yet, then we'll get a warning for
the store into the omp_data_s structure. Ideally, we'd be
able to notice this and not store anything at all, but
we're generating code too early. Suppress the warning. */
if (!by_ref)
TREE_NO_WARNING (var) = 1;
break;
case OMP_CLAUSE_LASTPRIVATE:
if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
break;
/* FALLTHRU */
case OMP_CLAUSE_PRIVATE:
if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_PRIVATE)
x = build_outer_var_ref (var, ctx);
else if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
{
if (is_task_ctx (ctx))
x = build_receiver_ref (var, false, ctx);
else
x = build_outer_var_ref (var, ctx);
}
else
x = NULL;
x = lang_hooks.decls.omp_clause_default_ctor (c, new_var, x);
if (x)
gimplify_and_add (x, ilist);
/* FALLTHRU */
do_dtor:
x = lang_hooks.decls.omp_clause_dtor (c, new_var);
if (x)
{
gimple_seq tseq = NULL;
dtor = x;
gimplify_stmt (&dtor, &tseq);
gsi_insert_seq_before (&diter, tseq, GSI_SAME_STMT);
}
break;
case OMP_CLAUSE_FIRSTPRIVATE:
if (OMP_CLAUSE_VIEW_VAR_KIND (c) == OMP_CLAUSE_VIEW_VAR_DISCARD)
break;
if (is_task_ctx (ctx))
{
if (is_reference (var) || is_variable_sized (var))
goto do_dtor;
else if (is_global_var (maybe_lookup_decl_in_outer_ctx (var,
ctx))
|| use_pointer_for_field (var, NULL))
{
x = build_receiver_ref (var, false, ctx);
SET_DECL_VALUE_EXPR (new_var, x);
DECL_HAS_VALUE_EXPR_P (new_var) = 1;
goto do_dtor;
}
}
x = build_outer_var_ref (var, ctx);
x = lang_hooks.decls.omp_clause_copy_ctor (c, new_var, x);
gimplify_and_add (x, ilist);
goto do_dtor;
break;
case OMP_CLAUSE_INPUT:
case OMP_CLAUSE_OUTPUT:
{
stream_p vs = lookup_stream (var);
x = build_outer_var_ref (vs->stream, ctx);
x = lang_hooks.decls.omp_clause_copy_ctor (c, vs->stream, x);
gimplify_and_add (x, ilist);
#if 0
tree view = OMP_CLAUSE_VIEW_ID (c);
tree burst = OMP_CLAUSE_BURST_SIZE (c);
if (view != NULL_TREE)
{
x = build_outer_var_ref (view, ctx);
x = lang_hooks.decls.omp_clause_copy_ctor (c, view, x);
gimplify_and_add (x, ilist);
}
if (burst != NULL_TREE && DECL_P (burst))
{
x = build_outer_var_ref (burst, ctx);
x = lang_hooks.decls.omp_clause_copy_ctor (c, burst, x);
gimplify_and_add (x, ilist);
}
#endif
}
break;
case OMP_CLAUSE_COPYIN:
by_ref = use_pointer_for_field (var, NULL);
x = build_receiver_ref (var, by_ref, ctx);
x = lang_hooks.decls.omp_clause_assign_op (c, new_var, x);
append_to_statement_list (x, &copyin_seq);
copyin_by_ref |= by_ref;
break;
case OMP_CLAUSE_REDUCTION:
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
x = build_outer_var_ref (var, ctx);
if (is_reference (var))
x = build_fold_addr_expr_loc (clause_loc, x);
SET_DECL_VALUE_EXPR (placeholder, x);
DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
lower_omp (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c), ctx);
gimple_seq_add_seq (ilist,
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c));
OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
}
else
{
x = omp_reduction_init (c, TREE_TYPE (new_var));
gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
gimplify_assign (new_var, x, ilist);
}
break;
default:
gcc_unreachable ();
}
}
}
/* The copyin sequence is not to be executed by the main thread, since
that would result in self-copies. Perhaps not visible to scalars,
but it certainly is to C++ operator=. */
if (copyin_seq)
{
x = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
x = build2 (NE_EXPR, boolean_type_node, x,
build_int_cst (TREE_TYPE (x), 0));
x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL);
gimplify_and_add (x, ilist);
}
/* If any copyin variable is passed by reference, we must ensure the
master thread doesn't modify it before it is copied over in all
threads. Similarly for variables in both firstprivate and
lastprivate clauses we need to ensure the lastprivate copying
happens after firstprivate copying in all threads. */
if (copyin_by_ref || lastprivate_firstprivate)
gimplify_and_add (build_omp_barrier (), ilist);
}
/* Generate code to implement the LASTPRIVATE clauses. This is used for
both parallel and workshare constructs. PREDICATE may be NULL if it's
always true. */
static void
lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
omp_context *ctx)
{
tree x, c, label = NULL;
bool par_clauses = false;
/* Early exit if there are no lastprivate clauses. */
clauses = find_omp_clause (clauses, OMP_CLAUSE_LASTPRIVATE);
if (clauses == NULL)
{
/* If this was a workshare clause, see if it had been combined
with its parallel. In that case, look for the clauses on the
parallel statement itself. */
if (is_parallel_ctx (ctx))
return;
ctx = ctx->outer;
if (ctx == NULL || !is_parallel_ctx (ctx))
return;
clauses = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt),
OMP_CLAUSE_LASTPRIVATE);
if (clauses == NULL)
return;
par_clauses = true;
}
if (predicate)
{
gimple stmt;
tree label_true, arm1, arm2;
label = create_artificial_label (UNKNOWN_LOCATION);
label_true = create_artificial_label (UNKNOWN_LOCATION);
arm1 = TREE_OPERAND (predicate, 0);
arm2 = TREE_OPERAND (predicate, 1);
gimplify_expr (&arm1, stmt_list, NULL, is_gimple_val, fb_rvalue);
gimplify_expr (&arm2, stmt_list, NULL, is_gimple_val, fb_rvalue);
stmt = gimple_build_cond (TREE_CODE (predicate), arm1, arm2,
label_true, label);
gimple_seq_add_stmt (stmt_list, stmt);
gimple_seq_add_stmt (stmt_list, gimple_build_label (label_true));
}
for (c = clauses; c ;)
{
tree var, new_var;
location_t clause_loc = OMP_CLAUSE_LOCATION (c);
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
{
var = OMP_CLAUSE_DECL (c);
new_var = lookup_decl (var, ctx);
if (OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
{
lower_omp (OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
gimple_seq_add_seq (stmt_list,
OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
}
OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c) = NULL;
x = build_outer_var_ref (var, ctx);
if (is_reference (var))
new_var = build_fold_indirect_ref_loc (clause_loc, new_var);
x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
gimplify_and_add (x, stmt_list);
}
c = OMP_CLAUSE_CHAIN (c);
if (c == NULL && !par_clauses)
{
/* If this was a workshare clause, see if it had been combined
with its parallel. In that case, continue looking for the
clauses also on the parallel statement itself. */
if (is_parallel_ctx (ctx))
break;
ctx = ctx->outer;
if (ctx == NULL || !is_parallel_ctx (ctx))
break;
c = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt),
OMP_CLAUSE_LASTPRIVATE);
par_clauses = true;
}
}
if (label)
gimple_seq_add_stmt (stmt_list, gimple_build_label (label));
}
/* Generate code to implement the REDUCTION clauses. */
static void
lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
{
gimple_seq sub_seq = NULL;
gimple stmt;
tree x, c;
int count = 0;
/* First see if there is exactly one reduction clause. Use OMP_ATOMIC
update in that case, otherwise use a lock. */
for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
{
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
/* Never use OMP_ATOMIC for array reductions. */
count = -1;
break;
}
count++;
}
if (count == 0)
return;
for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
{
tree var, ref, new_var;
enum tree_code code;
location_t clause_loc = OMP_CLAUSE_LOCATION (c);
if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
continue;
var = OMP_CLAUSE_DECL (c);
new_var = lookup_decl (var, ctx);
if (is_reference (var))
new_var = build_fold_indirect_ref_loc (clause_loc, new_var);
ref = build_outer_var_ref (var, ctx);
code = OMP_CLAUSE_REDUCTION_CODE (c);
/* reduction(-:var) sums up the partial results, so it acts
identically to reduction(+:var). */
if (code == MINUS_EXPR)
code = PLUS_EXPR;
if (count == 1)
{
tree addr = build_fold_addr_expr_loc (clause_loc, ref);
addr = save_expr (addr);
ref = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr);
x = fold_build2_loc (clause_loc, code, TREE_TYPE (ref), ref, new_var);
x = build2 (OMP_ATOMIC, void_type_node, addr, x);
gimplify_and_add (x, stmt_seqp);
return;
}
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
{
tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
if (is_reference (var))
ref = build_fold_addr_expr_loc (clause_loc, ref);
SET_DECL_VALUE_EXPR (placeholder, ref);
DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
lower_omp (OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
gimple_seq_add_seq (&sub_seq, OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c));
OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
}
else
{
x = build2 (code, TREE_TYPE (ref), ref, new_var);
ref = build_outer_var_ref (var, ctx);
gimplify_assign (ref, x, &sub_seq);
}
}
stmt = gimple_build_call (built_in_decls[BUILT_IN_GOMP_ATOMIC_START], 0);
gimple_seq_add_stmt (stmt_seqp, stmt);
gimple_seq_add_seq (stmt_seqp, sub_seq);
stmt = gimple_build_call (built_in_decls[BUILT_IN_GOMP_ATOMIC_END], 0);
gimple_seq_add_stmt (stmt_seqp, stmt);
}
/* Generate code to implement the COPYPRIVATE clauses. */
static void
lower_copyprivate_clauses (tree clauses, gimple_seq *slist, gimple_seq *rlist,
omp_context *ctx)
{
tree c;
for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
{
tree var, new_var, ref, x;
bool by_ref;
location_t clause_loc = OMP_CLAUSE_LOCATION (c);
if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYPRIVATE)
continue;
var = OMP_CLAUSE_DECL (c);
by_ref = use_pointer_for_field (var, NULL);
ref = build_sender_ref (var, ctx);
x = new_var = lookup_decl_in_outer_ctx (var, ctx);
if (by_ref)
{
x = build_fold_addr_expr_loc (clause_loc, new_var);
x = fold_convert_loc (clause_loc, TREE_TYPE (ref), x);
}
gimplify_assign (ref, x, slist);
ref = build_receiver_ref (var, false, ctx);
if (by_ref)
{
ref = fold_convert_loc (clause_loc,
build_pointer_type (TREE_TYPE (new_var)),
ref);
ref = build_fold_indirect_ref_loc (clause_loc, ref);
}
if (is_reference (var))
{
ref = fold_convert_loc (clause_loc, TREE_TYPE (new_var), ref);
ref = build_fold_indirect_ref_loc (clause_loc, ref);
new_var = build_fold_indirect_ref_loc (clause_loc, new_var);
}
x = lang_hooks.decls.omp_clause_assign_op (c, new_var, ref);
gimplify_and_add (x, rlist);
}
}
/* Generate code to implement the clauses, FIRSTPRIVATE, COPYIN, LASTPRIVATE,
and REDUCTION from the sender (aka parent) side. */
static void
lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist,
omp_context *ctx)
{
tree c;
for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
{
tree val, ref, x, var;
bool by_ref, do_in = false, do_out = false;
location_t clause_loc = OMP_CLAUSE_LOCATION (c);
switch (OMP_CLAUSE_CODE (c))
{
case OMP_CLAUSE_PRIVATE:
if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
break;
continue;
case OMP_CLAUSE_FIRSTPRIVATE:
case OMP_CLAUSE_COPYIN:
case OMP_CLAUSE_LASTPRIVATE:
case OMP_CLAUSE_REDUCTION:
case OMP_CLAUSE_INPUT:
case OMP_CLAUSE_OUTPUT:
break;
default:
continue;
}
val = OMP_CLAUSE_DECL (c);
var = lookup_decl_in_outer_ctx (val, ctx);
if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
&& is_global_var (var))
continue;
if (is_variable_sized (val))
continue;
by_ref = use_pointer_for_field (val, NULL);
switch (OMP_CLAUSE_CODE (c))
{
case OMP_CLAUSE_FIRSTPRIVATE:
do_in = (OMP_CLAUSE_VIEW_VAR_KIND (c) != OMP_CLAUSE_VIEW_VAR_DISCARD);
break;
case OMP_CLAUSE_PRIVATE:
case OMP_CLAUSE_COPYIN:
do_in = true;
break;
case OMP_CLAUSE_INPUT:
case OMP_CLAUSE_OUTPUT:
{
omp_stmt_p omp_stmt = lookup_omp_stmt (ctx->stmt);
stream_p vs = lookup_stream (val);
gimple stmt;
tree fn;
var = lookup_decl_in_outer_ctx (var, ctx);
if(DECL_HAS_VALUE_EXPR_P (var))
{
var = DECL_VALUE_EXPR (var);
if(TREE_CODE(var) == INDIRECT_REF)
var = TREE_OPERAND (var, 0);
}
/* Handle views. */
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INPUT
&& OMP_CLAUSE_FIRSTPRIVATE_INPUT (c) != NULL_TREE)
{
tree fp_view = OMP_CLAUSE_FIRSTPRIVATE_INPUT (c);
*pointer_map_insert (omp_stmt->firstprivate_view_var_pmap, fp_view) = var;
}
#if 0
tree view = OMP_CLAUSE_VIEW_ID (c);
tree burst = OMP_CLAUSE_BURST_SIZE (c);
if (burst != NULL_TREE && DECL_P (burst))
{
ref = build_sender_ref (burst, ctx);
x = lookup_decl (burst, ctx);
gimplify_assign (ref, x, ilist);
if (is_task_ctx (ctx))
DECL_ABSTRACT_ORIGIN (TREE_OPERAND (ref, 1)) = NULL;
}
if (OMP_CLAUSE_VIEW_ID (c) != NULL_TREE)
{
tree view_var = OMP_CLAUSE_VIEW_ID (c);
tree local_view_var = lookup_decl (view_var, ctx);
*pointer_map_insert (omp_stmt->var_map, view_var) = local_view_var;
*pointer_map_insert (omp_stmt->reverse_var_map, local_view_var) = view_var;
}
#endif
val = vs->stream;
var = vs->stream;
by_ref = false;
do_in = true;
do_out = false;
}
break;
case OMP_CLAUSE_LASTPRIVATE:
if (by_ref || is_reference (val))
{
if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
continue;
do_in = true;
}
else
{
do_out = true;
if (lang_hooks.decls.omp_private_outer_ref (val))
do_in = true;
}
break;
case OMP_CLAUSE_REDUCTION:
do_in = true;
do_out = !(by_ref || is_reference (val));
break;
default:
gcc_unreachable ();
}
if (do_in)
{
ref = build_sender_ref (val, ctx);
x = by_ref ? build_fold_addr_expr_loc (clause_loc, var) : var;
gimplify_assign (ref, x, ilist);
if (is_task_ctx (ctx))
DECL_ABSTRACT_ORIGIN (TREE_OPERAND (ref, 1)) = NULL;
}
if (do_out)
{
ref = build_sender_ref (val, ctx);
gimplify_assign (var, ref, olist);
}
}
}
/* Generate code to implement SHARED from the sender (aka parent)
side. This is trickier, since GIMPLE_OMP_PARALLEL_CLAUSES doesn't
list things that got automatically shared. */
static void
lower_send_shared_vars (gimple_seq *ilist, gimple_seq *olist, omp_context *ctx)
{
tree var, ovar, nvar, f, x, record_type;
if (ctx->record_type == NULL)
return;
record_type = ctx->srecord_type ? ctx->srecord_type : ctx->record_type;
for (f = TYPE_FIELDS (record_type); f ; f = TREE_CHAIN (f))
{
ovar = DECL_ABSTRACT_ORIGIN (f);
nvar = maybe_lookup_decl (ovar, ctx);
if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
continue;
/* If CTX is a nested parallel directive. Find the immediately
enclosing parallel or workshare construct that contains a
mapping for OVAR. */
var = lookup_decl_in_outer_ctx (ovar, ctx);
if (use_pointer_for_field (ovar, ctx))
{
x = build_sender_ref (ovar, ctx);
var = build_fold_addr_expr (var);
gimplify_assign (x, var, ilist);
}
else
{
x = build_sender_ref (ovar, ctx);
gimplify_assign (x, var, ilist);
if (!TREE_READONLY (var)
/* We don't need to receive a new reference to a result
or parm decl. In fact we may not store to it as we will
invalidate any pending RSO and generate wrong gimple
during inlining. */
&& !((TREE_CODE (var) == RESULT_DECL
|| TREE_CODE (var) == PARM_DECL)
&& DECL_BY_REFERENCE (var)))
{
x = build_sender_ref (ovar, ctx);
gimplify_assign (var, x, olist);
}
}
}
}
/* A convenience function to build an empty GIMPLE_COND with just the
condition. */
static gimple
gimple_build_cond_empty (tree cond)
{
enum tree_code pred_code;
tree lhs, rhs;
gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
}
/* Build the function calls to GOMP_parallel_start etc to actually
generate the parallel operation. REGION is the parallel region
being expanded. BB is the block where to insert the code. WS_ARGS
will be set if this is a call to a combined parallel+workshare
construct, it contains the list of additional arguments needed by
the workshare construct. */
static void
expand_parallel_call (omp_region_p region, basic_block bb,
gimple entry_stmt, tree ws_args)
{
tree t, t1, t2, val, cond, c, clauses;
gimple_stmt_iterator gsi;
gimple stmt;
int start_ix;
location_t clause_loc;
clauses = gimple_omp_parallel_clauses (entry_stmt);
/* Determine what flavor of GOMP_parallel_start we will be
emitting. */
start_ix = BUILT_IN_GOMP_PARALLEL_START;
if (is_combined_parallel (region))
{
switch (region->inner->type)
{
case GIMPLE_OMP_FOR:
gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START
+ (region->inner->sched_kind
== OMP_CLAUSE_SCHEDULE_RUNTIME
? 3 : region->inner->sched_kind);
break;
case GIMPLE_OMP_SECTIONS:
start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START;
break;
default:
gcc_unreachable ();
}
}
/* By default, the value of NUM_THREADS is zero (selected at run time)
and there is no conditional. */
cond = NULL_TREE;
val = build_int_cst (unsigned_type_node, 0);
c = find_omp_clause (clauses, OMP_CLAUSE_IF);
if (c)
cond = OMP_CLAUSE_IF_EXPR (c);
c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS);
if (c)
{
val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
clause_loc = OMP_CLAUSE_LOCATION (c);
}
else
clause_loc = gimple_location (entry_stmt);
/* Ensure 'val' is of the correct type. */
val = fold_convert_loc (clause_loc, unsigned_type_node, val);
/* If we found the clause 'if (cond)', build either
(cond != 0) or (cond ? val : 1u). */
if (cond)
{
gimple_stmt_iterator gsi;
cond = gimple_boolify (cond);
if (integer_zerop (val))
val = fold_build2_loc (clause_loc,
EQ_EXPR, unsigned_type_node, cond,
build_int_cst (TREE_TYPE (cond), 0));
else
{
basic_block cond_bb, then_bb, else_bb;
edge e, e_then, e_else;
tree tmp_then, tmp_else, tmp_join, tmp_var;
tmp_var = create_tmp_var (TREE_TYPE (val), NULL);
if (gimple_in_ssa_p (cfun))
{
tmp_then = make_ssa_name (tmp_var, NULL);
tmp_else = make_ssa_name (tmp_var, NULL);
tmp_join = make_ssa_name (tmp_var, NULL);
}
else
{
tmp_then = tmp_var;
tmp_else = tmp_var;
tmp_join = tmp_var;
}
e = split_block (bb, NULL);
cond_bb = e->src;
bb = e->dest;
remove_edge (e);
then_bb = create_empty_bb (cond_bb);
else_bb = create_empty_bb (then_bb);
set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
stmt = gimple_build_cond_empty (cond);
gsi = gsi_start_bb (cond_bb);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
gsi = gsi_start_bb (then_bb);
stmt = gimple_build_assign (tmp_then, val);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
gsi = gsi_start_bb (else_bb);
stmt = gimple_build_assign
(tmp_else, build_int_cst (unsigned_type_node, 1));
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
if (gimple_in_ssa_p (cfun))
{
gimple phi = create_phi_node (tmp_join, bb);
SSA_NAME_DEF_STMT (tmp_join) = phi;
add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
}
val = tmp_join;
}
gsi = gsi_start_bb (bb);
val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
}
gsi = gsi_last_bb (bb);
t = gimple_omp_parallel_data_arg (entry_stmt);
if (t == NULL)
t1 = null_pointer_node;
else
t1 = build_fold_addr_expr (t);
t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
if (ws_args)
{
tree args = tree_cons (NULL, t2,
tree_cons (NULL, t1,
tree_cons (NULL, val, ws_args)));
t = build_function_call_expr (UNKNOWN_LOCATION,
built_in_decls[start_ix], args);
}
else
t = build_call_expr (built_in_decls[start_ix], 3, t2, t1, val);
force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
t = gimple_omp_parallel_data_arg (entry_stmt);
if (t == NULL)
t = null_pointer_node;
else
t = build_fold_addr_expr (t);
t = build_call_expr_loc (gimple_location (entry_stmt),
gimple_omp_parallel_child_fn (entry_stmt), 1, t);
force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
t = build_call_expr_loc (gimple_location (entry_stmt),
built_in_decls[BUILT_IN_GOMP_PARALLEL_END], 0);
force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
}
/* Build the function call to GOMP_task to actually
generate the task operation. BB is the block where to insert the code. */
static void
expand_task_call (omp_region_p region, basic_block bb, gimple entry_stmt)
{
tree t, t1, t2, t3, flags, cond, c, clauses;
gimple_stmt_iterator gsi;
location_t loc = gimple_location (entry_stmt);
if (is_streaming_region (region))
{
streamization_info_p out_para_sinfo =
get_outermost_parallel_streamization_info (region);
bb = out_para_sinfo->initialization_bb;
}
clauses = gimple_omp_task_clauses (entry_stmt);
c = find_omp_clause (clauses, OMP_CLAUSE_IF);
if (c)
cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (c));
else
cond = boolean_true_node;
c = find_omp_clause (clauses, OMP_CLAUSE_UNTIED);
flags = build_int_cst (unsigned_type_node, (c ? 1 : 0));
gsi = gsi_last_bb (bb);
t = gimple_omp_task_data_arg (entry_stmt);
if (t == NULL)
t2 = null_pointer_node;
else
t2 = build_fold_addr_expr_loc (loc, t);
t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
t = gimple_omp_task_copy_fn (entry_stmt);
if (t == NULL)
t3 = null_pointer_node;
else
t3 = build_fold_addr_expr_loc (loc, t);
if (is_streaming_region (region)
&& !get_streamization_info (region)->is_pre_task)
{
streamization_info_p task = get_streamization_info (region);
t = build_call_expr (built_in_decls[BUILT_IN_GOMP_STREAM_TASK], 7, t1,
t2, t3, gimple_omp_task_arg_size (entry_stmt),
gimple_omp_task_arg_align (entry_stmt), task->num_instances,
task->can_replicate);
}
else
t = build_call_expr (built_in_decls[BUILT_IN_GOMP_TASK], 7, t1, t2, t3,
gimple_omp_task_arg_size (entry_stmt),
gimple_omp_task_arg_align (entry_stmt), cond, flags);
force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
}
/* If exceptions are enabled, wrap the statements in BODY in a MUST_NOT_THROW
catch handler and return it. This prevents programs from violating the
structured block semantics with throws. */
static gimple_seq
maybe_catch_exception (gimple_seq body)
{
gimple g;
tree decl;
if (!flag_exceptions)
return body;
if (lang_protect_cleanup_actions)
decl = lang_protect_cleanup_actions ();
else
decl = built_in_decls[BUILT_IN_TRAP];
g = gimple_build_eh_must_not_throw (decl);
g = gimple_build_try (body, gimple_seq_alloc_with_stmt (g),
GIMPLE_TRY_CATCH);
return gimple_seq_alloc_with_stmt (g);
}
/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
static tree
list2chain (tree list)
{
tree t;
for (t = list; t; t = TREE_CHAIN (t))
{
tree var = TREE_VALUE (t);
if (TREE_CHAIN (t))
TREE_CHAIN (var) = TREE_VALUE (TREE_CHAIN (t));
else
TREE_CHAIN (var) = NULL_TREE;
}
return list ? TREE_VALUE (list) : NULL_TREE;
}
/* Remove barriers in REGION->EXIT's block. Note that this is only
valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
removed. */
static void
remove_exit_barrier (omp_region_p region)
{
gimple_stmt_iterator gsi;
basic_block exit_bb;
edge_iterator ei;
edge e;
gimple stmt;
int any_addressable_vars = -1;
exit_bb = region->exit;
/* If the parallel region doesn't return, we don't have REGION->EXIT
block at all. */
if (! exit_bb)
return;
/* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
statements that can appear in between are extremely limited -- no
memory operations at all. Here, we allow nothing at all, so the
only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
gsi = gsi_last_bb (exit_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
gsi_prev (&gsi);
if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
return;
FOR_EACH_EDGE (e, ei, exit_bb->preds)
{
gsi = gsi_last_bb (e->src);
if (gsi_end_p (gsi))
continue;
stmt = gsi_stmt (gsi);
if (gimple_code (stmt) == GIMPLE_OMP_RETURN
&& !gimple_omp_return_nowait_p (stmt))
{
/* OpenMP 3.0 tasks unfortunately prevent this optimization
in many cases. If there could be tasks queued, the barrier
might be needed to let the tasks run before some local
variable of the parallel that the task uses as shared
runs out of scope. The task can be spawned either
from within current function (this would be easy to check)
or from some function it calls and gets passed an address
of such a variable. */
if (any_addressable_vars < 0)
{
gimple parallel_stmt = last_stmt (region->entry);
tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
tree local_decls = DECL_STRUCT_FUNCTION (child_fun)->local_decls;
tree block;
any_addressable_vars = 0;
for (; local_decls; local_decls = TREE_CHAIN (local_decls))
if (TREE_ADDRESSABLE (TREE_VALUE (local_decls)))
{
any_addressable_vars = 1;
break;
}
for (block = gimple_block (stmt);
!any_addressable_vars
&& block
&& TREE_CODE (block) == BLOCK;
block = BLOCK_SUPERCONTEXT (block))
{
for (local_decls = BLOCK_VARS (block);
local_decls;
local_decls = TREE_CHAIN (local_decls))
if (TREE_ADDRESSABLE (local_decls))
{
any_addressable_vars = 1;
break;
}
if (block == gimple_block (parallel_stmt))
break;
}
}
if (!any_addressable_vars)
gimple_omp_return_set_nowait (stmt);
}
}
}
static void
remove_exit_barriers (omp_region_p region)
{
if (region->type == GIMPLE_OMP_PARALLEL)
remove_exit_barrier (region);
if (region->inner)
{
region = region->inner;
remove_exit_barriers (region);
while (region->next)
{
region = region->next;
remove_exit_barriers (region);
}
}
}
/* Optimize omp_get_thread_num () and omp_get_num_threads ()
calls. These can't be declared as const functions, but
within one parallel body they are constant, so they can be
transformed there into __builtin_omp_get_{thread_num,num_threads} ()
which are declared const. Similarly for task body, except
that in untied task omp_get_thread_num () can change at any task
scheduling point. */
static void
optimize_omp_library_calls (gimple entry_stmt)
{
basic_block bb;
gimple_stmt_iterator gsi;
tree thr_num_id
= DECL_ASSEMBLER_NAME (built_in_decls [BUILT_IN_OMP_GET_THREAD_NUM]);
tree num_thr_id
= DECL_ASSEMBLER_NAME (built_in_decls [BUILT_IN_OMP_GET_NUM_THREADS]);
bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
&& find_omp_clause (gimple_omp_task_clauses (entry_stmt),
OMP_CLAUSE_UNTIED) != NULL);
FOR_EACH_BB (bb)
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple call = gsi_stmt (gsi);
tree decl;
if (is_gimple_call (call)
&& (decl = gimple_call_fndecl (call))
&& DECL_EXTERNAL (decl)
&& TREE_PUBLIC (decl)
&& DECL_INITIAL (decl) == NULL)
{
tree built_in;
if (DECL_NAME (decl) == thr_num_id)
{
/* In #pragma omp task untied omp_get_thread_num () can change
during the execution of the task region. */
if (untied_task)
continue;
built_in = built_in_decls [BUILT_IN_OMP_GET_THREAD_NUM];
}
else if (DECL_NAME (decl) == num_thr_id)
built_in = built_in_decls [BUILT_IN_OMP_GET_NUM_THREADS];
else
continue;
if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
|| gimple_call_num_args (call) != 0)
continue;
if (flag_exceptions && !TREE_NOTHROW (decl))
continue;
if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
TREE_TYPE (TREE_TYPE (built_in))))
continue;
gimple_call_set_fndecl (call, built_in);
}
}
}
/* Verifies if REGION appears in a concurrent context (i.e., will
there be multiple threads executing the enclosing context).
Conservatively returns TRUE when impossible to determine. */
static bool
is_concurrent_context (omp_region_p region)
{
while ((region = region->outer))
{
switch (region->type)
{
case GIMPLE_OMP_PARALLEL:
case GIMPLE_OMP_FOR:
return true;
break;
case GIMPLE_OMP_SINGLE:
case GIMPLE_OMP_SECTION:
return false;
break;
case GIMPLE_OMP_TASK:
/* This should not be allowed yet. Nesting in tasks
(streaming or not) requires much more analysis. */
gcc_assert (0);
default:
gcc_unreachable ();
}
}
return true;
}
/* After the outlining of the task has occurred, this function
generates the proper code in the enclosing context at the position
where the task code used to be. */
/* xxxx */
static void
expand_steaming_taskreg_in_outer_context (omp_region_p region,
basic_block original_bb)
{
streamization_info_p sinfo = get_streamization_info (region);
streamization_info_p outermost_parallel_sinfo =
get_outermost_parallel_streamization_info (region);
gimple_stmt_iterator gsi;
gimple stmt;
tree fn;
view_p view;
int i;
tree fp_var;
if (sinfo == outermost_parallel_sinfo)
{
task_p task;
/* Build and initialize an activation counter and a termination
flag per task. Take their addresses and pass them as
firstprivate in the tasks and all enclosing contexts. */
gsi = gsi_start_bb (region->entry);
for (i = 0; VEC_iterate (task_p, sinfo->tasks, i, task); ++i)
{
#if 0 /*zzzz*/
task->activation_counter = create_tmp_var (long_long_unsigned_type_node,
"activation_counter");
TREE_ADDRESSABLE (task->activation_counter) = 1;
task->termination_flag = create_tmp_var (boolean_type_node,
"termination_flag");
TREE_ADDRESSABLE (task->termination_flag) = 1;
stmt = gimple_build_assign (task->activation_counter,
build_int_cst (long_long_unsigned_type_node, 0));
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
stmt = gimple_build_assign (task->activation_counter_p,
build_fold_addr_expr (task->activation_counter));
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
stmt = gimple_build_assign (task->termination_flag, boolean_false_node);
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
stmt = gimple_build_assign (task->termination_flag_p,
build_fold_addr_expr (task->termination_flag));
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
#endif
/* Create one instance of the task data structure. */
fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_TASK];
//stmt = gimple_build_call (fn, 2, task->activation_counter_p, task->termination_flag_p);
stmt = gimple_build_call (fn, 0);
gimple_call_set_lhs (stmt, task->task_decl);
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
}
/* Create a fake task for the parallel region (only used
for initialization and finalization
synchronization). */
fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_TASK];
//stmt = gimple_build_call (fn, 2, null_pointer_node, null_pointer_node);
stmt = gimple_build_call (fn, 0);
gimple_call_set_lhs (stmt, sinfo->task_decl);
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
/* Create firstprivate views and pass the pointers as well. */
for (i = 0; VEC_iterate (view_p, sinfo->fpviews, i, view); ++i)
{
fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_WRITE_VIEW];
stmt = gimple_build_call (fn, 2, view->view_size, get_view_burst_size (view, &gsi));
gimple_call_set_lhs (stmt, view->view);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
}
else if (region->type == GIMPLE_OMP_PARALLEL)
{
//gcc_assert (0);
}
else if (!sinfo->is_pre_task)
{
streamization_info_p outer_sinfo = get_outer_streamization_info (region);
tree agg_fact = build_int_cst (long_long_unsigned_type_node, AGGREGATION_FACTOR);
basic_block init_bb, outer_entry_bb, outer_exit_bb, join_bb, cond_bb, commit_bb;
edge e;
streamization_info_p task = sinfo;
/* We disallow injecting back streams (lastprivate) in the outer
context. */
gcc_assert (VEC_length (view_p, outermost_parallel_sinfo->rviews) == 0);
/* Prepare blocks for code generation. */
e = split_block_after_labels (original_bb);
init_bb = e->src;
original_bb = split_edge (e);
/*
e = single_succ_edge (outer_sinfo->region->entry);
outer_entry_bb = split_edge (e);
e = split_block_after_labels (outer_sinfo->region->exit);
outer_exit_bb = split_edge (e);
*/
task->local_activation_counter = create_tmp_var (long_long_unsigned_type_node, "local_activation_counter");
task->local_activation_index = create_tmp_var (long_long_unsigned_type_node, "local_activation_index");
task->local_activation_index_next = create_tmp_var (long_long_unsigned_type_node, "local_activation_index_next");
task->local_act_ctr_p = create_tmp_var (build_pointer_type (long_long_unsigned_type_node), "activation_counter_p");
#if 0 /*zzzz*/
local_act_ctr_p = lookup_var (task->activation_counter_p, outer_sinfo->stmt);
#endif
/* Some code will be generated by the enclosing directive, so we
will pass the info on each task requiring further handling up
until the enclosing directive is expanded itself. */
/* Initialization and finalization, as well as activation index
selection for concurrent firstprivate clauses can only be
generated in the enclosing. */
/* Add this task to the tasks to be handled at the enclosing
region level (for handling activation counters and
termination flags). */
VEC_safe_push (task_p, heap, outer_sinfo->tasks, task);
/* Handle control-flow (activation counters). */
/* Increment counter. */
gsi = gsi_last_bb (init_bb);
stmt = gimple_build_assign_with_ops (PLUS_EXPR, task->local_activation_counter,
task->local_activation_counter,
build_int_cst (long_long_unsigned_type_node, 1));
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* Handle the periodic update of the global counter. */
e = single_succ_edge (original_bb);
join_bb = e->dest;
cond_bb = split_edge (e);
e = single_succ_edge (cond_bb);
commit_bb = split_edge (e);
remove_edge (find_edge (cond_bb, commit_bb));
make_edge (cond_bb, commit_bb, EDGE_TRUE_VALUE);
make_edge (cond_bb, join_bb, EDGE_FALSE_VALUE);
gsi = gsi_last_bb (cond_bb);
stmt = gimple_build_cond (EQ_EXPR, task->local_activation_counter, agg_fact,
NULL_TREE, NULL_TREE);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* Commit the activation counter either atomically or just by
adding. */
gsi = gsi_last_bb (commit_bb);
if (is_concurrent_context (region))
{
HOST_WIDE_INT sync_builtin_idx;
tree sync_builtin;
sync_builtin_idx = tree_low_cst (TYPE_SIZE_UNIT (long_long_unsigned_type_node), 1);
sync_builtin_idx = exact_log2 (sync_builtin_idx);
gcc_assert (sync_builtin_idx < 5);
sync_builtin = built_in_decls[BUILT_IN_FETCH_AND_ADD_N + sync_builtin_idx + 1];
stmt = gimple_build_call (sync_builtin, 2, task->local_act_ctr_p, agg_fact);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
else
{
tree temp = create_tmp_var (long_long_unsigned_type_node, NULL);
stmt = gimple_build_assign (temp, build_fold_indirect_ref (task->local_act_ctr_p));
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign_with_ops (PLUS_EXPR, temp, temp, agg_fact);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign (build_fold_indirect_ref (task->local_act_ctr_p), temp);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
/* Reset the local counter. */
stmt = gimple_build_assign (task->local_activation_counter,
build_int_cst (long_long_unsigned_type_node, 0));
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* Handle data-flow (firstprivate clauses). */
if (!is_concurrent_context (region))
{
/* Compute a local activation index for stall/commit calls. */
/* If there are firstprivate views. */
if (!VEC_empty (view_p, task->fpviews))
{
/* Update the local counters for this task. */
/* Increment local_activation_index for commits and
local_activation_index_next for stalls. */
gsi = gsi_last_bb (commit_bb);
stmt = gimple_build_assign_with_ops (PLUS_EXPR, task->local_activation_index,
task->local_activation_index, agg_fact);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign_with_ops (PLUS_EXPR, task->local_activation_index_next,
task->local_activation_index, agg_fact);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
/* Issue STALL/COMMIT calls for firstprivate write views for
first and last blocks in this region. */
for (i = 0; VEC_iterate (view_p, task->fpviews, i, view); ++i)
{
/* We need to use the proper view pointer, in the nesting
context of the task. */
tree view_decl = lookup_var (view->view, outer_sinfo->stmt);
stream_p stream = view->stream;
/* COMMIT */
gsi = gsi_last_bb (commit_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_COMMIT];
stmt = gimple_build_call (fn, 2, view_decl, task->local_activation_index);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* STALL */
fn = built_in_decls[BUILT_IN_GOMP_STREAM_STALL];
stmt = gimple_build_call (fn, 3, view_decl, task->local_activation_index,
task->local_activation_index_next);
gimple_call_set_lhs (stmt, view->buffer_pointer);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* Copy the FIRSTPRIVATE data to the stream buffer. */
/* If the variable is non-register, we need to add in a
temporary assignment in a register. */
gsi = gsi_last_bb (original_bb);
fp_var = lookup_fp_view_var (view->view, task->stmt);
if (AGGREGATE_TYPE_P (TREE_TYPE (fp_var)))
{
/* If the type is not assign-copyable, use memcpy. */
fn = build_fold_addr_expr (implicit_built_in_decls [BUILT_IN_MEMCPY]);
stmt = gimple_build_call (fn, 3, view->buffer_pointer, build_fold_addr_expr (fp_var), view->burst_size);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
else
{
if (!is_gimple_reg (stream->var))
{
fp_var = create_tmp_var (TREE_TYPE (stream->var), "fp_var_reg");
stmt = gimple_build_assign (fp_var, stream->var);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
stmt = gimple_build_assign (build_fold_indirect_ref (view->buffer_pointer), fp_var);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
/* Increment the buffer pointer for next time. */
stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR,
view->buffer_pointer,
view->buffer_pointer,
view->burst_size);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
}
else
{
/* Firstprivate clauses in concurrent contexts are only
allowed within parallel loops for now. */
gcc_assert (VEC_length (view_p, task->fpviews) == 0
/*|| region->outer->type == GIMPLE_OMP_FOR*/);
}
}
else
{
}
}
static void
handle_nested_streaming_tasks (omp_region_p region,
basic_block initialization_bb,
basic_block finalization_bb)
{
tree agg_fact = build_int_cst (long_long_unsigned_type_node, AGGREGATION_FACTOR);
streamization_info_p sinfo = get_streamization_info (region);
gimple_stmt_iterator gsi;
gimple stmt;
task_p task;
view_p view;
tree fn;
int i, j;
for (i = 0; VEC_iterate (task_p, sinfo->tasks, i, task); ++i)
{
#if 0 /*zzzz*/
tree local_act_ctr_p =
lookup_var (task->activation_counter_p, sinfo->stmt);
#endif
/* Initialize a local activation counter for each task. */
gsi = gsi_last_bb (initialization_bb);
stmt = gimple_build_assign (task->local_activation_counter,
build_int_cst (long_long_unsigned_type_node, 0));
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_GET_TASK_ACTIVATION_COUNTER];
stmt = gimple_build_call (fn, 1, lookup_var (task->task_decl, sinfo->stmt));
gimple_call_set_lhs (stmt, task->local_act_ctr_p);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* If there are firstprivate views. */
if (!VEC_empty (view_p, task->fpviews))
{
if (!is_concurrent_context (task->region))
{
/* Initialize the local activation index. */
gsi = gsi_last_bb (initialization_bb);
stmt = gimple_build_assign (task->local_activation_index,
build_fold_indirect_ref (task->local_act_ctr_p));
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* Update the local counters for this task. */
/* For initial STALL. */
gsi = gsi_last_bb (initialization_bb);
stmt = gimple_build_assign_with_ops (PLUS_EXPR, task->local_activation_index_next,
task->local_activation_index, agg_fact);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* For final COMMIT. */
gsi = gsi_last_bb (finalization_bb);
stmt = gimple_build_assign_with_ops (PLUS_EXPR, task->local_activation_index,
task->local_activation_index, task->local_activation_counter);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
else
{
gcc_assert (0);
}
}
/* Issue STALL/COMMIT calls for firstprivate write views for
first and last blocks in this region. */
for (j = 0; VEC_iterate (view_p, task->fpviews, j, view); ++j)
{
/* We need to use the proper view pointer, in the nesting
context of the task. */
tree view_decl = lookup_var (view->view, sinfo->stmt);
/* STALL */
gsi = gsi_last_bb (initialization_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_STALL];
stmt = gimple_build_call (fn, 3, view_decl, task->local_activation_index,
task->local_activation_index_next);
gimple_call_set_lhs (stmt, view->buffer_pointer);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* COMMIT */
gsi = gsi_last_bb (finalization_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_COMMIT];
stmt = gimple_build_call (fn, 2, view_decl, task->local_activation_index);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
/* Commit the local activation counter's value either atomically
or just by adding. */
gsi = gsi_last_bb (finalization_bb);
if (is_concurrent_context (task->region))
{
HOST_WIDE_INT sync_builtin_idx;
tree sync_builtin;
sync_builtin_idx = tree_low_cst (TYPE_SIZE_UNIT (long_long_unsigned_type_node), 1);
sync_builtin_idx = exact_log2 (sync_builtin_idx);
gcc_assert (sync_builtin_idx < 5);
sync_builtin = built_in_decls[BUILT_IN_FETCH_AND_ADD_N + sync_builtin_idx + 1];
stmt = gimple_build_call (sync_builtin, 2, task->local_act_ctr_p, task->local_activation_counter);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
else
{
tree temp = create_tmp_var (long_long_unsigned_type_node, NULL);
stmt = gimple_build_assign (temp, build_fold_indirect_ref (task->local_act_ctr_p));
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign_with_ops (PLUS_EXPR, temp, temp, task->local_activation_counter);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign (build_fold_indirect_ref (task->local_act_ctr_p), temp);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
}
}
/* Expand the OpenMP parallel or task directive starting at REGION. */
static void
expand_omp_taskreg (omp_region_p region)
{
basic_block entry_bb, exit_bb, new_bb;
struct function *child_cfun;
tree child_fn, block, t, ws_args, *tp;
tree save_current;
gimple_stmt_iterator gsi;
gimple entry_stmt, stmt;
edge e;
entry_stmt = last_stmt (region->entry);
child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
child_cfun = DECL_STRUCT_FUNCTION (child_fn);
/* If this function has been already instrumented, make sure
the child function isn't instrumented again. */
child_cfun->after_tree_profile = cfun->after_tree_profile;
entry_bb = region->entry;
exit_bb = region->exit;
if (is_combined_parallel (region))
ws_args = region->ws_args;
else
ws_args = NULL_TREE;
if (child_cfun->cfg)
{
/* Due to inlining, it may happen that we have already outlined
the region, in which case all we need to do is make the
sub-graph unreachable and emit the parallel call. */
edge entry_succ_e, exit_succ_e;
gimple_stmt_iterator gsi;
entry_succ_e = single_succ_edge (entry_bb);
gsi = gsi_last_bb (entry_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
|| gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
gsi_remove (&gsi, true);
new_bb = entry_bb;
if (exit_bb)
{
exit_succ_e = single_succ_edge (exit_bb);
make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
}
remove_edge_and_dominated_blocks (entry_succ_e);
}
else
{
/* If the parallel region needs data sent from the parent
function, then the very first statement (except possible
tree profile counter updates) of the parallel body
is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
&.OMP_DATA_O is passed as an argument to the child function,
we need to replace it with the argument as seen by the child
function.
In most cases, this will end up being the identity assignment
.OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
a function call that has been inlined, the original PARM_DECL
.OMP_DATA_I may have been converted into a different local
variable. In which case, we need to keep the assignment. */
if (gimple_omp_taskreg_data_arg (entry_stmt))
{
basic_block entry_succ_bb = single_succ (entry_bb);
gimple_stmt_iterator gsi;
tree arg, narg;
gimple parcopy_stmt = NULL;
if (is_streaming_region (region))
{
streamization_info_p sinfo = get_streamization_info (region);
if (region->type == GIMPLE_OMP_TASK)
entry_succ_bb = sinfo->initialization_bb;
}
for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
{
gimple stmt;
gcc_assert (!gsi_end_p (gsi));
stmt = gsi_stmt (gsi);
if (gimple_code (stmt) != GIMPLE_ASSIGN)
continue;
if (gimple_num_ops (stmt) == 2)
{
tree arg = gimple_assign_rhs1 (stmt);
/* We're ignore the subcode because we're
effectively doing a STRIP_NOPS. */
if (TREE_CODE (arg) == ADDR_EXPR
&& TREE_OPERAND (arg, 0)
== gimple_omp_taskreg_data_arg (entry_stmt))
{
parcopy_stmt = stmt;
break;
}
}
}
gcc_assert (parcopy_stmt != NULL);
arg = DECL_ARGUMENTS (child_fn);
if (!gimple_in_ssa_p (cfun))
{
if (gimple_assign_lhs (parcopy_stmt) == arg)
gsi_remove (&gsi, true);
else
{
/* ?? Is setting the subcode really necessary ?? */
gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
gimple_assign_set_rhs1 (parcopy_stmt, arg);
}
}
else
{
/* If we are in ssa form, we must load the value from the default
definition of the argument. That should not be defined now,
since the argument is not used uninitialized. */
gcc_assert (gimple_default_def (cfun, arg) == NULL);
narg = make_ssa_name (arg, gimple_build_nop ());
set_default_def (arg, narg);
/* ?? Is setting the subcode really necessary ?? */
gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (narg));
gimple_assign_set_rhs1 (parcopy_stmt, narg);
update_stmt (parcopy_stmt);
}
}
/* We need to update those as the streamization might have moved
them around. */
entry_bb = region->entry;
exit_bb = region->exit;
/* Declare local variables needed in CHILD_CFUN. */
block = DECL_INITIAL (child_fn);
BLOCK_VARS (block) = list2chain (child_cfun->local_decls);
/* The gimplifier could record temporaries in parallel/task block
rather than in containing function's local_decls chain,
which would mean cgraph missed finalizing them. Do it now. */
for (t = BLOCK_VARS (block); t; t = TREE_CHAIN (t))
if (TREE_CODE (t) == VAR_DECL
&& TREE_STATIC (t)
&& !DECL_EXTERNAL (t))
varpool_finalize_decl (t);
DECL_SAVED_TREE (child_fn) = NULL;
gimple_set_body (child_fn, bb_seq (single_succ (entry_bb)));
TREE_USED (block) = 1;
/* Reset DECL_CONTEXT on function arguments. */
for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t))
DECL_CONTEXT (t) = child_fn;
/* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
so that it can be moved to the child function. */
gsi = gsi_last_bb (entry_bb);
stmt = gsi_stmt (gsi);
gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
|| gimple_code (stmt) == GIMPLE_OMP_TASK));
gsi_remove (&gsi, true);
e = split_block (entry_bb, stmt);
entry_bb = e->dest;
single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
/* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
if (exit_bb)
{
gsi = gsi_last_bb (exit_bb);
gcc_assert (!gsi_end_p (gsi)
&& gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
stmt = gimple_build_return (NULL);
gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
gsi_remove (&gsi, true);
}
/* Move the parallel region into CHILD_CFUN. */
if (gimple_in_ssa_p (cfun))
{
push_cfun (child_cfun);
init_tree_ssa (child_cfun);
init_ssa_operands ();
cfun->gimple_df->in_ssa_p = true;
pop_cfun ();
block = NULL_TREE;
}
else
block = gimple_block (entry_stmt);
new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
if (exit_bb)
single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
/* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
for (tp = &child_cfun->local_decls; *tp; )
if (DECL_CONTEXT (TREE_VALUE (*tp)) != cfun->decl)
tp = &TREE_CHAIN (*tp);
else
*tp = TREE_CHAIN (*tp);
/* Inform the callgraph about the new function. */
DECL_STRUCT_FUNCTION (child_fn)->curr_properties
= cfun->curr_properties;
cgraph_add_new_function (child_fn, true);
/* Fix the callgraph edges for child_cfun. Those for cfun will be
fixed in a following pass. */
push_cfun (child_cfun);
save_current = current_function_decl;
current_function_decl = child_fn;
if (optimize)
optimize_omp_library_calls (entry_stmt);
rebuild_cgraph_edges ();
/* Some EH regions might become dead, see PR34608. If
pass_cleanup_cfg isn't the first pass to happen with the
new child, these dead EH edges might cause problems.
Clean them up now. */
if (flag_exceptions)
{
basic_block bb;
bool changed = false;
FOR_EACH_BB (bb)
changed |= gimple_purge_dead_eh_edges (bb);
if (changed)
cleanup_tree_cfg ();
}
if (gimple_in_ssa_p (cfun))
update_ssa (TODO_update_ssa);
current_function_decl = save_current;
pop_cfun ();
}
/* If this task is streaming, we need to handle the activation
counter incrementation at the place where the outlined task used
to be. */
if (is_streaming_region (region))
expand_steaming_taskreg_in_outer_context (region, new_bb);
/* Emit a library call to launch the children threads. */
if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
expand_parallel_call (region, new_bb, entry_stmt, ws_args);
else
expand_task_call (region, new_bb, entry_stmt);
update_ssa (TODO_update_ssa_only_virtuals);
}
/* A subroutine of expand_omp_for. Generate code for a parallel
loop with any schedule. Given parameters:
for (V = N1; V cond N2; V += STEP) BODY;
where COND is "<" or ">", we generate pseudocode
more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
if (more) goto L0; else goto L3;
L0:
V = istart0;
iend = iend0;
L1:
BODY;
V += STEP;
if (V cond iend) goto L1; else goto L2;
L2:
if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
L3:
If this is a combined omp parallel loop, instead of the call to
GOMP_loop_foo_start, we call GOMP_loop_foo_next.
For collapsed loops, given parameters:
collapse(3)
for (V1 = N11; V1 cond1 N12; V1 += STEP1)
for (V2 = N21; V2 cond2 N22; V2 += STEP2)
for (V3 = N31; V3 cond3 N32; V3 += STEP3)
BODY;
we generate pseudocode
if (cond3 is <)
adj = STEP3 - 1;
else
adj = STEP3 + 1;
count3 = (adj + N32 - N31) / STEP3;
if (cond2 is <)
adj = STEP2 - 1;
else
adj = STEP2 + 1;
count2 = (adj + N22 - N21) / STEP2;
if (cond1 is <)
adj = STEP1 - 1;
else
adj = STEP1 + 1;
count1 = (adj + N12 - N11) / STEP1;
count = count1 * count2 * count3;
more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
if (more) goto L0; else goto L3;
L0:
V = istart0;
T = V;
V3 = N31 + (T % count3) * STEP3;
T = T / count3;
V2 = N21 + (T % count2) * STEP2;
T = T / count2;
V1 = N11 + T * STEP1;
iend = iend0;
L1:
BODY;
V += 1;
if (V < iend) goto L10; else goto L2;
L10:
V3 += STEP3;
if (V3 cond3 N32) goto L1; else goto L11;
L11:
V3 = N31;
V2 += STEP2;
if (V2 cond2 N22) goto L1; else goto L12;
L12:
V2 = N21;
V1 += STEP1;
goto L1;
L2:
if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
L3:
*/
static void
expand_omp_for_generic (omp_region_p region,
struct omp_for_data *fd,
enum built_in_function start_fn,
enum built_in_function next_fn)
{
tree type, istart0, iend0, iend;
tree t, vmain, vback, bias = NULL_TREE;
basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
basic_block l2_bb = NULL, l3_bb = NULL;
gimple_stmt_iterator gsi;
gimple stmt;
bool in_combined_parallel = is_combined_parallel (region);
bool broken_loop = region->cont == NULL;
edge e, ne;
tree *counts = NULL;
int i;
gcc_assert (!broken_loop || !in_combined_parallel);
gcc_assert (fd->iter_type == long_integer_type_node
|| !in_combined_parallel);
type = TREE_TYPE (fd->loop.v);
istart0 = create_tmp_var (fd->iter_type, ".istart0");
iend0 = create_tmp_var (fd->iter_type, ".iend0");
TREE_ADDRESSABLE (istart0) = 1;
TREE_ADDRESSABLE (iend0) = 1;
if (gimple_in_ssa_p (cfun))
{
add_referenced_var (istart0);
add_referenced_var (iend0);
}
/* See if we need to bias by LLONG_MIN. */
if (fd->iter_type == long_long_unsigned_type_node
&& TREE_CODE (type) == INTEGER_TYPE
&& !TYPE_UNSIGNED (type))
{
tree n1, n2;
if (fd->loop.cond_code == LT_EXPR)
{
n1 = fd->loop.n1;
n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
}
else
{
n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
n2 = fd->loop.n1;
}
if (TREE_CODE (n1) != INTEGER_CST
|| TREE_CODE (n2) != INTEGER_CST
|| ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
}
entry_bb = region->entry;
cont_bb = region->cont;
collapse_bb = NULL;
gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
gcc_assert (broken_loop
|| BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
l1_bb = single_succ (l0_bb);
if (!broken_loop)
{
l2_bb = create_empty_bb (cont_bb);
gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb);
gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
}
else
l2_bb = NULL;
l3_bb = BRANCH_EDGE (entry_bb)->dest;
exit_bb = region->exit;
gsi = gsi_last_bb (entry_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
if (fd->collapse > 1)
{
/* collapsed loops need work for expansion in SSA form. */
gcc_assert (!gimple_in_ssa_p (cfun));
counts = (tree *) alloca (fd->collapse * sizeof (tree));
for (i = 0; i < fd->collapse; i++)
{
tree itype = TREE_TYPE (fd->loops[i].v);
if (POINTER_TYPE_P (itype))
itype = lang_hooks.types.type_for_size (TYPE_PRECISION (itype), 0);
t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
? -1 : 1));
t = fold_build2 (PLUS_EXPR, itype,
fold_convert (itype, fd->loops[i].step), t);
t = fold_build2 (PLUS_EXPR, itype, t,
fold_convert (itype, fd->loops[i].n2));
t = fold_build2 (MINUS_EXPR, itype, t,
fold_convert (itype, fd->loops[i].n1));
if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
t = fold_build2 (TRUNC_DIV_EXPR, itype,
fold_build1 (NEGATE_EXPR, itype, t),
fold_build1 (NEGATE_EXPR, itype,
fold_convert (itype,
fd->loops[i].step)));
else
t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
fold_convert (itype, fd->loops[i].step));
t = fold_convert (type, t);
if (TREE_CODE (t) == INTEGER_CST)
counts[i] = t;
else
{
counts[i] = create_tmp_var (type, ".count");
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
true, GSI_SAME_STMT);
stmt = gimple_build_assign (counts[i], t);
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
}
if (SSA_VAR_P (fd->loop.n2))
{
if (i == 0)
t = counts[0];
else
{
t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
true, GSI_SAME_STMT);
}
stmt = gimple_build_assign (fd->loop.n2, t);
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
}
}
}
if (in_combined_parallel)
{
/* In a combined parallel loop, emit a call to
GOMP_loop_foo_next. */
t = build_call_expr (built_in_decls[next_fn], 2,
build_fold_addr_expr (istart0),
build_fold_addr_expr (iend0));
}
else
{
tree t0, t1, t2, t3, t4;
/* If this is not a combined parallel loop, emit a call to
GOMP_loop_foo_start in ENTRY_BB. */
t4 = build_fold_addr_expr (iend0);
t3 = build_fold_addr_expr (istart0);
t2 = fold_convert (fd->iter_type, fd->loop.step);
if (POINTER_TYPE_P (type)
&& TYPE_PRECISION (type) != TYPE_PRECISION (fd->iter_type))
{
/* Avoid casting pointers to integer of a different size. */
tree itype
= lang_hooks.types.type_for_size (TYPE_PRECISION (type), 0);
t1 = fold_convert (fd->iter_type, fold_convert (itype, fd->loop.n2));
t0 = fold_convert (fd->iter_type, fold_convert (itype, fd->loop.n1));
}
else
{
t1 = fold_convert (fd->iter_type, fd->loop.n2);
t0 = fold_convert (fd->iter_type, fd->loop.n1);
}
if (bias)
{
t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
}
if (fd->iter_type == long_integer_type_node)
{
if (fd->chunk_size)
{
t = fold_convert (fd->iter_type, fd->chunk_size);
t = build_call_expr (built_in_decls[start_fn], 6,
t0, t1, t2, t, t3, t4);
}
else
t = build_call_expr (built_in_decls[start_fn], 5,
t0, t1, t2, t3, t4);
}
else
{
tree t5;
tree c_bool_type;
/* The GOMP_loop_ull_*start functions have additional boolean
argument, true for < loops and false for > loops.
In Fortran, the C bool type can be different from
boolean_type_node. */
c_bool_type = TREE_TYPE (TREE_TYPE (built_in_decls[start_fn]));
t5 = build_int_cst (c_bool_type,
fd->loop.cond_code == LT_EXPR ? 1 : 0);
if (fd->chunk_size)
{
t = fold_convert (fd->iter_type, fd->chunk_size);
t = build_call_expr (built_in_decls[start_fn], 7,
t5, t0, t1, t2, t, t3, t4);
}
else
t = build_call_expr (built_in_decls[start_fn], 6,
t5, t0, t1, t2, t3, t4);
}
}
if (TREE_TYPE (t) != boolean_type_node)
t = fold_build2 (NE_EXPR, boolean_type_node,
t, build_int_cst (TREE_TYPE (t), 0));
t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
true, GSI_SAME_STMT);
gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
/* Remove the GIMPLE_OMP_FOR statement. */
gsi_remove (&gsi, true);
/* Iteration setup for sequential loop goes in L0_BB. */
gsi = gsi_start_bb (l0_bb);
t = istart0;
if (bias)
t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
if (POINTER_TYPE_P (type))
t = fold_convert (lang_hooks.types.type_for_size (TYPE_PRECISION (type),
0), t);
t = fold_convert (type, t);
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
false, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign (fd->loop.v, t);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
t = iend0;
if (bias)
t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
if (POINTER_TYPE_P (type))
t = fold_convert (lang_hooks.types.type_for_size (TYPE_PRECISION (type),
0), t);
t = fold_convert (type, t);
iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
if (fd->collapse > 1)
{
tree tem = create_tmp_var (type, ".tem");
stmt = gimple_build_assign (tem, fd->loop.v);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
for (i = fd->collapse - 1; i >= 0; i--)
{
tree vtype = TREE_TYPE (fd->loops[i].v), itype;
itype = vtype;
if (POINTER_TYPE_P (vtype))
itype = lang_hooks.types.type_for_size (TYPE_PRECISION (vtype), 0);
t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
t = fold_convert (itype, t);
t = fold_build2 (MULT_EXPR, itype, t,
fold_convert (itype, fd->loops[i].step));
if (POINTER_TYPE_P (vtype))
t = fold_build2 (POINTER_PLUS_EXPR, vtype,
fd->loops[i].n1, fold_convert (sizetype, t));
else
t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
false, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign (fd->loops[i].v, t);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
if (i != 0)
{
t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
false, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign (tem, t);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
}
}
if (!broken_loop)
{
/* Code to control the increment and predicate for the sequential
loop goes in the CONT_BB. */
gsi = gsi_last_bb (cont_bb);
stmt = gsi_stmt (gsi);
gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
vmain = gimple_omp_continue_control_use (stmt);
vback = gimple_omp_continue_control_def (stmt);
if (POINTER_TYPE_P (type))
t = fold_build2 (POINTER_PLUS_EXPR, type, vmain,
fold_convert (sizetype, fd->loop.step));
else
t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
true, GSI_SAME_STMT);
stmt = gimple_build_assign (vback, t);
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
t = build2 (fd->loop.cond_code, boolean_type_node, vback, iend);
stmt = gimple_build_cond_empty (t);
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
/* Remove GIMPLE_OMP_CONTINUE. */
gsi_remove (&gsi, true);
if (fd->collapse > 1)
{
basic_block last_bb, bb;
last_bb = cont_bb;
for (i = fd->collapse - 1; i >= 0; i--)
{
tree vtype = TREE_TYPE (fd->loops[i].v);
bb = create_empty_bb (last_bb);
gsi = gsi_start_bb (bb);
if (i < fd->collapse - 1)
{
e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
e->probability = REG_BR_PROB_BASE / 8;
t = fd->loops[i + 1].n1;
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
false, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign (fd->loops[i + 1].v, t);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
else
collapse_bb = bb;
set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
if (POINTER_TYPE_P (vtype))
t = fold_build2 (POINTER_PLUS_EXPR, vtype,
fd->loops[i].v,
fold_convert (sizetype, fd->loops[i].step));
else
t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v,
fd->loops[i].step);
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
false, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign (fd->loops[i].v, t);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
if (i > 0)
{
t = fd->loops[i].n2;
t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
t = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
fd->loops[i].v, t);
stmt = gimple_build_cond_empty (t);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
e = make_edge (bb, l1_bb, EDGE_TRUE_VALUE);
e->probability = REG_BR_PROB_BASE * 7 / 8;
}
else
make_edge (bb, l1_bb, EDGE_FALLTHRU);
last_bb = bb;
}
}
/* Emit code to get the next parallel iteration in L2_BB. */
gsi = gsi_start_bb (l2_bb);
t = build_call_expr (built_in_decls[next_fn], 2,
build_fold_addr_expr (istart0),
build_fold_addr_expr (iend0));
t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
if (TREE_TYPE (t) != boolean_type_node)
t = fold_build2 (NE_EXPR, boolean_type_node,
t, build_int_cst (TREE_TYPE (t), 0));
stmt = gimple_build_cond_empty (t);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
/* Add the loop cleanup function. */
gsi = gsi_last_bb (exit_bb);
if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
t = built_in_decls[BUILT_IN_GOMP_LOOP_END_NOWAIT];
else
t = built_in_decls[BUILT_IN_GOMP_LOOP_END];
stmt = gimple_build_call (t, 0);
gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
gsi_remove (&gsi, true);
/* Connect the new blocks. */
find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
if (!broken_loop)
{
gimple_seq phis;
e = find_edge (cont_bb, l3_bb);
ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
phis = phi_nodes (l3_bb);
for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple phi = gsi_stmt (gsi);
SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
PHI_ARG_DEF_FROM_EDGE (phi, e));
}
remove_edge (e);
make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
if (fd->collapse > 1)
{
e = find_edge (cont_bb, l1_bb);
remove_edge (e);
e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
}
else
{
e = find_edge (cont_bb, l1_bb);
e->flags = EDGE_TRUE_VALUE;
}
e->probability = REG_BR_PROB_BASE * 7 / 8;
find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
set_immediate_dominator (CDI_DOMINATORS, l2_bb,
recompute_dominator (CDI_DOMINATORS, l2_bb));
set_immediate_dominator (CDI_DOMINATORS, l3_bb,
recompute_dominator (CDI_DOMINATORS, l3_bb));
set_immediate_dominator (CDI_DOMINATORS, l0_bb,
recompute_dominator (CDI_DOMINATORS, l0_bb));
set_immediate_dominator (CDI_DOMINATORS, l1_bb,
recompute_dominator (CDI_DOMINATORS, l1_bb));
}
if (is_streaming_region (region))
{
basic_block init_bb, fin_bb;
init_bb = split_edge (single_succ_edge (l0_bb));
fin_bb = split_edge (split_block_after_labels (l2_bb));
handle_nested_streaming_tasks (region, init_bb, fin_bb);
}
}
/* A subroutine of expand_omp_for. Generate code for a parallel
loop with static schedule and no specified chunk size. Given
parameters:
for (V = N1; V cond N2; V += STEP) BODY;
where COND is "<" or ">", we generate pseudocode
if (cond is <)
adj = STEP - 1;
else
adj = STEP + 1;
if ((__typeof (V)) -1 > 0 && cond is >)
n = -(adj + N2 - N1) / -STEP;
else
n = (adj + N2 - N1) / STEP;
q = n / nthreads;
q += (q * nthreads != n);
s0 = q * threadid;
e0 = min(s0 + q, n);
V = s0 * STEP + N1;
if (s0 >= e0) goto L2; else goto L0;
L0:
e = e0 * STEP + N1;
L1:
BODY;
V += STEP;
if (V cond e) goto L1;
L2:
*/
static void
expand_omp_for_static_nochunk (omp_region_p region,
struct omp_for_data *fd)
{
tree n, q, s0, e0, e, t, nthreads, threadid;
tree type, itype, vmain, vback;
basic_block entry_bb, exit_bb, seq_start_bb, body_bb, cont_bb;
basic_block fin_bb;
gimple_stmt_iterator gsi;
gimple stmt;
itype = type = TREE_TYPE (fd->loop.v);
if (POINTER_TYPE_P (type))
itype = lang_hooks.types.type_for_size (TYPE_PRECISION (type), 0);
entry_bb = region->entry;
cont_bb = region->cont;
gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
body_bb = single_succ (seq_start_bb);
gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
fin_bb = FALLTHRU_EDGE (cont_bb)->dest;
exit_bb = region->exit;
/* Iteration space partitioning goes in ENTRY_BB. */
gsi = gsi_last_bb (entry_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0);
t = fold_convert (itype, t);
nthreads = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
true, GSI_SAME_STMT);
t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
t = fold_convert (itype, t);
threadid = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
true, GSI_SAME_STMT);
fd->loop.n1
= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loop.n1),
true, NULL_TREE, true, GSI_SAME_STMT);
fd->loop.n2
= force_gimple_operand_gsi (&gsi, fold_convert (itype, fd->loop.n2),
true, NULL_TREE, true, GSI_SAME_STMT);
fd->loop.step
= force_gimple_operand_gsi (&gsi, fold_convert (itype, fd->loop.step),
true, NULL_TREE, true, GSI_SAME_STMT);
t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
t = fold_build2 (PLUS_EXPR, itype, fd->loop.step, t);
t = fold_build2 (PLUS_EXPR, itype, t, fd->loop.n2);
t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, fd->loop.n1));
if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
t = fold_build2 (TRUNC_DIV_EXPR, itype,
fold_build1 (NEGATE_EXPR, itype, t),
fold_build1 (NEGATE_EXPR, itype, fd->loop.step));
else
t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd->loop.step);
t = fold_convert (itype, t);
n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
q = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
t = fold_build2 (MULT_EXPR, itype, q, nthreads);
t = fold_build2 (NE_EXPR, itype, t, n);
t = fold_build2 (PLUS_EXPR, itype, q, t);
q = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
t = build2 (MULT_EXPR, itype, q, threadid);
s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
t = fold_build2 (PLUS_EXPR, itype, s0, q);
t = fold_build2 (MIN_EXPR, itype, t, n);
e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
t = build2 (GE_EXPR, boolean_type_node, s0, e0);
gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
/* Remove the GIMPLE_OMP_FOR statement. */
gsi_remove (&gsi, true);
/* Setup code for sequential iteration goes in SEQ_START_BB. */
gsi = gsi_start_bb (seq_start_bb);
t = fold_convert (itype, s0);
t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
if (POINTER_TYPE_P (type))
t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
fold_convert (sizetype, t));
else
t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
false, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign (fd->loop.v, t);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
t = fold_convert (itype, e0);
t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
if (POINTER_TYPE_P (type))
t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
fold_convert (sizetype, t));
else
t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
/* The code controlling the sequential loop replaces the
GIMPLE_OMP_CONTINUE. */
gsi = gsi_last_bb (cont_bb);
stmt = gsi_stmt (gsi);
gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
vmain = gimple_omp_continue_control_use (stmt);
vback = gimple_omp_continue_control_def (stmt);
if (POINTER_TYPE_P (type))
t = fold_build2 (POINTER_PLUS_EXPR, type, vmain,
fold_convert (sizetype, fd->loop.step));
else
t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE,
true, GSI_SAME_STMT);
stmt = gimple_build_assign (vback, t);
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
t = build2 (fd->loop.cond_code, boolean_type_node, vback, e);
gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
/* Remove the GIMPLE_OMP_CONTINUE statement. */
gsi_remove (&gsi, true);
/* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
gsi = gsi_last_bb (exit_bb);
if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
force_gimple_operand_gsi (&gsi, build_omp_barrier (), false, NULL_TREE,
false, GSI_SAME_STMT);
gsi_remove (&gsi, true);
/* Connect all the blocks. */
find_edge (entry_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
find_edge (entry_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE;
find_edge (cont_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, entry_bb);
set_immediate_dominator (CDI_DOMINATORS, body_bb,
recompute_dominator (CDI_DOMINATORS, body_bb));
set_immediate_dominator (CDI_DOMINATORS, fin_bb,
recompute_dominator (CDI_DOMINATORS, fin_bb));
}
/* A subroutine of expand_omp_for. Generate code for a parallel
loop with static schedule and a specified chunk size. Given
parameters:
for (V = N1; V cond N2; V += STEP) BODY;
where COND is "<" or ">", we generate pseudocode
if (cond is <)
adj = STEP - 1;
else
adj = STEP + 1;
if ((__typeof (V)) -1 > 0 && cond is >)
n = -(adj + N2 - N1) / -STEP;
else
n = (adj + N2 - N1) / STEP;
trip = 0;
V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
here so that V is defined
if the loop is not entered
L0:
s0 = (trip * nthreads + threadid) * CHUNK;
e0 = min(s0 + CHUNK, n);
if (s0 < n) goto L1; else goto L4;
L1:
V = s0 * STEP + N1;
e = e0 * STEP + N1;
L2:
BODY;
V += STEP;
if (V cond e) goto L2; else goto L3;
L3:
trip += 1;
goto L0;
L4:
*/
static void
expand_omp_for_static_chunk (omp_region_p region, struct omp_for_data *fd)
{
tree n, s0, e0, e, t;
tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
tree type, itype, v_main, v_back, v_extra;
basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
basic_block trip_update_bb, cont_bb, fin_bb;
gimple_stmt_iterator si;
gimple stmt;
edge se;
itype = type = TREE_TYPE (fd->loop.v);
if (POINTER_TYPE_P (type))
itype = lang_hooks.types.type_for_size (TYPE_PRECISION (type), 0);
entry_bb = region->entry;
se = split_block (entry_bb, last_stmt (entry_bb));
entry_bb = se->src;
iter_part_bb = se->dest;
cont_bb = region->cont;
gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
gcc_assert (BRANCH_EDGE (iter_part_bb)->dest
== FALLTHRU_EDGE (cont_bb)->dest);
seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
body_bb = single_succ (seq_start_bb);
gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
fin_bb = FALLTHRU_EDGE (cont_bb)->dest;
trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
exit_bb = region->exit;
/* Trip and adjustment setup goes in ENTRY_BB. */
si = gsi_last_bb (entry_bb);
gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_FOR);
t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0);
t = fold_convert (itype, t);
nthreads = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
true, GSI_SAME_STMT);
t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
t = fold_convert (itype, t);
threadid = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
true, GSI_SAME_STMT);
fd->loop.n1
= force_gimple_operand_gsi (&si, fold_convert (type, fd->loop.n1),
true, NULL_TREE, true, GSI_SAME_STMT);
fd->loop.n2
= force_gimple_operand_gsi (&si, fold_convert (itype, fd->loop.n2),
true, NULL_TREE, true, GSI_SAME_STMT);
fd->loop.step
= force_gimple_operand_gsi (&si, fold_convert (itype, fd->loop.step),
true, NULL_TREE, true, GSI_SAME_STMT);
fd->chunk_size
= force_gimple_operand_gsi (&si, fold_convert (itype, fd->chunk_size),
true, NULL_TREE, true, GSI_SAME_STMT);
t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
t = fold_build2 (PLUS_EXPR, itype, fd->loop.step, t);
t = fold_build2 (PLUS_EXPR, itype, t, fd->loop.n2);
t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, fd->loop.n1));
if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
t = fold_build2 (TRUNC_DIV_EXPR, itype,
fold_build1 (NEGATE_EXPR, itype, t),
fold_build1 (NEGATE_EXPR, itype, fd->loop.step));
else
t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd->loop.step);
t = fold_convert (itype, t);
n = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
true, GSI_SAME_STMT);
trip_var = create_tmp_var (itype, ".trip");
if (gimple_in_ssa_p (cfun))
{
add_referenced_var (trip_var);
trip_init = make_ssa_name (trip_var, NULL);
trip_main = make_ssa_name (trip_var, NULL);
trip_back = make_ssa_name (trip_var, NULL);
}
else
{
trip_init = trip_var;
trip_main = trip_var;
trip_back = trip_var;
}
stmt = gimple_build_assign (trip_init, build_int_cst (itype, 0));
gsi_insert_before (&si, stmt, GSI_SAME_STMT);
t = fold_build2 (MULT_EXPR, itype, threadid, fd->chunk_size);
t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
if (POINTER_TYPE_P (type))
t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
fold_convert (sizetype, t));
else
t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
v_extra = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
true, GSI_SAME_STMT);
/* Remove the GIMPLE_OMP_FOR. */
gsi_remove (&si, true);
/* Iteration space partitioning goes in ITER_PART_BB. */
si = gsi_last_bb (iter_part_bb);
t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
t = fold_build2 (PLUS_EXPR, itype, t, threadid);
t = fold_build2 (MULT_EXPR, itype, t, fd->chunk_size);
s0 = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
t = fold_build2 (PLUS_EXPR, itype, s0, fd->chunk_size);
t = fold_build2 (MIN_EXPR, itype, t, n);
e0 = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
t = build2 (LT_EXPR, boolean_type_node, s0, n);
gsi_insert_after (&si, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
/* Setup code for sequential iteration goes in SEQ_START_BB. */
si = gsi_start_bb (seq_start_bb);
t = fold_convert (itype, s0);
t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
if (POINTER_TYPE_P (type))
t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
fold_convert (sizetype, t));
else
t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
t = force_gimple_operand_gsi (&si, t, false, NULL_TREE,
false, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign (fd->loop.v, t);
gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
t = fold_convert (itype, e0);
t = fold_build2 (MULT_EXPR, itype, t, fd->loop.step);
if (POINTER_TYPE_P (type))
t = fold_build2 (POINTER_PLUS_EXPR, type, fd->loop.n1,
fold_convert (sizetype, t));
else
t = fold_build2 (PLUS_EXPR, type, t, fd->loop.n1);
e = force_gimple_operand_gsi (&si, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
/* The code controlling the sequential loop goes in CONT_BB,
replacing the GIMPLE_OMP_CONTINUE. */
si = gsi_last_bb (cont_bb);
stmt = gsi_stmt (si);
gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
v_main = gimple_omp_continue_control_use (stmt);
v_back = gimple_omp_continue_control_def (stmt);
if (POINTER_TYPE_P (type))
t = fold_build2 (POINTER_PLUS_EXPR, type, v_main,
fold_convert (sizetype, fd->loop.step));
else
t = fold_build2 (PLUS_EXPR, type, v_main, fd->loop.step);
stmt = gimple_build_assign (v_back, t);
gsi_insert_before (&si, stmt, GSI_SAME_STMT);
t = build2 (fd->loop.cond_code, boolean_type_node, v_back, e);
gsi_insert_before (&si, gimple_build_cond_empty (t), GSI_SAME_STMT);
/* Remove GIMPLE_OMP_CONTINUE. */
gsi_remove (&si, true);
/* Trip update code goes into TRIP_UPDATE_BB. */
si = gsi_start_bb (trip_update_bb);
t = build_int_cst (itype, 1);
t = build2 (PLUS_EXPR, itype, trip_main, t);
stmt = gimple_build_assign (trip_back, t);
gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
/* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
si = gsi_last_bb (exit_bb);
if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
force_gimple_operand_gsi (&si, build_omp_barrier (), false, NULL_TREE,
false, GSI_SAME_STMT);
gsi_remove (&si, true);
/* Connect the new blocks. */
find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE;
find_edge (cont_bb, trip_update_bb)->flags = EDGE_FALSE_VALUE;
redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb);
if (gimple_in_ssa_p (cfun))
{
gimple_stmt_iterator psi;
gimple phi;
edge re, ene;
edge_var_map_vector head;
edge_var_map *vm;
size_t i;
/* When we redirect the edge from trip_update_bb to iter_part_bb, we
remove arguments of the phi nodes in fin_bb. We need to create
appropriate phi nodes in iter_part_bb instead. */
se = single_pred_edge (fin_bb);
re = single_succ_edge (trip_update_bb);
head = redirect_edge_var_map_vector (re);
ene = single_succ_edge (entry_bb);
psi = gsi_start_phis (fin_bb);
for (i = 0; !gsi_end_p (psi) && VEC_iterate (edge_var_map, head, i, vm);
gsi_next (&psi), ++i)
{
gimple nphi;
source_location locus;
phi = gsi_stmt (psi);
t = gimple_phi_result (phi);
gcc_assert (t == redirect_edge_var_map_result (vm));
nphi = create_phi_node (t, iter_part_bb);
SSA_NAME_DEF_STMT (t) = nphi;
t = PHI_ARG_DEF_FROM_EDGE (phi, se);
locus = gimple_phi_arg_location_from_edge (phi, se);
/* A special case -- fd->loop.v is not yet computed in
iter_part_bb, we need to use v_extra instead. */
if (t == fd->loop.v)
t = v_extra;
add_phi_arg (nphi, t, ene, locus);
locus = redirect_edge_var_map_location (vm);
add_phi_arg (nphi, redirect_edge_var_map_def (vm), re, locus);
}
gcc_assert (!gsi_end_p (psi) && i == VEC_length (edge_var_map, head));
redirect_edge_var_map_clear (re);
while (1)
{
psi = gsi_start_phis (fin_bb);
if (gsi_end_p (psi))
break;
remove_phi_node (&psi, false);
}
/* Make phi node for trip. */
phi = create_phi_node (trip_main, iter_part_bb);
SSA_NAME_DEF_STMT (trip_main) = phi;
add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
UNKNOWN_LOCATION);
add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
UNKNOWN_LOCATION);
}
set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
recompute_dominator (CDI_DOMINATORS, iter_part_bb));
set_immediate_dominator (CDI_DOMINATORS, fin_bb,
recompute_dominator (CDI_DOMINATORS, fin_bb));
set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
recompute_dominator (CDI_DOMINATORS, seq_start_bb));
set_immediate_dominator (CDI_DOMINATORS, body_bb,
recompute_dominator (CDI_DOMINATORS, body_bb));
}
/* Expand the OpenMP loop defined by REGION. */
static void
expand_omp_for (omp_region_p region)
{
struct omp_for_data fd;
struct omp_for_data_loop *loops;
loops
= (struct omp_for_data_loop *)
alloca (gimple_omp_for_collapse (last_stmt (region->entry))
* sizeof (struct omp_for_data_loop));
extract_omp_for_data (last_stmt (region->entry), &fd, loops);
region->sched_kind = fd.sched_kind;
/* FIXME: for now we force schedule to dynamic. */
if (is_streaming_region (region))
region->sched_kind = fd.sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME;
gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
if (region->cont)
{
gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
}
if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
&& !fd.have_ordered
&& fd.collapse == 1
&& region->cont != NULL)
{
if (fd.chunk_size == NULL)
expand_omp_for_static_nochunk (region, &fd);
else
expand_omp_for_static_chunk (region, &fd);
}
else
{
int fn_index, start_ix, next_ix;
gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
fn_index = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
? 3 : fd.sched_kind;
fn_index += fd.have_ordered * 4;
start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index;
next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index;
if (fd.iter_type == long_long_unsigned_type_node)
{
start_ix += BUILT_IN_GOMP_LOOP_ULL_STATIC_START
- BUILT_IN_GOMP_LOOP_STATIC_START;
next_ix += BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
- BUILT_IN_GOMP_LOOP_STATIC_NEXT;
}
expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
(enum built_in_function) next_ix);
}
update_ssa (TODO_update_ssa_only_virtuals);
}
/* Expand code for an OpenMP sections directive. In pseudo code, we generate
v = GOMP_sections_start (n);
L0:
switch (v)
{
case 0:
goto L2;
case 1:
section 1;
goto L1;
case 2:
...
case n:
...
default:
abort ();
}
L1:
v = GOMP_sections_next ();
goto L0;
L2:
reduction;
If this is a combined parallel sections, replace the call to
GOMP_sections_start with call to GOMP_sections_next. */
static void
expand_omp_sections (omp_region_p region)
{
tree t, u, vin = NULL, vmain, vnext, l2;
VEC (tree,heap) *label_vec;
unsigned len;
basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
gimple_stmt_iterator si, switch_si;
gimple sections_stmt, stmt, cont;
edge_iterator ei;
edge e;
omp_region_p inner;
unsigned i, casei;
bool exit_reachable = region->cont != NULL;
gcc_assert (exit_reachable == (region->exit != NULL));
entry_bb = region->entry;
l0_bb = single_succ (entry_bb);
l1_bb = region->cont;
l2_bb = region->exit;
if (exit_reachable)
{
if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
l2 = gimple_block_label (l2_bb);
else
{
/* This can happen if there are reductions. */
len = EDGE_COUNT (l0_bb->succs);
gcc_assert (len > 0);
e = EDGE_SUCC (l0_bb, len - 1);
si = gsi_last_bb (e->dest);
l2 = NULL_TREE;
if (gsi_end_p (si)
|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
l2 = gimple_block_label (e->dest);
else
FOR_EACH_EDGE (e, ei, l0_bb->succs)
{
si = gsi_last_bb (e->dest);
if (gsi_end_p (si)
|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
{
l2 = gimple_block_label (e->dest);
break;
}
}
}
default_bb = create_empty_bb (l1_bb->prev_bb);
}
else
{
default_bb = create_empty_bb (l0_bb);
l2 = gimple_block_label (default_bb);
}
/* We will build a switch() with enough cases for all the
GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
and a default case to abort if something goes wrong. */
len = EDGE_COUNT (l0_bb->succs);
/* Use VEC_quick_push on label_vec throughout, since we know the size
in advance. */
label_vec = VEC_alloc (tree, heap, len);
/* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
GIMPLE_OMP_SECTIONS statement. */
si = gsi_last_bb (entry_bb);
sections_stmt = gsi_stmt (si);
gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
vin = gimple_omp_sections_control (sections_stmt);
if (!is_combined_parallel (region))
{
/* If we are not inside a combined parallel+sections region,
call GOMP_sections_start. */
t = build_int_cst (unsigned_type_node,
exit_reachable ? len - 1 : len);
u = built_in_decls[BUILT_IN_GOMP_SECTIONS_START];
stmt = gimple_build_call (u, 1, t);
}
else
{
/* Otherwise, call GOMP_sections_next. */
u = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT];
stmt = gimple_build_call (u, 0);
}
gimple_call_set_lhs (stmt, vin);
gsi_insert_after (&si, stmt, GSI_SAME_STMT);
gsi_remove (&si, true);
/* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
L0_BB. */
switch_si = gsi_last_bb (l0_bb);
gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
if (exit_reachable)
{
cont = last_stmt (l1_bb);
gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
vmain = gimple_omp_continue_control_use (cont);
vnext = gimple_omp_continue_control_def (cont);
}
else
{
vmain = vin;
vnext = NULL_TREE;
}
i = 0;
if (exit_reachable)
{
t = build3 (CASE_LABEL_EXPR, void_type_node,
build_int_cst (unsigned_type_node, 0), NULL, l2);
VEC_quick_push (tree, label_vec, t);
i++;
}
/* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
for (inner = region->inner, casei = 1;
inner;
inner = inner->next, i++, casei++)
{
basic_block s_entry_bb, s_exit_bb;
/* Skip optional reduction region. */
if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
{
--i;
--casei;
continue;
}
s_entry_bb = inner->entry;
s_exit_bb = inner->exit;
t = gimple_block_label (s_entry_bb);
u = build_int_cst (unsigned_type_node, casei);
u = build3 (CASE_LABEL_EXPR, void_type_node, u, NULL, t);
VEC_quick_push (tree, label_vec, u);
si = gsi_last_bb (s_entry_bb);
gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
gsi_remove (&si, true);
single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
if (s_exit_bb == NULL)
continue;
si = gsi_last_bb (s_exit_bb);
gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
gsi_remove (&si, true);
single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
}
/* Error handling code goes in DEFAULT_BB. */
t = gimple_block_label (default_bb);
u = build3 (CASE_LABEL_EXPR, void_type_node, NULL, NULL, t);
make_edge (l0_bb, default_bb, 0);
stmt = gimple_build_switch_vec (vmain, u, label_vec);
gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
gsi_remove (&switch_si, true);
VEC_free (tree, heap, label_vec);
si = gsi_start_bb (default_bb);
stmt = gimple_build_call (built_in_decls[BUILT_IN_TRAP], 0);
gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
if (exit_reachable)
{
/* Code to get the next section goes in L1_BB. */
si = gsi_last_bb (l1_bb);
gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
stmt = gimple_build_call (built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT], 0);
gimple_call_set_lhs (stmt, vnext);
gsi_insert_after (&si, stmt, GSI_SAME_STMT);
gsi_remove (&si, true);
single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
/* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
si = gsi_last_bb (l2_bb);
if (gimple_omp_return_nowait_p (gsi_stmt (si)))
t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END_NOWAIT];
else
t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END];
stmt = gimple_build_call (t, 0);
gsi_insert_after (&si, stmt, GSI_SAME_STMT);
gsi_remove (&si, true);
}
set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
}
/* Expand code for an OpenMP single directive. We've already expanded
much of the code, here we simply place the GOMP_barrier call. */
static void
expand_omp_single (omp_region_p region)
{
basic_block entry_bb, exit_bb;
gimple_stmt_iterator si;
bool need_barrier = false;
entry_bb = region->entry;
exit_bb = region->exit;
if (is_streaming_region (region))
{
basic_block cond_bb, init_bb, fin_bb;
edge e;
/* By construction the successor BB of entry is the
conditional. */
cond_bb = single_succ (entry_bb);
e = get_true_entry_from_cond (cond_bb);
init_bb = split_edge (e);
//e = get_true_exit_from_cond (cond_bb);
//fin_bb = split_edge (e);
e = split_block_after_labels (exit_bb);
region->exit = exit_bb = e->dest;
redirect_edge_and_branch_force (find_edge (cond_bb, e->src), e->dest);
fin_bb = split_edge (e);
handle_nested_streaming_tasks (region, init_bb, fin_bb);
}
si = gsi_last_bb (entry_bb);
/* The terminal barrier at the end of a GOMP_single_copy sequence cannot
be removed. We need to ensure that the thread that entered the single
does not exit before the data is copied out by the other threads. */
if (find_omp_clause (gimple_omp_single_clauses (gsi_stmt (si)),
OMP_CLAUSE_COPYPRIVATE))
need_barrier = true;
gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
gsi_remove (&si, true);
single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
si = gsi_last_bb (exit_bb);
if (!gimple_omp_return_nowait_p (gsi_stmt (si)) || need_barrier)
force_gimple_operand_gsi (&si, build_omp_barrier (), false, NULL_TREE,
false, GSI_SAME_STMT);
gsi_remove (&si, true);
single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
}
/* Generic expansion for OpenMP synchronization directives: master,
ordered and critical. All we need to do here is remove the entry
and exit markers for REGION. */
static void
expand_omp_synch (omp_region_p region)
{
basic_block entry_bb, exit_bb;
gimple_stmt_iterator si;
entry_bb = region->entry;
exit_bb = region->exit;
si = gsi_last_bb (entry_bb);
gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
|| gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
|| gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
|| gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL);
gsi_remove (&si, true);
single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
if (exit_bb)
{
si = gsi_last_bb (exit_bb);
gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
gsi_remove (&si, true);
single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
}
}
/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
operation as a __sync_fetch_and_op builtin. INDEX is log2 of the
size of the data type, and thus usable to find the index of the builtin
decl. Returns false if the expression is not of the proper form. */
static bool
expand_omp_atomic_fetch_op (basic_block load_bb,
tree addr, tree loaded_val,
tree stored_val, int index)
{
enum built_in_function base;
tree decl, itype, call;
enum insn_code *optab;
tree rhs;
basic_block store_bb = single_succ (load_bb);
gimple_stmt_iterator gsi;
gimple stmt;
location_t loc;
/* We expect to find the following sequences:
load_bb:
GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
store_bb:
val = tmp OP something; (or: something OP tmp)
GIMPLE_OMP_STORE (val)
???FIXME: Allow a more flexible sequence.
Perhaps use data flow to pick the statements.
*/
gsi = gsi_after_labels (store_bb);
stmt = gsi_stmt (gsi);
loc = gimple_location (stmt);
if (!is_gimple_assign (stmt))
return false;
gsi_next (&gsi);
if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
return false;
if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
return false;
/* Check for one of the supported fetch-op operations. */
switch (gimple_assign_rhs_code (stmt))
{
case PLUS_EXPR:
case POINTER_PLUS_EXPR:
base = BUILT_IN_FETCH_AND_ADD_N;
optab = sync_add_optab;
break;
case MINUS_EXPR:
base = BUILT_IN_FETCH_AND_SUB_N;
optab = sync_add_optab;
break;
case BIT_AND_EXPR:
base = BUILT_IN_FETCH_AND_AND_N;
optab = sync_and_optab;
break;
case BIT_IOR_EXPR:
base = BUILT_IN_FETCH_AND_OR_N;
optab = sync_ior_optab;
break;
case BIT_XOR_EXPR:
base = BUILT_IN_FETCH_AND_XOR_N;
optab = sync_xor_optab;
break;
default:
return false;
}
/* Make sure the expression is of the proper form. */
if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
rhs = gimple_assign_rhs2 (stmt);
else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
&& operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
rhs = gimple_assign_rhs1 (stmt);
else
return false;
decl = built_in_decls[base + index + 1];
itype = TREE_TYPE (TREE_TYPE (decl));
if (optab[TYPE_MODE (itype)] == CODE_FOR_nothing)
return false;
gsi = gsi_last_bb (load_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
call = build_call_expr_loc (loc,
decl, 2, addr,
fold_convert_loc (loc, itype, rhs));
call = fold_convert_loc (loc, void_type_node, call);
force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
gsi_remove (&gsi, true);
gsi = gsi_last_bb (store_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
gsi_remove (&gsi, true);
gsi = gsi_last_bb (store_bb);
gsi_remove (&gsi, true);
if (gimple_in_ssa_p (cfun))
update_ssa (TODO_update_ssa_no_phi);
return true;
}
/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
oldval = *addr;
repeat:
newval = rhs; // with oldval replacing *addr in rhs
oldval = __sync_val_compare_and_swap (addr, oldval, newval);
if (oldval != newval)
goto repeat;
INDEX is log2 of the size of the data type, and thus usable to find the
index of the builtin decl. */
static bool
expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
tree addr, tree loaded_val, tree stored_val,
int index)
{
tree loadedi, storedi, initial, new_storedi, old_vali;
tree type, itype, cmpxchg, iaddr;
gimple_stmt_iterator si;
basic_block loop_header = single_succ (load_bb);
gimple phi, stmt;
edge e;
cmpxchg = built_in_decls[BUILT_IN_VAL_COMPARE_AND_SWAP_N + index + 1];
type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
itype = TREE_TYPE (TREE_TYPE (cmpxchg));
if (sync_compare_and_swap[TYPE_MODE (itype)] == CODE_FOR_nothing)
return false;
/* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
si = gsi_last_bb (load_bb);
gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
/* For floating-point values, we'll need to view-convert them to integers
so that we can perform the atomic compare and swap. Simplify the
following code by always setting up the "i"ntegral variables. */
if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
{
tree iaddr_val;
iaddr = create_tmp_var (build_pointer_type_for_mode (itype, ptr_mode,
true), NULL);
iaddr_val
= force_gimple_operand_gsi (&si,
fold_convert (TREE_TYPE (iaddr), addr),
false, NULL_TREE, true, GSI_SAME_STMT);
stmt = gimple_build_assign (iaddr, iaddr_val);
gsi_insert_before (&si, stmt, GSI_SAME_STMT);
loadedi = create_tmp_var (itype, NULL);
if (gimple_in_ssa_p (cfun))
{
add_referenced_var (iaddr);
add_referenced_var (loadedi);
loadedi = make_ssa_name (loadedi, NULL);
}
}
else
{
iaddr = addr;
loadedi = loaded_val;
}
initial = force_gimple_operand_gsi (&si, build_fold_indirect_ref (iaddr),
true, NULL_TREE, true, GSI_SAME_STMT);
/* Move the value to the LOADEDI temporary. */
if (gimple_in_ssa_p (cfun))
{
gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
phi = create_phi_node (loadedi, loop_header);
SSA_NAME_DEF_STMT (loadedi) = phi;
SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
initial);
}
else
gsi_insert_before (&si,
gimple_build_assign (loadedi, initial),
GSI_SAME_STMT);
if (loadedi != loaded_val)
{
gimple_stmt_iterator gsi2;
tree x;
x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
gsi2 = gsi_start_bb (loop_header);
if (gimple_in_ssa_p (cfun))
{
gimple stmt;
x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
true, GSI_SAME_STMT);
stmt = gimple_build_assign (loaded_val, x);
gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
}
else
{
x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
true, GSI_SAME_STMT);
}
}
gsi_remove (&si, true);
si = gsi_last_bb (store_bb);
gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
if (iaddr == addr)
storedi = stored_val;
else
storedi =
force_gimple_operand_gsi (&si,
build1 (VIEW_CONVERT_EXPR, itype,
stored_val), true, NULL_TREE, true,
GSI_SAME_STMT);
/* Build the compare&swap statement. */
new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
new_storedi = force_gimple_operand_gsi (&si,
fold_convert (TREE_TYPE (loadedi),
new_storedi),
true, NULL_TREE,
true, GSI_SAME_STMT);
if (gimple_in_ssa_p (cfun))
old_vali = loadedi;
else
{
old_vali = create_tmp_var (TREE_TYPE (loadedi), NULL);
if (gimple_in_ssa_p (cfun))
add_referenced_var (old_vali);
stmt = gimple_build_assign (old_vali, loadedi);
gsi_insert_before (&si, stmt, GSI_SAME_STMT);
stmt = gimple_build_assign (loadedi, new_storedi);
gsi_insert_before (&si, stmt, GSI_SAME_STMT);
}
/* Note that we always perform the comparison as an integer, even for
floating point. This allows the atomic operation to properly
succeed even with NaNs and -0.0. */
stmt = gimple_build_cond_empty
(build2 (NE_EXPR, boolean_type_node,
new_storedi, old_vali));
gsi_insert_before (&si, stmt, GSI_SAME_STMT);
/* Update cfg. */
e = single_succ_edge (store_bb);
e->flags &= ~EDGE_FALLTHRU;
e->flags |= EDGE_FALSE_VALUE;
e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
/* Copy the new value to loadedi (we already did that before the condition
if we are not in SSA). */
if (gimple_in_ssa_p (cfun))
{
phi = gimple_seq_first_stmt (phi_nodes (loop_header));
SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
}
/* Remove GIMPLE_OMP_ATOMIC_STORE. */
gsi_remove (&si, true);
if (gimple_in_ssa_p (cfun))
update_ssa (TODO_update_ssa_no_phi);
return true;
}
/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
GOMP_atomic_start ();
*addr = rhs;
GOMP_atomic_end ();
The result is not globally atomic, but works so long as all parallel
references are within #pragma omp atomic directives. According to
responses received from omp@openmp.org, appears to be within spec.
Which makes sense, since that's how several other compilers handle
this situation as well.
LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
expanding. STORED_VAL is the operand of the matching
GIMPLE_OMP_ATOMIC_STORE.
We replace
GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
loaded_val = *addr;
and replace
GIMPLE_OMP_ATOMIC_ATORE (stored_val) with
*addr = stored_val;
*/
static bool
expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
tree addr, tree loaded_val, tree stored_val)
{
gimple_stmt_iterator si;
gimple stmt;
tree t;
si = gsi_last_bb (load_bb);
gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
t = built_in_decls[BUILT_IN_GOMP_ATOMIC_START];
t = build_function_call_expr (UNKNOWN_LOCATION, t, 0);
force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
stmt = gimple_build_assign (loaded_val, build_fold_indirect_ref (addr));
gsi_insert_before (&si, stmt, GSI_SAME_STMT);
gsi_remove (&si, true);
si = gsi_last_bb (store_bb);
gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
stmt = gimple_build_assign (build_fold_indirect_ref (unshare_expr (addr)),
stored_val);
gsi_insert_before (&si, stmt, GSI_SAME_STMT);
t = built_in_decls[BUILT_IN_GOMP_ATOMIC_END];
t = build_function_call_expr (UNKNOWN_LOCATION, t, 0);
force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
gsi_remove (&si, true);
if (gimple_in_ssa_p (cfun))
update_ssa (TODO_update_ssa_no_phi);
return true;
}
/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
using expand_omp_atomic_fetch_op. If it failed, we try to
call expand_omp_atomic_pipeline, and if it fails too, the
ultimate fallback is wrapping the operation in a mutex
(expand_omp_atomic_mutex). REGION is the atomic region built
by build_omp_regions_1(). */
static void
expand_omp_atomic (omp_region_p region)
{
basic_block load_bb = region->entry, store_bb = region->exit;
gimple load = last_stmt (load_bb), store = last_stmt (store_bb);
tree loaded_val = gimple_omp_atomic_load_lhs (load);
tree addr = gimple_omp_atomic_load_rhs (load);
tree stored_val = gimple_omp_atomic_store_val (store);
tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
HOST_WIDE_INT index;
/* Make sure the type is one of the supported sizes. */
index = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
index = exact_log2 (index);
if (index >= 0 && index <= 4)
{
unsigned int align = TYPE_ALIGN_UNIT (type);
/* __sync builtins require strict data alignment. */
if (exact_log2 (align) >= index)
{
/* When possible, use specialized atomic update functions. */
if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
&& store_bb == single_succ (load_bb))
{
if (expand_omp_atomic_fetch_op (load_bb, addr,
loaded_val, stored_val, index))
return;
}
/* If we don't have specialized __sync builtins, try and implement
as a compare and swap loop. */
if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
loaded_val, stored_val, index))
return;
}
}
/* The ultimate fallback is wrapping the operation in a mutex. */
expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
}
/* Expand the parallel region tree rooted at REGION. Expansion
proceeds in depth-first order. Innermost regions are expanded
first. This way, parallel regions that require a new function to
be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
internal dependencies in their body. */
static void
expand_omp (omp_region_p region)
{
while (region)
{
location_t saved_location;
/* First, determine whether this is a combined parallel+workshare
region. */
if (region->type == GIMPLE_OMP_PARALLEL)
determine_parallel_type (region);
if (region->inner)
expand_omp (region->inner);
saved_location = input_location;
if (gimple_has_location (last_stmt (region->entry)))
input_location = gimple_location (last_stmt (region->entry));
switch (region->type)
{
case GIMPLE_OMP_PARALLEL:
case GIMPLE_OMP_TASK:
expand_omp_taskreg (region);
break;
case GIMPLE_OMP_FOR:
expand_omp_for (region);
break;
case GIMPLE_OMP_SECTIONS:
expand_omp_sections (region);
break;
case GIMPLE_OMP_SECTION:
/* Individual omp sections are handled together with their
parent GIMPLE_OMP_SECTIONS region. */
break;
case GIMPLE_OMP_SINGLE:
expand_omp_single (region);
break;
case GIMPLE_OMP_MASTER:
case GIMPLE_OMP_ORDERED:
case GIMPLE_OMP_CRITICAL:
expand_omp_synch (region);
break;
case GIMPLE_OMP_ATOMIC_LOAD:
expand_omp_atomic (region);
break;
default:
gcc_unreachable ();
}
input_location = saved_location;
region = region->next;
}
}
/* Helper for build_omp_regions. Scan the dominator tree starting at
block BB. PARENT is the region that contains BB. If SINGLE_TREE is
true, the function ends once a single tree is built (otherwise, whole
forest of OMP constructs may be built). */
static void
build_omp_regions_1 (basic_block bb, omp_region_p parent,
bool single_tree)
{
gimple_stmt_iterator gsi;
gimple stmt;
basic_block son;
gsi = gsi_last_bb (bb);
if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
{
omp_region_p region;
enum gimple_code code;
stmt = gsi_stmt (gsi);
code = gimple_code (stmt);
if (code == GIMPLE_OMP_RETURN)
{
/* STMT is the return point out of region PARENT. Mark it
as the exit point and make PARENT the immediately
enclosing region. */
gcc_assert (parent);
region = parent;
region->exit = bb;
parent = parent->outer;
}
else if (code == GIMPLE_OMP_ATOMIC_STORE)
{
/* GIMPLE_OMP_ATOMIC_STORE is analoguous to
GIMPLE_OMP_RETURN, but matches with
GIMPLE_OMP_ATOMIC_LOAD. */
gcc_assert (parent);
gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
region = parent;
region->exit = bb;
parent = parent->outer;
}
else if (code == GIMPLE_OMP_CONTINUE)
{
gcc_assert (parent);
parent->cont = bb;
}
else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
{
/* GIMPLE_OMP_SECTIONS_SWITCH is part of
GIMPLE_OMP_SECTIONS, and we do nothing for it. */
;
}
else
{
/* Otherwise, this directive becomes the parent for a new
region. */
region = new_omp_region (bb, code, parent);
parent = region;
}
}
if (single_tree && !parent)
return;
for (son = first_dom_son (CDI_DOMINATORS, bb);
son;
son = next_dom_son (CDI_DOMINATORS, son))
build_omp_regions_1 (son, parent, single_tree);
}
/* Builds the tree of OMP regions rooted at ROOT, storing it to
root_omp_region. */
static void
build_omp_regions_root (basic_block root)
{
gcc_assert (root_omp_region == NULL);
build_omp_regions_1 (root, NULL, true);
gcc_assert (root_omp_region != NULL);
}
/* Expands omp construct (and its subconstructs) starting in HEAD. */
void
omp_expand_local (basic_block head)
{
build_omp_regions_root (head);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "\nOMP region tree\n\n");
dump_omp_region (dump_file, root_omp_region, 0);
fprintf (dump_file, "\n");
}
remove_exit_barriers (root_omp_region);
expand_omp (root_omp_region);
free_omp_regions ();
}
/* Scan the CFG and build a tree of OMP regions. Return the root of
the OMP region tree. */
static void
build_omp_regions (void)
{
gcc_assert (root_omp_region == NULL);
calculate_dominance_info (CDI_DOMINATORS);
build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL, false);
}
static void
retrieve_streaming_info (omp_region_p region)
{
streamization_info_p task, outermost_parallel_sinfo;
gimple task_stmt;
tree c;
while (region)
{
if (region->inner)
retrieve_streaming_info (region->inner);
switch (region->type)
{
case GIMPLE_OMP_TASK:
task_stmt = gsi_stmt (gsi_last_bb (region->entry));
/* Build streamization info data structures for all nesting
regions of a streaming task up to the outermost enclosing
parallel region. */
if (find_omp_clause (gimple_omp_task_clauses (task_stmt),
OMP_CLAUSE_INPUT)
|| find_omp_clause (gimple_omp_task_clauses (task_stmt),
OMP_CLAUSE_OUTPUT))
{
build_streamization_info (region);
}
if (is_streaming_region (region))
{
omp_stmt_p omp_stmt;
task = get_streamization_info (region);
outermost_parallel_sinfo = get_outermost_parallel_streamization_info (region);
VEC_safe_push (task_p, heap, outermost_parallel_sinfo->tasks, task);
omp_stmt = lookup_omp_stmt (task->stmt);
#if 0 /*zzzz*/
task->activation_counter_p = omp_stmt->activation_counter_p;
task->termination_flag_p = omp_stmt->termination_flag_p;
#endif
task->task_decl = omp_stmt->task_decl;
}
for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
{
enum omp_clause_code code = OMP_CLAUSE_CODE (c);
switch (code)
{
case OMP_CLAUSE_INPUT:
if (OMP_CLAUSE_FIRSTPRIVATE_INPUT (c) != NULL_TREE)
{
stream_p stream = lookup_stream (OMP_CLAUSE_STREAM_ID (c));
view_p view = build_view (stream, NULL_TREE);
gcc_assert (task && outermost_parallel_sinfo);
/* FIXME: Limit to one consumer for now. We
will either split the stream or decide we
can have a single stream depending on
dominators later. */
/* This is a promoted FIRSTPRIVATE clause. */
VEC_safe_push (view_p, heap, stream->fpviews, view);
VEC_safe_push (view_p, heap, outermost_parallel_sinfo->fpviews, view);
VEC_safe_push (view_p, heap, task->fpviews, view);
view->sinfo = outermost_parallel_sinfo;
view->view = OMP_CLAUSE_FIRSTPRIVATE_INPUT (c);
}
/* Fallthru */
case OMP_CLAUSE_OUTPUT:
{
/* Build the taskgraph. */
stream_p stream = lookup_stream (OMP_CLAUSE_STREAM_ID (c));
view_p view = build_view (stream, c);
/* Ensure streams only belong to one region. */
if (stream->outermost_parallel_sinfo)
gcc_assert (stream->outermost_parallel_sinfo
== outermost_parallel_sinfo);
else
{
stream->outermost_parallel_sinfo = outermost_parallel_sinfo;
VEC_safe_push (stream_p, heap, outermost_parallel_sinfo->streams, stream);
}
view->sinfo = task;
if (code == OMP_CLAUSE_INPUT)
{
view->type = READ_VIEW;
VEC_safe_push (view_p, heap, stream->rviews, view);
VEC_safe_push (view_p, heap, task->rviews, view);
}
else
{
view->type = WRITE_VIEW;
VEC_safe_push (view_p, heap, stream->wviews, view);
VEC_safe_push (view_p, heap, task->wviews, view);
}
}
break;
default:
break;
}
}
/* Decide on the number of instances this task should be
using and whether it can be allowed to
self-replicate. */
if (is_streaming_region (region))
{
omp_region_p outer = region->outer;
tree num_thr_c = find_omp_clause (gimple_omp_task_clauses (task_stmt),
OMP_CLAUSE_NUM_THREADS);
task = get_streamization_info (region);
task->num_instances = NULL_TREE;
task->can_replicate = boolean_false_node;
if (num_thr_c)
task->num_instances = OMP_CLAUSE_NUM_THREADS_EXPR (num_thr_c);
while (outer && task->num_instances == NULL_TREE)
{
switch (outer->type)
{
case GIMPLE_OMP_PARALLEL:
{
streamization_info_p outer_sinfo = get_streamization_info (outer);
tree clauses = gimple_omp_parallel_clauses (outer_sinfo->stmt);
tree c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS);
if (c)
task->num_instances = OMP_CLAUSE_NUM_THREADS_EXPR (c);
else
{
tree fn = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS];
gimple stmt;
/* Not supported now. This call should be
issued in the proper context (outermost
parallel), but that may just give the
wrong number. We only support the case
where num_threads is set for now. */
gcc_assert (0);
/*
num_threads = create_tmp_var (integer_type_node, "gomp_num_threads");
stmt = gimple_build_call (fn, 0);
gimple_call_set_lhs (stmt, num_threads);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
*/
}
}
break;
case GIMPLE_OMP_SINGLE:
case GIMPLE_OMP_TASK:
case GIMPLE_OMP_SECTION:
task->num_instances = integer_one_node;
break;
default:
break;
}
outer = outer->outer;
}
gcc_assert (task->num_instances);
}
break;
case GIMPLE_OMP_PARALLEL:
{
if (is_streaming_region (region))
{
omp_stmt_p omp_stmt;
streamization_info_p parallel_region = get_streamization_info (region);
outermost_parallel_sinfo = get_outermost_parallel_streamization_info (region);
if (parallel_region == outermost_parallel_sinfo)
{
omp_stmt = lookup_omp_stmt (parallel_region->stmt);
parallel_region->task_decl = omp_stmt->task_decl;
}
}
}
break;
case GIMPLE_OMP_SINGLE:
case GIMPLE_OMP_FOR:
break;
default:
break;
}
region = region->next;
}
}
static void
build_task_chunk_loop (basic_block entry_bb, basic_block exit_bb,
tree avl_work, streamization_info_p task)
{
basic_block loop_init_bb, loop_header_bb, loop_iv_bb, body_entry_bb;
gimple_stmt_iterator gsi;
edge latch_edge, e;
gimple stmt;
view_p view;
int i;
tree fn, tmp;
tree act_iv = create_tmp_var (size_type_node, "act_iv");
latch_edge = single_succ_edge (exit_bb);
body_entry_bb = entry_bb;
loop_header_bb = split_edge (single_pred_edge (entry_bb));
e = single_pred_edge (loop_header_bb);
loop_init_bb = split_edge (e);
stmt = gimple_build_assign (act_iv, build_int_cst (size_type_node, 0));
gsi = gsi_last_bb (loop_init_bb);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
remove_edge (single_succ_edge (loop_header_bb));
make_edge (loop_header_bb, body_entry_bb, EDGE_TRUE_VALUE);
make_edge (loop_header_bb, single_succ (exit_bb), EDGE_FALSE_VALUE);
gsi = gsi_last_bb (loop_header_bb);
if (!avl_work)
avl_work = build_int_cst (size_type_node, AGGREGATION_FACTOR);
stmt = gimple_build_cond (LT_EXPR, act_iv, avl_work, NULL_TREE, NULL_TREE);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
loop_iv_bb = split_edge (latch_edge);
latch_edge = single_succ_edge (loop_iv_bb);
gsi = gsi_last_bb (loop_iv_bb);
stmt = gimple_build_assign_with_ops (PLUS_EXPR, act_iv, act_iv,
build_int_cst (size_type_node, 1));
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
for (i = 0; VEC_iterate (view_p, task->rviews, i, view); ++i)
{
stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR,
view->buffer_pointer,
view->buffer_pointer,
view->burst_size);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
for (i = 0; VEC_iterate (view_p, task->wviews, i, view); ++i)
{
stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR,
view->buffer_pointer,
view->buffer_pointer,
view->burst_size);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
redirect_edge_and_branch (latch_edge, loop_header_bb);
}
static void
build_task_control_loop (streamization_info_p task)
{
basic_block loop_init_bb, loop_header_bb;
basic_block loop_acquire_bb, loop_release_bb;
gimple_stmt_iterator gsi;
edge latch_edge;
gimple stmt;
view_p view;
int i;
tree fn, act_idx_end_reg, act_idx_start_reg;
task->act_idx_start = create_tmp_var (long_long_unsigned_type_node,
"act_idx_start");
TREE_ADDRESSABLE (task->act_idx_start) = 1;
if (gimple_in_ssa_p (cfun))
add_referenced_var (task->act_idx_start);
task->avl_work = create_tmp_var (long_long_unsigned_type_node,
"avl_work");
latch_edge = task->sese_body_exit;
loop_acquire_bb = split_edge (task->sese_body_entry);
task->sese_body_entry = single_pred_edge (loop_acquire_bb);
loop_header_bb = split_edge (task->sese_body_entry);
task->sese_body_entry = single_pred_edge (loop_header_bb);
loop_init_bb = split_edge (task->sese_body_entry);
task->sese_body_entry = single_pred_edge (loop_init_bb);
//stmt = gimple_build_assign (act_iv, build_int_cst (size_type_node, 0));
//gsi = gsi_last_bb (loop_init_bb);
//gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
gsi = gsi_last_bb (loop_header_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_GET_AVAILABLE_WORK];
stmt = gimple_build_call (fn, 2, lookup_var (task->task_decl, task->stmt),
build_fold_addr_expr (task->act_idx_start));
gimple_call_set_lhs (stmt, task->avl_work);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
remove_edge (single_succ_edge (loop_header_bb));
make_edge (loop_header_bb, loop_acquire_bb, EDGE_TRUE_VALUE);
task->sese_body_exit =
make_edge (loop_header_bb, task->sese_body_exit->dest, EDGE_FALSE_VALUE);
stmt = gimple_build_cond (GT_EXPR, task->avl_work,
build_int_cst (size_type_node, 0),
NULL_TREE, NULL_TREE);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
loop_release_bb = split_edge (latch_edge);
latch_edge = single_succ_edge (loop_release_bb);
gsi = gsi_last_bb (loop_acquire_bb);
act_idx_start_reg = create_tmp_var (long_long_unsigned_type_node, "act_idx_start_reg");
stmt = gimple_build_assign (act_idx_start_reg, task->act_idx_start);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
act_idx_end_reg = create_tmp_var (long_long_unsigned_type_node, "act_idx_end_reg");
stmt = gimple_build_assign_with_ops (PLUS_EXPR, act_idx_end_reg,
act_idx_start_reg,
task->avl_work);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
for (i = 0; VEC_iterate (view_p, task->rviews, i, view); ++i)
{
gsi = gsi_last_bb (loop_acquire_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_UPDATE];
stmt = gimple_build_call (fn, 3, view->view, act_idx_start_reg, act_idx_end_reg);
gimple_call_set_lhs (stmt, view->buffer_pointer);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
gsi = gsi_last_bb (loop_release_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_RELEASE];
stmt = gimple_build_call (fn, 2, view->view, act_idx_end_reg);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
for (i = 0; VEC_iterate (view_p, task->wviews, i, view); ++i)
{
gsi = gsi_last_bb (loop_acquire_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_STALL];
stmt = gimple_build_call (fn, 3, view->view, act_idx_start_reg, act_idx_end_reg);
gimple_call_set_lhs (stmt, view->buffer_pointer);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
gsi = gsi_last_bb (loop_release_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_COMMIT];
stmt = gimple_build_call (fn, 2, view->view, act_idx_end_reg);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
redirect_edge_and_branch (latch_edge, loop_header_bb);
}
static void
select_task_chunk_loop (streamization_info_p task,
basic_block entry1, basic_block exit1,
basic_block entry2, basic_block exit2)
{
basic_block join_bb = single_succ (exit2);
basic_block cond_bb = split_edge (single_pred_edge (entry1));
gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
gimple stmt;
gcc_assert (single_succ (exit1) == entry2);
remove_edge (single_pred_edge (entry1));
remove_edge (single_pred_edge (entry2));
make_edge (cond_bb, entry1, EDGE_TRUE_VALUE);
make_edge (cond_bb, entry2, EDGE_FALSE_VALUE);
make_edge (exit1, join_bb, EDGE_FALLTHRU);
stmt = gimple_build_cond (EQ_EXPR, task->avl_work,
build_int_cst (size_type_node, AGGREGATION_FACTOR),
NULL_TREE, NULL_TREE);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
static void
prepare_streaming_context (omp_region_p region)
{
while (region)
{
if (is_streaming_region (region))
switch (region->type)
{
case GIMPLE_OMP_PARALLEL:
if (get_streamization_info (region) == get_outermost_parallel_streamization_info (region))
{
/* Build a SINGLE region for initialization at the
beginning and another one at the end for
finalization. */
basic_block condition_bb, body_entry_bb, body_exit_bb;
basic_block initialization_bb, finalization_bb, join_bb;
basic_block copy_in_bb;
edge entry_edge, exit_edge, tmp_edge;
tree tmp, fn;
streamization_info_p sinfo = get_streamization_info (region);
gimple_stmt_iterator gsi, src, dest;
gimple stmt;
int i, j;
stream_p stream;
tree task_decl = lookup_var (sinfo->task_decl, sinfo->stmt);
view_p view;
entry_edge = single_succ_edge (region->entry);
exit_edge = split_block_after_labels (region->exit);
region->exit = exit_edge->dest;
body_entry_bb = entry_edge->dest;
body_exit_bb = exit_edge->src;
/* Move all copy-in statements to the right place. */
copy_in_bb = split_edge (entry_edge);
entry_edge = single_succ_edge (copy_in_bb);
dest = gsi_last_bb (copy_in_bb);
src = gsi_start_bb (entry_edge->dest);
while (!gsi_end_p (src))
if (get_stmt_loc (gsi_stmt (src)) == SL_COPY_IN)
gsi_move_after (&src, &dest);
else
gsi_next (&src);
/* Add this task instance to the task node. */
gsi = gsi_last_bb (copy_in_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_TASK_ADD_INSTANCE];
stmt = gimple_build_call (fn, 1, task_decl);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* Build initialization region. */
condition_bb = split_edge (entry_edge);
entry_edge = single_succ_edge (condition_bb);
initialization_bb = split_edge (entry_edge);
entry_edge = single_succ_edge (initialization_bb);
join_bb = split_edge (entry_edge);
remove_edge (find_edge (condition_bb, initialization_bb));
make_edge (condition_bb, initialization_bb,
EDGE_TRUE_VALUE);
make_edge (condition_bb, join_bb, EDGE_FALSE_VALUE);
gsi = gsi_last_bb (condition_bb);
tmp = create_tmp_var (boolean_type_node, "initialization_region");
stmt = gimple_build_call (built_in_decls[BUILT_IN_GOMP_SINGLE_START], 0);
gimple_call_set_lhs (stmt, tmp);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
stmt = gimple_build_cond (EQ_EXPR, tmp, boolean_true_node,
NULL_TREE, NULL_TREE);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
sinfo->initialization_bb = initialization_bb;
sinfo->sese_body_entry = single_succ_edge (join_bb);
sinfo->post_initialization_bb = split_edge (single_succ_edge (sinfo->initialization_bb));
/* Build finalization region. */
condition_bb = split_edge (exit_edge);
exit_edge = single_succ_edge (condition_bb);
finalization_bb = split_edge (exit_edge);
remove_edge (find_edge (condition_bb, finalization_bb));
make_edge (condition_bb, finalization_bb,
EDGE_TRUE_VALUE);
make_edge (condition_bb, region->exit, EDGE_FALSE_VALUE);
gsi = gsi_last_bb (condition_bb);
tmp = create_tmp_var (boolean_type_node, "finalization_region");
stmt = gimple_build_call (built_in_decls[BUILT_IN_GOMP_SINGLE_START], 0);
gimple_call_set_lhs (stmt, tmp);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
stmt = gimple_build_cond (EQ_EXPR, tmp, boolean_true_node,
NULL_TREE, NULL_TREE);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
sinfo->finalization_bb = finalization_bb;
sinfo->sese_body_exit = single_pred_edge (condition_bb);
/* FIXME-perf: Force a barrier just before finalizing ... */
gsi = gsi_start_bb (condition_bb);
fn = built_in_decls[BUILT_IN_GOMP_BARRIER];
stmt = gimple_build_call (fn, 0);
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
/* Issue stream creation and connection calls for all
streams being used within the taskgraph bound to this
parallel region. */
for (i = 0; VEC_iterate (stream_p, sinfo->streams, i, stream); ++i)
{
tree type_size = TYPE_SIZE_UNIT (stream->element_type);
tree horizon_size = build_int_cst (size_type_node,
HORIZON);
gsi = gsi_last_bb (sinfo->initialization_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_STREAM];
stmt = gimple_build_call (fn, 2, type_size, horizon_size);
gimple_call_set_lhs (stmt, stream->stream);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* FIXME: this needs to take into account
replication as well ... we may want to move this
in the task call itself if we want dynamic values
as well. */
for (j = 0; VEC_iterate (view_p, stream->rviews, j, view); ++j)
{
fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS];
stmt = gimple_build_call (fn, 4, stream->stream,
view->sinfo->num_instances,
integer_zero_node,
boolean_false_node);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
for (j = 0; VEC_iterate (view_p, stream->wviews, j, view); ++j)
{
fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS];
stmt = gimple_build_call (fn, 4, stream->stream,
integer_zero_node,
view->sinfo->num_instances,
boolean_false_node);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
/* Connect any firstprivate write views on this
stream. As these are the master views, they only
get one instance. */
for (j = 0; VEC_iterate (view_p, stream->fpviews, j, view); ++j)
{
tree view_decl = lookup_var (view->view, sinfo->stmt);
/* Only one firstprivate view per stream possible. */
gcc_assert (j == 0);
/* If there are firstprivate views, there should be no write views. */
gcc_assert (VEC_empty (view_p, stream->wviews));
fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS];
stmt = gimple_build_call (fn, 4, stream->stream,
integer_zero_node,
integer_one_node,
boolean_false_node);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_CONNECT_VIEW];
stmt = gimple_build_call (fn, 3, task_decl, stream->stream, view_decl);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
/* Set the final count flag for the number of
expected views. */
gsi = gsi_last_bb (sinfo->post_initialization_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS];
stmt = gimple_build_call (fn, 4, stream->stream,
integer_zero_node,
integer_zero_node,
boolean_true_node);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
/* Issue wait until connected call in the parallel
region. We add a separate BB after the
initialization one to ensure tasks are created before
this call. */
gsi = gsi_last_bb (sinfo->post_initialization_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_WAIT_UNTIL_CONNECTED];
stmt = gimple_build_call (fn, 1, task_decl);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
gsi = gsi_start_bb (single_succ (sinfo->post_initialization_bb));
fn = built_in_decls[BUILT_IN_GOMP_BARRIER];
stmt = gimple_build_call (fn, 0);
gsi_insert_before (&gsi, stmt, GSI_CONTINUE_LINKING);
/* Issue GOMP_stream_init call. */
gsi = gsi_last_bb (split_edge (single_pred_edge (sinfo->initialization_bb)));
fn = built_in_decls[BUILT_IN_GOMP_STREAM_INIT];
stmt = gimple_build_call (fn, 0);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* Issue termination call to task exit. */
gsi = gsi_last_bb (sinfo->finalization_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_TASK_EXIT];
stmt = gimple_build_call (fn, 1, task_decl);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_EXIT];
stmt = gimple_build_call (fn, 0);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
gsi = gsi_start_bb (region->exit);
fn = built_in_decls[BUILT_IN_GOMP_BARRIER];
stmt = gimple_build_call (fn, 0);
gsi_insert_before (&gsi, stmt, GSI_CONTINUE_LINKING);
}
break;
case GIMPLE_OMP_TASK:
{
streamization_info_p init_sinfo =
get_outermost_parallel_streamization_info (region);
streamization_info_p task = get_streamization_info (region);
gimple_stmt_iterator src, dest;
gimple_stmt_iterator gsi;
gimple stmt;
view_p view;
int i;
tree fn, tmp;
tree task_decl = lookup_var (task->task_decl, task->stmt);
if (task->is_pre_task)
{
gcc_assert (VEC_length (view_p, task->wviews) == 1);
gcc_assert (VEC_length (view_p, task->rviews) == 0);
view = VEC_index (view_p, task->wviews, 0);
dest = gsi_last_bb (init_sinfo->initialization_bb);
src = gsi_start_bb (task->region->entry);
while (!gsi_end_p (src))
if (get_stmt_loc (gsi_stmt (src)) == SL_COPY_IN)
{
gimple src_stmt = gsi_stmt (src);
tree rhs, base_var, parallel_var;
gimple copy_region_stmt = get_streamization_info (region->outer)->stmt;
/* FIXME ... */
if (!gimple_assign_single_p (src_stmt))
continue;
rhs = gimple_assign_rhs1 (src_stmt);
base_var = reverse_lookup_var (rhs, copy_region_stmt);
if (base_var)
{
parallel_var = lookup_var (base_var, init_sinfo->stmt);
gimple_assign_set_rhs1 (src_stmt, parallel_var);
}
gsi_move_after (&src, &dest);
gimple_set_block (src_stmt, NULL_TREE);
}
else
gsi_next (&src);
/* Find entry and exit edges for the task's body. */
task->sese_body_entry = single_succ_edge (task->region->entry);
gsi = gsi_last_bb (task->region->exit);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
gsi_prev (&gsi);
stmt = (gsi_end_p (gsi)) ? NULL : gsi_stmt (gsi);
task->sese_body_exit = split_block (task->region->exit, stmt);
task->region->exit = task->sese_body_exit->dest;
/* Build initialization block. */
task->initialization_bb = split_edge (task->sese_body_entry);
task->sese_body_entry = single_succ_edge (task->initialization_bb);
/* Before we can afford to loopify the task body, we
bring all COPY_IN statements to the initialization
block. */
src = gsi_start_bb (task->sese_body_entry->dest);
dest = gsi_start_bb (task->initialization_bb);
while (!gsi_end_p (src))
if (get_stmt_loc (gsi_stmt (src)) == SL_COPY_IN)
gsi_move_before (&src, &dest);
//gsi_remove (&src, true);
else
gsi_next (&src);
/* Get an access pointer in the stream (we assume enough
space is available) and convert the view_access. We
don't synchronize for now as this should happen in
single threaded code before the other producer or any
consumer can start. */
gsi = gsi_last_bb (task->initialization_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_PRE];
stmt = gimple_build_call (fn, 2, view->stream->stream, get_view_burst_size (view, &gsi));
gimple_call_set_lhs (stmt, view->buffer_pointer);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* Replace all accesses to the view's variable with
accesses to the stream buffer. */
convert_view_to_var (view, task->sese_body_entry->dest, task->sese_body_exit->src);
break;
}
/* Issue runtime calls in the initialization and
finalization areas of the outermost enclosing
parallel region. */
gsi = gsi_last_bb (init_sinfo->initialization_bb);
/* yyyy */
#if 0
/* Create one instance of the task data structure. */
fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_TASK];
stmt = gimple_build_call (fn, 2, lookup_var (task->activation_counter_p, init_sinfo->stmt),
lookup_var (task->termination_flag_p, init_sinfo->stmt));
gimple_call_set_lhs (stmt, lookup_var (task->task_decl, init_sinfo->stmt));
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
#endif
/* As the tasks will only be created once, we need to
hoist the task creation calls as well as all the
necessary sender-side copy-in statements in the
outermost enclosing parallel region's initialization
BB. We also need to remap the variables according to
the new context (for activation counters, termination
flags or any other artificial firstprivate clauses
that we introduced). */
dest = gsi_last_bb (init_sinfo->initialization_bb);
src = gsi_start_bb (task->region->entry);
while (!gsi_end_p (src))
if (get_stmt_loc (gsi_stmt (src)) == SL_COPY_IN)
{
gimple src_stmt = gsi_stmt (src);
tree rhs, base_var, parallel_var;
gimple copy_region_stmt = get_streamization_info (region->outer)->stmt;
/* FIXME ... */
if (!gimple_assign_single_p (src_stmt))
continue;
rhs = gimple_assign_rhs1 (src_stmt);
base_var = reverse_lookup_var (rhs, copy_region_stmt);
if (base_var)
{
parallel_var = lookup_var (base_var, init_sinfo->stmt);
gimple_assign_set_rhs1 (src_stmt, parallel_var);
}
gsi_move_after (&src, &dest);
gimple_set_block (src_stmt, NULL_TREE);
}
else
gsi_next (&src);
/* Set termination flag in finalization_bb. */
gsi = gsi_start_bb (init_sinfo->finalization_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_SET_TASK_TERMINATION_FLAG];
stmt = gimple_build_call (fn, 1, lookup_var (task->task_decl, init_sinfo->stmt));
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
/* Find entry and exit edges for the task's body. */
task->sese_body_entry = single_succ_edge (task->region->entry);
gsi = gsi_last_bb (task->region->exit);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
gsi_prev (&gsi);
stmt = (gsi_end_p (gsi)) ? NULL : gsi_stmt (gsi);
task->sese_body_exit = split_block (task->region->exit, stmt);
task->region->exit = task->sese_body_exit->dest;
/* Build initialization and finalization blocks. */
task->initialization_bb = split_edge (task->sese_body_entry);
task->sese_body_entry = single_succ_edge (task->initialization_bb);
task->finalization_bb = split_edge (task->sese_body_exit);
task->sese_body_exit = single_pred_edge (task->finalization_bb);
/* Before we can afford to loopify the task body, we
bring all COPY_IN statements to the initialization
block. */
src = gsi_start_bb (task->sese_body_entry->dest);
dest = gsi_start_bb (task->initialization_bb);
while (!gsi_end_p (src))
if (get_stmt_loc (gsi_stmt (src)) == SL_COPY_IN)
gsi_move_before (&src, &dest);
else
gsi_next (&src);
/* Add this task instance to the task node. */
gsi = gsi_last_bb (task->initialization_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_TASK_ADD_INSTANCE];
stmt = gimple_build_call (fn, 1, task_decl);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* Issue the proper initialization and finalization
calls for the task and its views. */
gsi = gsi_last_bb (task->initialization_bb);
for (i = 0; VEC_iterate (view_p, task->rviews, i, view); ++i)
{
fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_READ_VIEW];
stmt = gimple_build_call (fn, 2, view->view_size, get_view_burst_size (view, &gsi));
gimple_call_set_lhs (stmt, view->view);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_CONNECT_VIEW];
stmt = gimple_build_call (fn, 3, task_decl, view->stream->stream, view->view);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
for (i = 0; VEC_iterate (view_p, task->wviews, i, view); ++i)
{
fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_WRITE_VIEW];
stmt = gimple_build_call (fn, 2, view->view_size, get_view_burst_size (view, &gsi));
gimple_call_set_lhs (stmt, view->view);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_CONNECT_VIEW];
stmt = gimple_build_call (fn, 3, task_decl, view->stream->stream, view->view);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
}
/* Must be last in the initialization_bb. */
fn = built_in_decls[BUILT_IN_GOMP_STREAM_WAIT_UNTIL_CONNECTED];
stmt = gimple_build_call (fn, 1, task_decl);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
gsi = gsi_last_bb (task->finalization_bb);
fn = built_in_decls[BUILT_IN_GOMP_STREAM_TASK_EXIT];
stmt = gimple_build_call (fn, 1, task_decl);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
/* Build local (to an aggregated iteration)
initialization and finalization blocks. These are
part of the task body. */
task->local_initialization_bb = split_edge (task->sese_body_entry);
task->sese_body_entry = single_pred_edge (task->local_initialization_bb);
task->local_finalization_bb = split_edge (task->sese_body_exit);
task->sese_body_exit = single_succ_edge (task->local_finalization_bb);
/* Issue register load instructions for input views. */
gsi = gsi_last_bb (task->local_initialization_bb);
for (i = 0; VEC_iterate (view_p, task->rviews, i, view); ++i)
convert_view_to_var (view, task->local_initialization_bb, task->local_finalization_bb);
/* Issue register store instructions for output views. */
gsi = gsi_last_bb (task->local_finalization_bb);
for (i = 0; VEC_iterate (view_p, task->wviews, i, view); ++i)
convert_view_to_var (view, task->local_initialization_bb, task->local_finalization_bb);
/* Duplicate the task's body, then loopify each version,
one as a static loop, the other dynamic. We start
from and entry and an exit edge. */
{
basic_block body_entry_1, body_exit_1;
basic_block body_entry_2, body_exit_2;
edge entry, exit;
VEC (basic_block, heap) *bbs = NULL;
basic_block bb, after, new_bb, dom_bb, tmp_bb;
int i;
edge e;
body_entry_1 = task->sese_body_entry->dest;
body_exit_1 = task->sese_body_exit->src;
/* Keep track of this block as we will redirect the
edge. */
tmp_bb = task->sese_body_exit->dest;
initialize_original_copy_tables ();
/* Find all blocks we wish to duplicate, then
duplicate them all. */
VEC_safe_push (basic_block, heap, bbs, body_entry_1);
gather_blocks_in_sese_region (body_entry_1, body_exit_1, &bbs);
after = body_exit_1;
for (i = 0; VEC_iterate (basic_block, bbs, i, bb); ++i)
{
after = duplicate_block (bb, NULL, after);
bb->flags |= BB_DUPLICATED;
}
/* Set dominators. */
for (i = 0; VEC_iterate (basic_block, bbs, i, bb); ++i)
{
new_bb = get_bb_copy (bb);
dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
if (dom_bb->flags & BB_DUPLICATED)
{
dom_bb = get_bb_copy (dom_bb);
set_immediate_dominator (CDI_DOMINATORS, new_bb, dom_bb);
}
}
/* Redirect edges. */
for (i = 0; VEC_iterate (basic_block, bbs, i, bb); ++i)
{
edge_iterator ei;
new_bb = get_bb_copy (bb);
FOR_EACH_EDGE (e, ei, new_bb->succs)
{
if (!(e->dest->flags & BB_DUPLICATED))
continue;
redirect_edge_and_branch_force (e, get_bb_copy (e->dest));
}
}
body_entry_2 = get_bb_copy (body_entry_1);
body_exit_2 = get_bb_copy (body_exit_1);
redirect_edge_and_branch_force (task->sese_body_exit,
body_entry_2);
set_immediate_dominator (CDI_DOMINATORS, body_entry_2,
body_exit_1);
set_immediate_dominator (CDI_DOMINATORS, tmp_bb,
body_exit_2);
task->sese_body_exit = single_succ_edge (body_exit_2);
/* Clear information about duplicates. */
for (i = 0; VEC_iterate (basic_block, bbs, i, bb); ++i)
bb->flags &= ~BB_DUPLICATED;
free_original_copy_tables ();
/* Get available work, build stall/update and
commit/release calls and choose execution loop. */
build_task_control_loop (task);
select_task_chunk_loop (task, body_entry_1, body_exit_1,
body_entry_2, body_exit_2);
/* Build loops. */
build_task_chunk_loop (body_entry_1, body_exit_1, NULL_TREE, task);
build_task_chunk_loop (body_entry_2, body_exit_2, task->avl_work, task);
}
}
break;
default:
break;
}
prepare_streaming_context (region->inner);
region = region->next;
}
}
static void
delete_omp_region (omp_region_p region)
{
omp_region_p outer = region->outer;
omp_region_p siblings, prev;
/* We assume outer exists. */
gcc_assert (outer);
if (region == outer->inner)
{
outer->inner = region->next;
free_omp_region_1 (region);
return;
}
siblings = outer->inner;
prev = siblings;
while (siblings)
{
if (region == siblings)
prev->next = region->next;
prev = siblings;
siblings = siblings->next;
}
}
static void
expand_pre_operators (omp_region_p region)
{
while (region)
{
if (is_streaming_region (region))
{
streamization_info_p sinfo = get_streamization_info (region);
task_p task;
stream_p stream;
view_p view;
int i, j;
VEC (stream_p,heap) *pre_streams = NULL;
VEC (task_p,heap) *pre_tasks = NULL;
gcc_assert (sinfo == get_outermost_parallel_streamization_info (region));
/* Find streams with "pre" operators. */
for (i = 0; VEC_iterate (stream_p, sinfo->streams, i, stream); ++i)
{
/* If more than one distinct write view is present, this
stream has a "pre" operator (FIXME: for now). */
gcc_assert (VEC_length (view_p, stream->wviews) <= 2);
if (VEC_length (view_p, stream->wviews) == 2)
{
VEC_safe_push (stream_p, heap, pre_streams, stream);
}
}
/* Determine which producer task is "pre"-initializing data
in the stream. */
for (i = 0; VEC_iterate (stream_p, pre_streams, i, stream); ++i)
{
for (j = 0; VEC_iterate (view_p, stream->wviews, j, view); ++j)
{
task_p producer = view->sinfo;
/* We assume that the pre_task is not part of a
pipeline: there should be no "input" clauses (for
now not even promted firstprivate clauses), but
we assume the "other" task will have at least
one. */
if (!find_omp_clause (gimple_omp_task_clauses (producer->stmt), OMP_CLAUSE_INPUT))
{
VEC_safe_push (task_p, heap, pre_tasks, producer);
VEC_unordered_remove (view_p, stream->wviews, j);
break;
}
}
}
/* Inline the pre_task's body (don't expand) and issue
proper runtime calls. */
for (i = 0; VEC_iterate (task_p, pre_tasks, i, task); ++i)
{
task_p r_task;
/* Remove the task and its write view from all data
structures. */
for (j = 0; VEC_iterate (task_p, sinfo->tasks, j, r_task);)
{
if (r_task == task)
VEC_ordered_remove (task_p, sinfo->tasks, j);
else
j++;
}
/* Remove the task directive, the corresponding
omp_return and the task omp_region. */
//gsi = gsi_last_bb (task->region->entry);
//gsi_remove (&gsi, true);
//gsi = gsi_last_bb (task->region->exit);
//gsi_remove (&gsi, true);
//delete_omp_region (task->region);
//task->region->streamization_info = NULL;
task->is_pre_task = true;
}
}
/* Only traverse up to the outermost enclosing parallel
regions. */
if (!is_streaming_region (region) && region->inner)
expand_pre_operators (region->inner);
region = region->next;
}
}
static void
prepare_stream_expansion (omp_region_p region)
{
retrieve_streaming_info (region);
expand_pre_operators (region);
prepare_streaming_context (region);
}
/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
static unsigned int
execute_expand_omp (void)
{
build_omp_regions ();
if (!root_omp_region)
return 0;
if (dump_file)
{
fprintf (dump_file, "\nOMP region tree\n\n");
dump_omp_region (dump_file, root_omp_region, 0);
fprintf (dump_file, "\n");
}
remove_exit_barriers (root_omp_region);
prepare_stream_expansion (root_omp_region);
expand_omp (root_omp_region);
cleanup_tree_cfg ();
free_omp_regions ();
return 0;
}
/* OMP expansion -- the default pass, run before creation of SSA form. */
static bool
gate_expand_omp (void)
{
return (flag_openmp != 0 && errorcount == 0);
}
struct gimple_opt_pass pass_expand_omp =
{
{
GIMPLE_PASS,
"ompexp", /* name */
gate_expand_omp, /* gate */
execute_expand_omp, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_NONE, /* tv_id */
PROP_gimple_any, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func /* todo_flags_finish */
}
};
/* Routines to lower OpenMP directives into OMP-GIMPLE. */
/* Lower the OpenMP sections directive in the current statement in GSI_P.
CTX is the enclosing OMP context for the current statement. */
static void
lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
tree block, control;
gimple_stmt_iterator tgsi;
unsigned i, len;
gimple stmt, new_stmt, bind, t;
gimple_seq ilist, dlist, olist, new_body, body;
struct gimplify_ctx gctx;
stmt = gsi_stmt (*gsi_p);
push_gimplify_context (&gctx);
dlist = NULL;
ilist = NULL;
lower_rec_input_clauses (gimple_omp_sections_clauses (stmt),
&ilist, &dlist, ctx);
tgsi = gsi_start (gimple_omp_body (stmt));
for (len = 0; !gsi_end_p (tgsi); len++, gsi_next (&tgsi))
continue;
tgsi = gsi_start (gimple_omp_body (stmt));
body = NULL;
for (i = 0; i < len; i++, gsi_next (&tgsi))
{
omp_context *sctx;
gimple sec_start;
sec_start = gsi_stmt (tgsi);
sctx = maybe_lookup_ctx (sec_start);
gcc_assert (sctx);
gimple_seq_add_stmt (&body, sec_start);
lower_omp (gimple_omp_body (sec_start), sctx);
gimple_seq_add_seq (&body, gimple_omp_body (sec_start));
gimple_omp_set_body (sec_start, NULL);
if (i == len - 1)
{
gimple_seq l = NULL;
lower_lastprivate_clauses (gimple_omp_sections_clauses (stmt), NULL,
&l, ctx);
gimple_seq_add_seq (&body, l);
gimple_omp_section_set_last (sec_start);
}
gimple_seq_add_stmt (&body, gimple_build_omp_return (false));
}
block = make_node (BLOCK);
bind = gimple_build_bind (NULL, body, block);
olist = NULL;
lower_reduction_clauses (gimple_omp_sections_clauses (stmt), &olist, ctx);
block = make_node (BLOCK);
new_stmt = gimple_build_bind (NULL, NULL, block);
pop_gimplify_context (new_stmt);
gimple_bind_append_vars (new_stmt, ctx->block_vars);
BLOCK_VARS (block) = gimple_bind_vars (bind);
if (BLOCK_VARS (block))
TREE_USED (block) = 1;
new_body = NULL;
gimple_seq_add_seq (&new_body, ilist);
gimple_seq_add_stmt (&new_body, stmt);
gimple_seq_add_stmt (&new_body, gimple_build_omp_sections_switch ());
gimple_seq_add_stmt (&new_body, bind);
control = create_tmp_var (unsigned_type_node, ".section");
t = gimple_build_omp_continue (control, control);
gimple_omp_sections_set_control (stmt, control);
gimple_seq_add_stmt (&new_body, t);
gimple_seq_add_seq (&new_body, olist);
gimple_seq_add_seq (&new_body, dlist);
new_body = maybe_catch_exception (new_body);
t = gimple_build_omp_return
(!!find_omp_clause (gimple_omp_sections_clauses (stmt),
OMP_CLAUSE_NOWAIT));
gimple_seq_add_stmt (&new_body, t);
gimple_bind_set_body (new_stmt, new_body);
gimple_omp_set_body (stmt, NULL);
gsi_replace (gsi_p, new_stmt, true);
}
/* A subroutine of lower_omp_single. Expand the simple form of
a GIMPLE_OMP_SINGLE, without a copyprivate clause:
if (GOMP_single_start ())
BODY;
[ GOMP_barrier (); ] -> unless 'nowait' is present.
FIXME. It may be better to delay expanding the logic of this until
pass_expand_omp. The expanded logic may make the job more difficult
to a synchronization analysis pass. */
static void
lower_omp_single_simple (gimple single_stmt, gimple_seq *pre_p)
{
location_t loc = gimple_location (single_stmt);
tree tlabel = create_artificial_label (loc);
tree flabel = create_artificial_label (loc);
gimple call, cond;
tree lhs, decl;
decl = built_in_decls[BUILT_IN_GOMP_SINGLE_START];
lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)), NULL);
call = gimple_build_call (decl, 0);
gimple_call_set_lhs (call, lhs);
gimple_seq_add_stmt (pre_p, call);
cond = gimple_build_cond (EQ_EXPR, lhs,
fold_convert_loc (loc, TREE_TYPE (lhs),
boolean_true_node),
tlabel, flabel);
gimple_seq_add_stmt (pre_p, cond);
gimple_seq_add_stmt (pre_p, gimple_build_label (tlabel));
gimple_seq_add_seq (pre_p, gimple_omp_body (single_stmt));
gimple_seq_add_stmt (pre_p, gimple_build_label (flabel));
}
/* A subroutine of lower_omp_single. Expand the simple form of
a GIMPLE_OMP_SINGLE, with a copyprivate clause:
#pragma omp single copyprivate (a, b, c)
Create a new structure to hold copies of 'a', 'b' and 'c' and emit:
{
if ((copyout_p = GOMP_single_copy_start ()) == NULL)
{
BODY;
copyout.a = a;
copyout.b = b;
copyout.c = c;
GOMP_single_copy_end (&copyout);
}
else
{
a = copyout_p->a;
b = copyout_p->b;
c = copyout_p->c;
}
GOMP_barrier ();
}
FIXME. It may be better to delay expanding the logic of this until
pass_expand_omp. The expanded logic may make the job more difficult
to a synchronization analysis pass. */
static void
lower_omp_single_copy (gimple single_stmt, gimple_seq *pre_p, omp_context *ctx)
{
tree ptr_type, t, l0, l1, l2;
gimple_seq copyin_seq;
location_t loc = gimple_location (single_stmt);
ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_copy_o");
ptr_type = build_pointer_type (ctx->record_type);
ctx->receiver_decl = create_tmp_var (ptr_type, ".omp_copy_i");
l0 = create_artificial_label (loc);
l1 = create_artificial_label (loc);
l2 = create_artificial_label (loc);
t = build_call_expr_loc (loc, built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_START], 0);
t = fold_convert_loc (loc, ptr_type, t);
gimplify_assign (ctx->receiver_decl, t, pre_p);
t = build2 (EQ_EXPR, boolean_type_node, ctx->receiver_decl,
build_int_cst (ptr_type, 0));
t = build3 (COND_EXPR, void_type_node, t,
build_and_jump (&l0), build_and_jump (&l1));
gimplify_and_add (t, pre_p);
gimple_seq_add_stmt (pre_p, gimple_build_label (l0));
gimple_seq_add_seq (pre_p, gimple_omp_body (single_stmt));
copyin_seq = NULL;
lower_copyprivate_clauses (gimple_omp_single_clauses (single_stmt), pre_p,
&copyin_seq, ctx);
t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
t = build_call_expr_loc (loc, built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_END],
1, t);
gimplify_and_add (t, pre_p);
t = build_and_jump (&l2);
gimplify_and_add (t, pre_p);
gimple_seq_add_stmt (pre_p, gimple_build_label (l1));
gimple_seq_add_seq (pre_p, copyin_seq);
gimple_seq_add_stmt (pre_p, gimple_build_label (l2));
}
/* Expand code for an OpenMP single directive. */
static void
lower_omp_single (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
tree block;
gimple t, bind, single_stmt = gsi_stmt (*gsi_p);
gimple_seq bind_body, dlist;
struct gimplify_ctx gctx;
push_gimplify_context (&gctx);
bind_body = NULL;
lower_rec_input_clauses (gimple_omp_single_clauses (single_stmt),
&bind_body, &dlist, ctx);
lower_omp (gimple_omp_body (single_stmt), ctx);
gimple_seq_add_stmt (&bind_body, single_stmt);
if (ctx->record_type)
lower_omp_single_copy (single_stmt, &bind_body, ctx);
else
lower_omp_single_simple (single_stmt, &bind_body);
gimple_omp_set_body (single_stmt, NULL);
gimple_seq_add_seq (&bind_body, dlist);
bind_body = maybe_catch_exception (bind_body);
t = gimple_build_omp_return
(!!find_omp_clause (gimple_omp_single_clauses (single_stmt),
OMP_CLAUSE_NOWAIT));
gimple_seq_add_stmt (&bind_body, t);
block = make_node (BLOCK);
bind = gimple_build_bind (NULL, bind_body, block);
pop_gimplify_context (bind);
gimple_bind_append_vars (bind, ctx->block_vars);
BLOCK_VARS (block) = ctx->block_vars;
gsi_replace (gsi_p, bind, true);
if (BLOCK_VARS (block))
TREE_USED (block) = 1;
}
/* Expand code for an OpenMP master directive. */
static void
lower_omp_master (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
tree block, lab = NULL, x;
gimple stmt = gsi_stmt (*gsi_p), bind;
location_t loc = gimple_location (stmt);
gimple_seq tseq;
struct gimplify_ctx gctx;
push_gimplify_context (&gctx);
block = make_node (BLOCK);
bind = gimple_build_bind (NULL, gimple_seq_alloc_with_stmt (stmt),
block);
x = build_call_expr_loc (loc, built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
x = build2 (EQ_EXPR, boolean_type_node, x, integer_zero_node);
x = build3 (COND_EXPR, void_type_node, x, NULL, build_and_jump (&lab));
tseq = NULL;
gimplify_and_add (x, &tseq);
gimple_bind_add_seq (bind, tseq);
lower_omp (gimple_omp_body (stmt), ctx);
gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
gimple_bind_add_seq (bind, gimple_omp_body (stmt));
gimple_omp_set_body (stmt, NULL);
gimple_bind_add_stmt (bind, gimple_build_label (lab));
gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
pop_gimplify_context (bind);
gimple_bind_append_vars (bind, ctx->block_vars);
BLOCK_VARS (block) = ctx->block_vars;
gsi_replace (gsi_p, bind, true);
}
/* Expand code for an OpenMP ordered directive. */
static void
lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
tree block;
gimple stmt = gsi_stmt (*gsi_p), bind, x;
struct gimplify_ctx gctx;
push_gimplify_context (&gctx);
block = make_node (BLOCK);
bind = gimple_build_bind (NULL, gimple_seq_alloc_with_stmt (stmt),
block);
x = gimple_build_call (built_in_decls[BUILT_IN_GOMP_ORDERED_START], 0);
gimple_bind_add_stmt (bind, x);
lower_omp (gimple_omp_body (stmt), ctx);
gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
gimple_bind_add_seq (bind, gimple_omp_body (stmt));
gimple_omp_set_body (stmt, NULL);
x = gimple_build_call (built_in_decls[BUILT_IN_GOMP_ORDERED_END], 0);
gimple_bind_add_stmt (bind, x);
gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
pop_gimplify_context (bind);
gimple_bind_append_vars (bind, ctx->block_vars);
BLOCK_VARS (block) = gimple_bind_vars (bind);
gsi_replace (gsi_p, bind, true);
}
/* Gimplify a GIMPLE_OMP_CRITICAL statement. This is a relatively simple
substitution of a couple of function calls. But in the NAMED case,
requires that languages coordinate a symbol name. It is therefore
best put here in common code. */
static GTY((param1_is (tree), param2_is (tree)))
splay_tree critical_name_mutexes;
static void
lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
tree block;
tree name, lock, unlock;
gimple stmt = gsi_stmt (*gsi_p), bind;
location_t loc = gimple_location (stmt);
gimple_seq tbody;
struct gimplify_ctx gctx;
name = gimple_omp_critical_name (stmt);
if (name)
{
tree decl;
splay_tree_node n;
if (!critical_name_mutexes)
critical_name_mutexes
= splay_tree_new_ggc (splay_tree_compare_pointers);
n = splay_tree_lookup (critical_name_mutexes, (splay_tree_key) name);
if (n == NULL)
{
char *new_str;
decl = create_tmp_var_raw (ptr_type_node, NULL);
new_str = ACONCAT ((".gomp_critical_user_",
IDENTIFIER_POINTER (name), NULL));
DECL_NAME (decl) = get_identifier (new_str);
TREE_PUBLIC (decl) = 1;
TREE_STATIC (decl) = 1;
DECL_COMMON (decl) = 1;
DECL_ARTIFICIAL (decl) = 1;
DECL_IGNORED_P (decl) = 1;
varpool_finalize_decl (decl);
splay_tree_insert (critical_name_mutexes, (splay_tree_key) name,
(splay_tree_value) decl);
}
else
decl = (tree) n->value;
lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_START];
lock = build_call_expr_loc (loc, lock, 1, build_fold_addr_expr_loc (loc, decl));
unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_END];
unlock = build_call_expr_loc (loc, unlock, 1,
build_fold_addr_expr_loc (loc, decl));
}
else
{
lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_START];
lock = build_call_expr_loc (loc, lock, 0);
unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_END];
unlock = build_call_expr_loc (loc, unlock, 0);
}
push_gimplify_context (&gctx);
block = make_node (BLOCK);
bind = gimple_build_bind (NULL, gimple_seq_alloc_with_stmt (stmt), block);
tbody = gimple_bind_body (bind);
gimplify_and_add (lock, &tbody);
gimple_bind_set_body (bind, tbody);
lower_omp (gimple_omp_body (stmt), ctx);
gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
gimple_bind_add_seq (bind, gimple_omp_body (stmt));
gimple_omp_set_body (stmt, NULL);
tbody = gimple_bind_body (bind);
gimplify_and_add (unlock, &tbody);
gimple_bind_set_body (bind, tbody);
gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
pop_gimplify_context (bind);
gimple_bind_append_vars (bind, ctx->block_vars);
BLOCK_VARS (block) = gimple_bind_vars (bind);
gsi_replace (gsi_p, bind, true);
}
/* A subroutine of lower_omp_for. Generate code to emit the predicate
for a lastprivate clause. Given a loop control predicate of (V
cond N2), we gate the clause on (!(V cond N2)). The lowered form
is appended to *DLIST, iterator initialization is appended to
*BODY_P. */
static void
lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
gimple_seq *dlist, struct omp_context *ctx)
{
tree clauses, cond, vinit;
enum tree_code cond_code;
gimple_seq stmts;
cond_code = fd->loop.cond_code;
cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR;
/* When possible, use a strict equality expression. This can let VRP
type optimizations deduce the value and remove a copy. */
if (host_integerp (fd->loop.step, 0))
{
HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->loop.step);
if (step == 1 || step == -1)
cond_code = EQ_EXPR;
}
cond = build2 (cond_code, boolean_type_node, fd->loop.v, fd->loop.n2);
clauses = gimple_omp_for_clauses (fd->for_stmt);
stmts = NULL;
lower_lastprivate_clauses (clauses, cond, &stmts, ctx);
if (!gimple_seq_empty_p (stmts))
{
gimple_seq_add_seq (&stmts, *dlist);
*dlist = stmts;
/* Optimize: v = 0; is usually cheaper than v = some_other_constant. */
vinit = fd->loop.n1;
if (cond_code == EQ_EXPR
&& host_integerp (fd->loop.n2, 0)
&& ! integer_zerop (fd->loop.n2))
vinit = build_int_cst (TREE_TYPE (fd->loop.v), 0);
/* Initialize the iterator variable, so that threads that don't execute
any iterations don't execute the lastprivate clauses by accident. */
gimplify_assign (fd->loop.v, vinit, body_p);
}
}
/* Lower code for an OpenMP loop directive. */
static void
lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
tree *rhs_p, block;
struct omp_for_data fd;
gimple stmt = gsi_stmt (*gsi_p), new_stmt;
gimple_seq omp_for_body, body, dlist;
size_t i;
struct gimplify_ctx gctx;
push_gimplify_context (&gctx);
lower_omp (gimple_omp_for_pre_body (stmt), ctx);
lower_omp (gimple_omp_body (stmt), ctx);
block = make_node (BLOCK);
new_stmt = gimple_build_bind (NULL, NULL, block);
/* Move declaration of temporaries in the loop body before we make
it go away. */
omp_for_body = gimple_omp_body (stmt);
if (!gimple_seq_empty_p (omp_for_body)
&& gimple_code (gimple_seq_first_stmt (omp_for_body)) == GIMPLE_BIND)
{
tree vars = gimple_bind_vars (gimple_seq_first_stmt (omp_for_body));
gimple_bind_append_vars (new_stmt, vars);
}
/* The pre-body and input clauses go before the lowered GIMPLE_OMP_FOR. */
dlist = NULL;
body = NULL;
lower_rec_input_clauses (gimple_omp_for_clauses (stmt), &body, &dlist, ctx);
gimple_seq_add_seq (&body, gimple_omp_for_pre_body (stmt));
/* Lower the header expressions. At this point, we can assume that
the header is of the form:
#pragma omp for (V = VAL1; V {<|>|<=|>=} VAL2; V = V [+-] VAL3)
We just need to make sure that VAL1, VAL2 and VAL3 are lowered
using the .omp_data_s mapping, if needed. */
for (i = 0; i < gimple_omp_for_collapse (stmt); i++)
{
rhs_p = gimple_omp_for_initial_ptr (stmt, i);
if (!is_gimple_min_invariant (*rhs_p))
*rhs_p = get_formal_tmp_var (*rhs_p, &body);
rhs_p = gimple_omp_for_final_ptr (stmt, i);
if (!is_gimple_min_invariant (*rhs_p))
*rhs_p = get_formal_tmp_var (*rhs_p, &body);
rhs_p = &TREE_OPERAND (gimple_omp_for_incr (stmt, i), 1);
if (!is_gimple_min_invariant (*rhs_p))
*rhs_p = get_formal_tmp_var (*rhs_p, &body);
}
/* Once lowered, extract the bounds and clauses. */
extract_omp_for_data (stmt, &fd, NULL);
lower_omp_for_lastprivate (&fd, &body, &dlist, ctx);
gimple_seq_add_stmt (&body, stmt);
gimple_seq_add_seq (&body, gimple_omp_body (stmt));
gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v,
fd.loop.v));
/* After the loop, add exit clauses. */
lower_reduction_clauses (gimple_omp_for_clauses (stmt), &body, ctx);
gimple_seq_add_seq (&body, dlist);
body = maybe_catch_exception (body);
/* Region exit marker goes at the end of the loop body. */
gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait));
pop_gimplify_context (new_stmt);
gimple_bind_append_vars (new_stmt, ctx->block_vars);
BLOCK_VARS (block) = gimple_bind_vars (new_stmt);
if (BLOCK_VARS (block))
TREE_USED (block) = 1;
gimple_bind_set_body (new_stmt, body);
gimple_omp_set_body (stmt, NULL);
gimple_omp_for_set_pre_body (stmt, NULL);
gsi_replace (gsi_p, new_stmt, true);
}
/* Callback for walk_stmts. Check if the current statement only contains
GIMPLE_OMP_FOR or GIMPLE_OMP_PARALLEL. */
static tree
check_combined_parallel (gimple_stmt_iterator *gsi_p,
bool *handled_ops_p,
struct walk_stmt_info *wi)
{
int *info = (int *) wi->info;
gimple stmt = gsi_stmt (*gsi_p);
*handled_ops_p = true;
switch (gimple_code (stmt))
{
WALK_SUBSTMTS;
case GIMPLE_OMP_FOR:
case GIMPLE_OMP_SECTIONS:
*info = *info == 0 ? 1 : -1;
break;
default:
*info = -1;
break;
}
return NULL;
}
struct omp_taskcopy_context
{
/* This field must be at the beginning, as we do "inheritance": Some
callback functions for tree-inline.c (e.g., omp_copy_decl)
receive a copy_body_data pointer that is up-casted to an
omp_context pointer. */
copy_body_data cb;
omp_context *ctx;
};
static tree
task_copyfn_copy_decl (tree var, copy_body_data *cb)
{
struct omp_taskcopy_context *tcctx = (struct omp_taskcopy_context *) cb;
if (splay_tree_lookup (tcctx->ctx->sfield_map, (splay_tree_key) var))
return create_tmp_var (TREE_TYPE (var), NULL);
return var;
}
static tree
task_copyfn_remap_type (struct omp_taskcopy_context *tcctx, tree orig_type)
{
tree name, new_fields = NULL, type, f;
type = lang_hooks.types.make_type (RECORD_TYPE);
name = DECL_NAME (TYPE_NAME (orig_type));
name = build_decl (gimple_location (tcctx->ctx->stmt),
TYPE_DECL, name, type);
TYPE_NAME (type) = name;
for (f = TYPE_FIELDS (orig_type); f ; f = TREE_CHAIN (f))
{
tree new_f = copy_node (f);
DECL_CONTEXT (new_f) = type;
TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &tcctx->cb);
TREE_CHAIN (new_f) = new_fields;
walk_tree (&DECL_SIZE (new_f), copy_tree_body_r, &tcctx->cb, NULL);
walk_tree (&DECL_SIZE_UNIT (new_f), copy_tree_body_r, &tcctx->cb, NULL);
walk_tree (&DECL_FIELD_OFFSET (new_f), copy_tree_body_r,
&tcctx->cb, NULL);
new_fields = new_f;
*pointer_map_insert (tcctx->cb.decl_map, f) = new_f;
}
TYPE_FIELDS (type) = nreverse (new_fields);
layout_type (type);
return type;
}
/* Create task copyfn. */
static void
create_task_copyfn (gimple task_stmt, omp_context *ctx)
{
struct function *child_cfun;
tree child_fn, t, c, src, dst, f, sf, arg, sarg, decl;
tree record_type, srecord_type, bind, list;
bool record_needs_remap = false, srecord_needs_remap = false;
splay_tree_node n;
struct omp_taskcopy_context tcctx;
struct gimplify_ctx gctx;
location_t loc = gimple_location (task_stmt);
child_fn = gimple_omp_task_copy_fn (task_stmt);
child_cfun = DECL_STRUCT_FUNCTION (child_fn);
gcc_assert (child_cfun->cfg == NULL);
child_cfun->dont_save_pending_sizes_p = 1;
DECL_SAVED_TREE (child_fn) = alloc_stmt_list ();
/* Reset DECL_CONTEXT on function arguments. */
for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t))
DECL_CONTEXT (t) = child_fn;
/* Populate the function. */
push_gimplify_context (&gctx);
current_function_decl = child_fn;
bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
TREE_SIDE_EFFECTS (bind) = 1;
list = NULL;
DECL_SAVED_TREE (child_fn) = bind;
DECL_SOURCE_LOCATION (child_fn) = gimple_location (task_stmt);
/* Remap src and dst argument types if needed. */
record_type = ctx->record_type;
srecord_type = ctx->srecord_type;
for (f = TYPE_FIELDS (record_type); f ; f = TREE_CHAIN (f))
if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
{
record_needs_remap = true;
break;
}
for (f = TYPE_FIELDS (srecord_type); f ; f = TREE_CHAIN (f))
if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
{
srecord_needs_remap = true;
break;
}
if (record_needs_remap || srecord_needs_remap)
{
memset (&tcctx, '\0', sizeof (tcctx));
tcctx.cb.src_fn = ctx->cb.src_fn;
tcctx.cb.dst_fn = child_fn;
tcctx.cb.src_node = cgraph_node (tcctx.cb.src_fn);
tcctx.cb.dst_node = tcctx.cb.src_node;
tcctx.cb.src_cfun = ctx->cb.src_cfun;
tcctx.cb.copy_decl = task_copyfn_copy_decl;
tcctx.cb.eh_lp_nr = 0;
tcctx.cb.transform_call_graph_edges = CB_CGE_MOVE;
tcctx.cb.decl_map = pointer_map_create ();
tcctx.ctx = ctx;
if (record_needs_remap)
record_type = task_copyfn_remap_type (&tcctx, record_type);
if (srecord_needs_remap)
srecord_type = task_copyfn_remap_type (&tcctx, srecord_type);
}
else
tcctx.cb.decl_map = NULL;
push_cfun (child_cfun);
arg = DECL_ARGUMENTS (child_fn);
TREE_TYPE (arg) = build_pointer_type (record_type);
sarg = TREE_CHAIN (arg);
TREE_TYPE (sarg) = build_pointer_type (srecord_type);
/* First pass: initialize temporaries used in record_type and srecord_type
sizes and field offsets. */
if (tcctx.cb.decl_map)
for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
&& OMP_CLAUSE_VIEW_VAR_KIND (c) != OMP_CLAUSE_VIEW_VAR_DISCARD)
{
tree *p;
decl = OMP_CLAUSE_DECL (c);
p = (tree *) pointer_map_contains (tcctx.cb.decl_map, decl);
if (p == NULL)
continue;
n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
sf = (tree) n->value;
sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf);
src = build_fold_indirect_ref_loc (loc, sarg);
src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL);
t = build2 (MODIFY_EXPR, TREE_TYPE (*p), *p, src);
append_to_statement_list (t, &list);
}
/* Second pass: copy shared var pointers and copy construct non-VLA
firstprivate vars. */
for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
switch (OMP_CLAUSE_CODE (c))
{
case OMP_CLAUSE_SHARED:
decl = OMP_CLAUSE_DECL (c);
n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
if (n == NULL)
break;
f = (tree) n->value;
if (tcctx.cb.decl_map)
f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f);
n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
sf = (tree) n->value;
if (tcctx.cb.decl_map)
sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf);
src = build_fold_indirect_ref_loc (loc, sarg);
src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL);
dst = build_fold_indirect_ref_loc (loc, arg);
dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL);
t = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, src);
append_to_statement_list (t, &list);
break;
case OMP_CLAUSE_FIRSTPRIVATE:
if (OMP_CLAUSE_VIEW_VAR_KIND (c) == OMP_CLAUSE_VIEW_VAR_DISCARD)
break;
decl = OMP_CLAUSE_DECL (c);
if (is_variable_sized (decl))
break;
n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
if (n == NULL)
break;
f = (tree) n->value;
if (tcctx.cb.decl_map)
f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f);
n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
if (n != NULL)
{
sf = (tree) n->value;
if (tcctx.cb.decl_map)
sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf);
src = build_fold_indirect_ref_loc (loc, sarg);
src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL);
if (use_pointer_for_field (decl, NULL) || is_reference (decl))
src = build_fold_indirect_ref_loc (loc, src);
}
else
src = decl;
dst = build_fold_indirect_ref_loc (loc, arg);
dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL);
t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src);
append_to_statement_list (t, &list);
break;
case OMP_CLAUSE_INPUT:
case OMP_CLAUSE_OUTPUT:
{
/* FIXME: add copy-constructor for view/burst. */
stream_p str = lookup_stream (OMP_CLAUSE_DECL (c));
n = splay_tree_lookup (ctx->field_map, (splay_tree_key) str->stream);
f = (tree) n->value;
if (tcctx.cb.decl_map)
f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f);
n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) str->stream);
if (n != NULL)
{
sf = (tree) n->value;
if (tcctx.cb.decl_map)
sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf);
src = build_fold_indirect_ref (sarg);
src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL);
if (use_pointer_for_field (str->stream, NULL)
|| is_reference (str->stream))
src = build_fold_indirect_ref (src);
}
else
src = str->stream;
dst = build_fold_indirect_ref (arg);
dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL);
t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src);
append_to_statement_list (t, &list);
}
break;
case OMP_CLAUSE_PRIVATE:
if (! OMP_CLAUSE_PRIVATE_OUTER_REF (c))
break;
decl = OMP_CLAUSE_DECL (c);
n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
f = (tree) n->value;
if (tcctx.cb.decl_map)
f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f);
n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
if (n != NULL)
{
sf = (tree) n->value;
if (tcctx.cb.decl_map)
sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf);
src = build_fold_indirect_ref_loc (loc, sarg);
src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL);
if (use_pointer_for_field (decl, NULL))
src = build_fold_indirect_ref_loc (loc, src);
}
else
src = decl;
dst = build_fold_indirect_ref_loc (loc, arg);
dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL);
t = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, src);
append_to_statement_list (t, &list);
break;
default:
break;
}
/* Last pass: handle VLA firstprivates. */
if (tcctx.cb.decl_map)
for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
{
tree ind, ptr, df;
decl = OMP_CLAUSE_DECL (c);
if (!is_variable_sized (decl)
|| OMP_CLAUSE_VIEW_VAR_KIND (c) == OMP_CLAUSE_VIEW_VAR_DISCARD)
continue;
n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
if (n == NULL)
continue;
f = (tree) n->value;
f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f);
gcc_assert (DECL_HAS_VALUE_EXPR_P (decl));
ind = DECL_VALUE_EXPR (decl);
gcc_assert (TREE_CODE (ind) == INDIRECT_REF);
gcc_assert (DECL_P (TREE_OPERAND (ind, 0)));
n = splay_tree_lookup (ctx->sfield_map,
(splay_tree_key) TREE_OPERAND (ind, 0));
sf = (tree) n->value;
sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf);
src = build_fold_indirect_ref_loc (loc, sarg);
src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL);
src = build_fold_indirect_ref_loc (loc, src);
dst = build_fold_indirect_ref_loc (loc, arg);
dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL);
t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src);
append_to_statement_list (t, &list);
n = splay_tree_lookup (ctx->field_map,
(splay_tree_key) TREE_OPERAND (ind, 0));
df = (tree) n->value;
df = *(tree *) pointer_map_contains (tcctx.cb.decl_map, df);
ptr = build_fold_indirect_ref_loc (loc, arg);
ptr = build3 (COMPONENT_REF, TREE_TYPE (df), ptr, df, NULL);
t = build2 (MODIFY_EXPR, TREE_TYPE (ptr), ptr,
build_fold_addr_expr_loc (loc, dst));
append_to_statement_list (t, &list);
}
t = build1 (RETURN_EXPR, void_type_node, NULL);
append_to_statement_list (t, &list);
if (tcctx.cb.decl_map)
pointer_map_destroy (tcctx.cb.decl_map);
pop_gimplify_context (NULL);
BIND_EXPR_BODY (bind) = list;
pop_cfun ();
current_function_decl = ctx->cb.src_fn;
}
/* Lower the OpenMP parallel or task directive in the current statement
in GSI_P. CTX holds context information for the directive. */
static void
lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
tree clauses;
tree child_fn, t;
gimple stmt = gsi_stmt (*gsi_p);
gimple par_bind, bind;
gimple_seq par_body, olist, ilist, par_olist, par_ilist, new_body;
struct gimplify_ctx gctx;
location_t loc = gimple_location (stmt);
gimple_stmt_iterator gsi;
clauses = gimple_omp_taskreg_clauses (stmt);
par_bind = gimple_seq_first_stmt (gimple_omp_body (stmt));
par_body = gimple_bind_body (par_bind);
child_fn = ctx->cb.dst_fn;
if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
&& !gimple_omp_parallel_combined_p (stmt))
{
struct walk_stmt_info wi;
int ws_num = 0;
memset (&wi, 0, sizeof (wi));
wi.info = &ws_num;
wi.val_only = true;
walk_gimple_seq (par_body, check_combined_parallel, NULL, &wi);
if (ws_num == 1)
gimple_omp_parallel_set_combined_p (stmt, true);
}
if (ctx->srecord_type)
create_task_copyfn (stmt, ctx);
push_gimplify_context (&gctx);
par_olist = NULL;
par_ilist = NULL;
lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx);
if (par_ilist)
for (gsi = gsi_start (par_ilist); !gsi_end_p (gsi); gsi_next (&gsi))
set_stmt_loc_if_none (gsi_stmt (gsi), SL_COPY_IN);
if (par_olist)
for (gsi = gsi_start (par_olist); !gsi_end_p (gsi); gsi_next (&gsi))
set_stmt_loc_if_none (gsi_stmt (gsi), SL_COPY_OUT);
lower_omp (par_body, ctx);
if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL)
lower_reduction_clauses (clauses, &par_olist, ctx);
/* Declare all the variables created by mapping and the variables
declared in the scope of the parallel body. */
record_vars_into (ctx->block_vars, child_fn);
record_vars_into (gimple_bind_vars (par_bind), child_fn);
if (ctx->record_type)
{
ctx->sender_decl
= create_tmp_var (ctx->srecord_type ? ctx->srecord_type
: ctx->record_type, ".omp_data_o");
TREE_ADDRESSABLE (ctx->sender_decl) = 1;
gimple_omp_taskreg_set_data_arg (stmt, ctx->sender_decl);
}
olist = NULL;
ilist = NULL;
lower_send_clauses (clauses, &ilist, &olist, ctx);
lower_send_shared_vars (&ilist, &olist, ctx);
if (ilist)
for (gsi = gsi_start (ilist); !gsi_end_p (gsi); gsi_next (&gsi))
set_stmt_loc_if_none (gsi_stmt (gsi), SL_COPY_IN);
if (olist)
for (gsi = gsi_start (olist); !gsi_end_p (gsi); gsi_next (&gsi))
set_stmt_loc_if_none (gsi_stmt (gsi), SL_COPY_OUT);
/* Once all the expansions are done, sequence all the different
fragments inside gimple_omp_body. */
new_body = NULL;
if (ctx->record_type)
{
gimple st;
t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
/* fixup_child_record_type might have changed receiver_decl's type. */
t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t);
st = gimple_build_assign (ctx->receiver_decl, t);
set_stmt_loc (st, SL_COPY_IN);
gimple_seq_add_stmt (&new_body, st);
}
gimple_seq_add_seq (&new_body, par_ilist);
gimple_seq_add_seq (&new_body, par_body);
gimple_seq_add_seq (&new_body, par_olist);
new_body = maybe_catch_exception (new_body);
gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
gimple_omp_set_body (stmt, new_body);
bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind));
gimple_bind_add_stmt (bind, stmt);
if (ilist || olist)
{
gimple_seq_add_stmt (&ilist, bind);
gimple_seq_add_seq (&ilist, olist);
bind = gimple_build_bind (NULL, ilist, NULL);
}
gsi_replace (gsi_p, bind, true);
pop_gimplify_context (NULL);
}
/* Callback for lower_omp_1. Return non-NULL if *tp needs to be
regimplified. If DATA is non-NULL, lower_omp_1 is outside
of OpenMP context, but with task_shared_vars set. */
static tree
lower_omp_regimplify_p (tree *tp, int *walk_subtrees,
void *data)
{
tree t = *tp;
/* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
if (TREE_CODE (t) == VAR_DECL && data == NULL && DECL_HAS_VALUE_EXPR_P (t))
return t;
if (task_shared_vars
&& DECL_P (t)
&& bitmap_bit_p (task_shared_vars, DECL_UID (t)))
return t;
/* If a global variable has been privatized, TREE_CONSTANT on
ADDR_EXPR might be wrong. */
if (data == NULL && TREE_CODE (t) == ADDR_EXPR)
recompute_tree_invariant_for_addr_expr (t);
*walk_subtrees = !TYPE_P (t) && !DECL_P (t);
return NULL_TREE;
}
static void
lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
gimple stmt = gsi_stmt (*gsi_p);
struct walk_stmt_info wi;
if (gimple_has_location (stmt))
input_location = gimple_location (stmt);
if (task_shared_vars)
memset (&wi, '\0', sizeof (wi));
/* If we have issued syntax errors, avoid doing any heavy lifting.
Just replace the OpenMP directives with a NOP to avoid
confusing RTL expansion. */
if (errorcount && is_gimple_omp (stmt))
{
gsi_replace (gsi_p, gimple_build_nop (), true);
return;
}
switch (gimple_code (stmt))
{
case GIMPLE_COND:
if ((ctx || task_shared_vars)
&& (walk_tree (gimple_cond_lhs_ptr (stmt), lower_omp_regimplify_p,
ctx ? NULL : &wi, NULL)
|| walk_tree (gimple_cond_rhs_ptr (stmt), lower_omp_regimplify_p,
ctx ? NULL : &wi, NULL)))
gimple_regimplify_operands (stmt, gsi_p);
break;
case GIMPLE_CATCH:
lower_omp (gimple_catch_handler (stmt), ctx);
break;
case GIMPLE_EH_FILTER:
lower_omp (gimple_eh_filter_failure (stmt), ctx);
break;
case GIMPLE_TRY:
lower_omp (gimple_try_eval (stmt), ctx);
lower_omp (gimple_try_cleanup (stmt), ctx);
break;
case GIMPLE_BIND:
lower_omp (gimple_bind_body (stmt), ctx);
break;
case GIMPLE_OMP_PARALLEL:
case GIMPLE_OMP_TASK:
ctx = maybe_lookup_ctx (stmt);
lower_omp_taskreg (gsi_p, ctx);
break;
case GIMPLE_OMP_FOR:
ctx = maybe_lookup_ctx (stmt);
gcc_assert (ctx);
lower_omp_for (gsi_p, ctx);
break;
case GIMPLE_OMP_SECTIONS:
ctx = maybe_lookup_ctx (stmt);
gcc_assert (ctx);
lower_omp_sections (gsi_p, ctx);
break;
case GIMPLE_OMP_SINGLE:
ctx = maybe_lookup_ctx (stmt);
gcc_assert (ctx);
lower_omp_single (gsi_p, ctx);
break;
case GIMPLE_OMP_MASTER:
ctx = maybe_lookup_ctx (stmt);
gcc_assert (ctx);
lower_omp_master (gsi_p, ctx);
break;
case GIMPLE_OMP_ORDERED:
ctx = maybe_lookup_ctx (stmt);
gcc_assert (ctx);
lower_omp_ordered (gsi_p, ctx);
break;
case GIMPLE_OMP_CRITICAL:
ctx = maybe_lookup_ctx (stmt);
gcc_assert (ctx);
lower_omp_critical (gsi_p, ctx);
break;
case GIMPLE_OMP_ATOMIC_LOAD:
if ((ctx || task_shared_vars)
&& walk_tree (gimple_omp_atomic_load_rhs_ptr (stmt),
lower_omp_regimplify_p, ctx ? NULL : &wi, NULL))
gimple_regimplify_operands (stmt, gsi_p);
break;
default:
if ((ctx || task_shared_vars)
&& walk_gimple_op (stmt, lower_omp_regimplify_p,
ctx ? NULL : &wi))
gimple_regimplify_operands (stmt, gsi_p);
break;
}
}
static void
lower_omp (gimple_seq body, omp_context *ctx)
{
location_t saved_location = input_location;
gimple_stmt_iterator gsi = gsi_start (body);
for (gsi = gsi_start (body); !gsi_end_p (gsi); gsi_next (&gsi))
lower_omp_1 (&gsi, ctx);
input_location = saved_location;
}
/* Main entry point. */
static unsigned int
execute_lower_omp (void)
{
gimple_seq body;
/* This pass always runs, to provide PROP_gimple_lomp.
But there is nothing to do unless -fopenmp is given. */
if (flag_openmp == 0)
return 0;
all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
delete_omp_context);
if (!htab_stmt_loc)
htab_stmt_loc = htab_create_ggc (10, hash_stmt_loc,
eq_stmt_loc, NULL);
body = gimple_body (current_function_decl);
scan_omp (body, NULL);
gcc_assert (taskreg_nesting_level == 0);
if (all_contexts->root)
{
struct gimplify_ctx gctx;
if (task_shared_vars)
push_gimplify_context (&gctx);
lower_omp (body, NULL);
if (task_shared_vars)
pop_gimplify_context (NULL);
}
if (all_contexts)
{
splay_tree_delete (all_contexts);
all_contexts = NULL;
}
BITMAP_FREE (task_shared_vars);
return 0;
}
struct gimple_opt_pass pass_lower_omp =
{
{
GIMPLE_PASS,
"omplower", /* name */
NULL, /* gate */
execute_lower_omp, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_NONE, /* tv_id */
PROP_gimple_any, /* properties_required */
PROP_gimple_lomp, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func /* todo_flags_finish */
}
};
/* The following is a utility to diagnose OpenMP structured block violations.
It is not part of the "omplower" pass, as that's invoked too late. It
should be invoked by the respective front ends after gimplification. */
static splay_tree all_labels;
/* Check for mismatched contexts and generate an error if needed. Return
true if an error is detected. */
static bool
diagnose_sb_0 (gimple_stmt_iterator *gsi_p,
gimple branch_ctx, gimple label_ctx)
{
if (label_ctx == branch_ctx)
return false;
/*
Previously we kept track of the label's entire context in diagnose_sb_[12]
so we could traverse it and issue a correct "exit" or "enter" error
message upon a structured block violation.
We built the context by building a list with tree_cons'ing, but there is
no easy counterpart in gimple tuples. It seems like far too much work
for issuing exit/enter error messages. If someone really misses the
distinct error message... patches welcome.
*/
#if 0
/* Try to avoid confusing the user by producing and error message
with correct "exit" or "enter" verbiage. We prefer "exit"
unless we can show that LABEL_CTX is nested within BRANCH_CTX. */
if (branch_ctx == NULL)
exit_p = false;
else
{
while (label_ctx)
{
if (TREE_VALUE (label_ctx) == branch_ctx)
{
exit_p = false;
break;
}
label_ctx = TREE_CHAIN (label_ctx);
}
}
if (exit_p)
error ("invalid exit from OpenMP structured block");
else
error ("invalid entry to OpenMP structured block");
#endif
/* If it's obvious we have an invalid entry, be specific about the error. */
if (branch_ctx == NULL)
error ("invalid entry to OpenMP structured block");
else
/* Otherwise, be vague and lazy, but efficient. */
error ("invalid branch to/from an OpenMP structured block");
gsi_replace (gsi_p, gimple_build_nop (), false);
return true;
}
/* Pass 1: Create a minimal tree of OpenMP structured blocks, and record
where each label is found. */
static tree
diagnose_sb_1 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
struct walk_stmt_info *wi)
{
gimple context = (gimple) wi->info;
gimple inner_context;
gimple stmt = gsi_stmt (*gsi_p);
*handled_ops_p = true;
switch (gimple_code (stmt))
{
WALK_SUBSTMTS;
case GIMPLE_OMP_PARALLEL:
case GIMPLE_OMP_TASK:
case GIMPLE_OMP_SECTIONS:
case GIMPLE_OMP_SINGLE:
case GIMPLE_OMP_SECTION:
case GIMPLE_OMP_MASTER:
case GIMPLE_OMP_ORDERED:
case GIMPLE_OMP_CRITICAL:
/* The minimal context here is just the current OMP construct. */
inner_context = stmt;
wi->info = inner_context;
walk_gimple_seq (gimple_omp_body (stmt), diagnose_sb_1, NULL, wi);
wi->info = context;
break;
case GIMPLE_OMP_FOR:
inner_context = stmt;
wi->info = inner_context;
/* gimple_omp_for_{index,initial,final} are all DECLs; no need to
walk them. */
walk_gimple_seq (gimple_omp_for_pre_body (stmt),
diagnose_sb_1, NULL, wi);
walk_gimple_seq (gimple_omp_body (stmt), diagnose_sb_1, NULL, wi);
wi->info = context;
break;
case GIMPLE_LABEL:
splay_tree_insert (all_labels, (splay_tree_key) gimple_label_label (stmt),
(splay_tree_value) context);
break;
default:
break;
}
return NULL_TREE;
}
/* Pass 2: Check each branch and see if its context differs from that of
the destination label's context. */
static tree
diagnose_sb_2 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
struct walk_stmt_info *wi)
{
gimple context = (gimple) wi->info;
splay_tree_node n;
gimple stmt = gsi_stmt (*gsi_p);
*handled_ops_p = true;
switch (gimple_code (stmt))
{
WALK_SUBSTMTS;
case GIMPLE_OMP_PARALLEL:
case GIMPLE_OMP_TASK:
case GIMPLE_OMP_SECTIONS:
case GIMPLE_OMP_SINGLE:
case GIMPLE_OMP_SECTION:
case GIMPLE_OMP_MASTER:
case GIMPLE_OMP_ORDERED:
case GIMPLE_OMP_CRITICAL:
wi->info = stmt;
walk_gimple_seq (gimple_omp_body (stmt), diagnose_sb_2, NULL, wi);
wi->info = context;
break;
case GIMPLE_OMP_FOR:
wi->info = stmt;
/* gimple_omp_for_{index,initial,final} are all DECLs; no need to
walk them. */
walk_gimple_seq (gimple_omp_for_pre_body (stmt),
diagnose_sb_2, NULL, wi);
walk_gimple_seq (gimple_omp_body (stmt), diagnose_sb_2, NULL, wi);
wi->info = context;
break;
case GIMPLE_COND:
{
tree lab = gimple_cond_true_label (stmt);
if (lab)
{
n = splay_tree_lookup (all_labels,
(splay_tree_key) lab);
diagnose_sb_0 (gsi_p, context,
n ? (gimple) n->value : NULL);
}
lab = gimple_cond_false_label (stmt);
if (lab)
{
n = splay_tree_lookup (all_labels,
(splay_tree_key) lab);
diagnose_sb_0 (gsi_p, context,
n ? (gimple) n->value : NULL);
}
}
break;
case GIMPLE_GOTO:
{
tree lab = gimple_goto_dest (stmt);
if (TREE_CODE (lab) != LABEL_DECL)
break;
n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
diagnose_sb_0 (gsi_p, context, n ? (gimple) n->value : NULL);
}
break;
case GIMPLE_SWITCH:
{
unsigned int i;
for (i = 0; i < gimple_switch_num_labels (stmt); ++i)
{
tree lab = CASE_LABEL (gimple_switch_label (stmt, i));
n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
if (n && diagnose_sb_0 (gsi_p, context, (gimple) n->value))
break;
}
}
break;
case GIMPLE_RETURN:
diagnose_sb_0 (gsi_p, context, NULL);
break;
default:
break;
}
return NULL_TREE;
}
static unsigned int
diagnose_omp_structured_block_errors (void)
{
struct walk_stmt_info wi;
gimple_seq body = gimple_body (current_function_decl);
all_labels = splay_tree_new (splay_tree_compare_pointers, 0, 0);
memset (&wi, 0, sizeof (wi));
walk_gimple_seq (body, diagnose_sb_1, NULL, &wi);
memset (&wi, 0, sizeof (wi));
wi.want_locations = true;
walk_gimple_seq (body, diagnose_sb_2, NULL, &wi);
splay_tree_delete (all_labels);
all_labels = NULL;
return 0;
}
static bool
gate_diagnose_omp_blocks (void)
{
return flag_openmp != 0;
}
struct gimple_opt_pass pass_diagnose_omp_blocks =
{
{
GIMPLE_PASS,
"*diagnose_omp_blocks", /* name */
gate_diagnose_omp_blocks, /* gate */
diagnose_omp_structured_block_errors, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_NONE, /* tv_id */
PROP_gimple_any, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
}
};
#include "gt-omp-low.h"