diff --git a/gcc/ChangeLog.omp-stream b/gcc/ChangeLog.omp-stream new file mode 100644 index 000000000..1f149a85d --- /dev/null +++ b/gcc/ChangeLog.omp-stream @@ -0,0 +1,206 @@ +2010-10-04 Cupertino Miranda + + * omp-low.c: Debugging + (convert_view_var_array): Perform a type cast for the case where an + address of ARRAY_REF is performed. Cast is done for the expected type + of the current node. + (expand_view_traverser_callback): Corrected code for cases where an + assign statement that has two memory operands. + +2010-10-04 Cupertino Miranda + + * omp-low.c: Debugging and cleanup + (_lookup_stream): Removed stream type checking to start treat every + stream var in the same fashion. + (TRAVERSE_VIEWS_IN_REGION): Check htab before doing the traversal. + (traverse_views_in_region_and_subs): Check region->view_pointers before + traverse it. + (compute_data_position): Rename variables. Enabled to pass NULL index + to make it further reusable. + (convert_view_var_array_struct): Added assign_to_var_if_has_parent. + (convert_view_var_array): Updated way to perform convertion. Convertion + is only executed for ARRAY elements. Created checks to validate if + recursive callers should do the convertion or simply ignore. The + remaining expansions are performed by the caller function. + (expand_view_traverser_callback_data): Removed unused nr_elements. + (expand_view_traverser_callback): Moved sync_index increment to after + commit / release. Created condition to either call + convert_view_var_array (for an ARRAY element, substituting it by direct + buffer accesses) or perform the expansion in this function by adding + either and read / write to the buffer after / before an update / commit + respectively. + (register_views_traverser_callback): Change initial assign to + sync_index to view_horizon instead of 0. + (expand_task_streaming_extensions): Removed assign to nr_elements + remove from the structure. Added gsi_inserts for write_stmts and + read_stmts elements. + (stream_create_calls): Changed created variables from size_type_node to + long_long_unsigned_type_node. + (expand_push_and_pop_stubs): Removed un-used nr_elements local + variable. + (expand_omp_single): Removed a condition that was miss protecting some + code that was producing bad builtin calls when master had no shared + stream with child tasks. + (retrieve_streams_info): Changed var from size_type_node to + long_long_unsigned_type_node. + +2010-09-28 Antoniu Pop + + * omp-low.c: Debugging and cleanup. + (retrieve_streams_info): Removed unguarded call to + OMP_CLAUSE_STREAM_ID. This macro can only be called on + OMP_CLAUSE_INPUT and OMP_CLAUSE_OUTPUT. + (list_bbs_in_sese): Renamed variable NEW to avoid compilation + warnings and potential surprises. + (lookup_stream, compute_data_position): Changed type of + BUFFER_MASK to be consistent. As we use unsigned_long_long for + indexes and horizons, this should be used everywhere we interact + with stream indexes. + (convert_view_var_array): Handle cases where the use of a + dereference is not acceptable in gimple. This includes among + others, binary RHS in assignment, non-register LHS, statements + with no memory operands. + +2010-07-27 Cupertino Miranda + + * omp-low.c: Updated code to new streaming syntax. + (regions_streamization_info): Added outer_check, outer_first, + sese_entry, sese_exit; + (stream, _lookup_stream): Added buffer_type, buffer_mask, + element_number. Removed element_size and data. Initializations within + _lookup_stream. Added logic to identify type of elements in stream + depending if stream var is simple type or an array. Should be changed + later. + (view): Added sync_index, buffer_pointer, view_var, burst, view_horizon, + expanded_push_pop and info. + (prepare_inner_context_for_streaming): Adapted to supported newly + expansion correctly updateing elements in regions_streamization_info. + (compute_data_position): Added function to simplify expansion to buffer + positions by index. + (create_data_pointer): Removed + (is_same_expression): Added function to identify similar tree node + expressions. Used to compare with operands used in code to substitute + the view_var by a direct buffer access. + (convert_view_var_array_struct, convert_view_var_array): Strucutre and + function traverser callback to convert the view_vars in direct buffer + accesses. Uses is_same_expression function to compare tree expressions. + (list_bbs_in_sese): Function to create a list of BBS inside of a SESE + region. We use it to identify which BBS we should traverse to + substitute all view_var references by buffer acceses. + (convert_view_var_array_traverser): Function to traverse SESE + and substitute all the references of view_var by the respective buffer + pointer. It is a callback function but is not used as such. + (expand_view_traverser_callback): Changed how the expansion is + performed. Mainly, created different index variables, view_vars are + converted here by calls to convert_view_var_array_traverser instead of + the previous copy or memcpy that was done before. + (register_views_traverser_callback): Initialize newly introduced index + vars. + (expand_task_streaming_extensions): Adapted to reflect new syntax / + expansion. + (expand_push_and_pop_stubs): Updated to reflect new expansion. More + respectively, create stall / release statements before single regions. + (expand_omp_single): Setup SESE BBS nodes in stramization_info, both + task and single regions. + (retrieve_streams_info): Initialize new view structure elements. + + * gimplify.c: Updated set default cluase for view var. + (gimplify_scan_omp_clauses): Added view var as local when omp a new + syntax input / output clause is used. + + * c-parser.c: Added support for new input / output clause clauses. + (c_parser_omp_stream_identifier, (c_parser_omp_stream_clause): New + parsing function. + (c_parser_omp_clause_input, c_parser_omp_clause_output): Updated. + +2010-07-22 Cupertino Miranda + + * omp-low.c: Clearly commented code. + Renamed stream_task to regions_streamization_info + Renamed create_synchronization to expand_view_traverser_callback + (stream, _lookup_stream): Added access_type, element_size, element_type + to stream structure. Initialized those in _lookup_stream. + (prepare_inner_context_for_streaming): Updated comments. Removed + repeated split_edge call for outer_latch. Removed call to remove edge. + (create_data_pointer): Changed way to obtain stream elements size. + (expand_view_traverser_callback): In case elements being accessed in + view are typed ARRAY_DECL then the data in it is fully copied to and + from the buffer with a MEMCPY built in. + (lower_copyprivate_clauses): Removed call to build_fold_addr_expr_loc. + Removed a couple of redundant calls to cleanup_tree_cfg. + +2010-07-20 Antoniu Pop + + * omp-low.c (create_task_copyfn): Added cases for OMP_CLAUSE_INPUT + and OMP_CLAUSE_OUTPUT to create the proper copy-in code for the + marshalling function. + +2010-07-09 Cupertino Miranda + + * omp-low.c (clause_to_var_struct, hash_clause_to_var, + eq_clause_to_var, lookup_var_for_clause): New structure, + hash table and the accessor functions. + +2010-07-06 Cupertino Miranda + + * omp-low.c (stream_task, get_streamization_info_for_region): + Structure and creator/accessor functions streamization + information used through code expnsion. + (stream, htab_stream, hash_stream, eq_stream, lookup_stream, + _lookup_stream): New structure, hash table and the accessor + functions. + (view, hash_view, eq_view, alloc_view, _lookup_view, + lookup_view, maybe_lookup_view): New structure, hash table + and the accessor functions. + (stmt_location, stmt_loc, htab_stmt_loc, hash_stmt_loc, + eq_stmt_loc, get_stmt_loc, lookup_stmt_loc, set_stmt_loc, + set_stmt_loc_if_none): New structure, hash table and the + accessor functions. + (TRAVERSE_VIEWS_IN_REGION_AND_SUBS, TRAVERSE_VIEWS_IN_REGION, + GET_VIEW_FOR_STREAM_WITH_POINTER): New macros + (scan_sharing_clauses, lower_rec_input_clauses, expand_omp_taskreg, + lower_send_clauses, expand_omp_single, execute_expand_omp, + lower_omp_taskreg, execute_lower_omp): + Implement streamization of INPUT/OUTPUT clauses. + + * c-parser.c (c_parser_omp_clause_name, c_parser_omp_all_clauses, + OMP_TASK_CLAUSE_MASK): Added OMP_CLAUSE_INPUT and + OMP_CLAUSE_OUTPUT cases. + (c_parser_omp_clause_input, c_parser_omp_clause_output): New. + + * tree-flow.h (omp_region): Added streamization_init_bb and + streamization_exit_bb fields. + * c-pragma.h (pragma_omp_clause): Add OMP_CLAUSE_INPUT + OMP_CLAUSE_OUTPUT clauses. + * c-typeck.c (c_finish_omp_clauses): Added OMP_CLAUSE_INPUT and + OMP_CLAUSE_OUTPUT cases. + * gimplify.c (gimplify_scan_omp_clauses, + gimplify_adjust_omp_clauses): Added OMP_CLAUSE_INPUT and + OMP_CLAUSE_OUTPUT cases. + * tree.c (omp_clause_num_ops, omp_clause_code_name, walk_tree_1): + Add and handle OMP_CLAUSE_INPUT and OMP_CLAUSE_OUTPUT cases. + * tree.h (omp_clause_code): Add OMP_CLAUSE_INPUT + OMP_CLAUSE_OUTPUT clauses. + * tree-pretty-print.c (dump_omp_clause): Added OMP_CLAUSE_INPUT + and OMP_CLAUSE_OUTPUT cases. + +2010-07-01 Antoniu Pop + + * builtin-types.def (BT_FN_ULL_PTR_ULL, BT_FN_VOID_PTR_ULL) + (BT_FN_PTR_SIZE_ULL_PTR, BT_FN_VOID_PTR_INT_INT_INT): New builtin + types for the GOMP stream runtime. + * omp-builtins.def (BUILT_IN_GOMP_STREAM_CREATE_STREAM) + (BUILT_IN_GOMP_STREAM_CREATE_READ_VIEW) + (BUILT_IN_GOMP_STREAM_CREATE_WRITE_VIEW) + (BUILT_IN_GOMP_STREAM_CREATE_TASK) + (BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS) + (BUILT_IN_GOMP_STREAM_CONNECT_VIEW) + (BUILT_IN_GOMP_STREAM_WAIT_UNTIL_CONNECTED) + (BUILT_IN_GOMP_STREAM_UPDATE, BUILT_IN_GOMP_STREAM_STALL) + (BUILT_IN_GOMP_STREAM_RELEASE, BUILT_IN_GOMP_STREAM_COMMIT) + (BUILT_IN_GOMP_STREAM_TASK_EXIT): New builtin functions of the + GOMP stream runtime. + +2010-06-30 Cupertino Miranda + + * Branch from mainline (r158338). diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index 4676dd764..75ee40dbb 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -218,6 +218,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_ULONG_ULONG, BT_ULONG, BT_ULONG) DEF_FUNCTION_TYPE_1 (BT_FN_ULONGLONG_ULONGLONG, BT_ULONGLONG, BT_ULONGLONG) DEF_FUNCTION_TYPE_1 (BT_FN_UINT32_UINT32, BT_UINT32, BT_UINT32) DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_UINT64, BT_UINT64, BT_UINT64) +DEF_FUNCTION_TYPE_1 (BT_FN_PTR_INT, BT_PTR, BT_INT) DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR, BT_FN_VOID_PTR) @@ -313,6 +314,10 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_LONGPTR_LONGPTR, BT_BOOL, BT_PTR_LONG, BT_PTR_LONG) DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR, BT_BOOL, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG) +DEF_FUNCTION_TYPE_2 (BT_FN_ULL_PTR_ULL, + BT_ULONGLONG, BT_PTR, BT_ULONGLONG) +DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_ULL, + BT_VOID, BT_PTR, BT_ULONGLONG) DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR_PTR, BT_FN_VOID_PTR_PTR) @@ -381,6 +386,8 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_OMPFN_PTR_UINT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT) DEF_FUNCTION_TYPE_3 (BT_FN_PTR_CONST_PTR_INT_SIZE, BT_PTR, BT_CONST_PTR, BT_INT, BT_SIZE) +DEF_FUNCTION_TYPE_3 (BT_FN_PTR_SIZE_ULL_PTR, + BT_PTR, BT_SIZE, BT_ULONGLONG, BT_PTR) DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR, BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR) @@ -400,6 +407,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT) DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR, BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR) +DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_INT_INT_INT, + BT_VOID, BT_PTR, BT_INT, BT_INT, BT_INT) DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG, BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING, diff --git a/gcc/c-parser.c b/gcc/c-parser.c index e5440d47c..da2f30184 100644 --- a/gcc/c-parser.c +++ b/gcc/c-parser.c @@ -7156,6 +7156,10 @@ c_parser_omp_clause_name (c_parser *parser) if (!strcmp ("firstprivate", p)) result = PRAGMA_OMP_CLAUSE_FIRSTPRIVATE; break; + case 'i': + if (!strcmp ("input", p)) + result = PRAGMA_OMP_CLAUSE_INPUT; + break; case 'l': if (!strcmp ("lastprivate", p)) result = PRAGMA_OMP_CLAUSE_LASTPRIVATE; @@ -7169,6 +7173,8 @@ c_parser_omp_clause_name (c_parser *parser) case 'o': if (!strcmp ("ordered", p)) result = PRAGMA_OMP_CLAUSE_ORDERED; + else if (!strcmp ("output", p)) + result = PRAGMA_OMP_CLAUSE_OUTPUT; break; case 'p': if (!strcmp ("private", p)) @@ -7267,6 +7273,157 @@ c_parser_omp_variable_list (c_parser *parser, return list; } +/* OpenMP X.X: + stream-list: + identifier ************************ + variable-list , identifier + + KIND must be OMP_CLAUSE_INPUT or OMP_CLAUSE_OUTPUT. */ +static bool +c_parser_omp_stream_identifier (c_parser *parser, location_t loc, + tree *id, tree *sub) +{ + if (c_parser_next_token_is_not (parser, CPP_NAME) + || c_parser_peek_token (parser)->id_kind != C_ID_ID) + c_parser_error (parser, "expected stream or view identifier"); + + *id = lookup_name (c_parser_peek_token (parser)->value); + if (*id == NULL_TREE) + { + inform (loc, "OpenMP stream and view identifiers must" + " be declared before use in streaming clauses."); + undeclared_variable (c_parser_peek_token (parser)->location, + c_parser_peek_token (parser)->value); + return false; + } + + if (*id == error_mark_node) + return false; + + c_parser_consume_token (parser); + + /* If this is an array reference. */ + if (c_parser_next_token_is (parser, CPP_OPEN_SQUARE)) + { + c_parser_consume_token (parser); + *sub = c_parser_expression (parser).value; + c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, + "expected %<]%>"); + } + else + *sub = NULL_TREE; + + if (c_parser_next_token_is (parser, CPP_OPEN_SQUARE)) + { + c_parser_error (parser, "single dimension arrays" + " supported only in streaming clauses"); + return false; + } + + /* Next, we must either connect a view or have another stream use + separated by a comma, or the closing parenthesis. This test + prevents all other syntaxes that will be supported later (like + dot or deref ...) */ + if (c_parser_next_token_is_not (parser, CPP_LSHIFT) + && c_parser_next_token_is_not (parser, CPP_RSHIFT) + && c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN) + && c_parser_next_token_is_not (parser, CPP_COMMA)) + { + c_parser_error (parser, "wrong syntax on streaming clause"); + return false; + } + + return true; +} + +static tree +c_parser_omp_stream_clause (c_parser *parser, + enum omp_clause_code kind, + tree list) +{ + /* The clause's location. */ + location_t clause_loc = c_parser_peek_token (parser)->location; + + gcc_assert (kind == OMP_CLAUSE_INPUT || kind == OMP_CLAUSE_OUTPUT); + + if (!c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>")) + return list; + + /* Every stream clause must start with either a stream identifier or + a view identifier. */ + while (true) + { + tree stream_id, stream_idx, view_id, view_idx; + tree omp_clause; + + if (!c_parser_omp_stream_identifier (parser, clause_loc, + &stream_id, &stream_idx)) + break; + + omp_clause = build_omp_clause (clause_loc, kind); + OMP_CLAUSE_STREAM_ID (omp_clause) = stream_id; + OMP_CLAUSE_STREAM_SUB (omp_clause) = stream_idx; + + + if (c_parser_next_token_is (parser, CPP_LSHIFT) + || c_parser_next_token_is (parser, CPP_RSHIFT)) + { + bool lshift_stream_operator_p = + c_parser_next_token_is (parser, CPP_LSHIFT); + + c_parser_consume_token (parser); + + if (!c_parser_omp_stream_identifier (parser, clause_loc, + &view_id, &view_idx)) + break; + + /* If the clause is reversed ("view << >> stream" instead of + "stream << >> view"), swap the roles. */ + if ((kind == OMP_CLAUSE_INPUT && lshift_stream_operator_p) + || (kind == OMP_CLAUSE_OUTPUT && !lshift_stream_operator_p)) + { + OMP_CLAUSE_STREAM_ID (omp_clause) = view_id; + view_id = stream_id; + OMP_CLAUSE_STREAM_SUB (omp_clause) = view_idx; + view_idx = stream_idx; + } + + OMP_CLAUSE_VIEW_ID (omp_clause) = view_id; + OMP_CLAUSE_BURST_SIZE (omp_clause) = view_idx; + } + else + { + OMP_CLAUSE_VIEW_ID (omp_clause) = NULL_TREE; + OMP_CLAUSE_BURST_SIZE (omp_clause) = NULL_TREE; + } + + OMP_CLAUSE_CHAIN (omp_clause) = list; + list = omp_clause; + + // TODO: TINO Check this + //if(OMP_CLAUSE_VIEW_ID (omp_clause) != NULL_TREE) + //{ + // tree new_omp_clause = build_omp_clause (clause_loc, OMP_CLAUSE_PRIVATE); + // OMP_CLAUSE_DECL (new_omp_clause) = OMP_CLAUSE_VIEW_ID (omp_clause); + // OMP_CLAUSE_CHAIN (new_omp_clause) = list; + // list = new_omp_clause; + //} + + if (c_parser_next_token_is (parser, CPP_COMMA)) + c_parser_consume_token (parser); + else if (c_parser_next_token_is (parser, CPP_CLOSE_PAREN)) + break; + else + { + c_parser_error (parser, "expected %<,%> or %<)%>"); + break; + } + } + + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, "expected %<)%>"); + return list; +} + /* Similarly, but expect leading and trailing parenthesis. This is a very common case for omp clauses. */ @@ -7426,6 +7583,15 @@ c_parser_omp_clause_if (c_parser *parser, tree list) return list; } +/* OpenMP stream extension: + input ( variable-list ) */ + +static tree +c_parser_omp_clause_input (c_parser *parser, tree list) +{ + return c_parser_omp_stream_clause (parser, OMP_CLAUSE_INPUT, list); +} + /* OpenMP 2.5: lastprivate ( variable-list ) */ @@ -7512,6 +7678,15 @@ c_parser_omp_clause_ordered (c_parser *parser, tree list) return c; } +/* OpenMP stream extension: + output ( variable-list ) */ + +static tree +c_parser_omp_clause_output (c_parser *parser, tree list) +{ + return c_parser_omp_stream_clause (parser, OMP_CLAUSE_OUTPUT, list); +} + /* OpenMP 2.5: private ( variable-list ) */ @@ -7756,6 +7931,10 @@ c_parser_omp_all_clauses (c_parser *parser, unsigned int mask, clauses = c_parser_omp_clause_if (parser, clauses); c_name = "if"; break; + case PRAGMA_OMP_CLAUSE_INPUT: + clauses = c_parser_omp_clause_input (parser, clauses); + c_name = "input"; + break; case PRAGMA_OMP_CLAUSE_LASTPRIVATE: clauses = c_parser_omp_clause_lastprivate (parser, clauses); c_name = "lastprivate"; @@ -7772,6 +7951,10 @@ c_parser_omp_all_clauses (c_parser *parser, unsigned int mask, clauses = c_parser_omp_clause_ordered (parser, clauses); c_name = "ordered"; break; + case PRAGMA_OMP_CLAUSE_OUTPUT: + clauses = c_parser_omp_clause_output (parser, clauses); + c_name = "output"; + break; case PRAGMA_OMP_CLAUSE_PRIVATE: clauses = c_parser_omp_clause_private (parser, clauses); c_name = "private"; @@ -8583,7 +8766,9 @@ c_parser_omp_single (location_t loc, c_parser *parser) | (1u << PRAGMA_OMP_CLAUSE_DEFAULT) \ | (1u << PRAGMA_OMP_CLAUSE_PRIVATE) \ | (1u << PRAGMA_OMP_CLAUSE_FIRSTPRIVATE) \ - | (1u << PRAGMA_OMP_CLAUSE_SHARED)) + | (1u << PRAGMA_OMP_CLAUSE_SHARED)) \ + | (1u << PRAGMA_OMP_CLAUSE_INPUT) \ + | (1u << PRAGMA_OMP_CLAUSE_OUTPUT) static tree c_parser_omp_task (location_t loc, c_parser *parser) diff --git a/gcc/c-pragma.h b/gcc/c-pragma.h index eab23db6c..f8ee01fba 100644 --- a/gcc/c-pragma.h +++ b/gcc/c-pragma.h @@ -62,10 +62,12 @@ typedef enum pragma_omp_clause { PRAGMA_OMP_CLAUSE_DEFAULT, PRAGMA_OMP_CLAUSE_FIRSTPRIVATE, PRAGMA_OMP_CLAUSE_IF, + PRAGMA_OMP_CLAUSE_INPUT, PRAGMA_OMP_CLAUSE_LASTPRIVATE, PRAGMA_OMP_CLAUSE_NOWAIT, PRAGMA_OMP_CLAUSE_NUM_THREADS, PRAGMA_OMP_CLAUSE_ORDERED, + PRAGMA_OMP_CLAUSE_OUTPUT, PRAGMA_OMP_CLAUSE_PRIVATE, PRAGMA_OMP_CLAUSE_REDUCTION, PRAGMA_OMP_CLAUSE_SCHEDULE, diff --git a/gcc/c-typeck.c b/gcc/c-typeck.c index 1bac4fdd8..a3b98f4ee 100644 --- a/gcc/c-typeck.c +++ b/gcc/c-typeck.c @@ -10272,6 +10272,20 @@ c_finish_omp_clauses (tree clauses) bitmap_set_bit (&lastprivate_head, DECL_UID (t)); break; + case OMP_CLAUSE_INPUT: + name = "input"; + t = OMP_CLAUSE_DECL (c); + need_complete = true; + need_implicitly_determined = true; + break; + + case OMP_CLAUSE_OUTPUT: + name = "output"; + t = OMP_CLAUSE_DECL (c); + need_complete = true; + need_implicitly_determined = true; + break; + case OMP_CLAUSE_IF: case OMP_CLAUSE_NUM_THREADS: case OMP_CLAUSE_SCHEDULE: diff --git a/gcc/gimplify.c b/gcc/gimplify.c index f6266e10c..7b72bdd4f 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -5734,6 +5734,15 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, flags = GOVD_REDUCTION | GOVD_SEEN | GOVD_EXPLICIT; check_non_private = "reduction"; goto do_add; + case OMP_CLAUSE_INPUT: + case OMP_CLAUSE_OUTPUT: + { + tree view = OMP_CLAUSE_VIEW_ID (c); + if(view != NULL_TREE) + omp_add_variable (ctx, view, GOVD_LOCAL | GOVD_SEEN); + } + flags = GOVD_PRIVATE | GOVD_EXPLICIT; + goto do_add; do_add: decl = OMP_CLAUSE_DECL (c); @@ -5959,6 +5968,8 @@ gimplify_adjust_omp_clauses (tree *list_p) = (n->value & GOVD_FIRSTPRIVATE) != 0; break; + case OMP_CLAUSE_INPUT: + case OMP_CLAUSE_OUTPUT: case OMP_CLAUSE_REDUCTION: case OMP_CLAUSE_COPYIN: case OMP_CLAUSE_COPYPRIVATE: diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index 5fd4f9aea..39747942b 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -206,3 +206,40 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_START, "GOMP_single_copy_start", BT_FN_PTR, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_END, "GOMP_single_copy_end", BT_FN_VOID_PTR, ATTR_NOTHROW_LIST) + +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_CREATE_STREAM, + "GOMP_stream_create_stream", + BT_FN_PTR_SIZE_ULL_PTR, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_CREATE_READ_VIEW, + "GOMP_stream_create_read_view", + BT_FN_PTR, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_CREATE_WRITE_VIEW, + "GOMP_stream_create_write_view", + BT_FN_PTR, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_CREATE_TASK, + "GOMP_stream_create_task", + BT_FN_PTR, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS, + "GOMP_stream_add_expected_views", + BT_FN_VOID_PTR_INT_INT_INT, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_CONNECT_VIEW, + "GOMP_stream_connect_view", + BT_FN_VOID_PTR_PTR_PTR, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_WAIT_UNTIL_CONNECTED, + "GOMP_stream_wait_until_connected", + BT_FN_VOID_PTR, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_UPDATE, "GOMP_stream_update", + BT_FN_ULL_PTR_ULL, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_STALL, "GOMP_stream_stall", + BT_FN_VOID_PTR_ULL, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_RELEASE, "GOMP_stream_release", + BT_FN_VOID_PTR_ULL, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_COMMIT, "GOMP_stream_commit", + BT_FN_VOID_PTR_ULL, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_TASK_EXIT, "GOMP_stream_task_exit", + BT_FN_VOID_PTR, ATTR_NOTHROW_LIST) + +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_PUSH_STUB, "GOMP_stream_push_stub", + BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_POP_STUB, "GOMP_stream_pop_stub", + BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LIST) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index cc36cb51b..f9779e53e 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -20,7 +20,39 @@ for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see -. */ +. + +Streamization expansion to Erbium: + +This version contains the expansion for both task and single regions +It is not currently able to perform data-parallelism but only pipeline +parallelism. + +Implementation: + +During lowering pass variables that represent streams / views are +identified and registered for the purpose. +Also for the non SESE regions (such as single regions) PUSH / POP stub builtins +are created before and after any SESE streaming region. +At this stage a hashtable of streams is initialized and elements are +introduced to it. + +In the expansion pass, we first perform a traversal through all the +regions, identifying every streamization task and creating a connectivity graph +between all of the tasks. +For each of the tasks, we identify the type of view (read / writer) and add the +view to the respective stream by inserting it in a hashtable within the stream +structure. + +Once this recognition is performed, All the regions are traversed, starting on +the child tasks and going to the parent ones. +Each of the expansion function for the specific type of region was patched to +perform the streamization branch. + +Supported types are currently: + - Single region + - Task region +*/ #include "config.h" #include "system.h" @@ -46,6 +78,1372 @@ along with GCC; see the file COPYING3. If not see #include "optabs.h" #include "cfgloop.h" +/* Map a clause to the related VAR_DECL */ +typedef struct clause_to_var_struct +{ + tree clause; + + tree var; +} *clause_to_var; + +/* Map clauses to the respective variables. */ +htab_t htab_clause_to_var; + +/* Compute a hash function for clause_to_var. */ +static hashval_t +hash_clause_to_var (const void *elt) +{ + return htab_hash_pointer (((const struct clause_to_var_struct *) elt)->clause); +} + +/* Compares clause_to_var elements E1 and E2. */ +static int +eq_clause_to_var (const void *e1, const void *e2) +{ + const struct clause_to_var_struct *elt1 = (const struct clause_to_var_struct *) e1; + const struct clause_to_var_struct *elt2 = (const struct clause_to_var_struct *) e2; + + return elt1->clause == elt2->clause; +} + +/* Find a variable for a input / output clause. */ +static tree * +lookup_var_for_clause (tree clause) +{ + struct clause_to_var_struct **slot, tmp; + tmp.clause = clause; + + if (!htab_clause_to_var) + htab_clause_to_var = htab_create_ggc (10, hash_clause_to_var, + eq_clause_to_var, NULL); + + slot = (clause_to_var *) htab_find_slot (htab_clause_to_var, &tmp, INSERT); + + if (!*slot) + { + (*slot) = GGC_CNEW (struct clause_to_var_struct); + (*slot)->clause = clause; + } + + return &((*slot)->var); +} + +/* Structure holding pointers to all relevant basic blocks for + streamization within a omp_region. */ +typedef struct regions_streamization_info_struct +{ + /* The task pragma statement. */ + struct omp_region *task_region; + + /* omp pragma statement */ + gimple stmt; + + /* Same as OMP region */ + basic_block entry_bb; + basic_block exit_bb; + + /* Init, Clean and Final BBS. + * init_bb = init vars, views allocate and connect + * TODO clean_bb = currently not in use. + * final_bb = GOMP_stream_task_exit */ + basic_block init_bb; + basic_block clean_bb; + basic_block final_bb; + + /* To insert GOMP_wait_until_connected */ + basic_block init_latch_bb; + + /* BBS for synchronization loop */ + basic_block outer_head; + basic_block outer_check; + basic_block outer_first; + basic_block outer_latch; + basic_block outer_last; + basic_block outer_exit; + + /* BBS representing the non generated task body. */ + basic_block sese_entry; + basic_block sese_exit; + + /* VAR_DECLs to be used for condition */ + tree outer_loop_cond; + tree inner_loop_cond; + + /* VAR_DECL to be used to define the runtime task. + * Should exist a single one per region, and thats why it is defined in this + * data structure. */ + tree task_decl; + +} *regions_streamization_info; + +/* Helper function to return regions_streamization_info_struct for a + * region + * Allocates and initializes it if called for the first time for a specific + * region. */ +static regions_streamization_info +get_streamisation_info_for_region (struct omp_region *region) +{ + if(!region->streamization_info) + { + regions_streamization_info task = GGC_CNEW (struct regions_streamization_info_struct); + region->streamization_info = task; + + task->task_region = region; + task->entry_bb = region->entry; + task->exit_bb = region->exit; + task->stmt = last_stmt (region->entry); + + task->task_decl = NULL_TREE; + } + + return (regions_streamization_info) region->streamization_info; +} + +/* Enum to identify type of variable in clause */ +enum stream_access_type { + STREAM_ACCESS_TYPE_DIRECT = 0, /* sinple type variable */ + STREAM_ACCESS_TYPE_REFERENCE, /* unsized variable, is passed as a pointer */ + STREAM_ACCESS_TYPE_ARRAY /* sized array, will be copied to the buffer */ +}; + +/* This structure maps a stream to the variable it privatizes. */ +typedef struct stream_struct +{ + /* The variable this stream privatizes. */ + tree var; + + /* Store the actual read/write variables. */ + htab_t *views; + + /* Counters for number of views and type */ + int nr_consumers; + int nr_producers; + + /* The stream decl */ + tree stream; + + /* Buffer related tree nodes. */ + tree buffer_type; + tree buffer_mask; + + /* Set to access var by ref */ + enum stream_access_type access_type; + + tree element_type; + + /* Used for traversals */ + bool visit; + +} *stream; + +#define TYPE_SIZE_HAS_INT(TYPE) \ + TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TYPE)) + +/* Map all streamized variables to their respective streams. */ +htab_t htab_stream; + +/* Compute a hash function for stream. */ +static hashval_t +hash_stream (const void *elt) +{ + return htab_hash_pointer (((const struct stream_struct *) elt)->var); +} + +/* Compares stream elements E1 and E2. */ +static int +eq_stream (const void *e1, const void *e2) +{ + const struct stream_struct *elt1 = (const struct stream_struct *) e1; + const struct stream_struct *elt2 = (const struct stream_struct *) e2; + + return elt1->var == elt2->var; +} + +/* TODO: Remove prototype by moving code */ +struct omp_context; +static bool +use_pointer_for_field (tree, struct omp_context *); + +/* Find a STREAM mapping for VAR or create one if missing in + HTAB_VAR_STREAM. */ +#define lookup_stream(VAR) \ + _lookup_stream(VAR, true); +static stream +_lookup_stream (tree var, bool insert) +{ + struct stream_struct **slot, tmp; + tmp.var = var; + + if (!htab_stream) + htab_stream = htab_create_ggc (10, hash_stream, + eq_stream, NULL); + + if(insert) + slot = (stream *) htab_find_slot (htab_stream, &tmp, INSERT); + else + { + slot = (stream *) htab_find_slot (htab_stream, &tmp, NO_INSERT); + if(slot != NULL) + return (*slot); + else + return NULL; + } + + if (!*slot) + { + (*slot) = GGC_CNEW (struct stream_struct); + (*slot)->var = var; + (*slot)->stream = create_tmp_var_raw (ptr_type_node, "gomp_stream"); + + (*slot)->element_type = TREE_TYPE (var); + (*slot)->buffer_type = build_pointer_type ((*slot)->element_type); + + (*slot)->nr_producers = 0; + (*slot)->nr_consumers = 0; + (*slot)->views = (htab_t *) xmalloc (sizeof(htab_t)); + *(*slot)->views = NULL; + + /* We need to ensure the type we use is consistent. */ +#define HORIZON 4096 + (*slot)->buffer_mask = build_int_cst (long_long_unsigned_type_node, HORIZON - 1); + } + + return (*slot); +} + +/* Structure representing a VIEW */ +typedef struct view_struct +{ + /* Pointer to associate view to any type of object. + * Currently only associated with omp_region. */ + void *pointer; + + /* Stream to which this view connects to */ + struct stream_struct *stream; + + /* VAR_DECL that this view is associated to. */ + tree var_decl; + /* Clause that it represents */ + tree clause; + + /* VAR_DECLS created for code generation */ + tree view; + tree index; + tree sync_index; + + tree data; + tree buffer_pointer; + + /* Decl that defines the view var. + * In case old syntax view_var is equal to stream->var */ + tree view_var; + tree burst; + tree view_horizon; + + /* Flag type of view (read / write) */ + bool is_producer; + + /* Traversal flag */ + bool visit; + + /* TODO: Remove this after PLDI deadline. I mean solve associated bug. ;-) */ + bool expanded_push_pop; + + /* Pointer used for traversals */ + void *info; +} *view; + +/* Hash fnction for VIEW */ +static hashval_t +hash_view (const void *elt) +{ + return htab_hash_pointer (((const struct view_struct *) elt)->pointer); +} + +/* Compares VIEW elements E1 and E2. */ +static int +eq_view (const void *e1, const void *e2) +{ + const struct view_struct *elt1 = (const struct view_struct *) e1; + const struct view_struct *elt2 = (const struct view_struct *) e2; + + return elt1->pointer == elt2->pointer; +} + +/* Helper function to allocate view */ +static view +alloc_view (void *pointer) +{ + view ret = GGC_CNEW (struct view_struct); + + ret->pointer = pointer; + return ret; +} + +#define GET_VIEW_FOR_STREAM_WITH_POINTER(STREAM, POINTER) \ + lookup_view(POINTER, &(STREAM->views), false) + +#define lookup_view(POINTER, HASH) \ + _lookup_view (POINTER, HASH, true) + +#define maybe_lookup_view(POINTER, HASH) \ + _lookup_view (POINTER, HASH, false) + +/* Find a VIEW mapping for a POINTER object. */ +static view +_lookup_view (void *pointer, htab_t *hash, bool insert) +{ + struct view_struct **slot, tmp; + + if (!*hash) + *hash = htab_create_ggc (10, hash_view, + eq_view, NULL); + + tmp.pointer = pointer; + if(insert == true) + slot = (view *) htab_find_slot (*hash, &tmp, INSERT); + else + slot = (view *) htab_find_slot (*hash, &tmp, NO_INSERT); + + if(insert == false && slot == NULL) + return NULL; + + if (!*slot) + (*slot) = alloc_view(pointer); + + return (*slot); +} + +/* Macro to traverse VIEW structures related to REGION. */ +#define TRAVERSE_VIEWS_IN_REGION(REGION, CALLBACK, DATA) \ + if (REGION->view_pointers != NULL) \ + htab_traverse (REGION->view_pointers, CALLBACK, (void *) DATA); + +/* Helper function to clear visit boolean in all stream and view structures. */ +static int +clear_visit_on_views_and_streams (void **slot, void *data ATTRIBUTE_UNUSED) +{ + view v = (view) *slot; + + v->visit = false; + v->stream->visit = false; + return 1; +} + +/* Traversal function to visit VIEW structures related to REGION and child + * regions. */ +static void +traverse_views_in_region_and_subs (struct omp_region *region, htab_trav cb, void *data, bool visit_neighbour) +{ + if(region->inner != NULL) + traverse_views_in_region_and_subs (region->inner, cb, data, true); + + if(visit_neighbour == true && region->next != NULL) + { + traverse_views_in_region_and_subs (region->next, cb, data, true); + } + + if(region->view_pointers != NULL) + htab_traverse (region->view_pointers, cb, data); +} + +/* Macro clear visit and traverse VIEW structures related to REGION and child + * regions. */ +#define TRAVERSE_VIEWS_IN_REGION_AND_SUBS(REGION, CALLBACK, DATA) \ + traverse_views_in_region_and_subs (REGION, clear_visit_on_views_and_streams, NULL, false); \ + traverse_views_in_region_and_subs (REGION, CALLBACK, (void *) DATA, false); + +/* Find VIEW if view relates to a REGION */ +static view +lookup_view_in_region (struct omp_region *region, view v) +{ + void **slot; + void *ptr = (void *) v; + + if (!region->view_pointers) + region->view_pointers = htab_create_ggc (10, htab_hash_pointer, htab_eq_pointer, NULL); + + slot = (void **) htab_find_slot (region->view_pointers, &v, INSERT); + gcc_assert (slot); + + if(!*slot) + { + (*slot) = GGC_CNEW (void *); + (*slot) = ptr; + } + + return (view) (*slot); +} + +/* Location types for statement re-ordering when generating code for + streamization during OMP expansion. */ +enum stmt_location +{ + SL_NONE, + SL_INIT, + SL_COPY_IN, + SL_COPY_OUT, + SL_PUSH, + SL_HEAD, + SL_DEREF, + SL_POP, + SL_DONT_MOVE +}; + +/* This structure maps a STMT to the LOCation it needs to be moved + during streamization. */ + +typedef struct stmt_loc +{ + /* The statement. */ + gimple stmt; + + /* The location. */ + enum stmt_location loc; +} *stmt_loc; + +/* Map the statements generated for streamization to their respective + locations. */ + +htab_t htab_stmt_loc; + +/* Compute a hash function for STMT_LOC. */ + +static hashval_t +hash_stmt_loc (const void *elt) +{ + return htab_hash_pointer (((const struct stmt_loc *) elt)->stmt); +} + +/* Compares STMT_LOC elements E1 and E2. */ + +static int +eq_stmt_loc (const void *e1, const void *e2) +{ + const struct stmt_loc *elt1 = (const struct stmt_loc *) e1; + const struct stmt_loc *elt2 = (const struct stmt_loc *) e2; + + return elt1->stmt == elt2->stmt; +} + + +/* Returns the STMT_LOCATION corresponding to STMT. If it has not yet + been set, returns SL_NONE. */ + +static enum stmt_location +get_stmt_loc (gimple stmt) +{ + struct stmt_loc **slot, tmp; + + tmp.stmt = stmt; + slot = (stmt_loc *) htab_find_slot (htab_stmt_loc, &tmp, NO_INSERT); + + if (!slot || !*slot) + return SL_NONE; + + return (*slot)->loc; +} + +/* Find a STMT_LOC mapping for STMT or create one if missing in + HTAB_STMT_LOC. */ + +static stmt_loc +lookup_stmt_loc (gimple stmt) +{ + struct stmt_loc **slot, tmp; + + tmp.stmt = stmt; + slot = (stmt_loc *) htab_find_slot (htab_stmt_loc, &tmp, INSERT); + gcc_assert (slot); + + if (!*slot) + { + (*slot) = GGC_CNEW (struct stmt_loc); + (*slot)->stmt = stmt; + (*slot)->loc = SL_NONE; + } + + return (*slot); +} + +/* Set the STMT_LOCATION of STMT to LOC. If no location information + is found, install it in the map. */ + +static void +set_stmt_loc (gimple stmt, enum stmt_location loc) +{ + stmt_loc sl = lookup_stmt_loc (stmt); + sl->loc = loc; +} + +/* Same as SET_STMT_LOC but do not modify the location if it is set + (!= SL_NONE). */ + +static void +set_stmt_loc_if_none (gimple stmt, enum stmt_location loc) +{ + stmt_loc sl = lookup_stmt_loc (stmt); + if (sl->loc == SL_NONE) + sl->loc = loc; +} + +/* This function will ensure that the outer context of the task is + ready for streamization (in particular that all the placeholder + blocks are in place for the code to be generated). + + We need to ensure that we get, for stream creation and + initialization, a BB that is in a OMP SINGLE region. The original + pattern is: + + : + ... + #pragma omp single + + : + if (__builtin_GOMP_single_start () == 1) + goto ; + else + goto ; + + : + ... + + : + ... + #pragma omp return +*/ + +static void +prepare_outer_context_for_streaming (regions_streamization_info task) +{ + gimple_stmt_iterator src, dst; + struct omp_region *outer = task->task_region->outer; + regions_streamization_info outer_task; + + /* Restrict streamization to tasks within a OMP_SINGLE + region. For now ... */ + gcc_assert + (outer && (gimple_code (last_stmt (outer->entry)) == GIMPLE_OMP_SINGLE)); + + outer_task = get_streamisation_info_for_region (outer); + + if (!outer_task->init_bb) + { + /* FIXME-stream: check this and adjust w/o MN. */ + edge e = EDGE_SUCC (single_succ (outer->entry), 0); + outer_task->init_bb = split_edge (e); + } + + /* As the tasks will only be created once, we need to hoist + the task creation calls as well as all the necessary + sender-side copy-in statements before the start of the + SINGLE body. This obviously means that it is + unacceptable to have sharing clauses on streamized tasks + that bear on variables with reaching definitions within + the OMP SINGLE region. */ + src = gsi_start_bb (task->entry_bb); + dst = gsi_last_bb (outer_task->init_bb); + while (!gsi_end_p (src)) + if (get_stmt_loc (gsi_stmt (src)) == SL_COPY_IN) + gsi_move_after (&src, &dst); + else + gsi_next (&src); +} + +/* This function organizes the code and placeholders within the task's + body in preparation for the expansion. */ +static void +prepare_inner_context_for_streaming (regions_streamization_info task) +{ + gimple_stmt_iterator gsi, src, dst; + gimple stmt; + edge e; + + /* + This code regorganizes the CFG to the extreme case of streamization expansion. + Further passes and GCC optimizations will reduce its complexity + if it was too extreme for the expansion needed. + + From the original structure: + + : + ... + #pragma omp task ... + + : + ... + + : + ... + #pragma omp return + + We re-arrange the CFG to look and support the following code: + + : + ... + #pragma omp task ... + + : + // copy-in of stream pointers and FIRSTPRIVATE/SHARED variables + // Initialize all the views + index = 0; + + : + sync_index = index + burst_size; + update_index = update (sync_index); + stall(sync_index); + + // OUTER_CHECK_LOOP + if (update_index != sync_index) + goto OUTER_LAST_BB; + + + // Setup data from input clauses + + + + + + + + + // Setup data from output clauses + + + index += 1; // += burst; + goto INNER_HEAD_BB; // REMOVE + + + commit (index) + release (index) + + + goto OUTER_HEAD_BB; + // if (update_index == sync_index) + // goto OUTER_HEAD_BB; + + : + // Call task exit + + : + #pragma omp return + */ + /* Collect info of the processing region and disconnect from CFG */ + basic_block body_enter_bb = single_succ (task->entry_bb); + basic_block body_exit_bb; + + gsi = gsi_last_bb (task->exit_bb); + gcc_assert (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + + gsi_prev(&gsi); + stmt = (gsi_end_p (gsi)) ? NULL : gsi_stmt (gsi); + + e = split_block (task->exit_bb, stmt); + body_exit_bb = e->src; + task->exit_bb = e->dest; + remove_edge (e); + + /* Create all the BB need for CFG change */ + task->init_bb = split_edge (single_succ_edge (task->entry_bb)); + + task->outer_head = split_edge (single_succ_edge (task->init_bb)); + + task->outer_check = split_edge (single_succ_edge (task->outer_head)); + task->outer_first = split_edge (single_succ_edge (task->outer_check)); + + task->sese_entry = split_edge (single_succ_edge (task->outer_first)); + task->sese_exit = split_edge (single_succ_edge (task->sese_entry)); + + task->outer_last = split_edge (single_succ_edge (task->sese_exit)); + task->outer_latch = split_edge (single_succ_edge (task->outer_last)); + + task->outer_exit = split_edge (single_succ_edge (task->outer_latch)); + + /* Separate the return call from any other statements in the + EXIT_BB as well as from labels. */ + gsi = gsi_last_bb (task->exit_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + gsi_prev (&gsi); + stmt = (gsi_end_p (gsi)) ? NULL : gsi_stmt (gsi); + e = split_block (task->exit_bb, stmt); + task->exit_bb = e->dest; + task->final_bb = e->src; + + /* Loopify both regions */ + redirect_edge_and_branch (single_succ_edge(task->outer_latch), task->outer_head); + redirect_edge_and_branch (single_succ_edge(task->outer_exit), task->final_bb); + + /* Create condition for both loop headers */ + task->outer_loop_cond = create_tmp_var (boolean_type_node, "outer_loop_cond"); + task->inner_loop_cond = create_tmp_var (boolean_type_node, "inner_loop_cond"); + + gsi = gsi_start_bb (task->init_bb); + stmt = gimple_build_assign (task->outer_loop_cond, boolean_false_node); + gsi_insert_before (&gsi, stmt, GSI_CONTINUE_LINKING); + + /* Update edges for outer loop check and add condition */ + remove_edge (single_succ_edge (task->outer_check)); + make_edge (task->outer_check, task->outer_first, + EDGE_TRUE_VALUE); + make_edge (task->outer_check, task->outer_exit, EDGE_FALSE_VALUE); + + stmt = gimple_build_cond (EQ_EXPR, task->outer_loop_cond, boolean_true_node, + NULL_TREE, NULL_TREE); + gsi = gsi_last_bb (task->outer_check); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + set_immediate_dominator (CDI_DOMINATORS, + task->outer_exit, task->outer_check); + + /* Move stream initialization to init BB */ + src = gsi_start_bb (body_enter_bb); + dst = gsi_last_bb (task->init_bb); + while (!gsi_end_p (src)) + if (get_stmt_loc (gsi_stmt (src)) == SL_COPY_IN) + gsi_move_after (&src, &dst); + else + gsi_next (&src); + + /* Correct edges to body part */ + remove_edge (single_succ_edge (task->sese_entry)); + make_edge (task->sese_entry, body_enter_bb, EDGE_FALLTHRU); + make_edge (body_exit_bb, task->sese_exit, EDGE_FALLTHRU); +} + +static void +compute_data_position (tree assign_node, tree data_decl, tree index, tree elem_type, tree horizon_mask, gimple_seq *ilist) +{ + gimple stmt; + tree bounded_index, final_index; + + if (index != NULL_TREE) + index = fold_convert (long_long_unsigned_type_node, index); + + /* Find absolute index */ + if(horizon_mask != NULL_TREE) + { + bounded_index = create_tmp_var (long_long_unsigned_type_node, "bounded_index"); + stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, bounded_index, index, horizon_mask); + gimple_seq_add_stmt (ilist, stmt); + } + else + bounded_index = index; + + if(index != NULL_TREE) + { + /* Compute shift with element_size*/ + final_index = create_tmp_var (long_long_unsigned_type_node, "final_index"); + stmt = gimple_build_assign_with_ops (MULT_EXPR, final_index, bounded_index, + build_int_cst (long_long_unsigned_type_node, TYPE_SIZE_HAS_INT (elem_type))); + gimple_seq_add_stmt (ilist, stmt); + } + else + final_index = build_int_cst (long_long_unsigned_type_node, 0); + + /* Create VAR_DECL and compute right address for it. */ + stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, assign_node, data_decl, final_index); + gimple_seq_add_stmt (ilist, stmt); +} + +static bool +is_same_expression (tree node_a, tree node_b) +{ + if(node_a == node_b) + return true; + else if(TREE_CODE(node_a) == TREE_CODE(node_b) && TREE_OPERAND_LENGTH (node_a) > 0) + { + bool result = true; + int i; + + if(TREE_TYPE (node_a) != TREE_TYPE (node_b)) + return false; + + for(i = 0; i < TREE_OPERAND_LENGTH (node_a); i++) + { + result = is_same_expression (TREE_OPERAND (node_a, i), TREE_OPERAND (node_b, i)); + + if(result == false) + return false; + } + return true; + } + return false; +} + +struct convert_view_var_array_struct { + gimple_seq ilist; + gimple_seq olist; + bool remove_stmt; + bool assign_to_var_if_has_parent; +}; + +static tree +convert_view_var_array (tree *tp, int *walk_subtrees, + void *data) +{ + tree t = *tp; + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; + + view v = (view) wi->info; + struct convert_view_var_array_struct *info = (struct convert_view_var_array_struct *) v->info; + + tree index; + bool convert = false; + bool to_remove = false; + tree buffer_pointer; + gimple stmt; + + /* If current_element matches the view_var */ + if(is_same_expression (t, v->view_var)) + { + index = build_int_cst (long_long_unsigned_type_node, 0); + convert = true; + } + /* In case a view_var is inside a ARRAY_REF */ + else if(TREE_CODE(t) == ARRAY_REF) + { + t = TREE_OPERAND (t, 0); + + wi->changed = false; + convert_view_var_array (&t, walk_subtrees, data); + if(wi->changed) + { + if(TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) + TREE_OPERAND (*tp, 0) = t; + else + { + tree local_index = TREE_OPERAND (*tp, 1); + + tree tmp = create_tmp_var (build_pointer_type (TREE_TYPE (t)), "tmp_element1"); + compute_data_position (tmp, build_fold_addr_expr (t), local_index, TREE_TYPE (t), NULL_TREE, &info->ilist); + + *tp = build_fold_indirect_ref (tmp); + } + + info->assign_to_var_if_has_parent = true; + } + } + /* In case view_var is inside a ADDR_EXPR */ + else if(TREE_CODE (t) == ADDR_EXPR) + { + t = TREE_OPERAND (t, 0); + + wi->changed = false; + convert_view_var_array (&t, walk_subtrees, data); + + if(wi->changed) + { + if(TREE_CODE (t) == ARRAY_REF) + { + tree tmp; + tree local_index = TREE_OPERAND (t, 1); + tree array = TREE_OPERAND (t, 0); + tree array_pointer = build_fold_addr_expr (array); + + tree array_pointer_cast = create_tmp_var (TREE_TYPE (*tp), "cast_array"); + stmt = gimple_build_assign_with_ops (NOP_EXPR, array_pointer_cast, array_pointer, NULL_TREE); + gimple_seq_add_stmt (&info->ilist, stmt); + + tmp = create_tmp_var (TREE_TYPE (*tp), "tmp_element"); + compute_data_position (tmp, array_pointer_cast, local_index, TREE_TYPE (TREE_TYPE (array)), NULL_TREE, &info->ilist); + t = tmp; + } + else + t = build_fold_addr_expr (t); + + *tp = t; + } + } + /* In case view_var is inside a INDIRECT_REF */ + else if(TREE_CODE(t) == INDIRECT_REF) + { + t = TREE_OPERAND (t, 0); + + wi->changed = false; + convert_view_var_array (&t, walk_subtrees, data); + + if(wi->changed) + { + if(info->assign_to_var_if_has_parent == true) + { + tree tmp; + t = build_fold_indirect_ref (t); + + tmp = create_tmp_var (TREE_TYPE (t), "var_copy"); + stmt = gimple_build_assign (tmp, t); + gimple_seq_add_stmt (&info->ilist, stmt); + + t = tmp; + info->assign_to_var_if_has_parent = false; + } + + *tp = t; + } + } + /* In case view_var is inside a (->) */ + else if(TREE_CODE(t) == COMPONENT_REF) + { + t = TREE_OPERAND (*tp, 0); + + wi->changed = false; + convert_view_var_array (&t, walk_subtrees, data); + + if(wi->changed) + { + if(info->assign_to_var_if_has_parent == true) + { + tree tmp = create_tmp_var (TREE_TYPE (t), "var_copy"); + stmt = gimple_build_assign (tmp, t); + gimple_seq_add_stmt (&info->ilist, stmt); + + t = tmp; + info->assign_to_var_if_has_parent = false; + } + + TREE_OPERAND (*tp, 0) = t; + } + } + + if(convert == true) + { + /* Compute right buffer position for index */ + buffer_pointer = create_tmp_var (v->stream->buffer_type, "data_pointer_elem"); + compute_data_position (buffer_pointer, v->buffer_pointer, index, v->stream->element_type, NULL_TREE, &info->ilist); + + /* Perform memcpy + TODO: Understand why the assignements between the buffers of type ARRAY + does not implicitly create a memcpy. A real memcpy in code, between + ARRAYs is parsed to it. */ + if(TREE_CODE (TREE_TYPE (*tp)) == ARRAY_TYPE && wi->is_lhs == true) + { + tree fn; + tree rhs = gimple_assign_rhs_to_tree (gsi_stmt (wi->gsi)); + + if(TREE_CODE (TREE_TYPE (rhs)) == ARRAY_TYPE) + rhs = build_fold_addr_expr (rhs); + + fn = build_fold_addr_expr (implicit_built_in_decls [BUILT_IN_MEMCPY]); + stmt = gimple_build_call (fn, 3, buffer_pointer, rhs, + build_int_cst (size_type_node, TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (t))))); + + gimple_seq_add_stmt (&info->olist, stmt); + + info->remove_stmt = true; + to_remove = true; + } + else + { + /* Do a direct substitution betwen the view_var and the buffer element. + * If view_var is a simple variable this code is not executed. */ + *tp = build_fold_indirect_ref (buffer_pointer); + } + + *walk_subtrees = false; + wi->changed = true; + } + + return NULL_TREE; +} + +typedef struct pointer_list { + void *pointer; + struct pointer_list *next; +} pointer_list_t, *pointer_list_p; + + +static void list_bbs_in_sese (edge a, edge b, pointer_list_p *list) +{ + pointer_list_p tmp; + basic_block bb_a = a->dest; + edge_iterator eit; + edge e; + pointer_list_p new_p; + + if(a == b) + return; + + new_p = (pointer_list_p) xmalloc(sizeof(pointer_list_t)); + new_p->pointer = (void *) bb_a; + new_p->next = NULL; + + if(*list == NULL) + *list = new_p; + else + { + tmp = *list; + while (tmp->next != NULL) + { + if(tmp->pointer == bb_a) + return; + + tmp = tmp->next; + } + + tmp->next = new_p; + } + + FOR_EACH_EDGE (e, eit, bb_a->succs) + list_bbs_in_sese (e, b, list); +} + +/* TODO: Redefine this to something better */ +#define FOR_ALL_BB_IN_SESE(LIST, EDGE_A, EDGE_B) \ + list_bbs_in_sese(EDGE_A, EDGE_B, &LIST); \ + for(; LIST != NULL; LIST = LIST->next) + +static int +convert_view_var_array_traverser (view v) +{ + basic_block bb; + struct omp_region *region = (struct omp_region *) v->pointer; + regions_streamization_info task; + pointer_list_p list = NULL; + + if(v->view_var == NULL_TREE) + return 1; + + task = get_streamisation_info_for_region (region); + + FOR_ALL_BB_IN_SESE (list, single_succ_edge (task->entry_bb), single_succ_edge (task->exit_bb)) + { + gimple_stmt_iterator gsi; + struct walk_stmt_info wi; + + bb = (basic_block) list->pointer; + + memset (&wi, 0, sizeof (wi)); + wi.info = v; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + struct convert_view_var_array_struct info = { NULL, NULL, false, false}; + + v->info = &info; + + if(gimple_code (gsi_stmt (gsi)) == GIMPLE_CALL && + (TREE_OPERAND (gimple_call_fn (gsi_stmt (gsi)), 0) == built_in_decls[BUILT_IN_GOMP_STREAM_PUSH_STUB] || + TREE_OPERAND (gimple_call_fn (gsi_stmt (gsi)), 0) == built_in_decls[BUILT_IN_GOMP_STREAM_PUSH_STUB])) + continue; + + walk_gimple_stmt (&gsi, NULL, convert_view_var_array, &wi); + gsi_insert_seq_before (&gsi, info.ilist, GSI_SAME_STMT); + gsi_insert_seq_after (&gsi, info.olist, GSI_SAME_STMT); + + if(info.remove_stmt == true) + gsi_remove(&gsi, true); + + } + } + return 1; +} + +/* Structure to be passed in second parameter of + * expand_view_traverser_callback. */ +struct expand_view_traverser_callback_data { + /* Synchorinization stmt_lists */ + gimple_seq stall_stmts; + gimple_seq update_stmts; + gimple_seq commit_stmts; + gimple_seq release_stmts; + /* Data access stmt_lists */ + gimple_seq read_stmts; + gimple_seq write_stmts; + /* Condition stmt_lists */ + gimple_seq inner_cond_stmt; + gimple_seq outer_cond_stmt; + /* VAR_DECLs already define for conditions in persistent task. */ + tree outer_loop_cond; + tree inner_loop_cond; +}; + +#define CREATE_SYNC_DEFAULT_DATA \ + { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL_TREE, NULL_TREE }; + +/* Create all the necessary callbacks and data accesses for the view. + * It adds the buitins to several clean stmt_list passed in second parameter. + * The caller might decide to use or discard to add these stmt lists to the code + * or even where to add it. */ +static int +expand_view_traverser_callback (void **slot, void *data) +{ + tree update_result, fn; + view v = (view) *slot; + struct expand_view_traverser_callback_data *sdata = (struct expand_view_traverser_callback_data *) data; + gimple stmt; + gimple_seq *current_list; + tree var = v->var_decl; + + current_list = (v->is_producer) ? &sdata->stall_stmts : &sdata->update_stmts; + + /* Creation of UPDATE or STALL call depending type of view */ + fn = (v->is_producer) ? built_in_decls[BUILT_IN_GOMP_STREAM_STALL] : built_in_decls[BUILT_IN_GOMP_STREAM_UPDATE]; + stmt = gimple_build_call (fn, 2, v->view, v->sync_index); + + if(!v->is_producer) + { + update_result = create_tmp_var (long_long_unsigned_type_node, "update_index"); + gimple_call_set_lhs (stmt, update_result); + } + gimple_seq_add_stmt (current_list, stmt); + + v->buffer_pointer = create_tmp_var (v->stream->buffer_type, "data_pointer_mask"); + compute_data_position (v->buffer_pointer, v->data, v->index, v->stream->element_type, v->stream->buffer_mask, current_list); + + if(v->clause != NULL_TREE) + var = *lookup_var_for_clause (v->clause); + + /* Create condition for both the burst and cycle loops */ + if(!v->is_producer) + { + if(sdata->outer_loop_cond != NULL_TREE) + { + tree tmp_var = create_tmp_var (boolean_type_node, "cond"); + + stmt = gimple_build_assign_with_ops (EQ_EXPR, + tmp_var, update_result, v->sync_index); + gimple_seq_add_stmt (&sdata->outer_cond_stmt, stmt); + + stmt = gimple_build_assign_with_ops (TRUTH_AND_EXPR, + sdata->outer_loop_cond, sdata->outer_loop_cond, tmp_var); + gimple_seq_add_stmt (&sdata->outer_cond_stmt, stmt); + } + } + + /* Convert all the view variable by the buffer. + * Kind of OCC expansion. */ + if(TREE_CODE (TREE_TYPE (v->view_var)) == ARRAY_TYPE) + { + convert_view_var_array_traverser(v); + } + else + { + tree buffer_pointer = create_tmp_var (v->stream->buffer_type, "data_pointer_elem"); + tree view_var = (is_gimple_reg (v->view_var)) ? + v->view_var : create_tmp_var (TREE_TYPE (v->view_var), "reg_var"); + + current_list = (v->is_producer) ? &sdata->write_stmts : &sdata->read_stmts; + compute_data_position (buffer_pointer, v->buffer_pointer, NULL_TREE, v->stream->element_type, NULL_TREE, current_list); + + gcc_assert(v->view_var); + + if(v->is_producer) + { + if (!is_gimple_reg (v->view_var)) + { + stmt = gimple_build_assign (view_var, v->view_var); + gimple_seq_add_stmt (current_list, stmt); + } + stmt = gimple_build_assign (build_fold_indirect_ref (buffer_pointer), view_var); + gimple_seq_add_stmt (current_list, stmt); + } + else + { + stmt = gimple_build_assign (view_var, build_fold_indirect_ref (buffer_pointer)); + gimple_seq_add_stmt (current_list, stmt); + if (!is_gimple_reg (v->view_var)) + { + stmt = gimple_build_assign (v->view_var, view_var); + gimple_seq_add_stmt (current_list, stmt); + } + } + } + + current_list = (v->is_producer) ? &sdata->commit_stmts : &sdata->release_stmts; + + /* Inclement view index */ + stmt = gimple_build_assign_with_ops (PLUS_EXPR, v->index, v->index, v->burst); + gimple_seq_add_stmt (current_list, stmt); + set_stmt_loc (stmt, SL_DEREF); + + /* Perform either commit of release dependig on type of view */ + fn = (v->is_producer) ? built_in_decls[BUILT_IN_GOMP_STREAM_COMMIT] : built_in_decls[BUILT_IN_GOMP_STREAM_RELEASE]; + stmt = gimple_build_call (fn, 2, v->view, v->index); + gimple_seq_add_stmt (current_list, stmt); + + stmt = gimple_build_assign_with_ops (PLUS_EXPR, v->sync_index, v->sync_index, v->burst); + gimple_seq_add_stmt (current_list, stmt); + + return 1; +} + +/* Traverser callback to perform: + * 1st - view creation + * 2nd - task creation (in case still not declared) + * 3rd - view connection + * 4th - dereference buffer pointer from the stream + * */ +static int +register_views_traverser_callback (void **slot, void *data ATTRIBUTE_UNUSED) +{ + view v = (view) *slot; + struct omp_region *region = (struct omp_region *) v->pointer; + tree fn, type, tmp_var; + gimple stmt; + gimple_stmt_iterator gsi_entry, gsi_init, gsi_final; + edge e; + regions_streamization_info task = get_streamisation_info_for_region (region); + + /* Create stremization BB if it doesn"t exist */ + if (!task->init_bb) + { + e = EDGE_SUCC (single_succ (region->entry), 0); + task->init_bb = split_edge (e); + } + + gsi_entry = gsi_start_bb (task->init_bb); + gsi_init = gsi_last_bb (task->init_bb); + gsi_final = gsi_start_bb (task->final_bb); + + /* Create GOMP_stream_create_(write/read)_view call */ + if(v->is_producer) + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_WRITE_VIEW]; + else + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_READ_VIEW]; + + stmt = gimple_build_call (fn, 0); + gimple_call_set_lhs (stmt, v->view); + gsi_insert_after (&gsi_init, stmt, GSI_NEW_STMT); + + /* If still not created, create task decl for the present OMP region */ + if(task->task_decl == NULL_TREE) + { + task->task_decl = create_tmp_var (build_pointer_type (void_type_node), "task"); + + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_TASK]; + stmt = gimple_build_call (fn, 0); + gimple_call_set_lhs (stmt, task->task_decl); + gsi_insert_after (&gsi_init, stmt, GSI_NEW_STMT); + + fn = built_in_decls[BUILT_IN_GOMP_STREAM_TASK_EXIT]; + stmt = gimple_build_call (fn, 1, task->task_decl); + gsi_insert_after (&gsi_final, stmt, GSI_NEW_STMT); + } + + /* Create connect view */ + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CONNECT_VIEW]; + stmt = gimple_build_call (fn, 3, task->task_decl, v->stream->stream, v->view); + gsi_insert_after (&gsi_init, stmt, GSI_NEW_STMT); + + /* Initialize index and sync_index vars */ + stmt = gimple_build_assign (v->index, build_int_cst (long_long_unsigned_type_node, 0)); + gsi_insert_before (&gsi_entry, stmt, GSI_SAME_STMT); + + stmt = gimple_build_assign_with_ops (NOP_EXPR, v->sync_index, v->view_horizon, NULL_TREE); + gsi_insert_before (&gsi_entry, stmt, GSI_SAME_STMT); + + /* Dereference the buffer from the stream decl and store it in v->data */ + type = build_pointer_type (v->stream->buffer_type); + tmp_var = create_tmp_var (type, "cast_stream"); + + stmt = gimple_build_assign (tmp_var, fold_convert (type, v->stream->stream)); + gsi_insert_after (&gsi_init, stmt, GSI_CONTINUE_LINKING); + + stmt = gimple_build_assign (v->data, build_fold_indirect_ref (tmp_var)); + gsi_insert_after (&gsi_init, stmt, GSI_CONTINUE_LINKING); + + return 1; +} + +/* Create call to GOMP_wait_until_connected for a specific omp_region. + * It creates a new BB to add this call to avoid having the task creation + * happening afterwards, which ends in deadlock. */ +static void +create_wait_until_connected_call (struct omp_region *region) +{ + gimple stmt; + tree fn; + gimple_stmt_iterator gsi_init; + edge e; + regions_streamization_info task = get_streamisation_info_for_region (region); + + if(!task->init_latch_bb) + { + e = single_succ_edge (task->init_bb); + task->init_latch_bb = split_edge (e); + } + + gsi_init = gsi_last_bb (task->init_latch_bb); + + fn = built_in_decls[BUILT_IN_GOMP_STREAM_WAIT_UNTIL_CONNECTED]; + stmt = gimple_build_call (fn, 1, task->task_decl); + gsi_insert_after (&gsi_init, stmt, GSI_NEW_STMT); +} + + +/* If this is a streamized task, move the task creation call to the + beginning of the enclosing OMP region, generate the stream creation + call and add a loop on end_of_stream around the body of the task + function. + Return whether this task was streamized. +*/ +static bool +expand_task_streaming_extensions (struct omp_region *region) +{ + gimple_stmt_iterator gsi; + bool result = false; + regions_streamization_info task; + tree clauses; + gimple stmt; + + if (region->type != GIMPLE_OMP_TASK) + return false; + + clauses = gimple_omp_task_clauses (last_stmt (region->entry)); + + /* Case this is a streamization task */ + if (find_omp_clause (clauses, OMP_CLAUSE_INPUT) + || find_omp_clause (clauses, OMP_CLAUSE_OUTPUT)) + { + /* Initialize expand_view_traverser_callback_data with empty stmt_lists. */ + struct expand_view_traverser_callback_data win_data = CREATE_SYNC_DEFAULT_DATA; + + task = get_streamisation_info_for_region (region); + + prepare_outer_context_for_streaming (task); + prepare_inner_context_for_streaming (task); + + /* Traverse all the views for the regions and for each perform runtime + * view and task creation calls. */ + + TRAVERSE_VIEWS_IN_REGION (region, register_views_traverser_callback, task); + create_wait_until_connected_call (region); + + /* Setup parameters for view expansion. + * Traverse all the views for the region and for each perform its + * calls to synchronizations builtins, termination conditions and data + * access. */ + win_data.outer_loop_cond = task->outer_loop_cond; + win_data.inner_loop_cond = task->inner_loop_cond; + TRAVERSE_VIEWS_IN_REGION (region, expand_view_traverser_callback, &win_data); + + /* Insert stmt lists, returned from expand_view_traverser_callback + * view traversals, in the right basic blocks. */ + + /* - Sychronizations calls */ + gsi = gsi_last_bb (task->outer_last); + gsi_insert_seq_after (&gsi, win_data.write_stmts, GSI_CONTINUE_LINKING); + gsi_insert_seq_after (&gsi, win_data.commit_stmts, GSI_CONTINUE_LINKING); + gsi_insert_seq_after (&gsi, win_data.release_stmts, GSI_CONTINUE_LINKING); + gsi = gsi_start_bb (task->outer_head); + gsi_insert_seq_after (&gsi, win_data.stall_stmts, GSI_CONTINUE_LINKING); + gsi = gsi_last_bb (task->outer_head); + gsi_insert_seq_after (&gsi, win_data.update_stmts, GSI_CONTINUE_LINKING); + gsi_insert_seq_after (&gsi, win_data.read_stmts, GSI_CONTINUE_LINKING); + + /* - Condition stmts lists */ + gsi = gsi_last_bb (task->outer_head); + + stmt = gimple_build_assign (win_data.outer_loop_cond, boolean_true_node); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + gsi_insert_seq_after (&gsi, win_data.outer_cond_stmt, GSI_CONTINUE_LINKING); + + region->entry = task->entry_bb; + region->exit = task->exit_bb; + result = true; + } + + return result; +} /* Lowering of OpenMP parallel and workshare constructs proceeds in two phases. The first phase scans the function looking for OMP statements @@ -1396,6 +2794,36 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) OMP_CLAUSE_SET_CODE (c, OMP_CLAUSE_FIRSTPRIVATE); goto do_private; + case OMP_CLAUSE_INPUT: + case OMP_CLAUSE_OUTPUT: + { + /* The variable itself should be made private in the task's + context. FIXME_stream: add varible-sized or ptr + cases. */ + + tree *new_var; + stream vs; + decl = OMP_CLAUSE_DECL (c); + + vs = lookup_stream (decl); + + install_var_field (vs->stream, false, 3, ctx); + install_var_local (vs->stream, ctx); + + install_var_local (decl, ctx); + new_var = lookup_var_for_clause (c); + *new_var = lookup_decl (decl, ctx); + + if(OMP_CLAUSE_VIEW_ID (c) != NULL) + { + install_var_local (OMP_CLAUSE_VIEW_ID (c), ctx); + OMP_CLAUSE_VIEW_ID (c) = lookup_decl (OMP_CLAUSE_VIEW_ID (c), ctx); + } + else + OMP_CLAUSE_VIEW_ID (c) = *new_var; + } + break; + case OMP_CLAUSE_LASTPRIVATE: /* Let the corresponding firstprivate clause create the variable. */ @@ -1494,6 +2922,10 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) fixup_remapped_decl (decl, ctx, false); break; + case OMP_CLAUSE_INPUT: + case OMP_CLAUSE_OUTPUT: + break; + case OMP_CLAUSE_COPYPRIVATE: case OMP_CLAUSE_COPYIN: case OMP_CLAUSE_DEFAULT: @@ -2255,6 +3687,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, continue; } case OMP_CLAUSE_FIRSTPRIVATE: + case OMP_CLAUSE_INPUT: + case OMP_CLAUSE_OUTPUT: case OMP_CLAUSE_COPYIN: case OMP_CLAUSE_REDUCTION: break; @@ -2440,6 +3874,17 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, goto do_dtor; break; + case OMP_CLAUSE_INPUT: + case OMP_CLAUSE_OUTPUT: + { + stream vs = lookup_stream (var); + + x = build_outer_var_ref (vs->stream, ctx); + x = lang_hooks.decls.omp_clause_copy_ctor (c, vs->stream, x); + gimplify_and_add (x, ilist); + } + break; + case OMP_CLAUSE_COPYIN: by_ref = use_pointer_for_field (var, NULL); x = build_receiver_ref (var, by_ref, ctx); @@ -2762,6 +4207,8 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, case OMP_CLAUSE_COPYIN: case OMP_CLAUSE_LASTPRIVATE: case OMP_CLAUSE_REDUCTION: + case OMP_CLAUSE_INPUT: + case OMP_CLAUSE_OUTPUT: break; default: continue; @@ -2785,6 +4232,42 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, do_in = true; break; + case OMP_CLAUSE_INPUT: + case OMP_CLAUSE_OUTPUT: + { + gimple stmt; + tree fn; + stream vs = lookup_stream (val); + + var = lookup_decl_in_outer_ctx (var, ctx); + if(DECL_HAS_VALUE_EXPR_P (var)) + { + var = DECL_VALUE_EXPR (var); + + if(TREE_CODE(var) == INDIRECT_REF) + var = TREE_OPERAND (var, 0); + } + + fn = (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INPUT) ? + built_in_decls [BUILT_IN_GOMP_STREAM_PUSH_STUB] : + built_in_decls [BUILT_IN_GOMP_STREAM_POP_STUB]; + stmt = gimple_build_call (fn, 2, vs->var, var); + + if(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INPUT) + gimple_seq_add_stmt (ilist, stmt); + else + gimple_seq_add_stmt (olist, stmt); + + set_stmt_loc (stmt, SL_DONT_MOVE); + + val = vs->stream; + var = vs->stream; + by_ref = false; + do_in = true; + do_out = false; + } + break; + case OMP_CLAUSE_LASTPRIVATE: if (by_ref || is_reference (val)) { @@ -3342,6 +4825,7 @@ expand_omp_taskreg (struct omp_region *region) gimple_stmt_iterator gsi; gimple entry_stmt, stmt; edge e; + bool streaming_flag = false; entry_stmt = last_stmt (region->entry); child_fn = gimple_omp_taskreg_child_fn (entry_stmt); @@ -3458,6 +4942,13 @@ expand_omp_taskreg (struct omp_region *region) } } + streaming_flag = expand_task_streaming_extensions (region); + + /* We need to update those as the streamization might have moved + them around. */ + entry_bb = region->entry; + exit_bb = region->exit; + /* Declare local variables needed in CHILD_CFUN. */ block = DECL_INITIAL (child_fn); BLOCK_VARS (block) = list2chain (child_cfun->local_decls); @@ -3558,6 +5049,20 @@ expand_omp_taskreg (struct omp_region *region) pop_cfun (); } + /* This allows to hoist the GOMP_task calls at the beginning of the + outer region for tasks that have been streamized (they live + through all the iterations). */ + if (streaming_flag && region->outer->streamization_info) + { + regions_streamization_info task = (regions_streamization_info) region->outer->streamization_info; + if(!task->init_latch_bb) + { + e = single_succ_edge (task->init_bb); + task->init_latch_bb = split_edge (e); + } + new_bb = task->init_latch_bb; + } + /* Emit a library call to launch the children threads. */ if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) expand_parallel_call (region, new_bb, entry_stmt, ws_args); @@ -4845,6 +6350,174 @@ expand_omp_sections (struct omp_region *region) set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); } +/* Callback for htab_traverse. Issues stream creation calls and + destruction/set_eos for outer region streams. For now, we tie + streaming tasks to an OMP SINGLE region. */ + +static int +stream_create_calls (void **slot, void *data) +{ + gimple_stmt_iterator gsi; + gimple stmt; + tree fn, type_size, horizon_size, num_readers, num_writers; + regions_streamization_info task; + edge e; + view v = (view) *slot; + stream vs = v->stream; + struct omp_region *region = (struct omp_region *) data; + + + if (vs->visit == true) + return 1; + + vs->visit = true; + + type_size = build_int_cst (long_long_unsigned_type_node, TYPE_SIZE_HAS_INT (vs->element_type)); + horizon_size = build_int_cst (long_long_unsigned_type_node, HORIZON); + num_readers = build_int_cst (unsigned_type_node, vs->nr_consumers); + num_writers = build_int_cst (unsigned_type_node, vs->nr_producers); + + task = get_streamisation_info_for_region (region); + + if (!task->init_bb) + { + e = EDGE_SUCC (single_succ (task->entry_bb), 0); + task->init_bb = split_edge (e); + } + + /* Generate stream creation/initialization call. */ + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_STREAM]; + stmt = gimple_build_call (fn, 3, type_size, horizon_size, + build_int_cst(build_pointer_type (void_type_node), 0)); + gimple_call_set_lhs (stmt, vs->stream); + gsi = gsi_start_bb (task->init_bb); + gsi_insert_before (&gsi, stmt, GSI_CONTINUE_LINKING); + + fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS]; + stmt = gimple_build_call (fn, 4, vs->stream, num_readers, num_writers, boolean_true_node); + + gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); + + if (!task->final_bb) + { + if (EDGE_COUNT (task->exit_bb->preds) >= 2) + { + /* We need to ensure that we cover all exit paths from the + single section with the code from the + streamization_exit_bb. The exit from the single section + switch must not execute that code. */ + edge skip_single_sec_edge; + + e = split_block_after_labels (task->exit_bb); + task->final_bb = split_edge (e); + task->clean_bb = + split_edge (single_succ_edge (task->final_bb)); + + task->exit_bb = single_succ (task->clean_bb); + skip_single_sec_edge = find_edge (single_succ (task->entry_bb), + e->src); + gcc_assert (redirect_edge_and_branch (skip_single_sec_edge, + task->exit_bb)); + } + else + { + /* If the streamization body (of the OpenMP single + construct) does not have an exit (e.g., infinite loop), + then we may end up discarding the finalzation statements. + Do not link their BB. */ + task->final_bb = alloc_block (); + task->final_bb->il.gimple = GGC_CNEW (struct gimple_bb_info); + set_bb_seq (task->final_bb, gimple_seq_alloc ()); + } + } + + gsi = gsi_last_bb (task->final_bb); + + return 1; +} + +static void +expand_push_and_pop_stubs (struct omp_region *region) +{ + int i; + const int builtin[2] = {BUILT_IN_GOMP_STREAM_PUSH_STUB, BUILT_IN_GOMP_STREAM_POP_STUB}; + + for (i = 0; i < 2; i++) + { + struct cgraph_node *node = cgraph_node (built_in_decls[builtin[i]]); + struct cgraph_edge *e; + + if(node) + for (e = node->callers; e; e = e->next_caller) + { + struct cgraph_node *caller = e->caller; + + if(caller->decl == current_function_decl) + { + gimple_stmt_iterator gsi, gsi1; + gimple stmt = e->call_stmt; + + /* TODO: This is wrong! We agreed to perform the proper expansion + after the deadline of PLDI. For now it is good enough to perform + this since we will assume streamed variable never changes between + task pragmas inside of a master task. */ + tree stream_var; + stream s; + view v; + + stream_var = gimple_call_arg (stmt, 0); + s = lookup_stream (stream_var); + v = maybe_lookup_view (region, s->views); + + if(v != NULL && v->expanded_push_pop == false) + { + regions_streamization_info task; + struct expand_view_traverser_callback_data data = CREATE_SYNC_DEFAULT_DATA; + + gsi = gsi_for_stmt (e->call_stmt); + task = get_streamisation_info_for_region (region); + + v->view_var = gimple_call_arg (stmt, 1); + + expand_view_traverser_callback ((void **) &v, &data); + + if(v->is_producer) + { + gimple_seq copy = copy_gimple_seq_and_replace_locals (data.stall_stmts); + gimple_seq copy1 = copy_gimple_seq_and_replace_locals (data.write_stmts); + + gsi = gsi_for_stmt (stmt); + gsi_insert_seq_after (&gsi, data.write_stmts, GSI_CONTINUE_LINKING); + gsi_insert_seq_after (&gsi, data.commit_stmts, GSI_CONTINUE_LINKING); + gsi_insert_seq_after (&gsi, data.stall_stmts, GSI_CONTINUE_LINKING); + + /* TODO: TINO CHECK THIS might not be a single succ node. */ + gsi1 = gsi_start_bb (task->sese_entry); + gsi_insert_seq_after (&gsi1, copy, GSI_CONTINUE_LINKING); + gsi_insert_seq_after (&gsi1, copy1, GSI_CONTINUE_LINKING); + } + else + { + gimple_seq copy = copy_gimple_seq_and_replace_locals (data.release_stmts); + + gsi_insert_seq_before (&gsi, data.release_stmts, GSI_SAME_STMT); + gsi_insert_seq_before (&gsi, data.update_stmts, GSI_CONTINUE_LINKING); + gsi = gsi_for_stmt (stmt); + gsi_insert_seq_after (&gsi, data.read_stmts, GSI_CONTINUE_LINKING); + + gsi1 = gsi_start_bb (task->sese_exit); + gsi_insert_seq_after (&gsi1, copy, GSI_CONTINUE_LINKING); + } + + v->expanded_push_pop = true; + + } + gsi = gsi_for_stmt (e->call_stmt); + gsi_remove (&gsi, true); + } + } + } +} /* Expand code for an OpenMP single directive. We've already expanded much of the code, here we simply place the GOMP_barrier call. */ @@ -4854,6 +6527,8 @@ expand_omp_single (struct omp_region *region) { basic_block entry_bb, exit_bb; gimple_stmt_iterator si; + regions_streamization_info task; + edge e; bool need_barrier = false; entry_bb = region->entry; @@ -4866,10 +6541,34 @@ expand_omp_single (struct omp_region *region) if (find_omp_clause (gimple_omp_single_clauses (gsi_stmt (si)), OMP_CLAUSE_COPYPRIVATE)) need_barrier = true; + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE); gsi_remove (&si, true); single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + /* Issue any necessary stream creation calls. */ + TRAVERSE_VIEWS_IN_REGION_AND_SUBS (region, stream_create_calls, (void *) region); + + if(region->view_pointers != NULL) + { + TRAVERSE_VIEWS_IN_REGION (region, register_views_traverser_callback, (void *) region); + create_wait_until_connected_call (region); + } + + task = get_streamisation_info_for_region (region); + + task->sese_entry = split_edge (single_succ_edge (task->init_latch_bb)); + e = split_block_after_labels (task->final_bb); + task->sese_exit = e->src; + task->final_bb = e->dest; + + expand_push_and_pop_stubs (region); + + region->entry = task->entry_bb; + region->exit = task->exit_bb; + + exit_bb = region->exit; + si = gsi_last_bb (exit_bb); if (!gimple_omp_return_nowait_p (gsi_stmt (si)) || need_barrier) force_gimple_operand_gsi (&si, build_omp_barrier (), false, NULL_TREE, @@ -5479,6 +7178,150 @@ build_omp_regions (void) build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL, false); } +struct stream_list { + struct stream_struct *stream; + struct stream_list *prev; +}; +static void +retrieve_streams_info (struct omp_region *region) +{ + gimple task_stmt; + tree c; + + static struct stream_list *list = NULL; + + while (region) + { + location_t saved_location; + + if (region->inner) + retrieve_streams_info (region->inner); + + saved_location = input_location; + if (gimple_has_location (last_stmt (region->entry))) + input_location = gimple_location (last_stmt (region->entry)); + + switch (region->type) + { + case GIMPLE_OMP_TASK: + task_stmt = gsi_stmt (gsi_last_bb (region->entry)); + for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c)) + { + stream str; + + enum omp_clause_code code = OMP_CLAUSE_CODE (c); + switch (code) + { + case OMP_CLAUSE_INPUT: + case OMP_CLAUSE_OUTPUT: + { + struct stream_list *new_elem; + view v; + str = lookup_stream (OMP_CLAUSE_STREAM_ID (c)); + v = lookup_view (region, str->views); + + v->stream = str; + v->view = create_tmp_var (build_pointer_type (void_type_node), "view"); + v->index = create_tmp_var (long_long_unsigned_type_node, "index"); + v->sync_index = create_tmp_var (long_long_unsigned_type_node, "sync_index"); + v->data = create_tmp_var (v->stream->buffer_type, "data"); + v->clause = c; + + v->view_var = OMP_CLAUSE_VIEW_ID (c); + + if(TREE_CODE (TREE_TYPE (v->view_var)) == ARRAY_TYPE) + v->view_horizon = build_int_cst (long_long_unsigned_type_node, TYPE_SIZE_HAS_INT (TREE_TYPE (v->view_var)) / + TYPE_SIZE_HAS_INT (TREE_TYPE (TREE_TYPE (v->view_var)))); + else + v->view_horizon = build_int_cst (long_long_unsigned_type_node, 1); + + v->burst = OMP_CLAUSE_BURST_SIZE (c); + if(v->burst == NULL_TREE) + v->burst = v->view_horizon; + + v->var_decl = str->var; + + if(code == OMP_CLAUSE_INPUT) + { + str->nr_consumers++; + v->is_producer = false; + } + else + { + str->nr_producers++; + v->is_producer = true; + } + lookup_view_in_region (region, v); + + new_elem = (struct stream_list *) xmalloc(sizeof(struct stream_list)); + new_elem->stream = str; + new_elem->prev = list; + list = new_elem; + } + break; + default: + break; + } + } + break; + + case GIMPLE_OMP_SINGLE: + while(list != NULL) + { + struct stream_list *to_remove; + stream str = list->stream; + if(str->nr_producers == 0 || str->nr_consumers == 0) + { + view v = lookup_view (region, str->views); + v->stream = str; + v->view = create_tmp_var (build_pointer_type (void_type_node), "view"); + v->index = create_tmp_var (long_long_unsigned_type_node, "index"); + v->sync_index = create_tmp_var (long_long_unsigned_type_node, "sync_index"); + v->data = create_tmp_var (v->stream->buffer_type, "data"); + v->clause = NULL_TREE; + v->expanded_push_pop = false; + + v->view_var = v->stream->var; + + if(TREE_CODE (TREE_TYPE (v->view_var)) == ARRAY_TYPE) + v->view_horizon = build_int_cst (long_long_unsigned_type_node, TYPE_SIZE_HAS_INT (TREE_TYPE (v->view_var)) / + TYPE_SIZE_HAS_INT (TREE_TYPE (TREE_TYPE (v->view_var)))); + else + v->view_horizon = build_int_cst (long_long_unsigned_type_node, 1); + + v->burst = v->view_horizon; + + v->view_var = NULL_TREE; + + if(str->nr_producers == 0) + { + v->is_producer = true; + str->nr_producers++; + } + if(str->nr_consumers == 0) + { + v->is_producer = false; + str->nr_consumers++; + } + + lookup_view_in_region (region, v); + } + + to_remove = list; + list = list->prev; + free(to_remove); + } + break; + + default: + break; + } + + input_location = saved_location; + region = region->next; + } +} + /* Main entry point for expanding OMP-GIMPLE into runtime calls. */ static unsigned int @@ -5498,6 +7341,8 @@ execute_expand_omp (void) remove_exit_barriers (root_omp_region); + retrieve_streams_info (root_omp_region); + expand_omp (root_omp_region); cleanup_tree_cfg (); @@ -6339,6 +8184,36 @@ create_task_copyfn (gimple task_stmt, omp_context *ctx) t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src); append_to_statement_list (t, &list); break; + + case OMP_CLAUSE_INPUT: + case OMP_CLAUSE_OUTPUT: + { + stream str = lookup_stream (OMP_CLAUSE_DECL (c)); + n = splay_tree_lookup (ctx->field_map, (splay_tree_key) str->stream); + f = (tree) n->value; + if (tcctx.cb.decl_map) + f = *(tree *) pointer_map_contains (tcctx.cb.decl_map, f); + n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) str->stream); + if (n != NULL) + { + sf = (tree) n->value; + if (tcctx.cb.decl_map) + sf = *(tree *) pointer_map_contains (tcctx.cb.decl_map, sf); + src = build_fold_indirect_ref (sarg); + src = build3 (COMPONENT_REF, TREE_TYPE (sf), src, sf, NULL); + if (use_pointer_for_field (str->stream, NULL) + || is_reference (str->stream)) + src = build_fold_indirect_ref (src); + } + else + src = str->stream; + dst = build_fold_indirect_ref (arg); + dst = build3 (COMPONENT_REF, TREE_TYPE (f), dst, f, NULL); + t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src); + append_to_statement_list (t, &list); + } + break; + case OMP_CLAUSE_PRIVATE: if (! OMP_CLAUSE_PRIVATE_OUTER_REF (c)) break; @@ -6434,6 +8309,7 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq par_body, olist, ilist, par_olist, par_ilist, new_body; struct gimplify_ctx gctx; location_t loc = gimple_location (stmt); + gimple_stmt_iterator gsi; clauses = gimple_omp_taskreg_clauses (stmt); par_bind = gimple_seq_first_stmt (gimple_omp_body (stmt)); @@ -6460,6 +8336,17 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) par_olist = NULL; par_ilist = NULL; lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx); + + if (gimple_code (stmt) == GIMPLE_OMP_TASK) + { + if (par_ilist) + for (gsi = gsi_start (par_ilist); !gsi_end_p (gsi); gsi_next (&gsi)) + set_stmt_loc_if_none (gsi_stmt (gsi), SL_COPY_IN); + if (par_olist) + for (gsi = gsi_start (par_olist); !gsi_end_p (gsi); gsi_next (&gsi)) + set_stmt_loc_if_none (gsi_stmt (gsi), SL_COPY_OUT); + } + lower_omp (par_body, ctx); if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL) lower_reduction_clauses (clauses, &par_olist, ctx); @@ -6483,6 +8370,16 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) lower_send_clauses (clauses, &ilist, &olist, ctx); lower_send_shared_vars (&ilist, &olist, ctx); + if (gimple_code (stmt) == GIMPLE_OMP_TASK) + { + if (ilist) + for (gsi = gsi_start (ilist); !gsi_end_p (gsi); gsi_next (&gsi)) + set_stmt_loc_if_none (gsi_stmt (gsi), SL_COPY_IN); + if (olist) + for (gsi = gsi_start (olist); !gsi_end_p (gsi); gsi_next (&gsi)) + set_stmt_loc_if_none (gsi_stmt (gsi), SL_COPY_OUT); + } + /* Once all the expansions are done, sequence all the different fragments inside gimple_omp_body. */ @@ -6490,11 +8387,13 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (ctx->record_type) { + gimple st; t = build_fold_addr_expr_loc (loc, ctx->sender_decl); /* fixup_child_record_type might have changed receiver_decl's type. */ t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t); - gimple_seq_add_stmt (&new_body, - gimple_build_assign (ctx->receiver_decl, t)); + st = gimple_build_assign (ctx->receiver_decl, t); + set_stmt_loc (st, SL_COPY_IN); + gimple_seq_add_stmt (&new_body, st); } gimple_seq_add_seq (&new_body, par_ilist); @@ -6665,6 +8564,10 @@ execute_lower_omp (void) all_contexts = splay_tree_new (splay_tree_compare_pointers, 0, delete_omp_context); + if (!htab_stmt_loc) + htab_stmt_loc = htab_create_ggc (10, hash_stmt_loc, + eq_stmt_loc, NULL); + body = gimple_body (current_function_decl); scan_omp (body, NULL); gcc_assert (taskreg_nesting_level == 0); diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index 67fb70fbc..b4759cbfa 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -406,6 +406,12 @@ struct omp_region /* True if this is a combined parallel+workshare region. */ bool is_combined_parallel; + + /* Streams accessed by this region */ + htab_t view_pointers; + + /* Streamization data */ + void *streamization_info; }; extern struct omp_region *root_omp_region; diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 331d93d61..c14e3e060 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -307,6 +307,12 @@ dump_omp_clause (pretty_printer *buffer, tree clause, int spc, int flags) case OMP_CLAUSE_LASTPRIVATE: name = "lastprivate"; goto print_remap; + case OMP_CLAUSE_INPUT: + name = "input"; + goto print_remap; + case OMP_CLAUSE_OUTPUT: + name = "output"; + goto print_remap; case OMP_CLAUSE_COPYIN: name = "copyin"; goto print_remap; diff --git a/gcc/tree.c b/gcc/tree.c index c45e807b4..956dd7939 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -233,6 +233,8 @@ unsigned const char omp_clause_num_ops[] = 1, /* OMP_CLAUSE_SHARED */ 1, /* OMP_CLAUSE_FIRSTPRIVATE */ 2, /* OMP_CLAUSE_LASTPRIVATE */ + 4, /* OMP_CLAUSE_INPUT */ + 4, /* OMP_CLAUSE_OUTPUT */ 4, /* OMP_CLAUSE_REDUCTION */ 1, /* OMP_CLAUSE_COPYIN */ 1, /* OMP_CLAUSE_COPYPRIVATE */ @@ -243,7 +245,7 @@ unsigned const char omp_clause_num_ops[] = 0, /* OMP_CLAUSE_ORDERED */ 0, /* OMP_CLAUSE_DEFAULT */ 3, /* OMP_CLAUSE_COLLAPSE */ - 0 /* OMP_CLAUSE_UNTIED */ + 0, /* OMP_CLAUSE_UNTIED */ }; const char * const omp_clause_code_name[] = @@ -253,6 +255,8 @@ const char * const omp_clause_code_name[] = "shared", "firstprivate", "lastprivate", + "input", + "output", "reduction", "copyin", "copyprivate", @@ -10215,6 +10219,8 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, case OMP_CLAUSE_PRIVATE: case OMP_CLAUSE_SHARED: case OMP_CLAUSE_FIRSTPRIVATE: + case OMP_CLAUSE_INPUT: + case OMP_CLAUSE_OUTPUT: case OMP_CLAUSE_COPYIN: case OMP_CLAUSE_COPYPRIVATE: case OMP_CLAUSE_IF: diff --git a/gcc/tree.h b/gcc/tree.h index 7eed68fc1..6c98c3a73 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -295,6 +295,12 @@ enum omp_clause_code /* OpenMP clause: lastprivate (variable_list). */ OMP_CLAUSE_LASTPRIVATE, + /* OpenMP clause: firstprivate (variable_list). */ + OMP_CLAUSE_INPUT, + + /* OpenMP clause: lastprivate (variable_list). */ + OMP_CLAUSE_OUTPUT, + /* OpenMP clause: reduction (operator:variable_list). OMP_CLAUSE_REDUCTION_CODE: The tree_code of the operator. Operand 1: OMP_CLAUSE_REDUCTION_INIT: Stmt-list to initialize the var. @@ -1760,6 +1766,24 @@ extern void protected_set_expr_location (tree, location_t); OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \ OMP_CLAUSE_PRIVATE, \ OMP_CLAUSE_COPYPRIVATE), 0) + +#define OMP_CLAUSE_STREAM_ID(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \ + OMP_CLAUSE_INPUT, \ + OMP_CLAUSE_OUTPUT), 0) +#define OMP_CLAUSE_STREAM_SUB(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \ + OMP_CLAUSE_INPUT, \ + OMP_CLAUSE_OUTPUT), 1) +#define OMP_CLAUSE_VIEW_ID(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \ + OMP_CLAUSE_INPUT, \ + OMP_CLAUSE_OUTPUT), 2) +#define OMP_CLAUSE_BURST_SIZE(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE), \ + OMP_CLAUSE_INPUT, \ + OMP_CLAUSE_OUTPUT), 3) + #define OMP_CLAUSE_HAS_LOCATION(NODE) \ ((OMP_CLAUSE_CHECK (NODE))->omp_clause.locus != UNKNOWN_LOCATION) #define OMP_CLAUSE_LOCATION(NODE) (OMP_CLAUSE_CHECK (NODE))->omp_clause.locus diff --git a/libgomp/ChangeLog.omp-stream b/libgomp/ChangeLog.omp-stream new file mode 100644 index 000000000..970faaca8 --- /dev/null +++ b/libgomp/ChangeLog.omp-stream @@ -0,0 +1,38 @@ +2010-10-05 Antoniu Pop + + * stream.c (gomp_stream_add_view_to_list): Added missing sizeof + call inside gomp_realloc. + +2010-07-10 Antoniu Pop + + * stream.c (GOMP_stream_connect_view): Update the + nr_registered_views field in the view_handle when connecting + views. + +2010-07-05 Antoniu Pop + + * stream.c (gomp_stream_unregister_view): Set the EOS_P flag when + the last producer exits and commit up to the highest committed index. + (gomp_stream_compute_lower_max): New. + +2010-07-05 Antoniu Pop + + * stream.c (GOMP_stream_create_stream): corrected initialization + values. + +2010-07-01 Antoniu Pop + + * stream.h: New. + * stream.c: New. + * Makefile.am (libgomp_la_SOURCES): Added stream.c + * Makefile.in: Regenerated. + * libgomp.h: Include stream.h + * libgomp_g.h (GOMP_stream_create_stream) + (GOMP_stream_create_read_view, GOMP_stream_create_write_view) + (GOMP_stream_create_task, GOMP_stream_add_expected_views) + (GOMP_stream_connect_view, GOMP_stream_wait_until_connected) + (GOMP_stream_update, GOMP_stream_stall, GOMP_stream_release) + (GOMP_stream_commit, GOMP_stream_task_exit): Declared builtins + from stream.c + * libgomp.map: Export GOMP_stream_*. + * omp.h.in: Added GOMP_stream_* to the OpenMP public interface. diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 3786bee4c..d3caa723c 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -34,7 +34,7 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ - time.c fortran.c affinity.c + time.c fortran.c affinity.c stream.c nodist_noinst_HEADERS = libgomp_f.h nodist_libsubinclude_HEADERS = omp.h diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index c27d7a4c7..d54128536 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -95,7 +95,7 @@ am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \ error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \ parallel.lo sections.lo single.lo task.lo team.lo work.lo \ lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \ - fortran.lo affinity.lo + fortran.lo affinity.lo stream.lo libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/../depcomp @@ -306,7 +306,7 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ - time.c fortran.c affinity.c + time.c fortran.c affinity.c stream.c nodist_noinst_HEADERS = libgomp_f.h nodist_libsubinclude_HEADERS = omp.h @@ -463,6 +463,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stream.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@ diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 1d21cc09c..cbfff2e62 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -48,6 +48,7 @@ #include "mutex.h" #include "bar.h" #include "ptrlock.h" +#include "stream.h" /* This structure contains the data to control one work-sharing construct, diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index e6c12fa00..bdf5ee15a 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -167,4 +167,17 @@ GOMP_2.0 { GOMP_loop_ull_runtime_start; GOMP_loop_ull_static_next; GOMP_loop_ull_static_start; + GOMP_stream_create_stream; + GOMP_stream_create_read_view; + GOMP_stream_create_write_view; + GOMP_stream_create_task; + GOMP_stream_add_expected_views; + GOMP_stream_connect_view; + GOMP_stream_wait_until_connected; + GOMP_stream_update; + GOMP_stream_stall; + GOMP_stream_release; + GOMP_stream_commit; + GOMP_stream_task_exit; } GOMP_1.0; + diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 18f69bc1e..243f83050 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -179,4 +179,21 @@ extern bool GOMP_single_start (void); extern void *GOMP_single_copy_start (void); extern void GOMP_single_copy_end (void *); +/* stream.c */ +extern void *GOMP_stream_create_stream (size_t, unsigned long long, char *); +extern void *GOMP_stream_create_read_view (void); +extern void *GOMP_stream_create_write_view (void); +extern void *GOMP_stream_create_task (void); +extern void GOMP_stream_add_expected_views (void *, int, int, int); +extern void GOMP_stream_connect_view (void *, void *, void *); +extern void GOMP_stream_wait_until_connected (void *); +extern unsigned long long GOMP_stream_update (void *, + const unsigned long long); +extern void GOMP_stream_stall (void *, const unsigned long long); +extern void GOMP_stream_release (void *, const unsigned long long); +extern void GOMP_stream_commit (void *, const unsigned long long); +extern void GOMP_stream_task_exit (void *); + + + #endif /* LIBGOMP_G_H */ diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index 0198b8fd7..c51d144c6 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -102,4 +102,29 @@ int omp_get_active_level (void) __GOMP_NOTHROW; } #endif +#define GOMP_STREAM_ACCESS(VIEW, INDEX, TYPE) \ + (**((TYPE ***) (VIEW))) \ + [(unsigned long long)(INDEX) & \ + (*((unsigned long long *)((*((char ***) (VIEW))) + 1)))] + +extern void *GOMP_stream_create_stream (size_t, unsigned long long, char *) + __GOMP_NOTHROW; +extern void *GOMP_stream_create_read_view (void) __GOMP_NOTHROW; +extern void *GOMP_stream_create_write_view (void) __GOMP_NOTHROW; +extern void *GOMP_stream_create_task (void) __GOMP_NOTHROW; +extern void GOMP_stream_add_expected_views (void *, int, int, int) + __GOMP_NOTHROW; +extern void GOMP_stream_connect_view (void *, void *, void *) __GOMP_NOTHROW; +extern void GOMP_stream_wait_until_connected (void *) __GOMP_NOTHROW; +extern unsigned long long GOMP_stream_update (void *, + const unsigned long long) + __GOMP_NOTHROW; +extern void GOMP_stream_stall (void *, const unsigned long long) + __GOMP_NOTHROW; +extern void GOMP_stream_release (void *, const unsigned long long) + __GOMP_NOTHROW; +extern void GOMP_stream_commit (void *, const unsigned long long) + __GOMP_NOTHROW; +extern void GOMP_stream_task_exit (void *) __GOMP_NOTHROW; + #endif /* OMP_H */ diff --git a/libgomp/stream.c b/libgomp/stream.c new file mode 100644 index 000000000..50b684dca --- /dev/null +++ b/libgomp/stream.c @@ -0,0 +1,562 @@ +/* Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by Antoniu Pop . + + This file is part of the GNU OpenMP Library (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + more details. + + You should have received a copy of the GNU Lesser General Public License + along with libgomp; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* As a special exception, if you link this library with other files, some + of which are compiled with GCC, to produce an executable, this library + does not by itself cause the resulting executable to be covered by the + GNU General Public License. This exception does not however invalidate + any other reasons why the executable file might be covered by the GNU + General Public License. */ + +/* This implements the stream communication layer for libGOMP. */ + +#include +#include +#include + +#include "wait.h" +#include "sem.h" +#include "mutex.h" +#include "libgomp.h" + + +/* Data structures creation and pipeline initialization. */ + +/* Allocate and initialize a GOMP_STREAM for data elements of size + ELEMENT_SIZE using a circular buffer of STREAM_BUFFER_SIZE such + elements. Returns a pointer to the newly allocated stream. The + user may provide a pointer to pre-allocated memory to be used as + BUFFER for the stream. */ + +void * +GOMP_stream_create_stream (size_t element_size, + unsigned long long stream_buffer_size, + char *buffer) +{ + gomp_stream_p stream = (gomp_stream_p) gomp_malloc (sizeof (gomp_stream_t)); + + /* Initialize and allocate the data buffer. We force the + buffer_size to be a power of 2 for efficient modulo computation + of the indices in the circular buffer. */ + stream->element_size = element_size; + stream->buffer_size = 1; + while(stream->buffer_size < stream_buffer_size) + stream->buffer_size <<= 1; + stream->buffer_mask = stream->buffer_size - 1; + + /* In case the user provided a pre-allocated buffer, we need to + ensure it is properly sized. */ + if (buffer != NULL) + { + if (stream->buffer_size != stream_buffer_size) + gomp_fatal ("GOMP_stream: provided buffer size is not power of 2."); + + stream->buffer = buffer; + } + else + { + stream->buffer = + (void *) gomp_malloc (stream->element_size * stream->buffer_size); + } + + stream->expected_ready_p = false; + stream->connected_p = false; + stream->eos_p = false; + + /* Initialize the view_handles. */ + stream->read_views.current_min = stream->buffer_size; + stream->read_views.current_max = 0; + stream->read_views.view_list.views = NULL; + stream->read_views.view_list.nr_views = 0; + stream->read_views.view_list.size = 0; + stream->read_views.nr_expected_views = 0; + stream->read_views.nr_registered_views = 0; + stream->read_views.nr_unregistered_views = 0; + gomp_mutex_init (&stream->read_views.connect_view_mutex); + + stream->write_views.current_min = 0; + stream->write_views.current_max = stream->buffer_size; + stream->write_views.view_list.views = NULL; + stream->write_views.view_list.nr_views = 0; + stream->write_views.view_list.size = 0; + stream->write_views.nr_expected_views = 0; + stream->write_views.nr_registered_views = 0; + stream->write_views.nr_unregistered_views = 0; + gomp_mutex_init (&stream->write_views.connect_view_mutex); + +#ifndef HAVE_SYNC_BUILTINS + gomp_mutex_init (&stream->stream_mutex); +#endif + + return stream; +} + +/* Allocate and initialize a generic GOMP_STREAM_VIEW that can be + connected to any stream to give either read or write access + depending on its TYPE. Returns a pointer to the newly allocated + view. */ + +static inline void * +gomp_stream_create_view (int type) +{ + gomp_stream_view_p view = + (gomp_stream_view_p) gomp_malloc (sizeof(gomp_stream_view_t)); + + view->lower_index = 0; + view->upper_index = 0; + view->stream = NULL; + view->end_p = false; + view->type = type; + view->local_min_value = 0; + + return view; +} + +/* Wrapper for creating a READ view. */ + +void * +GOMP_stream_create_read_view (void) +{ + return gomp_stream_create_view (READ_VIEW); +} + +/* Wrapper for creating a WRITE view. */ + +void * +GOMP_stream_create_write_view (void) +{ + return gomp_stream_create_view (WRITE_VIEW); +} + +/* Allocate and initialize a GOMP_STREAM_TASK data structure. */ + +void * +GOMP_stream_create_task (void) +{ + gomp_stream_task_p task = + (gomp_stream_task_p) gomp_malloc (sizeof(gomp_stream_task_t)); + + task->read_view_list.views = NULL; + task->read_view_list.nr_views = 0; + task->read_view_list.size = 0; + + task->write_view_list.views = NULL; + task->write_view_list.nr_views = 0; + task->write_view_list.size = 0; + + return task; +} + +/* Declare additional READ_VIEWS and WRITE_VIEWS expected views on + stream S. When possible, the thread that creates the streaming + tasks should declare, for each stream, the number of read/write + views that will connect to a stream before the streaming tasks are + started. If this function is called on a stream, there will be no + further checks for the number of tasks partaking in the + initialization synchronization. */ + +void +GOMP_stream_add_expected_views (void *s, int read_views, int write_views, + int final) +{ + gomp_stream_p stream = (gomp_stream_p) s; + + if (stream->expected_ready_p) + gomp_fatal + ("GOMP_stream: attempting to modify a final number of expected views."); + stream->expected_ready_p = final; + +#ifdef HAVE_SYNC_BUILTINS + __sync_fetch_and_add (&stream->read_views.nr_expected_views, read_views); + __sync_fetch_and_add (&stream->write_views.nr_expected_views, write_views); +#else + gomp_mutex_lock (&stream->stream_mutex); + stream->read_views.nr_expected_views += read_views; + stream->write_views.nr_expected_views += write_views; + gomp_mutex_unlock (&stream->stream_mutex); +#endif +} + +/* Add VIEW to the VIEW_LIST. We actually use an array as this list + is only modified in the initialization phase and we never remove + any items from it. */ + +static inline void +gomp_stream_add_view_to_list (gomp_stream_view_p view, + gomp_stream_view_list_p view_list) +{ + /* Allocate memory when needed. */ + if (view_list->views == NULL || view_list->nr_views == view_list->size) + { + if (view_list->size == 0) + view_list->size = 4; + if (view_list->nr_views == view_list->size) + view_list->size <<= 1; + + view_list->views = + (gomp_stream_view_p *) gomp_realloc (view_list->views, + view_list->size * sizeof (gomp_stream_view_p)); + } + + view_list->views[view_list->nr_views] = view; + view_list->nr_views += 1; +} + +/* Connect a VIEW to a STREAM and also to the TASK which will use it. + This effectively builds the runtime task graph. */ + +void +GOMP_stream_connect_view (void *t, void *s, void *v) +{ + gomp_stream_task_p task = (gomp_stream_task_p) t; + gomp_stream_p stream = (gomp_stream_p) s; + gomp_stream_view_p view = (gomp_stream_view_p) v; + + gomp_stream_view_handle_p vh = (view->type == READ_VIEW) ? + &stream->read_views : &stream->write_views; + gomp_stream_view_list_p stream_list = &vh->view_list; + gomp_stream_view_list_p task_list = (view->type == READ_VIEW) ? + &task->read_view_list : &task->write_view_list; + + view->stream = stream; + + /* A read view's lower index is shifted by the buffer_size as the + stream is initially empty. This is equivalent to releasing the + original buffer_size elements. A write view will start with + buffer_size free space. */ + if (view->type == READ_VIEW) + view->lower_index = stream->buffer_size; + else + view->local_min_value = stream->buffer_size; + + /* Register the view with the TASK to which it belongs. This + operation is local to the task, so there is no need to + synchronize. */ + gomp_stream_add_view_to_list (view, task_list); + + /* Connect the view to the stream. This must be done atomically as + this data structure is shared with the other producer/consumer + tasks. */ + gomp_mutex_lock (&vh->connect_view_mutex); + gomp_stream_add_view_to_list (view, stream_list); + gomp_mutex_unlock (&vh->connect_view_mutex); + __sync_fetch_and_add (&vh->nr_registered_views, 1); +} + +/* Check whether all the expected views on STREAM have already + connected. */ + +static inline bool +gomp_stream_check_connected (gomp_stream_p stream) +{ + if (!stream->expected_ready_p) + return false; + + if (stream->connected_p) + return true; + + if ((stream->read_views.view_list.nr_views + == stream->read_views.nr_expected_views) + && (stream->write_views.view_list.nr_views + == stream->write_views.nr_expected_views)) + { + stream->connected_p = true; + return true; + } + return false; +} + +/* Wait until all the streams to which TASK connects are ready and + connected to all producer/consumer tasks. */ + +void +GOMP_stream_wait_until_connected (void *t) +{ + gomp_stream_task_p task = (gomp_stream_task_p) t; + int num_read_views = task->read_view_list.nr_views; + int num_write_views = task->write_view_list.nr_views; + int i; + bool done; + + do + { + done = true; + + for (i = 0; i < num_read_views; ++i) + if (!gomp_stream_check_connected (task->read_view_list.views[i]->stream)) + done = false; + + for (i = 0; i < num_write_views; ++i) + if (!gomp_stream_check_connected (task->write_view_list.views[i]->stream)) + done = false; + } + while (!done); +} + +/* Stream communication/synchronization. */ + +/* Compute the minimum of the LOWER_INDEX fields of all views in the + LIST of views. This is used during the termination phase to give + access to the readers up to the highest committed index. This is + only useful when producers, not too far apart in indices they + committed, forget to commit up to the last index that should appear + in the stream. */ + +static inline unsigned long long +gomp_stream_compute_lower_max (gomp_stream_view_list_p list) +{ + unsigned long long local_max = 0; + gomp_stream_view_p *views = list->views; + int i; + + for (i = 0; i < list->nr_views; ++i) + if (views[i]->lower_index > local_max) + local_max = views[i]->lower_index; + + return local_max; +} + +/* Compute the minimum of the LOWER_INDEX fields of all views in the + LIST of views. For a LIST of read views, this minimum represents + the highest index released by all read views on a stream (i.e. the + index of elements that all consumers have already discarded) and + therefore the highest index a write view will be allowed to acquire + for writing. For a LIST of write views, this minimum represents + the highest index all producers have committed and thus the highest + index available for reading. */ + +static inline unsigned long long +gomp_stream_compute_lower_min (gomp_stream_view_list_p list) +{ + unsigned long long local_min = GOMP_STREAM_MAX_INDEX; + gomp_stream_view_p *views = list->views; + int i; + + for (i = 0; i < list->nr_views; ++i) + if (views[i]->lower_index < local_min) + local_min = views[i]->lower_index; + + return local_min; +} + +/* Compute the minimum of the UPPER_INDEX fields of all views in the + LIST of views. Similar to the above, but this is only a hint on + the resources that another producer (resp. consumer) has already + acquired. If a producer (resp. consumer) has successfully acquired + an index with GOMP_stream_stall (resp. GOMP_stream_update) for + writing (resp. reading), then all other producers (resp. consumers) + on the same stream can access up to the same index without further + verification. */ + +static inline unsigned long long +gomp_stream_compute_upper_min (gomp_stream_view_list_p list) +{ + unsigned long long local_min = GOMP_STREAM_MAX_INDEX; + gomp_stream_view_p *views = list->views; + int i; + + for (i = 0; i < list->nr_views; ++i) + if (views[i]->upper_index < local_min) + local_min = views[i]->upper_index; + + return local_min; +} + +/* Wait until the producers (resp. consumers) on this stream have + committed (resp. released) up to the INDEX position in the stream. + When that hapens, the consumer (resp. producer) connected to the + stream through VIEW is allowed to access the elements up to + INDEX. */ + +static inline void +gomp_stream_wait_release (gomp_stream_view_p view, + gomp_stream_view_handle_p vh, + const unsigned long long index) +{ + /* Test whether someone already got a hold of a bigger index + yet. */ + + if (view->local_min_value < index) + { + while (vh->current_min < index && !view->stream->eos_p) + { + unsigned long long local_min = + gomp_stream_compute_lower_min (&vh->view_list); + + if (vh->current_min == local_min) + __asm volatile ("pause" : : : "memory"); + else + vh->current_min = local_min; + } + view->local_min_value = vh->current_min; + } +} + + +/* Request read access for the view V to the stream up to INDEX. In + case the producers have finished and there is not enough data, the + returned value is the highest index to which the view is allowed to + access the stream. */ + +unsigned long long +GOMP_stream_update (void *v, const unsigned long long index) +{ + gomp_stream_view_p view = (gomp_stream_view_p) v; + view->upper_index = index; + + /* In case another consumer has received permission to read up to a + yet higher index, then there is no need to check for this one. */ + if (index > view->stream->read_views.current_max) + { + gomp_stream_wait_release (view, &view->stream->write_views, index); + + /* If the producers have finished producing for this stream, we + need to ensure we do not give read permission to the view + past the highest fully committed index (committed by all + producers). */ + if (view->stream->eos_p) + { + view->stream->write_views.current_min = + gomp_stream_compute_lower_min (&view->stream->write_views.view_list); + + if (index > view->stream->write_views.current_min) + { + return view->stream->write_views.current_min; + } + } + view->stream->read_views.current_max = index; + } + return index; +} + +/* Request write access for the view V to the stream up to INDEX. */ + +void +GOMP_stream_stall (void *v, const unsigned long long index) +{ + gomp_stream_view_p view = (gomp_stream_view_p) v; + view->upper_index = index; + + if (index > view->stream->write_views.current_max) + { + gomp_stream_wait_release (view, &view->stream->read_views, index); + view->stream->write_views.current_max = index; + } +} + +/* Relinquish read access for the view V to the stream up to + INDEX. */ + +void +GOMP_stream_release (void *v, const unsigned long long index) +{ + gomp_stream_view_p view = (gomp_stream_view_p) v; + view->lower_index = index + view->stream->buffer_size; +} + +/* Relinquish write access for the view V to the stream up to + INDEX. */ + +void +GOMP_stream_commit (void *v, const unsigned long long index) +{ + gomp_stream_view_p view = (gomp_stream_view_p) v; + view->lower_index = index; +} + +/* Finalization and destruction of the streaming data structures. */ + +/* Disconnects VIEW from the stream to which it is connected and free + the stream if it was the last task to disconnect. */ + +static inline void +gomp_stream_unregister_view (gomp_stream_view_p view) +{ + gomp_stream_p stream = view->stream; + gomp_stream_view_handle_p vh = + (view->type == READ_VIEW) ? &stream->read_views : &stream->write_views; + int unregistered_views; + + __sync_fetch_and_add (&(vh->nr_unregistered_views), 1); + unregistered_views = __sync_add_and_fetch (&(stream->unregistered_views), 1); + + + /* Make sure that when multiple views access a stream, the finished + views do not hinder the others in the min computation. */ + if (view->type == READ_VIEW) + GOMP_stream_release (view, GOMP_STREAM_MAX_INDEX); + /* The last producer exiting will set the eos_p flag and allow the + consumers to read up to the highest committed index. */ + else if (vh->nr_unregistered_views == vh->nr_registered_views) + { + stream->eos_p = true; + vh->current_min = gomp_stream_compute_lower_max (&vh->view_list); + } + + /* If all known views arre accounted for, this is the last one + unregistering. It frees the memory allocated for the stream as + well as all the views on this stream. */ + if (unregistered_views == (stream->read_views.nr_registered_views + + stream->write_views.nr_registered_views)) + { + gomp_stream_view_list_p read_view_list = &stream->read_views.view_list; + gomp_stream_view_list_p write_view_list = &stream->write_views.view_list; + int i; + + for (i = 0; i < read_view_list->nr_views; ++i) + free (read_view_list->views[i]); + + for (i = 0; i < write_view_list->nr_views; ++i) + free (write_view_list->views[i]); + + free (stream->buffer); + free (read_view_list->views); + free (write_view_list->views); + free (stream); + } +} + +/* Invoked before terminating a stream TASK, this disconnects all the + views and for all streams for which it is the last one to + disconnect from, it frees up all data structures. */ + +void +GOMP_stream_task_exit (void *t) +{ + gomp_stream_task_p task = (gomp_stream_task_p) t; + int num_read_views = task->read_view_list.nr_views; + int num_write_views = task->write_view_list.nr_views; + int i; + + for (i = 0; i < num_read_views; ++i) + gomp_stream_unregister_view (task->read_view_list.views[i]); + + for (i = 0; i < num_write_views; ++i) + gomp_stream_unregister_view (task->write_view_list.views[i]); + + free (task->read_view_list.views); + free (task->write_view_list.views); + free (task); +} + + + + + diff --git a/libgomp/stream.h b/libgomp/stream.h new file mode 100644 index 000000000..701184d9e --- /dev/null +++ b/libgomp/stream.h @@ -0,0 +1,210 @@ +/* Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by Antoniu Pop . + + This file is part of the GNU OpenMP Library (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + more details. + + You should have received a copy of the GNU Lesser General Public License + along with libgomp; see the file COPYING.LIB. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* As a special exception, if you link this library with other files, some + of which are compiled with GCC, to produce an executable, this library + does not by itself cause the resulting executable to be covered by the + GNU General Public License. This exception does not however invalidate + any other reasons why the executable file might be covered by the GNU + General Public License. */ + +/* This implements the stream communication layer for libGOMP. */ + +#ifndef GOMP_STREAM_H +#define GOMP_STREAM_H 1 + +#include + +/* Define the type and maximum value for the indices used within the + stream. The indices are strictly monotonically increasing + integers, so we need a type that does not wrap around too fast. An + implementation of the wrap-around is necessary for systems where + this poses a problem. */ +/* typedef unsigned long long int gomp_stream_index_t; */ +#define GOMP_STREAM_MAX_INDEX ULLONG_MAX + +/*#define LAZY_SYNCH + #define GUARDED_WAKE*/ + +/* +typedef enum { + GOMP_STREAM_INITIALIZED = 1, + GOMP_STREAM_ALLOCATED = 2, + GOMP_STREAM_PRODUCED = 4, + GOMP_STREAM_CONSUMED = 8, + GOMP_STREAM_ZOMBIE = 16, + GOMP_STREAM_STALL = 32 +} gomp_stream_state; +*/ + +typedef enum +{ + READ_VIEW, + WRITE_VIEW +} gomp_stream_view_type_t; + +struct gomp_stream; +struct gomp_stream_task; + + +/* GOMP_STREAM_VIEW data structure. Producer and consumer tasks + connect to a GOMP_STREAM using views. */ + +typedef struct gomp_stream_view +{ + /* The stream accessed through this view. WARNING: this field needs + to be first. */ + struct gomp_stream *stream; + + /* The alignment directives are needed to ensure these + high-potential false-sharing fields are on their own cache + lines. */ + /* Lower and upper bounds accessible in the stream through this + view. */ + unsigned long long lower_index __attribute__((aligned (64))); + unsigned long long upper_index __attribute__((aligned (64))); + + /* The task using this view. */ + struct gomp_stream_task *task __attribute__((aligned (64))); + + /* Setting this flag means the process accessing the stream through + this view relinquishes his access to the stream (generally when + terminating). */ + bool end_p; + + /* Type of this view (read or write). */ + gomp_stream_view_type_t type; + + /* In order to avoid excessive accesses to the global minimum index + (released or consumed), which strongly impacts cache traffic, use + this duplcated field for an additional level of caching and only + update when needed. */ + unsigned long long local_min_value; + +} gomp_stream_view_t, *gomp_stream_view_p; + +/* List of GOMP_STREAM_VIEWs. As this list is only modified in the + initialization phase and we never remove items, we'll use an + array. */ + +typedef struct gomp_stream_view_list +{ + gomp_stream_view_p *views; + int nr_views; + int size; +} gomp_stream_view_list_t, *gomp_stream_view_list_p; + + +/* GOMP_STREAM_VIEW_HANDLE data structure. This allows the tasks + interacting through the GOMP_STREAM to which this handle is + attached to keep updated information global to all similar views on + this stream. */ + +typedef struct gomp_stream_view_handle +{ + /* The alignment directives are needed to ensure these + high-potential false-sharing fields are on their own cache + lines. */ + /* Latest computed value of the min released index and max acquired + index values across views. */ + volatile unsigned long long current_min __attribute__((aligned (64))); + volatile unsigned long long current_max __attribute__((aligned (64))); + + /* Bookkeeping for view connections. */ + gomp_stream_view_list_t view_list __attribute__((aligned (64))); + int nr_expected_views; + int nr_registered_views; + int nr_unregistered_views; + + /* Enforce atomic connection of the views to the stream. */ + gomp_mutex_t connect_view_mutex; + +} gomp_stream_view_handle_t, *gomp_stream_view_handle_p; + + +/* GOMP_STREAM data structure. */ + +typedef struct gomp_stream +{ + /* WARNING: the first two fields (BUFFER and BUFFER_MASK) need to + remain in their respective positions. */ + + /* The pointer to the buffer, as well as the subsequent buffer + description, should be on a single mostly read cache line. The + EOS_P flag is only set at the very end of the use of this + stream. */ + /* Circular buffer containing the data communicated through this + stream. */ + char *buffer __attribute__((aligned (64))); + + /* Size of the buffer and the bitmask used for modulo computation + for the wrap-around. The size is expressed in basic elements for + this stream. The size in bytes of the buffer is + BUFFER_SIZE * ELEMENT_SIZE. */ + unsigned long long buffer_mask; + unsigned long long buffer_size; + unsigned long long element_size; + + /* True once all the tasks that should be expected to connect to + this stream been declared. */ + bool expected_ready_p; + + /* True once all the tasks expected to connect to this stream have + done so. */ + bool connected_p; + + /* End of stream: true when all producers have finished committing + all the data and are terminating. */ + bool eos_p; + + /* Handles for read and write views on this stream. */ + gomp_stream_view_handle_t read_views; + gomp_stream_view_handle_t write_views; + + /* Barrier used both for waiting for all views to connect to the + stream and to find the last view disconnecting (and who therefore + frees this stream). */ + /*gomp_barrier_t view_handling_barrier;*/ + + /* Counter of the number of total unregistered views, both read and + write, used to determine the last task deconnecting its view. + The last task will also free the data structures. */ + int unregistered_views; + +#ifndef HAVE_SYNC_BUILTINS + /* We may need a lock for atomicity if no atomic operations are + available. */ + gomp_mutex_t stream_mutex; +#endif +} gomp_stream_t, *gomp_stream_p; + + +/* GOMP_STREAM_TASK data structure. Runtime node in the task + graph. */ + +typedef struct gomp_stream_task +{ + /* Lists of views on streams used by this task. */ + gomp_stream_view_list_t read_view_list; + gomp_stream_view_list_t write_view_list; + +} gomp_stream_task_t, *gomp_stream_task_p; + +#endif /* GOMP_STREAM_H */