From 57ff7e16b63d5739564ea99f09291fb22da820c8 Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Thu, 1 Sep 2011 16:23:55 +0200 Subject: [PATCH] Replace SPSC stream by BatchQueue streams --- gcc/omp-builtins.def | 22 +++ gcc/omp-low.c | 322 +++++++++++++++++++++++++++++++++-------- libgomp/libgomp.map | 9 ++ libgomp/omp.h.in | 16 +++ libgomp/stream.c | 331 ++++++++++++++++++++++++++++++++++++++++--- libgomp/stream.h | 39 ++++- 6 files changed, 661 insertions(+), 78 deletions(-) diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index 175099686..f2755c6c6 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -210,12 +210,21 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_END, "GOMP_single_copy_end", DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_CREATE_STREAM, "GOMP_stream_create_stream", BT_FN_PTR_SIZE_SIZE, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_BATCHQ_CREATE_STREAM, + "GOMP_batchQ_create_stream", + BT_FN_PTR_SIZE_SIZE, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_CREATE_READ_VIEW, "GOMP_stream_create_read_view", BT_FN_PTR_SIZE_SIZE, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_BATCHQ_CREATE_READ_VIEW, + "GOMP_batchQ_create_read_view", + BT_FN_PTR_SIZE_SIZE, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_CREATE_WRITE_VIEW, "GOMP_stream_create_write_view", BT_FN_PTR_SIZE_SIZE, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_BATCHQ_CREATE_WRITE_VIEW, + "GOMP_batchQ_create_write_view", + BT_FN_PTR_SIZE_SIZE, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_CREATE_TASK, "GOMP_stream_create_task", BT_FN_PTR, ATTR_NOTHROW_LIST) @@ -234,6 +243,9 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS, DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_CONNECT_VIEW, "GOMP_stream_connect_view", BT_FN_VOID_PTR_PTR_PTR, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_BATCHQ_CONNECT_VIEW, + "GOMP_batchQ_connect_view", + BT_FN_VOID_PTR_PTR_PTR, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_WAIT_UNTIL_CONNECTED, "GOMP_stream_wait_until_connected", BT_FN_VOID_PTR, ATTR_NOTHROW_LIST) @@ -242,12 +254,20 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_GET_AVAILABLE_WORK, BT_FN_ULL_PTR_ULL, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_UPDATE, "GOMP_stream_update", BT_FN_PTR_PTR_ULL_ULL, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_BATCHQ_UPDATE, "GOMP_batchQ_update", + BT_FN_PTR_PTR_ULL_ULL, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_STALL, "GOMP_stream_stall", BT_FN_PTR_PTR_ULL_ULL, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_BATCHQ_STALL, "GOMP_batchQ_stall", + BT_FN_PTR_PTR_ULL_ULL, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_RELEASE, "GOMP_stream_release", BT_FN_VOID_PTR_ULL, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_BATCHQ_RELEASE, "GOMP_batchQ_release", + BT_FN_VOID_PTR_ULL, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_COMMIT, "GOMP_stream_commit", BT_FN_VOID_PTR_ULL, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_BATCHQ_COMMIT, "GOMP_batchQ_commit", + BT_FN_VOID_PTR_ULL, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_TASK_EXIT, "GOMP_stream_task_exit", BT_FN_VOID_PTR, ATTR_NOTHROW_LIST) @@ -275,6 +295,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_EXIT, "GOMP_stream_exit", BT_FN_VOID, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_PRE, "GOMP_stream_pre", BT_FN_PTR_PTR_ULL, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_BATCHQ_PRE, "GOMP_batchQ_pre", + BT_FN_PTR_PTR_ULL, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_STREAM_PUSH_STUB, "GOMP_stream_push_stub", diff --git a/gcc/omp-low.c b/gcc/omp-low.c index fe774f8ca..f6f4e3280 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -78,6 +78,8 @@ Supported types are currently: #include "optabs.h" #include "cfgloop.h" +#define BATCHQ_ENABLED + /* Return the entry edge of a conditional block (true branch). */ static edge @@ -434,6 +436,11 @@ typedef struct stream /* Used for traversals */ bool visit; +#ifdef BATCHQ_ENABLED + /* Used for BatchQ */ + bool use_batchQ; +#endif + } stream_t; #define TYPE_SIZE_HAS_INT(TYPE) \ @@ -4792,7 +4799,38 @@ expand_steaming_taskreg_in_outer_context (omp_region_p region, /* Create firstprivate views and pass the pointers as well. */ for (i = 0; VEC_iterate (view_p, sinfo->fpviews, i, view); ++i) { - fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_WRITE_VIEW]; +#ifdef BATCHQ_ENABLED + int j, nb_associated_views = 0; + view_p aview; + + for (j = 0; VEC_iterate (view_p, view->stream->rviews, j, aview); ++j) + { + if (INTEGER_CST_CHECK(aview->sinfo->num_instances)) + nb_associated_views += TREE_INT_CST_LOW(aview->sinfo->num_instances); + else + { + /* Ensure we use generic functions */ + nb_associated_views ++; + break; + } + } + /* Pretty sure this for loop is useless */ + for (j = 0; VEC_iterate (view_p, view->stream->wviews, j, aview); ++j) + { + if (INTEGER_CST_CHECK(aview->sinfo->num_instances)) + nb_associated_views += TREE_INT_CST_LOW(aview->sinfo->num_instances); + else + { + /* Ensure we use generic functions */ + nb_associated_views ++; + break; + } + } + if (nb_associated_views == 1) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_CREATE_WRITE_VIEW]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_WRITE_VIEW]; stmt = gimple_build_call (fn, 2, view->view_size, get_view_burst_size (view, &gsi)); gimple_call_set_lhs (stmt, view->view); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); @@ -4926,12 +4964,22 @@ expand_steaming_taskreg_in_outer_context (omp_region_p region, /* COMMIT */ gsi = gsi_last_bb (commit_bb); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_COMMIT]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_COMMIT]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_COMMIT]; stmt = gimple_build_call (fn, 2, view_decl, task->local_activation_index); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); /* STALL */ - fn = built_in_decls[BUILT_IN_GOMP_STREAM_STALL]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_STALL]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_STALL]; stmt = gimple_build_call (fn, 3, view_decl, task->local_activation_index, task->local_activation_index_next); gimple_call_set_lhs (stmt, view->buffer_pointer); @@ -5057,7 +5105,12 @@ handle_nested_streaming_tasks (omp_region_p region, /* STALL */ gsi = gsi_last_bb (initialization_bb); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_STALL]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_STALL]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_STALL]; stmt = gimple_build_call (fn, 3, view_decl, task->local_activation_index, task->local_activation_index_next); gimple_call_set_lhs (stmt, view->buffer_pointer); @@ -5065,7 +5118,12 @@ handle_nested_streaming_tasks (omp_region_p region, /* COMMIT */ gsi = gsi_last_bb (finalization_bb); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_COMMIT]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_COMMIT]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_COMMIT]; stmt = gimple_build_call (fn, 2, view_decl, task->local_activation_index); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); } @@ -7633,26 +7691,46 @@ build_task_control_loop (streamization_info_p task) for (i = 0; VEC_iterate (view_p, task->rviews, i, view); ++i) { gsi = gsi_last_bb (loop_acquire_bb); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_UPDATE]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_UPDATE]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_UPDATE]; stmt = gimple_build_call (fn, 3, view->view, act_idx_start_reg, act_idx_end_reg); gimple_call_set_lhs (stmt, view->buffer_pointer); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); gsi = gsi_last_bb (loop_release_bb); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_RELEASE]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_RELEASE]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_RELEASE]; stmt = gimple_build_call (fn, 2, view->view, act_idx_end_reg); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); } for (i = 0; VEC_iterate (view_p, task->wviews, i, view); ++i) { gsi = gsi_last_bb (loop_acquire_bb); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_STALL]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_STALL]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_STALL]; stmt = gimple_build_call (fn, 3, view->view, act_idx_start_reg, act_idx_end_reg); gimple_call_set_lhs (stmt, view->buffer_pointer); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); gsi = gsi_last_bb (loop_release_bb); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_COMMIT]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_COMMIT]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_COMMIT]; stmt = gimple_build_call (fn, 2, view->view, act_idx_end_reg); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); } @@ -7789,72 +7867,175 @@ prepare_streaming_context (omp_region_p region) parallel region. */ for (i = 0; VEC_iterate (stream_p, sinfo->streams, i, stream); ++i) { - tree type_size = TYPE_SIZE_UNIT (stream->element_type); - tree horizon_size = build_int_cst (size_type_node, - HORIZON); +#ifdef BATCHQ_ENABLED + int nb_views = 0; - gsi = gsi_last_bb (sinfo->initialization_bb); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_STREAM]; - stmt = gimple_build_call (fn, 2, type_size, horizon_size); - gimple_call_set_lhs (stmt, stream->stream); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - /* FIXME: this needs to take into account - replication as well ... we may want to move this - in the task call itself if we want dynamic values - as well. */ + stream->use_batchQ = 0; for (j = 0; VEC_iterate (view_p, stream->rviews, j, view); ++j) { - fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS]; - stmt = gimple_build_call (fn, 4, stream->stream, - view->sinfo->num_instances, - integer_zero_node, - boolean_false_node); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + if (INTEGER_CST_CHECK(view->sinfo->num_instances)) + nb_views += TREE_INT_CST_LOW(view->sinfo->num_instances); + else + { + /* Ensure we use generic functions */ + nb_views += 2; + break; + } } for (j = 0; VEC_iterate (view_p, stream->wviews, j, view); ++j) { - fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS]; - stmt = gimple_build_call (fn, 4, stream->stream, - integer_zero_node, - view->sinfo->num_instances, - boolean_false_node); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + if (INTEGER_CST_CHECK(view->sinfo->num_instances)) + nb_views += TREE_INT_CST_LOW(view->sinfo->num_instances); + else + { + /* Ensure we use generic functions */ + nb_views += 2; + break; + } } - /* Connect any firstprivate write views on this stream. As these are the master views, they only get one instance. */ for (j = 0; VEC_iterate (view_p, stream->fpviews, j, view); ++j) { - tree view_decl = lookup_var (view->view, sinfo->stmt); - /* Only one firstprivate view per stream possible. */ gcc_assert (j == 0); /* If there are firstprivate views, there should be no write views. */ gcc_assert (VEC_empty (view_p, stream->wviews)); + nb_views ++; + } + if (nb_views == 2) + stream->use_batchQ = 1; +#endif + + tree type_size = TYPE_SIZE_UNIT (stream->element_type); + tree horizon_size; +#ifdef BATCHQ_ENABLED + tree read_burst_size, write_burst_size, burst_size; + view_p rview, wview; + +#endif + + gsi = gsi_last_bb (sinfo->initialization_bb); +#ifdef BATCHQ_ENABLED + if (stream->use_batchQ) + { + horizon_size = create_tmp_var (size_type_node, + "horizon_size"); + burst_size = create_tmp_var (size_type_node, + "burst_size"); + rview = VEC_index(view_p, stream->rviews, 0); + read_burst_size = get_view_burst_size (rview, &gsi); + if (VEC_empty (view_p, stream->wviews)) + wview = VEC_index(view_p, stream->fpviews, 0); + else + wview = VEC_index(view_p, stream->wviews, 0); + write_burst_size = get_view_burst_size (wview, &gsi); + stmt = gimple_build_assign_with_ops (MAX_EXPR, + burst_size, + read_burst_size, + write_burst_size); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + /* MAX(read_burst_size, write_burst_size) / type_size */ + stmt = gimple_build_assign_with_ops (CEIL_DIV_EXPR, + horizon_size, + burst_size, + type_size); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_CREATE_STREAM]; + } + else +#endif + { + horizon_size = build_int_cst (size_type_node, + HORIZON); + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_STREAM]; + } + stmt = gimple_build_call (fn, 2, type_size, horizon_size); + gimple_call_set_lhs (stmt, stream->stream); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + /* FIXME: this needs to take into account + replication as well ... we may want to move this + in the task call itself if we want dynamic values + as well. */ +#ifdef BATCHQ_ENABLED + if (stream->use_batchQ) + { + /* Connect any firstprivate write views on this + stream. As these are the master views, they only + get one instance. */ + for (j = 0; VEC_iterate (view_p, stream->fpviews, j, view); ++j) + { + tree view_decl = lookup_var (view->view, sinfo->stmt); + + /* Only one firstprivate view per stream possible. */ + gcc_assert (j == 0); + /* If there are firstprivate views, there should be no write views. */ + gcc_assert (VEC_empty (view_p, stream->wviews)); + + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_CONNECT_VIEW]; + stmt = gimple_build_call (fn, 3, task_decl, stream->stream, view_decl); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + } + } + else +#endif + { + for (j = 0; VEC_iterate (view_p, stream->rviews, j, view); ++j) + { + fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS]; + stmt = gimple_build_call (fn, 4, stream->stream, + view->sinfo->num_instances, + integer_zero_node, + boolean_false_node); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + } + for (j = 0; VEC_iterate (view_p, stream->wviews, j, view); ++j) + { + fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS]; + stmt = gimple_build_call (fn, 4, stream->stream, + integer_zero_node, + view->sinfo->num_instances, + boolean_false_node); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + } + + /* Connect any firstprivate write views on this + stream. As these are the master views, they only + get one instance. */ + for (j = 0; VEC_iterate (view_p, stream->fpviews, j, view); ++j) + { + tree view_decl = lookup_var (view->view, sinfo->stmt); + + /* Only one firstprivate view per stream possible. */ + gcc_assert (j == 0); + /* If there are firstprivate views, there should be no write views. */ + gcc_assert (VEC_empty (view_p, stream->wviews)); + + fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS]; + stmt = gimple_build_call (fn, 4, stream->stream, + integer_zero_node, + integer_one_node, + boolean_false_node); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CONNECT_VIEW]; + stmt = gimple_build_call (fn, 3, task_decl, stream->stream, view_decl); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + } + + /* Set the final count flag for the number of + expected views. */ + gsi = gsi_last_bb (sinfo->post_initialization_bb); fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS]; stmt = gimple_build_call (fn, 4, stream->stream, integer_zero_node, - integer_one_node, - boolean_false_node); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - fn = built_in_decls[BUILT_IN_GOMP_STREAM_CONNECT_VIEW]; - stmt = gimple_build_call (fn, 3, task_decl, stream->stream, view_decl); + integer_zero_node, + boolean_true_node); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); } - - /* Set the final count flag for the number of - expected views. */ - gsi = gsi_last_bb (sinfo->post_initialization_bb); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_ADD_EXPECTED_VIEWS]; - stmt = gimple_build_call (fn, 4, stream->stream, - integer_zero_node, - integer_zero_node, - boolean_true_node); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); } /* Issue wait until connected call in the parallel @@ -7972,7 +8153,12 @@ prepare_streaming_context (omp_region_p region) single threaded code before the other producer or any consumer can start. */ gsi = gsi_last_bb (task->initialization_bb); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_PRE]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_PRE]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_PRE]; stmt = gimple_build_call (fn, 2, view->stream->stream, get_view_burst_size (view, &gsi)); gimple_call_set_lhs (stmt, view->buffer_pointer); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); @@ -8078,12 +8264,22 @@ prepare_streaming_context (omp_region_p region) gsi = gsi_last_bb (task->initialization_bb); for (i = 0; VEC_iterate (view_p, task->rviews, i, view); ++i) { - fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_READ_VIEW]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_CREATE_READ_VIEW]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_READ_VIEW]; stmt = gimple_build_call (fn, 2, view->view_size, get_view_burst_size (view, &gsi)); gimple_call_set_lhs (stmt, view->view); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_CONNECT_VIEW]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_CONNECT_VIEW]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CONNECT_VIEW]; stmt = gimple_build_call (fn, 3, task_decl, view->stream->stream, view->view); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); @@ -8091,12 +8287,22 @@ prepare_streaming_context (omp_region_p region) for (i = 0; VEC_iterate (view_p, task->wviews, i, view); ++i) { - fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_WRITE_VIEW]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_CREATE_WRITE_VIEW]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CREATE_WRITE_VIEW]; stmt = gimple_build_call (fn, 2, view->view_size, get_view_burst_size (view, &gsi)); gimple_call_set_lhs (stmt, view->view); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - fn = built_in_decls[BUILT_IN_GOMP_STREAM_CONNECT_VIEW]; +#ifdef BATCHQ_ENABLED + if (view->stream->use_batchQ) + fn = built_in_decls[BUILT_IN_GOMP_BATCHQ_CONNECT_VIEW]; + else +#endif + fn = built_in_decls[BUILT_IN_GOMP_STREAM_CONNECT_VIEW]; stmt = gimple_build_call (fn, 3, task_decl, view->stream->stream, view->view); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); } diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index ad27ae269..a4e6e71c4 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -168,20 +168,28 @@ GOMP_2.0 { GOMP_loop_ull_static_next; GOMP_loop_ull_static_start; GOMP_stream_create_stream; + GOMP_batchQ_create_stream; GOMP_stream_create_read_view; + GOMP_batchQ_create_read_view; GOMP_stream_create_write_view; + GOMP_batchQ_create_write_view; GOMP_stream_create_task; GOMP_stream_get_task_activation_counter; GOMP_stream_set_task_termination_flag; GOMP_stream_task_add_instance; GOMP_stream_add_expected_views; GOMP_stream_connect_view; + GOMP_batchQ_connect_view; GOMP_stream_wait_until_connected; GOMP_stream_get_available_work; GOMP_stream_update; + GOMP_batchQ_update; GOMP_stream_stall; + GOMP_batchQ_stall; GOMP_stream_release; + GOMP_batchQ_release; GOMP_stream_commit; + GOMP_batchQ_commit; GOMP_stream_task_exit; GOMP_stream_create_control_stream; GOMP_stream_control_stream_set_eos; @@ -191,5 +199,6 @@ GOMP_2.0 { GOMP_stream_init; GOMP_stream_exit; GOMP_stream_pre; + GOMP_batchQ_pre; } GOMP_1.0; diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index bb6fca481..70a9cb097 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -109,12 +109,17 @@ int omp_get_active_level (void) __GOMP_NOTHROW; extern void *GOMP_stream_create_stream (size_t, size_t) __GOMP_NOTHROW; +extern void *GOMP_batchQ_create_stream (size_t, size_t) + __GOMP_NOTHROW; extern void *GOMP_stream_create_read_view (size_t, size_t) __GOMP_NOTHROW; +extern void *GOMP_batchQ_create_read_view (size_t, size_t) __GOMP_NOTHROW; extern void *GOMP_stream_create_write_view (size_t, size_t) __GOMP_NOTHROW; +extern void *GOMP_batchQ_create_write_view (size_t, size_t) __GOMP_NOTHROW; extern void *GOMP_stream_create_task (void) __GOMP_NOTHROW; extern void GOMP_stream_add_expected_views (void *, int, int, int) __GOMP_NOTHROW; extern void GOMP_stream_connect_view (void *, void *, void *) __GOMP_NOTHROW; +extern void GOMP_batchQ_connect_view (void *, void *, void *) __GOMP_NOTHROW; extern void GOMP_stream_wait_until_connected (void *) __GOMP_NOTHROW; extern unsigned long long GOMP_stream_get_available_work (void *, unsigned long long *) @@ -123,13 +128,24 @@ extern void *GOMP_stream_update (void *, const unsigned long long, const unsigned long long) __GOMP_NOTHROW; +extern void *GOMP_batchQ_update (void *, + const unsigned long long, + const unsigned long long) + __GOMP_NOTHROW; extern void *GOMP_stream_stall (void *, const unsigned long long, const unsigned long long) __GOMP_NOTHROW; +extern void *GOMP_batchQ_stall (void *, const unsigned long long, + const unsigned long long) + __GOMP_NOTHROW; extern void GOMP_stream_release (void *, const unsigned long long) __GOMP_NOTHROW; +extern void GOMP_batchQ_release (void *, const unsigned long long) + __GOMP_NOTHROW; extern void GOMP_stream_commit (void *, const unsigned long long) __GOMP_NOTHROW; +extern void GOMP_batchQ_commit (void *, const unsigned long long) + __GOMP_NOTHROW; extern void GOMP_stream_task_exit (void *) __GOMP_NOTHROW; #endif /* OMP_H */ diff --git a/libgomp/stream.c b/libgomp/stream.c index 5760b415a..196b15789 100644 --- a/libgomp/stream.c +++ b/libgomp/stream.c @@ -30,6 +30,10 @@ #include #include #include +#include +#include +#include +#include #include "wait.h" #include "sem.h" @@ -38,14 +42,22 @@ #define AGGREGATION_FACTOR 32 -//#define debug_log_init(S, V1, V2) printf (S, V1, V2); fflush (stdout) -#define debug_log_init(S, V1, V2) - -//#define debug_log_init3(S, V1, V2, V3) printf (S, V1, V2, V3); fflush (stdout) -#define debug_log_init3(S, V1, V2, V3) - -//#define debug_log(S, V1, V2) printf (S, V1, V2); fflush (stdout) -#define debug_log(S, V1, V2) +//#define OMP_STREAM_DEBUG +#ifdef OMP_STREAM_DEBUG + #define debug_log_init(S, V1, V2) printf (S, V1, V2); fflush (stdout) + #define debug_log_init3(S, V1, V2, V3) printf (S, V1, V2, V3); fflush (stdout) + #define debug_log(S, V1, V2) printf (S, V1, V2); fflush (stdout) + #define debug_log3(S, V1, V2, V3) printf (S, V1, V2, V3); fflush (stdout) + #define debug_log4(S, V1, V2, V3, V4) printf (S, V1, V2, V3, V4); fflush (stdout) + #define debug_log5(S, V1, V2, V3, V4, V5) printf (S, V1, V2, V3, V4, V5); fflush (stdout) +#else + #define debug_log_init(S, V1, V2) + #define debug_log_init3(S, V1, V2, V3) + #define debug_log(S, V1, V2) + #define debug_log3(S, V1, V2, V3) + #define debug_log4(S, V1, V2, V3, V4) + #define debug_log5(S, V1, V2, V3, V4, V5) +#endif gomp_barrier_t gomp_stream_tasks_wait_until_connected_barrier; gomp_barrier_t gomp_stream_tasks_exit_barrier; @@ -126,6 +138,89 @@ GOMP_stream_create_stream (size_t element_size, size_t buffer_size) return stream; } +void * +GOMP_batchQ_create_stream (size_t element_size, size_t buffer_size) +{ +#define BASE_SHM_NAME "/channel" +#define ROUNDUP(size) ((size + page_size) & ~(page_size - 1)) + int ret, shm_fd; + static int chan_idx = 0; + char shm_name[NAME_MAX]; + struct gomp_batchQ *stream; + size_t buffer_realsize, bq_size; + unsigned int page_size; + + buffer_size *= element_size * AGGREGATION_FACTOR; + buffer_realsize = 1; + while(buffer_realsize < buffer_size) + buffer_realsize <<= 1; + + /* Beginning of BatchQueue code */ + stream = NULL; + ret = sysconf(_SC_PAGESIZE); + if (ret == -1) + { + perror("BatchQueue init sysconf"); + return NULL; + } + page_size = ret; + bq_size = ROUNDUP(sizeof(gomp_batchQ_t) - 1 + buffer_realsize * 2); + ret = snprintf(shm_name, NAME_MAX, BASE_SHM_NAME"%d\n", chan_idx); + if (ret < 0) + { + fprintf(stderr, "BatchQueue init snprintf failed\n"); + return NULL; + } + else if (ret >= NAME_MAX) + { + fprintf(stderr, "Too many streams created: impossible to "); + fprintf(stderr, "create a stream named "BASE_SHM_NAME); + fprintf(stderr, "%d\n", chan_idx); + return NULL; + } + shm_fd = shm_open(shm_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + if (shm_fd == -1) + { + perror("BatchQueue init shm_open failed"); + return NULL; + } + ret = ftruncate(shm_fd, bq_size); + if (ret == -1) + { + perror("BatchQueue init ftruncate failed"); + goto close_file; + } + stream = mmap(NULL, bq_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, shm_fd, 0); + if (stream == MAP_FAILED) + { + perror("Batchqueue init mmap failed"); + stream = NULL; + goto close_file; + } + + stream->state = 0; + + stream->sender_ptr = stream->buf; + stream->receiver_ptr = stream->buf; + + stream->buf_start1 = stream->buf; + stream->buf_start2 = stream->buf + buffer_realsize; + stream->sender_buf = stream->buf_start1; + stream->receiver_buf = stream->buf_start1; + + stream->buffer_size = buffer_realsize; + stream->buffer_mask = buffer_realsize - 1; + + stream->pre_shift = 0; + + +close_file: + shm_unlink(shm_name); + return stream; +#undef BASE_SHM_NAME +#undef ROUNDUP +} + /* Allocate and initialize a generic GOMP_STREAM_VIEW that can be connected to any stream to give either read or write access depending on its TYPE. Returns a pointer to the newly allocated @@ -151,6 +246,22 @@ gomp_stream_create_view (int type, size_t view_size, size_t burst_size) return view; } +static inline void * +gomp_batchQ_create_view (int type, size_t view_size, size_t burst_size) +{ + gomp_batchQ_view_p view = + (gomp_batchQ_view_p) gomp_malloc (sizeof(gomp_batchQ_view_t)); + + view->stream = NULL; + view->type = type | BATCHQ_VIEW; + view->view_size = view_size; + view->burst_size = burst_size; + view->pxxk_size = view_size - burst_size; + view->termination_flag = false; + + return view; +} + /* Wrapper for creating a READ view . */ void * @@ -160,6 +271,13 @@ GOMP_stream_create_read_view (size_t view_size, size_t burst_size) return gomp_stream_create_view (READ_VIEW, view_size, burst_size); } +void * +GOMP_batchQ_create_read_view (size_t view_size, size_t burst_size) +{ + debug_log_init ("GOMP_stream_create_read_view %zu %zu\n", view_size, burst_size); + return gomp_batchQ_create_view(READ_VIEW, view_size, burst_size); +} + /* Wrapper for creating a WRITE view. */ void * @@ -169,6 +287,13 @@ GOMP_stream_create_write_view (size_t view_size, size_t burst_size) return gomp_stream_create_view (WRITE_VIEW, view_size, burst_size); } +void * +GOMP_batchQ_create_write_view (size_t view_size, size_t burst_size) +{ + debug_log_init ("GOMP_stream_create_write_view %zu %zu\n", view_size, burst_size); + return gomp_batchQ_create_view(WRITE_VIEW, view_size, burst_size); +} + /* Allocate and initialize a GOMP_STREAM_TASK data structure. */ void * @@ -182,11 +307,19 @@ GOMP_stream_create_task () task->read_view_list.nr_views = 0; task->read_view_list.size = 0; gomp_mutex_init (&task->read_view_list.connect_view_mutex); + task->batchQ_read_view_list.views = NULL; + task->batchQ_read_view_list.nr_views = 0; + task->batchQ_read_view_list.size = 0; + gomp_mutex_init (&task->batchQ_read_view_list.connect_view_mutex); task->write_view_list.views = NULL; task->write_view_list.nr_views = 0; task->write_view_list.size = 0; gomp_mutex_init (&task->write_view_list.connect_view_mutex); + task->batchQ_write_view_list.views = NULL; + task->batchQ_write_view_list.nr_views = 0; + task->batchQ_write_view_list.size = 0; + gomp_mutex_init (&task->batchQ_write_view_list.connect_view_mutex); task->activation_counter = 0; task->termination_flag = false; @@ -210,8 +343,22 @@ GOMP_stream_get_task_activation_counter (void *t) void GOMP_stream_set_task_termination_flag (void *t) { + int i; + gomp_stream_view_list_p task_list; gomp_stream_task_p task = (gomp_stream_task_p) t; + int num_batchQ_read_views = task->batchQ_read_view_list.nr_views; + int num_batchQ_write_views = task->batchQ_write_view_list.nr_views; + task_list = &task->batchQ_read_view_list; + gomp_mutex_lock (&task_list->connect_view_mutex); + for (i = 0; i < num_batchQ_read_views; ++i) + ((gomp_batchQ_view_p) task_list->views[i])->termination_flag = true; + gomp_mutex_unlock (&task_list->connect_view_mutex); + task_list = &task->batchQ_write_view_list; + gomp_mutex_lock (&task_list->connect_view_mutex); + for (i = 0; i < num_batchQ_write_views; ++i) + ((gomp_batchQ_view_p) task_list->views[i])->termination_flag = true; + gomp_mutex_unlock (&task_list->connect_view_mutex); task->termination_flag = true; } @@ -290,10 +437,10 @@ GOMP_stream_connect_view (void *t, void *s, void *v) gomp_stream_p stream = (gomp_stream_p) s; gomp_stream_view_p view = (gomp_stream_view_p) v; - gomp_stream_view_handle_p vh = (view->type == READ_VIEW) ? + gomp_stream_view_handle_p vh = ((view->type & VIEW_TYPE_MASK) == READ_VIEW) ? &stream->read_views : &stream->write_views; gomp_stream_view_list_p stream_list = &vh->view_list; - gomp_stream_view_list_p task_list = (view->type == READ_VIEW) ? + gomp_stream_view_list_p task_list = ((view->type & VIEW_TYPE_MASK) == READ_VIEW) ? &task->read_view_list : &task->write_view_list; view->stream = stream; @@ -302,7 +449,7 @@ GOMP_stream_connect_view (void *t, void *s, void *v) stream is initially empty. This is equivalent to releasing the original buffer_size elements. A write view will start with buffer_size free space. */ - if (view->type == READ_VIEW) + if ((view->type & VIEW_TYPE_MASK) == READ_VIEW) view->lower_index = stream->buffer_size; else view->local_min_value = stream->buffer_size; @@ -321,6 +468,32 @@ GOMP_stream_connect_view (void *t, void *s, void *v) gomp_stream_add_view_to_list (view, stream_list); gomp_mutex_unlock (&vh->view_list.connect_view_mutex); __sync_fetch_and_add (&vh->nr_registered_views, 1); + debug_log5 ("GOMP_stream_connect_view %p %lu %p %s %p\n", stream, + pthread_self(), view, ((view->type & VIEW_TYPE_MASK) == READ_VIEW) ? "In" : "Out", + task); +} + +void +GOMP_batchQ_connect_view (void *t, void *s, void *v) +{ + gomp_stream_task_p task = (gomp_stream_task_p) t; + gomp_batchQ_p stream = (gomp_batchQ_p) s; + gomp_batchQ_view_p view = (gomp_batchQ_view_p) v; + + gomp_stream_view_list_p task_list = ((view->type & VIEW_TYPE_MASK) == READ_VIEW) ? + &task->batchQ_read_view_list : &task->batchQ_write_view_list; + + /* Register the view with the TASK to which it belongs. This + operation is local to the task, so there is no need to + synchronize. */ + gomp_mutex_lock (&task_list->connect_view_mutex); + gomp_stream_add_view_to_list ((gomp_stream_view_p) view, task_list); + gomp_mutex_unlock (&task_list->connect_view_mutex); + + view->stream = stream; + debug_log5 ("GOMP_batchQ_connect_view %p %lu %p %s %p\n", stream, + pthread_self(), view, ((view->type & VIEW_TYPE_MASK) == READ_VIEW) ? "In" : "Out", + task); } /* Check whether all the expected views on STREAM have already @@ -362,8 +535,12 @@ GOMP_stream_wait_until_connected (void *t) gomp_mutex_lock (&task->read_view_list.connect_view_mutex); for (i = 0; i < task->read_view_list.nr_views; ++i) - if (!gomp_stream_check_connected (task->read_view_list.views[i]->stream)) - done = false; + { + if (task->read_view_list.views[i]->type & BATCHQ_VIEW) + continue; + if (!gomp_stream_check_connected (task->read_view_list.views[i]->stream)) + done = false; + } gomp_mutex_unlock (&task->read_view_list.connect_view_mutex); gomp_mutex_lock (&task->write_view_list.connect_view_mutex); @@ -490,7 +667,7 @@ GOMP_stream_update (void *v, const unsigned long long act_start, gomp_stream_p stream = view->stream; void *buffer_pointer; - debug_log ("GOMP_stream_update [in] %llu %llu\n", act_start, act_end); + debug_log4 ("GOMP_stream_update [in] %p %zd %llu %llu\n", stream, view->burst_size, act_start, act_end); /* This update requests access to the buffer in [low_idx,up_idx[. We will release up to low_idx-1 and acquire up to up_idx-1. */ @@ -532,11 +709,33 @@ GOMP_stream_update (void *v, const unsigned long long act_start, guaranteed access to all the requested data. */ buffer_pointer = stream->buffer + low_idx_loc; - debug_log ("GOMP_stream_update [out] %llu %llu\n", act_start, act_end); + debug_log4 ("GOMP_stream_update [out] %p %zd %llu %llu\n", stream, view->burst_size, act_start, act_end); return buffer_pointer; } +void * +GOMP_batchQ_update (void *v, const unsigned long long act_start, + const unsigned long long act_end) +{ + unsigned long long low_idx, up_idx; + gomp_batchQ_view_p view = (gomp_batchQ_view_p) v; + gomp_batchQ_p stream = view->stream; + + /* This update requests access to the buffer in [low_idx,up_idx[. + We will release up to low_idx-1 and acquire up to up_idx-1. */ + low_idx = act_start * view->burst_size; + up_idx = act_end * view->burst_size + view->pxxk_size - 1; + + if (up_idx - low_idx + 1 != stream->buffer_size) + if (!view->termination_flag) + gomp_fatal ("GOMP_batchQ: update requested access to more than buffer_size data."); + + while (!stream->state); + + return (void *) stream->receiver_ptr; +} + /* Request write access for the view V to the stream up to INDEX. */ void * @@ -548,7 +747,7 @@ GOMP_stream_stall (void *v, const unsigned long long act_start, gomp_stream_p stream = view->stream; void *buffer_pointer; - debug_log ("GOMP_stream_stall [in] %llu %llu\n", act_start, act_end); + debug_log4 ("GOMP_stream_stall [in] %p %zd %llu %llu\n", stream, view->burst_size, act_start, act_end); /* This update requests access to the buffer in [low_idx,up_idx[. We will release up to low_idx-1 and acquire up to up_idx-1. */ @@ -574,11 +773,31 @@ GOMP_stream_stall (void *v, const unsigned long long act_start, buffer_pointer = stream->buffer + (low_idx & stream->buffer_mask); - debug_log ("GOMP_stream_stall [out] %llu %llu\n", act_start, act_end); + debug_log4 ("GOMP_stream_stall [out] %p %zd %llu %llu\n", stream, view->burst_size, act_start, act_end); return buffer_pointer; } +void * +GOMP_batchQ_stall (void *v, const unsigned long long act_start, + const unsigned long long act_end) +{ + unsigned long long low_idx, up_idx; + gomp_batchQ_view_p view = (gomp_batchQ_view_p) v; + gomp_batchQ_p stream = view->stream; + + /* This update requests access to the buffer in [low_idx,up_idx[. + We will release up to low_idx-1 and acquire up to up_idx-1. */ + low_idx = act_start * view->burst_size + stream->pre_shift; + up_idx = act_end * view->burst_size + view->pxxk_size + stream->pre_shift - 1; + + if (up_idx - low_idx + 1 != stream->buffer_size) + if (!view->termination_flag) + gomp_fatal ("GOMP_stream: stall requested access to an amount of data different that buffer_size."); + + return (void *) stream->sender_ptr; +} + /* Relinquish read access for the view V to the stream up to INDEX. */ @@ -590,6 +809,25 @@ GOMP_stream_release (void *v, const unsigned long long act_idx) debug_log ("GOMP_stream_release %llu %llu\n", act_idx, act_idx); } +void +GOMP_batchQ_release (void *v, const unsigned long long act_idx) +{ + gomp_batchQ_view_p view = (gomp_batchQ_view_p) v; + gomp_batchQ_p stream = view->stream; + + stream->state = 0; + if (stream->receiver_buf == stream->buf_start1) + { + stream->receiver_ptr = stream->buf_start2; + stream->receiver_buf = stream->buf_start2; + } + else + { + stream->receiver_ptr = stream->buf_start1; + stream->receiver_buf = stream->buf_start1; + } +} + /* Relinquish write access for the view V to the stream up to INDEX. */ @@ -622,6 +860,26 @@ GOMP_stream_commit (void *v, const unsigned long long act_idx) debug_log ("GOMP_stream_commit %llu %llu\n", act_idx, act_idx); } +void +GOMP_batchQ_commit (void *v, const unsigned long long act_idx) +{ + gomp_batchQ_view_p view = (gomp_batchQ_view_p) v; + gomp_batchQ_p stream = view->stream; + + while (stream->state); + stream->state = 1; + if (stream->sender_buf == stream->buf_start1) + { + stream->sender_ptr = stream->buf_start2; + stream->sender_buf = stream->buf_start2; + } + else + { + stream->sender_ptr = stream->buf_start1; + stream->sender_buf = stream->buf_start1; + } +} + /* Finalization and destruction of the streaming data structures. */ /* Disconnects VIEW from the stream to which it is connected and free @@ -632,7 +890,7 @@ gomp_stream_unregister_view (gomp_stream_view_p view) { gomp_stream_p stream = view->stream; gomp_stream_view_handle_p vh = - (view->type == READ_VIEW) ? &stream->read_views : &stream->write_views; + ((view->type & VIEW_TYPE_MASK) == READ_VIEW) ? &stream->read_views : &stream->write_views; int unregistered_views; __sync_fetch_and_add (&(vh->nr_unregistered_views), 1); @@ -641,7 +899,7 @@ gomp_stream_unregister_view (gomp_stream_view_p view) /* Make sure that when multiple views access a stream, the finished views do not hinder the others in the min computation. */ - if (view->type == READ_VIEW) + if ((view->type & VIEW_TYPE_MASK) == READ_VIEW) GOMP_stream_release (view, GOMP_STREAM_MAX_INDEX); /* The last producer exiting will set the eos_p flag and allow the consumers to read up to the highest committed index. */ @@ -674,6 +932,12 @@ gomp_stream_unregister_view (gomp_stream_view_p view) } } +/* TODO */ +static inline void +gomp_batchQ_unregister_view (gomp_stream_view_p view) +{ +} + /* Invoked before terminating a stream TASK, this disconnects all the views and for all streams for which it is the last one to disconnect from, it frees up all data structures. */ @@ -683,7 +947,9 @@ GOMP_stream_task_exit (void *t) { gomp_stream_task_p task = (gomp_stream_task_p) t; int num_read_views = task->read_view_list.nr_views; + int num_batchQ_read_views = task->batchQ_read_view_list.nr_views; int num_write_views = task->write_view_list.nr_views; + int num_batchQ_write_views = task->batchQ_write_view_list.nr_views; int i, res; debug_log_init ("GOMP_stream_task_exit %zu %zu\n", (size_t) t, (size_t) t); @@ -694,11 +960,19 @@ GOMP_stream_task_exit (void *t) for (i = 0; i < num_read_views; ++i) gomp_stream_unregister_view (task->read_view_list.views[i]); + for (i = 0; i < num_batchQ_read_views; ++i) + gomp_batchQ_unregister_view (task->batchQ_read_view_list.views[i]); + for (i = 0; i < num_write_views; ++i) gomp_stream_unregister_view (task->write_view_list.views[i]); + for (i = 0; i < num_batchQ_write_views; ++i) + gomp_batchQ_unregister_view (task->batchQ_write_view_list.views[i]); + free (task->read_view_list.views); + free (task->batchQ_read_view_list.views); free (task->write_view_list.views); + free (task->batchQ_write_view_list.views); free (task); } } @@ -748,6 +1022,8 @@ GOMP_stream_get_available_work (void *t, unsigned long long *start_idx) return AGGREGATION_FACTOR; } +/* TODO? GOMP_batchQ_get_available_work */ + /* Initialize streaming in this region. */ @@ -760,6 +1036,8 @@ GOMP_stream_init () gomp_barrier_init (&gomp_stream_tasks_exit_barrier, gomp_stream_tasks_count); } +/* No GOMP_batchQ_init */ + /* Wait until all streaming threads complete. */ void @@ -768,6 +1046,8 @@ GOMP_stream_exit () gomp_barrier_wait (&gomp_stream_tasks_exit_barrier); } +/* No GOMP_batchQ_exit */ + /* Request SIZE bytes for a PRE operator on stream S. Return a pointer where data should be stored. */ void * @@ -783,6 +1063,19 @@ GOMP_stream_pre (void *s, const unsigned long long size) return stream->buffer; } +void * +GOMP_batchQ_pre (void *s, const unsigned long long size) +{ + gomp_batchQ_p stream = (gomp_batchQ_p) s; + + debug_log_init ("GOMP_stream_pre %zu \t %llu\n", (size_t) s, size); + + gomp_fatal ("GOMP_batchQ_pre not supported now: missing code in GOMP_batchQ_commit and GOMP_batchQ_stall\n"); + stream->pre_shift = size; + + return (void *) stream->sender_ptr; +} + /* This function is a pthread_create entry point for streaming tasks. */ diff --git a/libgomp/stream.h b/libgomp/stream.h index dd75994ec..f901d41fc 100644 --- a/libgomp/stream.h +++ b/libgomp/stream.h @@ -56,10 +56,13 @@ typedef enum { typedef enum { - READ_VIEW, + READ_VIEW = 0, WRITE_VIEW } gomp_stream_view_type_t; +#define VIEW_TYPE_MASK 1 +#define BATCHQ_VIEW 2 + struct gomp_stream; struct gomp_stream_task; @@ -106,6 +109,19 @@ typedef struct gomp_stream_view } gomp_stream_view_t, *gomp_stream_view_p; +struct gomp_batchQ; + +typedef struct gomp_batchQ_view +{ + /* Type of this view (read or write). */ + gomp_stream_view_type_t type; + size_t view_size; + size_t burst_size; + size_t pxxk_size; + struct gomp_batchQ *stream; + volatile bool termination_flag; +} gomp_batchQ_view_t, *gomp_batchQ_view_p; + /* List of GOMP_STREAM_VIEWs. As this list is only modified in the initialization phase and we never remove items, we'll use an array. */ @@ -202,6 +218,25 @@ typedef struct gomp_stream #endif } gomp_stream_t, *gomp_stream_p; +#define CACHE_LINE_SIZE 64 +typedef struct gomp_batchQ +{ + struct { + char volatile *buf_start1 __attribute__ ((aligned (CACHE_LINE_SIZE))); + char volatile *buf_start2; + size_t buffer_size; + size_t buffer_mask; + size_t pre_shift; + volatile unsigned int state:1; + + char volatile *sender_ptr __attribute__ ((aligned (CACHE_LINE_SIZE))); + char volatile *sender_buf; /* Which buffer is sender using? */ + char volatile *receiver_ptr __attribute__ ((aligned (CACHE_LINE_SIZE))); + char volatile *receiver_buf; /* Which buffer is receiver using? */ + }; + /* 0 would not conform to C99 section 6.7.5.2 ยง1 */ + volatile char buf[1] __attribute__ ((aligned (CACHE_LINE_SIZE))); +} gomp_batchQ_t, *gomp_batchQ_p; /* GOMP_STREAM_TASK data structure. Runtime node in the task graph. */ @@ -210,7 +245,9 @@ typedef struct gomp_stream_task { /* Lists of views on streams used by this task. */ gomp_stream_view_list_t read_view_list; + gomp_stream_view_list_t batchQ_read_view_list; gomp_stream_view_list_t write_view_list; + gomp_stream_view_list_t batchQ_write_view_list; /* The following are used directly in the generated code and should only be read here. A memory fence is guaranteed before the