From 075683d57463d9251d483badd944e1a60e15192f Mon Sep 17 00:00:00 2001 From: Henry Jin Date: Mon, 7 Nov 2022 17:21:02 -0800 Subject: [PATCH] more uses_allocators update --- History.tex | 4 ++-- devices/sources/declare_target.7.c | 6 +++--- devices/sources/declare_target.7.f90 | 4 ++-- memory_model/allocators.tex | 26 ++++++++++++++------------ memory_model/sources/allocators.6.c | 19 ++++++++++++------- memory_model/sources/allocators.6.f90 | 27 ++++++++++++++++++--------- 6 files changed, 51 insertions(+), 35 deletions(-) diff --git a/History.tex b/History.tex index 8de34d9..ccf19ae 100644 --- a/History.tex +++ b/History.tex @@ -18,14 +18,14 @@ \item Added the following examples for the 5.2 features: \begin{itemize} \item \scode{uses_allocators} clause for the use of allocators in - \code{target} regions (\specref{sec:allocators}) + \code{target} regions (\specref{sec:allocators}) \end{itemize} \item Added the following examples for the 5.1 features: \begin{itemize} \item The \scode{inoutset} dependence type (\specref{subsec:task_concurrent_depend}) \item Atomic compare and capture (\specref{sec:cas}) \end{itemize} -\item Added other examples: +\item Added the following examples for the 5.0 features: \begin{itemize} \item \code{declare}~\code{target} directive with \scode{device_type(nohost)} clause (\specref{subsec:declare_target_device_type}) diff --git a/devices/sources/declare_target.7.c b/devices/sources/declare_target.7.c index 4e9d9ea..6742201 100644 --- a/devices/sources/declare_target.7.c +++ b/devices/sources/declare_target.7.c @@ -14,7 +14,7 @@ void foo_onhost(); #pragma omp declare variant(foo_onhost) match(device={kind(host)}) void foo(){ - // device specific computation + //device specific computation } void foo_onhost(){ @@ -24,8 +24,8 @@ void foo_onhost(){ int main(){ #pragma omp target teams { - foo(); // calls foo() on target device - // or foo_onhost() in case of host fallback + foo(); //calls foo() on target device or + //foo_onhost() in case of host fallback } return 0; diff --git a/devices/sources/declare_target.7.f90 b/devices/sources/declare_target.7.f90 index ff9eccd..e2ced12 100644 --- a/devices/sources/declare_target.7.f90 +++ b/devices/sources/declare_target.7.f90 @@ -22,8 +22,8 @@ program main use subs !$omp target - call foo ! calls foo() on device - ! or foo_onhost() in case of host fallback + call foo !calls foo() on device or + !foo_onhost() in case of host fallback !$omp end target end program diff --git a/memory_model/allocators.tex b/memory_model/allocators.tex index 579bab5..3f91577 100644 --- a/memory_model/allocators.tex +++ b/memory_model/allocators.tex @@ -156,21 +156,18 @@ In CASE 2, user-defined traits are specified in the \splc{cgroup_traits} variabl An allocator is initialized for the \scode{target} region in the \scode{uses_allocators} clause, and the traits specified in \splc{cgroup_traits} are included by the \scode{traits} modifier. -As shown above, the \scode{uses_allocators} clause creates a new allocator for the -\scode{target} region, and uses only traits specified in the clause with a modifier. In CASE 3, the \splc{cgroup_alloc} variable is initialized on the host with traits -and a memory space. However, these are ignored by the \scode{uses_allocators} clause, -because a new allocator is initialized, and has no traits specified within the clause. +and a memory space. However, these are ignored by the \scode{uses_allocators} clause +and a new allocator for the \scode{target} region is initialized with default traits. \cexample[5.2]{allocators}{5} \ffreeexample[5.2]{allocators}{5} -The following example shows how to make an allocator, defined on the host, available in a \scode{target} region. +\index{dynamic_allocators clause@\scode{dynamic_allocators} clause} +\index{clauses!dynamic_allocators@\scode{dynamic_allocators}} -When the \scode{requires} directive is specified with a \scode{dynamic_allocators} -clause, allocators initialized on the host can be used in a \scode{target} region -without specifying a \scode{uses_allocators} clause. This applies to predefined -allocators and user-defined allocators. +The following example shows how to make an allocator available in a \scode{target} region +without specifying a \scode{uses_allocators} clause. In CASE 1, the predefined \scode{omp_cgroup_mem_alloc} allocator is used in the \scode{target} region as in CASE 1 of the previous example, but without specifying a \scode{uses_allocators} clause. @@ -179,9 +176,14 @@ This is accomplished by specifying the \scode{requires} directive with a restrictions on allocator usage in \scode{target} regions. CASE 2 also uses the \scode{dynamic_allocators} clause to remove allocator -restrictions in the \scode{target} region. Here, an allocator initialized -on the host is used for target array allocations of an \scode{allocate} clause. - +restrictions in \scode{target} regions. Here, an allocator is initialized +by calling the \scode{omp_init_allocator} routine in the \code{target} region. +The allocator is then used for the allocations of array \plc{xbuf} in +an \scode{allocate} clause of the \code{target}~\code{teams} construct +for each team and destroyed after its use. +The use of separate \code{target} regions is needed here since +no statement is allowed between a \code{target} directive and +its nested \code{teams} construct. \cexample[5.2]{allocators}{6} \ffreeexample[5.2]{allocators}{6} diff --git a/memory_model/sources/allocators.6.c b/memory_model/sources/allocators.6.c index fb1b8fb..4fe1df5 100644 --- a/memory_model/sources/allocators.6.c +++ b/memory_model/sources/allocators.6.c @@ -13,14 +13,14 @@ int calc(int i, int j) { return i*j;} #pragma omp declare target(calc) - int main() { #define N 256 int sum; int xbuf[N]; - omp_allocator_handle_t cgroup_alloc; + static omp_allocator_handle_t cgroup_alloc; + #pragma omp declare target(cgroup_alloc) const omp_alloctrait_t cgroup_traits[1] = {{omp_atk_access, omp_atv_cgroup}}; @@ -50,11 +50,14 @@ int main() for (int i = 0; i < N; i++) { xbuf[i] = 0; } - cgroup_alloc = omp_init_allocator( - omp_default_mem_space, 1, cgroup_traits); + // initializes the allocator in target region + #pragma omp target + cgroup_alloc = omp_init_allocator( + omp_default_mem_space, 1, cgroup_traits); - // WARNING: cgroup_alloc is in undefined state on target device! - #pragma omp target teams reduction(+:xbuf) thread_limit(N) \ + // uses the initialized allocator + #pragma omp target + #pragma omp teams reduction(+:xbuf) thread_limit(N) \ allocate(cgroup_alloc:xbuf) num_teams(4) { #pragma omp parallel for @@ -63,7 +66,9 @@ int main() } } - omp_destroy_allocator(cgroup_alloc); + // destroys the allocator after its use + #pragma omp target + omp_destroy_allocator(cgroup_alloc); sum = 0; #pragma omp parallel for reduction(+:sum) diff --git a/memory_model/sources/allocators.6.f90 b/memory_model/sources/allocators.6.f90 index ae74189..d85bdf2 100644 --- a/memory_model/sources/allocators.6.f90 +++ b/memory_model/sources/allocators.6.f90 @@ -25,8 +25,9 @@ program main !$omp requires dynamic_allocators - integer( omp_allocator_handle_kind ) :: cgroup_alloc - type(omp_alloctrait),parameter :: cgroup_traits(1)= & + integer(omp_allocator_handle_kind),save :: cgroup_alloc + !$omp declare target(cgroup_alloc) + type(omp_alloctrait),parameter :: cgroup_traits(1)= & [omp_alloctrait(omp_atk_access,omp_atv_cgroup)] !*** CASE 1: ***! @@ -55,21 +56,29 @@ program main do i=1,N; xbuf(i)=0; end do - cgroup_alloc = omp_init_allocator(omp_default_mem_space, 1, & - cgroup_traits) + !! initializes allocator in the target region + !$omp target + cgroup_alloc = omp_init_allocator(omp_default_mem_space, 1, & + cgroup_traits) + !$omp end target - !! WARNING: cgroup_alloc is in undefined state on target device! - !$omp target teams reduction(+:xbuf) thread_limit(N) & - !$omp& allocate(cgroup_alloc:xbuf) num_teams(4) + !! uses the initialized allocator + !$omp target + !$omp teams reduction(+:xbuf) thread_limit(N) & + !$omp& allocate(cgroup_alloc:xbuf) num_teams(4) !$omp parallel do do i = 1,N xbuf(i) = xbuf(i) + calc(i,omp_get_team_num()) enddo - !$omp end target teams + !$omp end teams + !$omp end target - call omp_destroy_allocator(cgroup_alloc) + !! destroys the allocator after its use + !$omp target + call omp_destroy_allocator(cgroup_alloc) + !$omp end target sum = 0 !$omp parallel do reduction(+:sum)