more uses_allocators update

This commit is contained in:
Henry Jin 2022-11-07 17:21:02 -08:00
parent 08859e6029
commit 075683d574
6 changed files with 51 additions and 35 deletions

View File

@ -18,14 +18,14 @@
\item Added the following examples for the 5.2 features:
\begin{itemize}
\item \scode{uses_allocators} clause for the use of allocators in
\code{target} regions (\specref{sec:allocators})
\code{target} regions (\specref{sec:allocators})
\end{itemize}
\item Added the following examples for the 5.1 features:
\begin{itemize}
\item The \scode{inoutset} dependence type (\specref{subsec:task_concurrent_depend})
\item Atomic compare and capture (\specref{sec:cas})
\end{itemize}
\item Added other examples:
\item Added the following examples for the 5.0 features:
\begin{itemize}
\item \code{declare}~\code{target} directive with \scode{device_type(nohost)}
clause (\specref{subsec:declare_target_device_type})

View File

@ -14,7 +14,7 @@ void foo_onhost();
#pragma omp declare variant(foo_onhost) match(device={kind(host)})
void foo(){
// device specific computation
//device specific computation
}
void foo_onhost(){
@ -24,8 +24,8 @@ void foo_onhost(){
int main(){
#pragma omp target teams
{
foo(); // calls foo() on target device
// or foo_onhost() in case of host fallback
foo(); //calls foo() on target device or
//foo_onhost() in case of host fallback
}
return 0;

View File

@ -22,8 +22,8 @@ program main
use subs
!$omp target
call foo ! calls foo() on device
! or foo_onhost() in case of host fallback
call foo !calls foo() on device or
!foo_onhost() in case of host fallback
!$omp end target
end program

View File

@ -156,21 +156,18 @@ In CASE 2, user-defined traits are specified in the \splc{cgroup_traits} variabl
An allocator is initialized for the \scode{target} region in the \scode{uses_allocators} clause,
and the traits specified in \splc{cgroup_traits} are included by the \scode{traits} modifier.
As shown above, the \scode{uses_allocators} clause creates a new allocator for the
\scode{target} region, and uses only traits specified in the clause with a modifier.
In CASE 3, the \splc{cgroup_alloc} variable is initialized on the host with traits
and a memory space. However, these are ignored by the \scode{uses_allocators} clause,
because a new allocator is initialized, and has no traits specified within the clause.
and a memory space. However, these are ignored by the \scode{uses_allocators} clause
and a new allocator for the \scode{target} region is initialized with default traits.
\cexample[5.2]{allocators}{5}
\ffreeexample[5.2]{allocators}{5}
The following example shows how to make an allocator, defined on the host, available in a \scode{target} region.
\index{dynamic_allocators clause@\scode{dynamic_allocators} clause}
\index{clauses!dynamic_allocators@\scode{dynamic_allocators}}
When the \scode{requires} directive is specified with a \scode{dynamic_allocators}
clause, allocators initialized on the host can be used in a \scode{target} region
without specifying a \scode{uses_allocators} clause. This applies to predefined
allocators and user-defined allocators.
The following example shows how to make an allocator available in a \scode{target} region
without specifying a \scode{uses_allocators} clause.
In CASE 1, the predefined \scode{omp_cgroup_mem_alloc} allocator is used in the \scode{target}
region as in CASE 1 of the previous example, but without specifying a \scode{uses_allocators} clause.
@ -179,9 +176,14 @@ This is accomplished by specifying the \scode{requires} directive with a
restrictions on allocator usage in \scode{target} regions.
CASE 2 also uses the \scode{dynamic_allocators} clause to remove allocator
restrictions in the \scode{target} region. Here, an allocator initialized
on the host is used for target array allocations of an \scode{allocate} clause.
restrictions in \scode{target} regions. Here, an allocator is initialized
by calling the \scode{omp_init_allocator} routine in the \code{target} region.
The allocator is then used for the allocations of array \plc{xbuf} in
an \scode{allocate} clause of the \code{target}~\code{teams} construct
for each team and destroyed after its use.
The use of separate \code{target} regions is needed here since
no statement is allowed between a \code{target} directive and
its nested \code{teams} construct.
\cexample[5.2]{allocators}{6}
\ffreeexample[5.2]{allocators}{6}

View File

@ -13,14 +13,14 @@
int calc(int i, int j) { return i*j;}
#pragma omp declare target(calc)
int main()
{
#define N 256
int sum;
int xbuf[N];
omp_allocator_handle_t cgroup_alloc;
static omp_allocator_handle_t cgroup_alloc;
#pragma omp declare target(cgroup_alloc)
const omp_alloctrait_t cgroup_traits[1] =
{{omp_atk_access, omp_atv_cgroup}};
@ -50,11 +50,14 @@ int main()
for (int i = 0; i < N; i++) { xbuf[i] = 0; }
cgroup_alloc = omp_init_allocator(
omp_default_mem_space, 1, cgroup_traits);
// initializes the allocator in target region
#pragma omp target
cgroup_alloc = omp_init_allocator(
omp_default_mem_space, 1, cgroup_traits);
// WARNING: cgroup_alloc is in undefined state on target device!
#pragma omp target teams reduction(+:xbuf) thread_limit(N) \
// uses the initialized allocator
#pragma omp target
#pragma omp teams reduction(+:xbuf) thread_limit(N) \
allocate(cgroup_alloc:xbuf) num_teams(4)
{
#pragma omp parallel for
@ -63,7 +66,9 @@ int main()
}
}
omp_destroy_allocator(cgroup_alloc);
// destroys the allocator after its use
#pragma omp target
omp_destroy_allocator(cgroup_alloc);
sum = 0;
#pragma omp parallel for reduction(+:sum)

View File

@ -25,8 +25,9 @@ program main
!$omp requires dynamic_allocators
integer( omp_allocator_handle_kind ) :: cgroup_alloc
type(omp_alloctrait),parameter :: cgroup_traits(1)= &
integer(omp_allocator_handle_kind),save :: cgroup_alloc
!$omp declare target(cgroup_alloc)
type(omp_alloctrait),parameter :: cgroup_traits(1)= &
[omp_alloctrait(omp_atk_access,omp_atv_cgroup)]
!*** CASE 1: ***!
@ -55,21 +56,29 @@ program main
do i=1,N; xbuf(i)=0; end do
cgroup_alloc = omp_init_allocator(omp_default_mem_space, 1, &
cgroup_traits)
!! initializes allocator in the target region
!$omp target
cgroup_alloc = omp_init_allocator(omp_default_mem_space, 1, &
cgroup_traits)
!$omp end target
!! WARNING: cgroup_alloc is in undefined state on target device!
!$omp target teams reduction(+:xbuf) thread_limit(N) &
!$omp& allocate(cgroup_alloc:xbuf) num_teams(4)
!! uses the initialized allocator
!$omp target
!$omp teams reduction(+:xbuf) thread_limit(N) &
!$omp& allocate(cgroup_alloc:xbuf) num_teams(4)
!$omp parallel do
do i = 1,N
xbuf(i) = xbuf(i) + calc(i,omp_get_team_num())
enddo
!$omp end target teams
!$omp end teams
!$omp end target
call omp_destroy_allocator(cgroup_alloc)
!! destroys the allocator after its use
!$omp target
call omp_destroy_allocator(cgroup_alloc)
!$omp end target
sum = 0
!$omp parallel do reduction(+:sum)