mirror of
https://github.com/OpenMP/Examples.git
synced 2025-04-10 16:32:11 +01:00
Merge pull request #8 from HenryJin/work
additional updates to Examples async_target and task_dep, change example naming scheme
This commit is contained in:
commit
16608643d6
@ -24,7 +24,30 @@ accessed.
|
|||||||
|
|
||||||
\cexample{async_target}{2c}
|
\cexample{async_target}{2c}
|
||||||
|
|
||||||
The Fortran example uses allocatable arrays for dynamic memory on the device.
|
The Fortran example below is similar to the C version above. Instead of pointers, though, it uses
|
||||||
|
the convenience of Fortran allocatable arrays on the device. An allocatable array has the
|
||||||
|
same behavior in a \code{map} clause as a C pointer, in this case.
|
||||||
|
|
||||||
|
If there is no shape specified for an allocatable array in a \code{map} clause, only the array descriptor
|
||||||
|
(also called a dope vector) is mapped. That is, device space is created for the descriptor, and it
|
||||||
|
is initially populated with host values. In this case, the \plc{v1} and \plc{v2} arrays will be in a
|
||||||
|
non-associated state on the device. When space for \plc{v1} and \plc{v2} is allocated on the device
|
||||||
|
the addresses to the space will be included in their descriptors.
|
||||||
|
|
||||||
|
At the end of the first \code{target} region, the descriptor (of an unshaped specification of an allocatable
|
||||||
|
array in a \code{map} clause) is returned with the raw device address of the allocated space.
|
||||||
|
The content of the array is not returned. In the example the data in arrays \plc{v1} and \plc{v2}
|
||||||
|
are not returned. In the second \code{target} directive, the \plc{v1} and \plc{v2} descriptors are
|
||||||
|
re-created on the device with the descriptive information; and references to the
|
||||||
|
vectors point to the correct local storage, of the space that was not freed in the first \code{target}
|
||||||
|
directive. At the end of the second \code{target} region, the data in array \plc{p} is copied back
|
||||||
|
to the host since \plc{p} is not an allocatable array.
|
||||||
|
|
||||||
|
A \code{depend} clause is used in the \code{task} directive to provide a wait at the beginning of the second
|
||||||
|
\code{target} region, to insure that there is no race condition with \plc{v1} and \plc{v2} in the two tasks.
|
||||||
|
It would be noncompliant to use \plc{v1} and/or \plc{v2} in lieu of \plc{N} in the \code{depend} clauses,
|
||||||
|
because the use of non-allocated allocatable arrays as list items in the first \code{depend} clause would
|
||||||
|
lead to unspecified behavior.
|
||||||
|
|
||||||
\fexample{async_target}{2f}
|
\fexample{async_target}{2f}
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
|
|
||||||
\vspace{1.0in}
|
\vspace{1.0in}
|
||||||
|
|
||||||
\textbf{Version 4.0.2 -- February, 2015}
|
\textbf{Version 4.0.2rv1 -- February, 2015}
|
||||||
\end{center}
|
\end{center}
|
||||||
\end{adjustwidth}
|
\end{adjustwidth}
|
||||||
|
|
||||||
@ -38,5 +38,7 @@ permission of OpenMP Architecture Review Board.\end{adjustwidth}
|
|||||||
\thispagestyle{empty}
|
\thispagestyle{empty}
|
||||||
\phantom{a}
|
\phantom{a}
|
||||||
\emph{This page intentionally left blank}
|
\emph{This page intentionally left blank}
|
||||||
|
|
||||||
|
This working version enacted the following tickets: 299, 342, and a few other editorial changes.
|
||||||
\vfill
|
\vfill
|
||||||
|
|
||||||
|
@ -439,9 +439,9 @@
|
|||||||
}{
|
}{
|
||||||
\def\cname{#1.#2}
|
\def\cname{#1.#2}
|
||||||
% Use following line for old numbering
|
% Use following line for old numbering
|
||||||
\def\ename{\thechapter.#2}
|
% \def\ename{\thechapter.#2}
|
||||||
% Use following for mneumonics
|
% Use following for mneumonics
|
||||||
% \def\ename{\escstr{#1}.#2}
|
\def\ename{\escstr{#1}.#2}
|
||||||
}
|
}
|
||||||
\noindent
|
\noindent
|
||||||
\textit{Example \ename}
|
\textit{Example \ename}
|
||||||
|
@ -12,10 +12,11 @@ extern void init(float *, float *, int);
|
|||||||
#pragma omp end declare target
|
#pragma omp end declare target
|
||||||
extern void foo();
|
extern void foo();
|
||||||
extern void output(float *, int);
|
extern void output(float *, int);
|
||||||
void vec_mult(float *p, float *v1, float *v2, int N, int dev)
|
void vec_mult(float *p, int N, int dev)
|
||||||
{
|
{
|
||||||
|
float *v1, *v2;
|
||||||
int i;
|
int i;
|
||||||
#pragma omp task depend(out: v1, v2)
|
#pragma omp task shared(v1, v2) depend(out: v1, v2)
|
||||||
#pragma omp target device(dev) map(v1, v2)
|
#pragma omp target device(dev) map(v1, v2)
|
||||||
{
|
{
|
||||||
// check whether on device dev
|
// check whether on device dev
|
||||||
@ -26,7 +27,7 @@ void vec_mult(float *p, float *v1, float *v2, int N, int dev)
|
|||||||
init(v1, v2, N);
|
init(v1, v2, N);
|
||||||
}
|
}
|
||||||
foo(); // execute other work asychronously
|
foo(); // execute other work asychronously
|
||||||
#pragma omp task depend(in: v1, v2)
|
#pragma omp task shared(v1, v2, p) depend(in: v1, v2)
|
||||||
#pragma omp target device(dev) map(to: v1, v2) map(from: p[0:N])
|
#pragma omp target device(dev) map(to: v1, v2) map(from: p[0:N])
|
||||||
{
|
{
|
||||||
// check whether on device dev
|
// check whether on device dev
|
||||||
@ -38,5 +39,6 @@ void vec_mult(float *p, float *v1, float *v2, int N, int dev)
|
|||||||
free(v1);
|
free(v1);
|
||||||
free(v2);
|
free(v2);
|
||||||
}
|
}
|
||||||
|
#pragma omp taskwait
|
||||||
output(p, N);
|
output(p, N);
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
integer :: i, idev
|
integer :: i, idev
|
||||||
!$omp declare target (init)
|
!$omp declare target (init)
|
||||||
|
|
||||||
!$omp task depend(out: v1,v2)
|
!$omp task shared(v1,v2) depend(out: N)
|
||||||
!$omp target device(idev) map(v1,v2)
|
!$omp target device(idev) map(v1,v2)
|
||||||
if( omp_is_initial_device() ) &
|
if( omp_is_initial_device() ) &
|
||||||
stop "not executing on target device"
|
stop "not executing on target device"
|
||||||
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
call foo() ! execute other work asychronously
|
call foo() ! execute other work asychronously
|
||||||
|
|
||||||
!$omp task depend(in: v1,v2)
|
!$omp task shared(v1,v2,p) depend(in: N)
|
||||||
!$omp target device(idev) map(to: v1,v2) map(from: p)
|
!$omp target device(idev) map(to: v1,v2) map(from: p)
|
||||||
if( omp_is_initial_device() ) &
|
if( omp_is_initial_device() ) &
|
||||||
stop "not executing on target device"
|
stop "not executing on target device"
|
||||||
@ -34,6 +34,7 @@
|
|||||||
!$omp end target
|
!$omp end target
|
||||||
!$omp end task
|
!$omp end task
|
||||||
|
|
||||||
|
!$omp taskwait
|
||||||
call output(p, N)
|
call output(p, N)
|
||||||
|
|
||||||
end subroutine
|
end subroutine
|
||||||
|
@ -13,7 +13,10 @@ C[N][N] )
|
|||||||
for (i = 0; i < N; i+=BS) {
|
for (i = 0; i < N; i+=BS) {
|
||||||
for (j = 0; j < N; j+=BS) {
|
for (j = 0; j < N; j+=BS) {
|
||||||
for (k = 0; k < N; k+=BS) {
|
for (k = 0; k < N; k+=BS) {
|
||||||
#pragma omp task depend ( in: A[i:BS][k:BS], B[k:BS][j:BS] ) \
|
// Note 1: i, j, k, A, B, C are firstprivate by default
|
||||||
|
// Note 2: A, B and C are just pointers
|
||||||
|
#pragma omp task private(ii, jj, kk) \
|
||||||
|
depend ( in: A[i:BS][k:BS], B[k:BS][j:BS] ) \
|
||||||
depend ( inout: C[i:BS][j:BS] )
|
depend ( inout: C[i:BS][j:BS] )
|
||||||
for (ii = i; ii < i+BS; ii++ )
|
for (ii = i; ii < i+BS; ii++ )
|
||||||
for (jj = j; jj < j+BS; jj++ )
|
for (jj = j; jj < j+BS; jj++ )
|
||||||
|
@ -3,19 +3,22 @@
|
|||||||
! @@compilable: yes
|
! @@compilable: yes
|
||||||
! @@linkable: no
|
! @@linkable: no
|
||||||
! @@expect: success
|
! @@expect: success
|
||||||
|
! Assume BS divides N perfectly
|
||||||
subroutine matmul_depend (N, BS, A, B, C)
|
subroutine matmul_depend (N, BS, A, B, C)
|
||||||
|
implicit none
|
||||||
integer :: N, BS, BM
|
integer :: N, BS, BM
|
||||||
real, dimension(N, N) :: A, B, C
|
real, dimension(N, N) :: A, B, C
|
||||||
integer :: i, j, k, ii, jj, kk
|
integer :: i, j, k, ii, jj, kk
|
||||||
BM = BS -1
|
BM = BS - 1
|
||||||
do i = 1, N, BS
|
do i = 1, N, BS
|
||||||
do j = 1, N, BS
|
do j = 1, N, BS
|
||||||
do k = 1, N, BS
|
do k = 1, N, BS
|
||||||
!$omp task depend ( in: A(i:i+BM, k:k+BM), B(k:k+BM, j:j+BM) ) &
|
!$omp task shared(A,B,C) private(ii,jj,kk) & ! I,J,K are firstprivate by default
|
||||||
|
!$omp depend ( in: A(i:i+BM, k:k+BM), B(k:k+BM, j:j+BM) ) &
|
||||||
!$omp depend ( inout: C(i:i+BM, j:j+BM) )
|
!$omp depend ( inout: C(i:i+BM, j:j+BM) )
|
||||||
do ii = i, i+BS
|
do ii = i, i+BM
|
||||||
do jj = j, j+BS
|
do jj = j, j+BM
|
||||||
do kk = k, k+BS
|
do kk = k, k+BM
|
||||||
C(jj,ii) = C(jj,ii) + A(kk,ii) * B(jj,kk)
|
C(jj,ii) = C(jj,ii) + A(kk,ii) * B(jj,kk)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
|
Loading…
x
Reference in New Issue
Block a user