mirror of
https://github.com/OpenMP/Examples.git
synced 2025-04-03 13:21:33 +01:00
Initial add of all files
This commit is contained in:
commit
542c10d074
80
Changes.log
Normal file
80
Changes.log
Normal file
@ -0,0 +1,80 @@
|
||||
[6-Jan-2015] Version 4.0.1ltx
|
||||
Changes from 4.0.1ltx-21Nov-2014
|
||||
|
||||
1. openmp.sty and openmp-examples.tex
|
||||
enable source line numbering
|
||||
|
||||
2. Split chapters in the main file (Examples_Sects.tex) into individual files
|
||||
Makefile and openmp-examples.tex were modified to use the new list.
|
||||
|
||||
3. Additional changes related to fixing fonts and language markers
|
||||
|
||||
Below is a summary.
|
||||
|
||||
+Page 2: "non- compound" -> "non-compound"
|
||||
+Page 10: fixed mis-placed language markers
|
||||
+Chap-8, page 24: fixed variable fonts for T, P, T/P
|
||||
+Chap-19. page 79-80: added missing Fortran cont. marker
|
||||
+Chap-25, page 100: combined 25.2f & 25.3f into one Fortran marker
|
||||
+Chap-30, page 120: combined 30.2c & 30.3c into one C/C++ marker
|
||||
+Chap-30, page 122-123: added missing Fortran cont. marker
|
||||
+Chap-32, page 127: added missing Fortran cont. marker
|
||||
+Chap-36, page 138-139: added missing Fortran cont. marker
|
||||
+Chap-39, page 147: added missing Fortran cont. marker
|
||||
+Chap-50, page 182: fixed variables p, v1, v2 fonts
|
||||
+Chap-51, page 189: fixed variables p, v1, v2 fonts
|
||||
+Chap-52. page 201: fixed variable fonts, function fonts
|
||||
+Chap-53. page 205: fixed variable fonts, function fonts
|
||||
+Chap-54. page 215: fixed variable fonts
|
||||
+Chap-58, page 237: fixed variable fonts
|
||||
+Chap-58, page 237: Minor wording change to reflect the new placement of the Example header.
|
||||
|
||||
Modification applied to the following files:
|
||||
Examples_Chapt.tex
|
||||
Examples_affinity.tex
|
||||
Examples_associate.tex
|
||||
Examples_atomic_restrict.tex
|
||||
Examples_cond_comp.tex
|
||||
Examples_declare_target.tex
|
||||
Examples_fort_sa_private.tex
|
||||
Examples_fort_sp_common.tex
|
||||
Examples_reduction.tex
|
||||
Examples_target.tex
|
||||
Examples_target_data.tex
|
||||
Examples_target_update.tex
|
||||
Examples_teams.tex
|
||||
Examples_threadprivate.tex
|
||||
Examples_workshare.tex
|
||||
|
||||
4. Other notes
|
||||
|
||||
+Chap-12, page 37: placement of C/C++ marker changed, but OK
|
||||
+Chap-29, page 114: marker moved, but OK.
|
||||
+Chap-50, page 187: Example 50.4bf header added.
|
||||
Fortran marker changed, but OK
|
||||
+Chap-51, page 192: "Example 51.3c" added, and example numbering
|
||||
shifted thereafter.
|
||||
|
||||
|
||||
[21-Nov-2014] Initial 4.0.1ltx
|
||||
Changes from 4.0.1ltx-3Jun2014
|
||||
|
||||
1. openmp.sty
|
||||
change from using mnemonics
|
||||
\def\ename{\escstr{#1}.#2}
|
||||
to seqential numbering
|
||||
\def\ename{\thechapter.#2}
|
||||
|
||||
For "fnexample()" definition, firstline=6, not 8
|
||||
|
||||
2. source file changes
|
||||
sources - use the "original" sources from 4.0.1
|
||||
|
||||
3. Version Number
|
||||
openmp-examples.tex:
|
||||
change footnote "Version 4.0 - July 2013"
|
||||
to "Version 4.0.1 - February 2014"
|
||||
Title_Page.tex:
|
||||
change from "November, 2013" to "February, 2014"
|
||||
"1997-2013" -> "1997-2014"
|
||||
|
9
Examples_Chapt.tex
Normal file
9
Examples_Chapt.tex
Normal file
@ -0,0 +1,9 @@
|
||||
|
||||
\chapter*{Examples}
|
||||
\label{chap:examples}
|
||||
The following are examples of the OpenMP API directives, constructs, and routines.
|
||||
\ccppspecificstart
|
||||
A statement following a directive is compound only when necessary, and a
|
||||
non-compound statement is indented with respect to a directive preceding it.
|
||||
\ccppspecificend
|
||||
|
243
Examples_affinity.tex
Normal file
243
Examples_affinity.tex
Normal file
@ -0,0 +1,243 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{proc\_bind} Clause}
|
||||
\label{chap:affinity}
|
||||
|
||||
The following examples demonstrate how to use the \code{proc\_bind} clause to
|
||||
control the thread binding for a team of threads in a \code{parallel} region.
|
||||
The machine architecture is depicted in the figure below. It consists of two sockets,
|
||||
each equipped with a quad-core processor and configured to execute two hardware
|
||||
threads simultaneously on each core. These examples assume a contiguous core numbering
|
||||
starting from 0, such that the hardware threads 0,1 form the first physical core.
|
||||
|
||||
\ifpdf
|
||||
%\begin{figure}[htbp]
|
||||
\centerline{\includegraphics[width=3.8in,keepaspectratio=true]%
|
||||
{figs/proc_bind_fig.pdf}}
|
||||
%\end{figure}
|
||||
\fi
|
||||
|
||||
The following equivalent place list declarations consist of eight places (which
|
||||
we designate as p0 to p7):
|
||||
|
||||
\code{OMP\_PLACES=\texttt{"}\{0,1\},\{2,3\},\{4,5\},\{6,7\},\{8,9\},\{10,11\},\{12,13\},\{14,15\}\texttt{"}}
|
||||
|
||||
or
|
||||
|
||||
\code{OMP\_PLACES=\texttt{"}\{0:2\}:8:2\texttt{"}}
|
||||
|
||||
\section{Spread Affinity Policy}
|
||||
|
||||
The following example shows the result of the \code{spread} affinity policy on
|
||||
the partition list when the number of threads is less than or equal to the number
|
||||
of places in the parent's place partition, for the machine architecture depicted
|
||||
above. Note that the threads are bound to the first place of each subpartition.
|
||||
|
||||
\cexample{affinity}{1c}
|
||||
|
||||
\fexample{affinity}{1f}
|
||||
|
||||
It is unspecified on which place the master thread is initially started. If the
|
||||
master thread is initially started on p0, the following placement of threads will
|
||||
be applied in the parallel region:
|
||||
|
||||
\begin{compactitem}
|
||||
\item thread 0 executes on p0 with the place partition p0,p1
|
||||
|
||||
\item thread 1 executes on p2 with the place partition p2,p3
|
||||
|
||||
\item thread 2 executes on p4 with the place partition p4,p5
|
||||
|
||||
\item thread 3 executes on p6 with the place partition p6,p7
|
||||
\end{compactitem}
|
||||
|
||||
|
||||
If the master thread would initially be started on p2, the placement of threads
|
||||
and distribution of the place partition would be as follows:
|
||||
|
||||
\begin{compactitem}
|
||||
\item thread 0 executes on p2 with the place partition p2,p3
|
||||
|
||||
\item thread 1 executes on p4 with the place partition p4,p5
|
||||
|
||||
\item thread 2 executes on p6 with the place partition p6,p7
|
||||
|
||||
\item thread 3 executes on p0 with the place partition p0,p1
|
||||
\end{compactitem}
|
||||
|
||||
The following example illustrates the \code{spread} thread affinity policy when
|
||||
the number of threads is greater than the number of places in the parent's place
|
||||
partition.
|
||||
|
||||
Let \plc{T} be the number of threads in the team, and \plc{P} be the number of places in the
|
||||
parent's place partition. The first \plc{T/P} threads of the team (including the master
|
||||
thread) execute on the parent's place. The next \plc{T/P} threads execute on the next
|
||||
place in the place partition, and so on, with wrap around.
|
||||
|
||||
\cexample{affinity}{2c}
|
||||
|
||||
\fexample{affinity}{2f}
|
||||
|
||||
It is unspecified on which place the master thread is initially started. If the
|
||||
master thread is initially started on p0, the following placement of threads will
|
||||
be applied in the parallel region:
|
||||
|
||||
\begin{compactitem}
|
||||
\item threads 0,1 execute on p0 with the place partition p0
|
||||
|
||||
\item threads 2,3 execute on p1 with the place partition p1
|
||||
|
||||
\item threads 4,5 execute on p2 with the place partition p2
|
||||
|
||||
\item threads 6,7 execute on p3 with the place partition p3
|
||||
|
||||
\item threads 8,9 execute on p4 with the place partition p4
|
||||
|
||||
\item threads 10,11 execute on p5 with the place partition p5
|
||||
|
||||
\item threads 12,13 execute on p6 with the place partition p6
|
||||
|
||||
\item threads 14,15 execute on p7 with the place partition p7
|
||||
\end{compactitem}
|
||||
|
||||
If the master thread would initially be started on p2, the placement of threads
|
||||
and distribution of the place partition would be as follows:
|
||||
|
||||
\begin{compactitem}
|
||||
\item threads 0,1 execute on p2 with the place partition p2
|
||||
|
||||
\item threads 2,3 execute on p3 with the place partition p3
|
||||
|
||||
\item threads 4,5 execute on p4 with the place partition p4
|
||||
|
||||
\item threads 6,7 execute on p5 with the place partition p5
|
||||
|
||||
\item threads 8,9 execute on p6 with the place partition p6
|
||||
|
||||
\item threads 10,11 execute on p7 with the place partition p7
|
||||
|
||||
\item threads 12,13 execute on p0 with the place partition p0
|
||||
|
||||
\item threads 14,15 execute on p1 with the place partition p1
|
||||
\end{compactitem}
|
||||
|
||||
\section{Close Affinity Policy}
|
||||
|
||||
The following example shows the result of the \code{close} affinity policy on
|
||||
the partition list when the number of threads is less than or equal to the number
|
||||
of places in parent's place partition, for the machine architecture depicted above.
|
||||
The place partition is not changed by the \code{close} policy.
|
||||
|
||||
\cexample{affinity}{3c}
|
||||
|
||||
\fexample{affinity}{3f}
|
||||
|
||||
It is unspecified on which place the master thread is initially started. If the
|
||||
master thread is initially started on p0, the following placement of threads will
|
||||
be applied in the \code{parallel} region:
|
||||
|
||||
\begin{compactitem}
|
||||
\item thread 0 executes on p0 with the place partition p0-p7
|
||||
|
||||
\item thread 1 executes on p1 with the place partition p0-p7
|
||||
|
||||
\item thread 2 executes on p2 with the place partition p0-p7
|
||||
|
||||
\item thread 3 executes on p3 with the place partition p0-p7
|
||||
\end{compactitem}
|
||||
|
||||
If the master thread would initially be started on p2, the placement of threads
|
||||
and distribution of the place partition would be as follows:
|
||||
|
||||
\begin{compactitem}
|
||||
\item thread 0 executes on p2 with the place partition p0-p7
|
||||
|
||||
\item thread 1 executes on p3 with the place partition p0-p7
|
||||
|
||||
\item thread 2 executes on p4 with the place partition p0-p7
|
||||
|
||||
\item thread 3 executes on p5 with the place partition p0-p7
|
||||
\end{compactitem}
|
||||
|
||||
The following example illustrates the \code{close} thread affinity policy when
|
||||
the number of threads is greater than the number of places in the parent's place
|
||||
partition.
|
||||
|
||||
Let \plc{T} be the number of threads in the team, and \plc{P} be the number of places in the
|
||||
parent's place partition. The first \plc{T/P} threads of the team (including the master
|
||||
thread) execute on the parent's place. The next \plc{T/P} threads execute on the next
|
||||
place in the place partition, and so on, with wrap around. The place partition
|
||||
is not changed by the \code{close} policy.
|
||||
|
||||
\cexample{affinity}{4c}
|
||||
|
||||
\fexample{affinity}{4f}
|
||||
|
||||
It is unspecified on which place the master thread is initially started. If the
|
||||
master thread is initially running on p0, the following placement of threads will
|
||||
be applied in the parallel region:
|
||||
|
||||
\begin{compactitem}
|
||||
\item threads 0,1 execute on p0 with the place partition p0-p7
|
||||
|
||||
\item threads 2,3 execute on p1 with the place partition p0-p7
|
||||
|
||||
\item threads 4,5 execute on p2 with the place partition p0-p7
|
||||
|
||||
\item threads 6,7 execute on p3 with the place partition p0-p7
|
||||
|
||||
\item threads 8,9 execute on p4 with the place partition p0-p7
|
||||
|
||||
\item threads 10,11 execute on p5 with the place partition p0-p7
|
||||
|
||||
\item threads 12,13 execute on p6 with the place partition p0-p7
|
||||
|
||||
\item threads 14,15 execute on p7 with the place partition p0-p7
|
||||
\end{compactitem}
|
||||
|
||||
If the master thread would initially be started on p2, the placement of threads
|
||||
and distribution of the place partition would be as follows:
|
||||
|
||||
\begin{compactitem}
|
||||
\item threads 0,1 execute on p2 with the place partition p0-p7
|
||||
|
||||
\item threads 2,3 execute on p3 with the place partition p0-p7
|
||||
|
||||
\item threads 4,5 execute on p4 with the place partition p0-p7
|
||||
|
||||
\item threads 6,7 execute on p5 with the place partition p0-p7
|
||||
|
||||
\item threads 8,9 execute on p6 with the place partition p0-p7
|
||||
|
||||
\item threads 10,11 execute on p7 with the place partition p0-p7
|
||||
|
||||
\item threads 12,13 execute on p0 with the place partition p0-p7
|
||||
|
||||
\item threads 14,15 execute on p1 with the place partition p0-p7
|
||||
\end{compactitem}
|
||||
|
||||
\section{Master Affinity Policy}
|
||||
|
||||
The following example shows the result of the \code{master} affinity policy on
|
||||
the partition list for the machine architecture depicted above. The place partition
|
||||
is not changed by the master policy.
|
||||
|
||||
\cexample{affinity}{5c}
|
||||
|
||||
\fexample{affinity}{5f}
|
||||
|
||||
It is unspecified on which place the master thread is initially started. If the
|
||||
master thread is initially running on p0, the following placement of threads will
|
||||
be applied in the parallel region:
|
||||
|
||||
\begin{compactitem}
|
||||
\item threads 0-3 execute on p0 with the place partition p0-p7
|
||||
\end{compactitem}
|
||||
|
||||
If the master thread would initially be started on p2, the placement of threads
|
||||
and distribution of the place partition would be as follows:
|
||||
|
||||
\begin{compactitem}
|
||||
\item threads 0-3 execute on p2 with the place partition p0-p7
|
||||
\end{compactitem}
|
||||
|
||||
|
35
Examples_array_sections.tex
Normal file
35
Examples_array_sections.tex
Normal file
@ -0,0 +1,35 @@
|
||||
\pagebreak
|
||||
\chapter{Array Sections in Device Constructs}
|
||||
\label{chap:array_sections}
|
||||
|
||||
The following examples show the usage of array sections in \code{map} clauses
|
||||
on \code{target} and \code{target} \code{data} constructs.
|
||||
|
||||
This example shows the invalid usage of two seperate sections of the same array
|
||||
inside of a \code{target} construct.
|
||||
|
||||
\cexample{array_sections}{1c}
|
||||
|
||||
\fexample{array_sections}{1f}
|
||||
|
||||
This example shows the invalid usage of two separate sections of the same array
|
||||
inside of a \code{target} construct.
|
||||
|
||||
\cexample{array_sections}{2c}
|
||||
|
||||
\fexample{array_sections}{2f}
|
||||
|
||||
This example shows the valid usage of two separate sections of the same array inside
|
||||
of a \code{target} construct.
|
||||
|
||||
\cexample{array_sections}{3c}
|
||||
|
||||
\fexample{array_sections}{3f}
|
||||
|
||||
This example shows the valid usage of a wholly contained array section of an already
|
||||
mapped array section inside of a \code{target} construct.
|
||||
|
||||
\cexample{array_sections}{4c}
|
||||
|
||||
\fexample{array_sections}{4f}
|
||||
|
32
Examples_associate.tex
Normal file
32
Examples_associate.tex
Normal file
@ -0,0 +1,32 @@
|
||||
\pagebreak
|
||||
\chapter{Fortran \code{ASSOCIATE} Construct}
|
||||
\fortranspecificstart
|
||||
\label{chap:associate}
|
||||
|
||||
The following is an invalid example of specifying an associate name on a data-sharing attribute
|
||||
clause. The constraint in the Data Sharing Attribute Rules section in the OpenMP
|
||||
4.0 API Specifications states that an associate name preserves the association
|
||||
with the selector established at the \code{ASSOCIATE} statement. The associate
|
||||
name \plc{b} is associated with the shared variable \plc{a}. With the predetermined data-sharing
|
||||
attribute rule, the associate name \plc{b} is not allowed to be specified on the \code{private}
|
||||
clause.
|
||||
|
||||
\fnexample{associate}{1f}
|
||||
|
||||
In next example, within the \code{parallel} construct, the association name \plc{thread\_id}
|
||||
is associated with the private copy of \plc{i}. The print statement should output the
|
||||
unique thread number.
|
||||
|
||||
\fnexample{associate}{2f}
|
||||
|
||||
The following example illustrates the effect of specifying a selector name on a data-sharing
|
||||
attribute clause. The associate name \plc{u} is associated with \plc{v} and the variable \plc{v}
|
||||
is specified on the \code{private} clause of the \code{parallel} construct.
|
||||
The construct association is established prior to the \code{parallel} region.
|
||||
The association between \plc{u} and the original \plc{v} is retained (see the Data Sharing
|
||||
Attribute Rules section in the OpenMP 4.0 API Specifications). Inside the \code{parallel}
|
||||
region, \plc{v} has the value of -1 and \plc{u} has the value of the original \plc{v}.
|
||||
|
||||
\fnexample{associate}{3f}
|
||||
\fortranspecificend
|
||||
|
31
Examples_async_target.tex
Normal file
31
Examples_async_target.tex
Normal file
@ -0,0 +1,31 @@
|
||||
\pagebreak
|
||||
\chapter{Asynchronous Execution of a \code{target} Region Using Tasks}
|
||||
\label{chap:async_target}
|
||||
|
||||
The following example shows how the \code{task} and \code{target} constructs
|
||||
are used to execute multiple \code{target} regions asynchronously. The task that
|
||||
encounters the \code{task} construct generates an explicit task that contains
|
||||
a \code{target} region. The thread executing the explicit task encounters a task
|
||||
scheduling point while waiting for the execution of the \code{target} region
|
||||
to complete, allowing the thread to switch back to the execution of the encountering
|
||||
task or one of the previously generated explicit tasks.
|
||||
|
||||
\cexample{async_target}{1c}
|
||||
|
||||
The Fortran version has an interface block that contains the \code{declare} \code{target}.
|
||||
An identical statement exists in the function declaration (not shown here).
|
||||
|
||||
\fexample{async_target}{1f}
|
||||
|
||||
The following example shows how the \code{task} and \code{target} constructs
|
||||
are used to execute multiple \code{target} regions asynchronously. The task dependence
|
||||
ensures that the storage is allocated and initialized on the device before it is
|
||||
accessed.
|
||||
|
||||
\cexample{async_target}{2c}
|
||||
|
||||
The Fortran example uses allocatable arrays for dynamic memory on the device.
|
||||
|
||||
\fexample{async_target}{2f}
|
||||
|
||||
|
44
Examples_atomic.tex
Normal file
44
Examples_atomic.tex
Normal file
@ -0,0 +1,44 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{atomic} Construct}
|
||||
\label{chap:atomic}
|
||||
|
||||
The following example avoids race conditions (simultaneous updates of an element
|
||||
of \plc{x} by multiple threads) by using the \code{atomic} construct .
|
||||
|
||||
The advantage of using the \code{atomic} construct in this example is that it
|
||||
allows updates of two different elements of \plc{x} to occur in parallel. If
|
||||
a \code{critical} construct were used instead, then all updates to elements of
|
||||
\plc{x} would be executed serially (though not in any guaranteed order).
|
||||
|
||||
Note that the \code{atomic} directive applies only to the statement immediately
|
||||
following it. As a result, elements of \plc{y} are not updated atomically in
|
||||
this example.
|
||||
|
||||
\cexample{atomic}{1c}
|
||||
|
||||
\fexample{atomic}{1f}
|
||||
|
||||
The following example illustrates the \code{read} and \code{write} clauses
|
||||
for the \code{atomic} directive. These clauses ensure that the given variable
|
||||
is read or written, respectively, as a whole. Otherwise, some other thread might
|
||||
read or write part of the variable while the current thread was reading or writing
|
||||
another part of the variable. Note that most hardware provides atomic reads and
|
||||
writes for some set of properly aligned variables of specific sizes, but not necessarily
|
||||
for all the variable types supported by the OpenMP API.
|
||||
|
||||
\cexample{atomic}{2c}
|
||||
|
||||
\fexample{atomic}{2f}
|
||||
|
||||
The following example illustrates the \code{capture} clause for the \code{atomic}
|
||||
directive. In this case the value of a variable is captured, and then the variable
|
||||
is incremented. These operations occur atomically. This particular example could
|
||||
be implemented using the fetch-and-add instruction available on many kinds of hardware.
|
||||
The example also shows a way to implement a spin lock using the \code{capture}
|
||||
and \code{read} clauses.
|
||||
|
||||
\cexample{atomic}{3c}
|
||||
|
||||
\fexample{atomic}{3f}
|
||||
|
||||
|
25
Examples_atomic_restrict.tex
Normal file
25
Examples_atomic_restrict.tex
Normal file
@ -0,0 +1,25 @@
|
||||
\pagebreak
|
||||
\chapter{Restrictions on the \code{atomic} Construct}
|
||||
\label{chap:atomic_restrict}
|
||||
|
||||
The following non-conforming examples illustrate the restrictions on the \code{atomic}
|
||||
construct.
|
||||
|
||||
\cexample{atomic_restrict}{1c}
|
||||
|
||||
\fexample{atomic_restrict}{1f}
|
||||
|
||||
\cexample{atomic_restrict}{2c}
|
||||
|
||||
\fortranspecificstart
|
||||
The following example is non-conforming because \code{I} and \code{R} reference
|
||||
the same location but have different types.
|
||||
|
||||
\fnexample{atomic_restrict}{2f}
|
||||
|
||||
Although the following example might work on some implementations, this is also
|
||||
non-conforming:
|
||||
|
||||
\fnexample{atomic_restrict}{3f}
|
||||
\fortranspecificend
|
||||
|
24
Examples_barrier_regions.tex
Normal file
24
Examples_barrier_regions.tex
Normal file
@ -0,0 +1,24 @@
|
||||
\pagebreak
|
||||
\chapter{Binding of \code{barrier} Regions}
|
||||
\label{chap:barrier_regions}
|
||||
|
||||
The binding rules call for a \code{barrier} region to bind to the closest enclosing
|
||||
\code{parallel} region.
|
||||
|
||||
In the following example, the call from the main program to \plc{sub2} is conforming
|
||||
because the \code{barrier} region (in \plc{sub3}) binds to the \code{parallel}
|
||||
region in \plc{sub2}. The call from the main program to \plc{sub1} is conforming
|
||||
because the \code{barrier} region binds to the \code{parallel} region in subroutine
|
||||
\plc{sub2}.
|
||||
|
||||
The call from the main program to \plc{sub3} is conforming because the \code{barrier}
|
||||
region binds to the implicit inactive \code{parallel} region enclosing the sequential
|
||||
part. Also note that the \code{barrier} region in \plc{sub3} when called from
|
||||
\plc{sub2} only synchronizes the team of threads in the enclosing \code{parallel}
|
||||
region and not all the threads created in \plc{sub1}.
|
||||
|
||||
\cexample{barrier_regions}{1c}
|
||||
|
||||
\fexample{barrier_regions}{1f}
|
||||
|
||||
|
42
Examples_cancellation.tex
Normal file
42
Examples_cancellation.tex
Normal file
@ -0,0 +1,42 @@
|
||||
\pagebreak
|
||||
\chapter{Cancellation Constructs}
|
||||
\label{chap:cancellation}
|
||||
|
||||
The following example shows how the \code{cancel} directive can be used to terminate
|
||||
an OpenMP region. Although the \code{cancel} construct terminates the OpenMP
|
||||
worksharing region, programmers must still track the exception through the pointer
|
||||
ex and issue a cancellation for the \code{parallel} region if an exception has
|
||||
been raised. The master thread checks the exception pointer to make sure that the
|
||||
exception is properly handled in the sequential part. If cancellation of the \code{parallel}
|
||||
region has been requested, some threads might have executed \code{phase\_1()}.
|
||||
However, it is guaranteed that none of the threads executed \code{phase\_2()}.
|
||||
|
||||
\cexample{cancellation}{1c}
|
||||
|
||||
|
||||
The following example illustrates the use of the \code{cancel} construct in error
|
||||
handling. If there is an error condition from the \code{allocate} statement,
|
||||
the cancellation is activated. The encountering thread sets the shared variable
|
||||
\code{err} and other threads of the binding thread set proceed to the end of
|
||||
the worksharing construct after the cancellation has been activated.
|
||||
|
||||
\fexample{cancellation}{1f}
|
||||
|
||||
The following example shows how to cancel a parallel search on a binary tree as
|
||||
soon as the search value has been detected. The code creates a task to descend
|
||||
into the child nodes of the current tree node. If the search value has been found,
|
||||
the code remembers the tree node with the found value through an \code{atomic}
|
||||
write to the result variable and then cancels execution of all search tasks. The
|
||||
function \code{search\_tree\_parallel} groups all search tasks into a single
|
||||
task group to control the effect of the \code{cancel taskgroup} directive. The
|
||||
\plc{level} argument is used to create undeferred tasks after the first ten
|
||||
levels of the tree.
|
||||
|
||||
\cexample{cancellation}{2c}
|
||||
|
||||
|
||||
The following is the equivalent parallel search example in Fortran.
|
||||
|
||||
\fexample{cancellation}{2f}
|
||||
|
||||
|
37
Examples_carrays_fpriv.tex
Normal file
37
Examples_carrays_fpriv.tex
Normal file
@ -0,0 +1,37 @@
|
||||
\pagebreak
|
||||
\chapter{C/C++ Arrays in a \code{firstprivate} Clause}
|
||||
\ccppspecificstart
|
||||
\label{chap:carrays_fpriv}
|
||||
|
||||
The following example illustrates the size and value of list items of array or
|
||||
pointer type in a \code{firstprivate} clause . The size of new list items is
|
||||
based on the type of the corresponding original list item, as determined by the
|
||||
base language.
|
||||
|
||||
In this example:
|
||||
|
||||
\begin{compactitem}
|
||||
\item The type of \code{A} is array of two arrays of two ints.
|
||||
|
||||
\item The type of \code{B} is adjusted to pointer to array of \code{n}
|
||||
ints, because it is a function parameter.
|
||||
|
||||
\item The type of \code{C} is adjusted to pointer to int, because
|
||||
it is a function parameter.
|
||||
|
||||
\item The type of \code{D} is array of two arrays of two ints.
|
||||
|
||||
\item The type of \code{E} is array of \code{n} arrays of \code{n}
|
||||
ints.
|
||||
\end{compactitem}
|
||||
|
||||
Note that \code{B} and \code{E} involve variable length array types.
|
||||
|
||||
The new items of array type are initialized as if each integer element of the original
|
||||
array is assigned to the corresponding element of the new array. Those of pointer
|
||||
type are initialized as if by assignment from the original item to the new item.
|
||||
|
||||
\cnexample{carrays_fpriv}{1c}
|
||||
\ccppspecificend
|
||||
|
||||
|
77
Examples_collapse.tex
Normal file
77
Examples_collapse.tex
Normal file
@ -0,0 +1,77 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{collapse} Clause}
|
||||
\label{chap:collapse}
|
||||
|
||||
In the following example, the \code{k} and \code{j} loops are associated with
|
||||
the loop construct. So the iterations of the \code{k} and \code{j} loops are
|
||||
collapsed into one loop with a larger iteration space, and that loop is then divided
|
||||
among the threads in the current team. Since the \code{i} loop is not associated
|
||||
with the loop construct, it is not collapsed, and the \code{i} loop is executed
|
||||
sequentially in its entirety in every iteration of the collapsed \code{k} and
|
||||
\code{j} loop.
|
||||
|
||||
The variable \code{j} can be omitted from the \code{private} clause when the
|
||||
\code{collapse} clause is used since it is implicitly private. However, if the
|
||||
\code{collapse} clause is omitted then \code{j} will be shared if it is omitted
|
||||
from the \code{private} clause. In either case, \code{k} is implicitly private
|
||||
and could be omitted from the \code{private} clause.
|
||||
|
||||
\cexample{collapse}{1c}
|
||||
|
||||
\fexample{collapse}{1f}
|
||||
|
||||
In the next example, the \code{k} and \code{j} loops are associated with the
|
||||
loop construct. So the iterations of the \code{k} and \code{j} loops are collapsed
|
||||
into one loop with a larger iteration space, and that loop is then divided among
|
||||
the threads in the current team.
|
||||
|
||||
The sequential execution of the iterations in the \code{k} and \code{j} loops
|
||||
determines the order of the iterations in the collapsed iteration space. This implies
|
||||
that in the sequentially last iteration of the collapsed iteration space, \code{k}
|
||||
will have the value \code{2} and \code{j} will have the value \code{3}. Since
|
||||
\code{klast} and \code{jlast} are \code{lastprivate}, their values are assigned
|
||||
by the sequentially last iteration of the collapsed \code{k} and \code{j} loop.
|
||||
This example prints: \code{2 3}.
|
||||
|
||||
\cexample{collapse}{2c}
|
||||
|
||||
\fexample{collapse}{2f}
|
||||
|
||||
The next example illustrates the interaction of the \code{collapse} and \code{ordered}
|
||||
clauses.
|
||||
|
||||
In the example, the loop construct has both a \code{collapse} clause and an \code{ordered}
|
||||
clause. The \code{collapse} clause causes the iterations of the \code{k} and
|
||||
\code{j} loops to be collapsed into one loop with a larger iteration space, and
|
||||
that loop is divided among the threads in the current team. An \code{ordered}
|
||||
clause is added to the loop construct, because an ordered region binds to the loop
|
||||
region arising from the loop construct.
|
||||
|
||||
According to \$, a thread must not execute more than one ordered region that binds
|
||||
to the same loop region. So the \code{collapse} clause is required for the example
|
||||
to be conforming. With the \code{collapse} clause, the iterations of the \code{k}
|
||||
and \code{j} loops are collapsed into one loop, and therefore only one ordered
|
||||
region will bind to the collapsed \code{k} and \code{j} loop. Without the \code{collapse}
|
||||
clause, there would be two ordered regions that bind to each iteration of the \code{k}
|
||||
loop (one arising from the first iteration of the \code{j} loop, and the other
|
||||
arising from the second iteration of the \code{j} loop).
|
||||
|
||||
The code prints
|
||||
|
||||
\code{0 1 1}
|
||||
\\
|
||||
\code{0 1 2}
|
||||
\\
|
||||
\code{0 2 1}
|
||||
\\
|
||||
\code{1 2 2}
|
||||
\\
|
||||
\code{1 3 1}
|
||||
\\
|
||||
\code{1 3 2}
|
||||
|
||||
\cexample{collapse}{3c}
|
||||
|
||||
\fexample{collapse}{3f}
|
||||
|
||||
|
21
Examples_cond_comp.tex
Normal file
21
Examples_cond_comp.tex
Normal file
@ -0,0 +1,21 @@
|
||||
\pagebreak
|
||||
\chapter{Conditional Compilation}
|
||||
\label{chap:cond_comp}
|
||||
|
||||
\ccppspecificstart
|
||||
The following example illustrates the use of conditional compilation using the
|
||||
OpenMP macro \code{\_OPENMP}. With OpenMP compilation, the \code{\_OPENMP}
|
||||
macro becomes defined.
|
||||
|
||||
\cnexample{cond_comp}{1c}
|
||||
\ccppspecificend
|
||||
|
||||
\fortranspecificstart
|
||||
The following example illustrates the use of the conditional compilation sentinel.
|
||||
With OpenMP compilation, the conditional compilation sentinel \code{!\$} is recognized
|
||||
and treated as two spaces. In fixed form source, statements guarded by the sentinel
|
||||
must start after column 6.
|
||||
|
||||
\fnexample{cond_comp}{1f}
|
||||
\fortranspecificend
|
||||
|
13
Examples_copyin.tex
Normal file
13
Examples_copyin.tex
Normal file
@ -0,0 +1,13 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{copyin} Clause}
|
||||
\label{chap:copyin}
|
||||
|
||||
The \code{copyin} clause is used to initialize threadprivate data upon entry
|
||||
to a \code{parallel} region. The value of the threadprivate variable in the master
|
||||
thread is copied to the threadprivate variable of each other team member.
|
||||
|
||||
\cexample{copyin}{1c}
|
||||
|
||||
\fexample{copyin}{1f}
|
||||
|
||||
|
51
Examples_copyprivate.tex
Normal file
51
Examples_copyprivate.tex
Normal file
@ -0,0 +1,51 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{copyprivate} Clause}
|
||||
\label{chap:copyprivate}
|
||||
|
||||
The \code{copyprivate} clause can be used to broadcast values acquired by a single
|
||||
thread directly to all instances of the private variables in the other threads.
|
||||
In this example, if the routine is called from the sequential part, its behavior
|
||||
is not affected by the presence of the directives. If it is called from a \code{parallel}
|
||||
region, then the actual arguments with which \code{a} and \code{b} are associated
|
||||
must be private.
|
||||
|
||||
The thread that executes the structured block associated with the \code{single}
|
||||
construct broadcasts the values of the private variables \code{a}, \code{b},
|
||||
\code{x}, and
|
||||
\code{y} from its implicit task's data environment to the data environments
|
||||
of the other implicit tasks in the thread team. The broadcast completes before
|
||||
any of the threads have left the barrier at the end of the construct.
|
||||
|
||||
\cexample{copyprivate}{1c}
|
||||
|
||||
\fexample{copyprivate}{1f}
|
||||
|
||||
In this example, assume that the input must be performed by the master thread.
|
||||
Since the \code{master} construct does not support the \code{copyprivate} clause,
|
||||
it cannot broadcast the input value that is read. However, \code{copyprivate}
|
||||
is used to broadcast an address where the input value is stored.
|
||||
|
||||
\cexample{copyprivate}{2c}
|
||||
|
||||
\fexample{copyprivate}{2f}
|
||||
|
||||
Suppose that the number of lock variables required within a \code{parallel} region
|
||||
cannot easily be determined prior to entering it. The \code{copyprivate} clause
|
||||
can be used to provide access to shared lock variables that are allocated within
|
||||
that \code{parallel} region.
|
||||
|
||||
\cexample{copyprivate}{3c}
|
||||
|
||||
\fortranspecificstart
|
||||
\fnexample{copyprivate}{3f}
|
||||
|
||||
Note that the effect of the \code{copyprivate} clause on a variable with the
|
||||
\code{allocatable} attribute is different than on a variable with the \code{pointer}
|
||||
attribute. The value of \code{A} is copied (as if by intrinsic assignment) and
|
||||
the pointer \code{B} is copied (as if by pointer assignment) to the corresponding
|
||||
list items in the other implicit tasks belonging to the \code{parallel} region.
|
||||
|
||||
\fnexample{copyprivate}{4f}
|
||||
\fortranspecificend
|
||||
|
||||
|
16
Examples_critical.tex
Normal file
16
Examples_critical.tex
Normal file
@ -0,0 +1,16 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{critical} Construct}
|
||||
\label{chap:critical}
|
||||
|
||||
The following example includes several \code{critical} constructs . The example
|
||||
illustrates a queuing model in which a task is dequeued and worked on. To guard
|
||||
against multiple threads dequeuing the same task, the dequeuing operation must
|
||||
be in a \code{critical} region. Because the two queues in this example are independent,
|
||||
they are protected by \code{critical} constructs with different names, \plc{xaxis}
|
||||
and \plc{yaxis}.
|
||||
|
||||
\cexample{critical}{1c}
|
||||
|
||||
\fexample{critical}{1f}
|
||||
|
||||
|
111
Examples_declare_target.tex
Normal file
111
Examples_declare_target.tex
Normal file
@ -0,0 +1,111 @@
|
||||
\pagebreak
|
||||
\chapter{\code{declare} \code{target} Construct}
|
||||
\label{chap:declare_target}
|
||||
|
||||
\section{\code{declare} \code{target} and \code{end} \code{declare} \code{target} for a Function}
|
||||
|
||||
The following example shows how the \code{declare} \code{target} directive
|
||||
is used to indicate that the corresponding call inside a \code{target} region
|
||||
is to a \code{fib} function that can execute on the default target device.
|
||||
|
||||
A version of the function is also available on the host device. When the \code{if}
|
||||
clause conditional expression on the \code{target} construct evaluates to \plc{false},
|
||||
the \code{target} region (thus \code{fib}) will execute on the host device.
|
||||
|
||||
For C/C++ codes the declaration of the function \code{fib} appears between the \code{declare}
|
||||
\code{target} and \code{end} \code{declare} \code{target} directives.
|
||||
|
||||
\cexample{declare_target}{1c}
|
||||
|
||||
The Fortran \code{fib} subroutine contains a \code{declare} \code{target} declaration
|
||||
to indicate to the compiler to create an device executable version of the procedure.
|
||||
The subroutine name has not been included on the \code{declare} \code{target}
|
||||
directive and is, therefore, implicitly assumed.
|
||||
|
||||
The program uses the \code{module\_fib} module, which presents an explicit interface to
|
||||
the compiler with the \code{declare} \code{target} declarations for processing
|
||||
the \code{fib} call.
|
||||
|
||||
\fexample{declare_target}{1f}
|
||||
|
||||
The next Fortran example shows the use of an external subroutine. Without an explicit
|
||||
interface (through module use or an interface block) the \code{declare} \code{target}
|
||||
declarations within a external subroutine are unknown to the main program unit;
|
||||
therefore, a \code{declare} \code{target} must be provided within the program
|
||||
scope for the compiler to determine that a target binary should be available.
|
||||
|
||||
\fexample{declare_target}{2f}
|
||||
|
||||
\section{\code{declare} \code{target} Construct for Class Type}
|
||||
|
||||
The following example shows how the \code{declare} \code{target} and \code{end}
|
||||
\code{declare} \code{target} directives are used to enclose the declaration
|
||||
of a variable \plc{varY} with a class type \code{typeY}. The member function \code{typeY::foo()} cannot
|
||||
be accessed on a target device because its declaration did not appear between \code{declare}
|
||||
\code{target} and \code{end} \code{declare} \code{target} directives.
|
||||
|
||||
\cexample{declare_target}{2c}
|
||||
|
||||
\section{\code{declare} \code{target} and \code{end} \code{declare} \code{target} for Variables}
|
||||
|
||||
The following examples show how the \code{declare} \code{target} and \code{end}
|
||||
\code{declare} \code{target} directives are used to indicate that global variables
|
||||
are mapped to the implicit device data environment of each target device.
|
||||
|
||||
In the following example, the declarations of the variables \plc{p}, \plc{v1}, and \plc{v2} appear
|
||||
between \code{declare} \code{target} and \code{end} \code{declare} \code{target}
|
||||
directives indicating that the variables are mapped to the implicit device data
|
||||
environment of each target device. The \code{target} \code{update} directive
|
||||
is then used to manage the consistency of the variables \plc{p}, \plc{v1}, and \plc{v2} between the
|
||||
data environment of the encountering host device task and the implicit device data
|
||||
environment of the default target device.
|
||||
|
||||
\cexample{declare_target}{3c}
|
||||
|
||||
The Fortran version of the above C code uses a different syntax. Fortran modules
|
||||
use a list syntax on the \code{declare} \code{target} directive to declare
|
||||
mapped variables.
|
||||
|
||||
\fexample{declare_target}{3f}
|
||||
|
||||
The following example also indicates that the function \code{Pfun()} is available on the
|
||||
target device, as well as the variable \plc{Q}, which is mapped to the implicit device
|
||||
data environment of each target device. The \code{target} \code{update} directive
|
||||
is then used to manage the consistency of the variable \plc{Q} between the data environment
|
||||
of the encountering host device task and the implicit device data environment of
|
||||
the default target device.
|
||||
|
||||
In the following example, the function and variable declarations appear between
|
||||
the \code{declare} \code{target} and \code{end} \code{declare} \code{target}
|
||||
directives.
|
||||
|
||||
\cexample{declare_target}{4c}
|
||||
|
||||
The Fortran version of the above C code uses a different syntax. In Fortran modules
|
||||
a list syntax on the \code{declare} \code{target} directive is used to declare
|
||||
mapped variables and procedures. The \plc{N} and \plc{Q} variables are declared as a comma
|
||||
separated list. When the \code{declare} \code{target} directive is used to
|
||||
declare just the procedure, the procedure name need not be listed -- it is implicitly
|
||||
assumed, as illustrated in the \code{Pfun()} function.
|
||||
|
||||
\fexample{declare_target}{4f}
|
||||
|
||||
\section{\code{declare} \code{target} and \code{end} \code{declare} \code{target} with \code{declare} \code{simd}}
|
||||
|
||||
The following example shows how the \code{declare} \code{target} and \code{end}
|
||||
\code{declare} \code{target} directives are used to indicate that a function
|
||||
is available on a target device. The \code{declare} \code{simd} directive indicates
|
||||
that there is a SIMD version of the function \code{P()} that is available on the target
|
||||
device as well as one that is available on the host device.
|
||||
|
||||
\cexample{declare_target}{5c}
|
||||
|
||||
The Fortran version of the above C code uses a different syntax. Fortran modules
|
||||
use a list syntax of the \code{declare} \code{target} declaration for the mapping.
|
||||
Here the \plc{N} and \plc{Q} variables are declared in the list form as a comma separated list.
|
||||
The function declaration does not use a list and implicitly assumes the function
|
||||
name. In this Fortran example row and column indices are reversed relative to the
|
||||
C/C++ example, as is usual for codes optimized for memory access.
|
||||
|
||||
\fexample{declare_target}{5f}
|
||||
|
12
Examples_default_none.tex
Normal file
12
Examples_default_none.tex
Normal file
@ -0,0 +1,12 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{default(none)} Clause}
|
||||
\label{chap:default_none}
|
||||
|
||||
The following example distinguishes the variables that are affected by the \code{default(none)}
|
||||
clause from those that are not.
|
||||
|
||||
\cexample{default_none}{1c}
|
||||
|
||||
\fexample{default_none}{1f}
|
||||
|
||||
|
35
Examples_device.tex
Normal file
35
Examples_device.tex
Normal file
@ -0,0 +1,35 @@
|
||||
\pagebreak
|
||||
\chapter{Device Routines}
|
||||
\label{chap:device}
|
||||
|
||||
\section{\code{omp\_is\_initial\_device} Routine}
|
||||
|
||||
The following example shows how the \code{omp\_is\_initial\_device} runtime library routine
|
||||
can be used to query if a code is executing on the initial host device or on a
|
||||
target device. The example then sets the number of threads in the \code{parallel}
|
||||
region based on where the code is executing.
|
||||
|
||||
\cexample{device}{1c}
|
||||
|
||||
\fexample{device}{1f}
|
||||
|
||||
\section{\code{omp\_get\_num\_devices} Routine}
|
||||
|
||||
The following example shows how the \code{omp\_get\_num\_devices} runtime library routine
|
||||
can be used to determine the number of devices.
|
||||
|
||||
\cexample{device}{2c}
|
||||
|
||||
\fexample{device}{2f}
|
||||
|
||||
\section{\code{omp\_set\_default\_device} and \\
|
||||
\code{omp\_get\_default\_device} Routines}
|
||||
|
||||
The following example shows how the \code{omp\_set\_default\_device} and \code{omp\_get\_default\_device}
|
||||
runtime library routines can be used to set the default device and determine the
|
||||
default device respectively.
|
||||
|
||||
\cexample{device}{3c}
|
||||
|
||||
\fexample{device}{3f}
|
||||
|
12
Examples_flush_nolist.tex
Normal file
12
Examples_flush_nolist.tex
Normal file
@ -0,0 +1,12 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{flush} Construct without a List}
|
||||
\label{chap:flush_nolist}
|
||||
|
||||
The following example distinguishes the shared variables affected by a \code{flush}
|
||||
construct with no list from the shared objects that are not affected:
|
||||
|
||||
\cexample{flush_nolist}{1c}
|
||||
|
||||
\fexample{flush_nolist}{1f}
|
||||
|
||||
|
19
Examples_fort_do.tex
Normal file
19
Examples_fort_do.tex
Normal file
@ -0,0 +1,19 @@
|
||||
\pagebreak
|
||||
\chapter{Fortran Restrictions on the \code{do} Construct}
|
||||
\label{chap:fort_do}
|
||||
\fortranspecificstart
|
||||
|
||||
If an \code{end do} directive follows a \plc{do-construct} in which several
|
||||
\code{DO} statements share a \code{DO} termination statement, then a \code{do}
|
||||
directive can only be specified for the outermost of these \code{DO} statements.
|
||||
The following example contains correct usages of loop constructs:
|
||||
|
||||
\fnexample{fort_do}{1f}
|
||||
|
||||
The following example is non-conforming because the matching \code{do} directive
|
||||
for the \code{end do} does not precede the outermost loop:
|
||||
|
||||
\fnexample{fort_do}{2f}
|
||||
\fortranspecificend
|
||||
|
||||
|
22
Examples_fort_loopvar.tex
Normal file
22
Examples_fort_loopvar.tex
Normal file
@ -0,0 +1,22 @@
|
||||
\pagebreak
|
||||
\chapter{Fortran Private Loop Iteration Variables}
|
||||
\label{chap:fort_loopvar}
|
||||
\fortranspecificstart
|
||||
|
||||
In general loop iteration variables will be private, when used in the \plc{do-loop}
|
||||
of a \code{do} and \code{parallel do} construct or in sequential loops in a
|
||||
\code{parallel} construct (see \$ and \$). In the following example of a sequential
|
||||
loop in a \code{parallel} construct the loop iteration variable \plc{I} will
|
||||
be private.
|
||||
|
||||
\fnexample{fort_loopvar}{1f}
|
||||
|
||||
In exceptional cases, loop iteration variables can be made shared, as in the following
|
||||
example:
|
||||
|
||||
\fnexample{fort_loopvar}{2f}
|
||||
|
||||
Note however that the use of shared loop iteration variables can easily lead to
|
||||
race conditions.
|
||||
\fortranspecificend
|
||||
|
16
Examples_fort_race.tex
Normal file
16
Examples_fort_race.tex
Normal file
@ -0,0 +1,16 @@
|
||||
\pagebreak
|
||||
\chapter{Race Conditions Caused by Implied Copies of Shared Variables in Fortran}
|
||||
\fortranspecificstart
|
||||
\label{chap:fort_race}
|
||||
|
||||
The following example contains a race condition, because the shared variable, which
|
||||
is an array section, is passed as an actual argument to a routine that has an assumed-size
|
||||
array as its dummy argument. The subroutine call passing an array section argument
|
||||
may cause the compiler to copy the argument into a temporary location prior to
|
||||
the call and copy from the temporary location into the original variable when the
|
||||
subroutine returns. This copying would cause races in the \code{parallel} region.
|
||||
|
||||
\fnexample{fort_race}{1f}
|
||||
\fortranspecificend
|
||||
|
||||
|
23
Examples_fort_sa_private.tex
Normal file
23
Examples_fort_sa_private.tex
Normal file
@ -0,0 +1,23 @@
|
||||
\pagebreak
|
||||
\chapter{Fortran Restrictions on Storage Association with the \code{private} Clause}
|
||||
\fortranspecificstart
|
||||
\label{chap:fort_sa_private}
|
||||
|
||||
The following non-conforming examples illustrate the implications of the \code{private}
|
||||
clause rules with regard to storage association.
|
||||
|
||||
\fnexample{fort_sa_private}{1f}
|
||||
|
||||
\fnexample{fort_sa_private}{2f}
|
||||
% blue line floater at top of this page for "Fortran, cont."
|
||||
\begin{figure}[t!]
|
||||
\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
|
||||
\end{figure}
|
||||
|
||||
\fnexample{fort_sa_private}{3f}
|
||||
|
||||
\fnexample{fort_sa_private}{4f}
|
||||
|
||||
\fnexample{fort_sa_private}{5f}
|
||||
\fortranspecificend
|
||||
|
38
Examples_fort_sp_common.tex
Normal file
38
Examples_fort_sp_common.tex
Normal file
@ -0,0 +1,38 @@
|
||||
\pagebreak
|
||||
\chapter{Fortran Restrictions on \code{shared} and \code{private} Clauses with Common Blocks}
|
||||
\fortranspecificstart
|
||||
\label{chap:fort_sp_common}
|
||||
|
||||
When a named common block is specified in a \code{private}, \code{firstprivate},
|
||||
or \code{lastprivate} clause of a construct, none of its members may be declared
|
||||
in another data-sharing attribute clause on that construct. The following examples
|
||||
illustrate this point.
|
||||
|
||||
The following example is conforming:
|
||||
|
||||
\fnexample{fort_sp_common}{1f}
|
||||
|
||||
The following example is also conforming:
|
||||
|
||||
\fnexample{fort_sp_common}{2f}
|
||||
% blue line floater at top of this page for "Fortran, cont."
|
||||
\begin{figure}[t!]
|
||||
\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
|
||||
\end{figure}
|
||||
|
||||
The following example is conforming:
|
||||
|
||||
\fnexample{fort_sp_common}{3f}
|
||||
|
||||
The following example is non-conforming because \code{x} is a constituent element
|
||||
of \code{c}:
|
||||
|
||||
\fnexample{fort_sp_common}{4f}
|
||||
|
||||
The following example is non-conforming because a common block may not be declared
|
||||
both shared and private:
|
||||
|
||||
\fnexample{fort_sp_common}{5f}
|
||||
\fortranspecificend
|
||||
|
||||
|
18
Examples_fpriv_sections.tex
Normal file
18
Examples_fpriv_sections.tex
Normal file
@ -0,0 +1,18 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{firstprivate} Clause and the \code{sections} Construct}
|
||||
\label{chap:fpriv_sections}
|
||||
|
||||
In the following example of the \code{sections} construct the \code{firstprivate}
|
||||
clause is used to initialize the private copy of \code{section\_count} of each
|
||||
thread. The problem is that the \code{section} constructs modify \code{section\_count},
|
||||
which breaks the independence of the \code{section} constructs. When different
|
||||
threads execute each section, both sections will print the value 1. When the same
|
||||
thread executes the two sections, one section will print the value 1 and the other
|
||||
will print the value 2. Since the order of execution of the two sections in this
|
||||
case is unspecified, it is unspecified which section prints which value.
|
||||
|
||||
\cexample{fpriv_sections}{1c}
|
||||
|
||||
\fexample{fpriv_sections}{1f}
|
||||
|
||||
|
21
Examples_get_nthrs.tex
Normal file
21
Examples_get_nthrs.tex
Normal file
@ -0,0 +1,21 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{omp\_get\_num\_threads} Routine}
|
||||
\label{chap:get_nthrs}
|
||||
|
||||
In the following example, the \code{omp\_get\_num\_threads} call returns 1 in
|
||||
the sequential part of the code, so \code{np} will always be equal to 1. To determine
|
||||
the number of threads that will be deployed for the \code{parallel} region, the
|
||||
call should be inside the \code{parallel} region.
|
||||
|
||||
\cexample{get_nthrs}{1c}
|
||||
|
||||
\fexample{get_nthrs}{1f}
|
||||
|
||||
The following example shows how to rewrite this program without including a query
|
||||
for the number of threads:
|
||||
|
||||
\cexample{get_nthrs}{2c}
|
||||
|
||||
\fexample{get_nthrs}{2f}
|
||||
|
||||
|
56
Examples_icv.tex
Normal file
56
Examples_icv.tex
Normal file
@ -0,0 +1,56 @@
|
||||
\pagebreak
|
||||
\chapter{Internal Control Variables (ICVs)}
|
||||
\label{chap:icv}
|
||||
|
||||
According to \$, an OpenMP implementation must act as if there are ICVs that control
|
||||
the behavior of the program. This example illustrates two ICVs, \plc{nthreads-var}
|
||||
and \plc{max-active-levels-var}. The \plc{nthreads-var} ICV controls the
|
||||
number of threads requested for encountered parallel regions; there is one copy
|
||||
of this ICV per task. The \plc{max-active-levels-var} ICV controls the maximum
|
||||
number of nested active parallel regions; there is one copy of this ICV for the
|
||||
whole program.
|
||||
|
||||
In the following example, the \plc{nest-var}, \plc{max-active-levels-var},
|
||||
\plc{dyn-var}, and \plc{nthreads-var} ICVs are modified through calls to
|
||||
the runtime library routines \code{omp\_set\_nested},\\ \code{omp\_set\_max\_active\_levels},\code{
|
||||
omp\_set\_dynamic}, and \code{omp\_set\_num\_threads} respectively. These ICVs
|
||||
affect the operation of \code{parallel} regions. Each implicit task generated
|
||||
by a \code{parallel} region has its own copy of the \plc{nest-var, dyn-var},
|
||||
and \plc{nthreads-var} ICVs.
|
||||
|
||||
In the following example, the new value of \plc{nthreads-var} applies only to
|
||||
the implicit tasks that execute the call to \code{omp\_set\_num\_threads}. There
|
||||
is one copy of the \plc{max-active-levels-var} ICV for the whole program and
|
||||
its value is the same for all tasks. This example assumes that nested parallelism
|
||||
is supported.
|
||||
|
||||
The outer \code{parallel} region creates a team of two threads; each of the threads
|
||||
will execute one of the two implicit tasks generated by the outer \code{parallel}
|
||||
region.
|
||||
|
||||
Each implicit task generated by the outer \code{parallel} region calls \code{omp\_set\_num\_threads(3)},
|
||||
assigning the value 3 to its respective copy of \plc{nthreads-var}. Then each
|
||||
implicit task encounters an inner \code{parallel} region that creates a team
|
||||
of three threads; each of the threads will execute one of the three implicit tasks
|
||||
generated by that inner \code{parallel} region.
|
||||
|
||||
Since the outer \code{parallel} region is executed by 2 threads, and the inner
|
||||
by 3, there will be a total of 6 implicit tasks generated by the two inner \code{parallel}
|
||||
regions.
|
||||
|
||||
Each implicit task generated by an inner \code{parallel} region will execute
|
||||
the call to\\ \code{omp\_set\_num\_threads(4)}, assigning the value 4 to its respective
|
||||
copy of \plc{nthreads-var}.
|
||||
|
||||
The print statement in the outer \code{parallel} region is executed by only one
|
||||
of the threads in the team. So it will be executed only once.
|
||||
|
||||
The print statement in an inner \code{parallel} region is also executed by only
|
||||
one of the threads in the team. Since we have a total of two inner \code{parallel}
|
||||
regions, the print statement will be executed twice -- once per inner \code{parallel}
|
||||
region.
|
||||
|
||||
\cexample{icv}{1c}
|
||||
|
||||
\fexample{icv}{1f}
|
||||
|
11
Examples_init_lock.tex
Normal file
11
Examples_init_lock.tex
Normal file
@ -0,0 +1,11 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{omp\_init\_lock} Routine}
|
||||
\label{chap:init_lock}
|
||||
|
||||
The following example demonstrates how to initialize an array of locks in a \code{parallel}
|
||||
region by using \code{omp\_init\_lock}.
|
||||
|
||||
\cexample{init_lock}{1c}
|
||||
|
||||
\fexample{init_lock}{1f}
|
||||
|
14
Examples_lastprivate.tex
Normal file
14
Examples_lastprivate.tex
Normal file
@ -0,0 +1,14 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{lastprivate} Clause}
|
||||
\label{chap:lastprivate}
|
||||
|
||||
Correct execution sometimes depends on the value that the last iteration of a loop
|
||||
assigns to a variable. Such programs must list all such variables in a \code{lastprivate}
|
||||
clause so that the values of the variables are the same as when the loop is executed
|
||||
sequentially.
|
||||
|
||||
\cexample{lastprivate}{1c}
|
||||
|
||||
\fexample{lastprivate}{1f}
|
||||
|
||||
|
23
Examples_lock_owner.tex
Normal file
23
Examples_lock_owner.tex
Normal file
@ -0,0 +1,23 @@
|
||||
\pagebreak
|
||||
\chapter{Ownership of Locks}
|
||||
\label{chap:lock_owner}
|
||||
|
||||
Ownership of locks has changed since OpenMP 2.5. In OpenMP 2.5, locks are owned
|
||||
by threads; so a lock released by the \code{omp\_unset\_lock} routine must be
|
||||
owned by the same thread executing the routine. With OpenMP 3.0, locks are owned
|
||||
by task regions; so a lock released by the \code{omp\_unset\_lock} routine in
|
||||
a task region must be owned by the same task region.
|
||||
|
||||
This change in ownership requires extra care when using locks. The following program
|
||||
is conforming in OpenMP 2.5 because the thread that releases the lock \code{lck}
|
||||
in the parallel region is the same thread that acquired the lock in the sequential
|
||||
part of the program (master thread of parallel region and the initial thread are
|
||||
the same). However, it is not conforming in OpenMP 3.0 and 3.1, because the task
|
||||
region that releases the lock \code{lck} is different from the task region that
|
||||
acquires the lock.
|
||||
|
||||
\cexample{lock_owner}{1c}
|
||||
|
||||
\fexample{lock_owner}{1f}
|
||||
|
||||
|
13
Examples_master.tex
Normal file
13
Examples_master.tex
Normal file
@ -0,0 +1,13 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{master} Construct}
|
||||
\label{chap:master}
|
||||
|
||||
The following example demonstrates the master construct . In the example, the master
|
||||
keeps track of how many iterations have been executed and prints out a progress
|
||||
report. The other threads skip the master region without waiting.
|
||||
|
||||
\cexample{master}{1c}
|
||||
|
||||
\fexample{master}{1f}
|
||||
|
||||
|
38
Examples_mem_model.tex
Normal file
38
Examples_mem_model.tex
Normal file
@ -0,0 +1,38 @@
|
||||
\pagebreak
|
||||
\chapter{The OpenMP Memory Model}
|
||||
\label{chap:mem_model}
|
||||
|
||||
In the following example, at Print 1, the value of \plc{x} could be either 2
|
||||
or 5, depending on the timing of the threads, and the implementation of the assignment
|
||||
to \plc{x}. There are two reasons that the value at Print 1 might not be 5.
|
||||
First, Print 1 might be executed before the assignment to \plc{x} is executed.
|
||||
Second, even if Print 1 is executed after the assignment, the value 5 is not guaranteed
|
||||
to be seen by thread 1 because a flush may not have been executed by thread 0 since
|
||||
the assignment.
|
||||
|
||||
The barrier after Print 1 contains implicit flushes on all threads, as well as
|
||||
a thread synchronization, so the programmer is guaranteed that the value 5 will
|
||||
be printed by both Print 2 and Print 3.
|
||||
|
||||
\cexample{mem_model}{1c}
|
||||
|
||||
\fexample{mem_model}{1f}
|
||||
|
||||
The following example demonstrates why synchronization is difficult to perform
|
||||
correctly through variables. The value of flag is undefined in both prints on thread
|
||||
1 and the value of data is only well-defined in the second print.
|
||||
|
||||
\cexample{mem_model}{2c}
|
||||
|
||||
\fexample{mem_model}{2f}
|
||||
|
||||
The next example demonstrates why synchronization is difficult to perform correctly
|
||||
through variables. Because the \plc{write}(1)-\plc{flush}(1)-\plc{flush}(2)-\plc{read}(2)
|
||||
sequence cannot be guaranteed in the example, the statements on thread 0 and thread
|
||||
1 may execute in either order.
|
||||
|
||||
\cexample{mem_model}{3c}
|
||||
|
||||
\fexample{mem_model}{3f}
|
||||
|
||||
|
11
Examples_nestable_lock.tex
Normal file
11
Examples_nestable_lock.tex
Normal file
@ -0,0 +1,11 @@
|
||||
\pagebreak
|
||||
\chapter{Nestable Lock Routines}
|
||||
\label{chap:nestable_lock}
|
||||
|
||||
The following example demonstrates how a nestable lock can be used to synchronize
|
||||
updates both to a whole structure and to one of its members.
|
||||
|
||||
\cexample{nestable_lock}{1c}
|
||||
|
||||
\fexample{nestable_lock}{1f}
|
||||
|
18
Examples_nested_loop.tex
Normal file
18
Examples_nested_loop.tex
Normal file
@ -0,0 +1,18 @@
|
||||
\pagebreak
|
||||
\chapter{Nested Loop Constructs}
|
||||
\label{chap:nested_loop}
|
||||
|
||||
The following example of loop construct nesting is conforming because the inner
|
||||
and outer loop regions bind to different \code{parallel} regions:
|
||||
|
||||
\cexample{nested_loop}{1c}
|
||||
|
||||
\fexample{nested_loop}{1f}
|
||||
|
||||
The following variation of the preceding example is also conforming:
|
||||
|
||||
\cexample{nested_loop}{2c}
|
||||
|
||||
\fexample{nested_loop}{2f}
|
||||
|
||||
|
52
Examples_nesting_restrict.tex
Normal file
52
Examples_nesting_restrict.tex
Normal file
@ -0,0 +1,52 @@
|
||||
\pagebreak
|
||||
\chapter{Restrictions on Nesting of Regions}
|
||||
\label{chap:nesting_restrict}
|
||||
|
||||
The examples in this section illustrate the region nesting rules.
|
||||
|
||||
The following example is non-conforming because the inner and outer loop regions
|
||||
are closely nested:
|
||||
|
||||
\cexample{nesting_restrict}{1c}
|
||||
|
||||
\fexample{nesting_restrict}{1f}
|
||||
|
||||
The following orphaned version of the preceding example is also non-conforming:
|
||||
|
||||
\cexample{nesting_restrict}{2c}
|
||||
|
||||
\fexample{nesting_restrict}{2f}
|
||||
|
||||
The following example is non-conforming because the loop and \code{single} regions
|
||||
are closely nested:
|
||||
|
||||
\cexample{nesting_restrict}{3c}
|
||||
|
||||
\fexample{nesting_restrict}{3f}
|
||||
|
||||
The following example is non-conforming because a \code{barrier} region cannot
|
||||
be closely nested inside a loop region:
|
||||
|
||||
\cexample{nesting_restrict}{4c}
|
||||
|
||||
\fexample{nesting_restrict}{4f}
|
||||
|
||||
The following example is non-conforming because the \code{barrier} region cannot
|
||||
be closely nested inside the \code{critical} region. If this were permitted,
|
||||
it would result in deadlock due to the fact that only one thread at a time can
|
||||
enter the \code{critical} region:
|
||||
|
||||
\cexample{nesting_restrict}{5c}
|
||||
|
||||
\fexample{nesting_restrict}{5f}
|
||||
|
||||
The following example is non-conforming because the \code{barrier} region cannot
|
||||
be closely nested inside the \code{single} region. If this were permitted, it
|
||||
would result in deadlock due to the fact that only one thread executes the \code{single}
|
||||
region:
|
||||
|
||||
\cexample{nesting_restrict}{6c}
|
||||
|
||||
\fexample{nesting_restrict}{6f}
|
||||
|
||||
|
28
Examples_nowait.tex
Normal file
28
Examples_nowait.tex
Normal file
@ -0,0 +1,28 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{nowait} Clause}
|
||||
\label{chap:nowait}
|
||||
|
||||
If there are multiple independent loops within a \code{parallel} region, you
|
||||
can use the \code{nowait} clause to avoid the implied barrier at the end of the
|
||||
loop construct, as follows:
|
||||
|
||||
\cexample{nowait}{1c}
|
||||
|
||||
\fexample{nowait}{1f}
|
||||
|
||||
In the following example, static scheduling distributes the same logical iteration
|
||||
numbers to the threads that execute the three loop regions. This allows the \code{nowait}
|
||||
clause to be used, even though there is a data dependence between the loops. The
|
||||
dependence is satisfied as long the same thread executes the same logical iteration
|
||||
numbers in each loop.
|
||||
|
||||
Note that the iteration count of the loops must be the same. The example satisfies
|
||||
this requirement, since the iteration space of the first two loops is from \code{0}
|
||||
to \code{n-1} (from \code{1} to \code{N} in the Fortran version), while the
|
||||
iteration space of the last loop is from \code{1} to \code{n} (\code{2} to
|
||||
\code{N+1} in the Fortran version).
|
||||
|
||||
\cexample{nowait}{2c}
|
||||
|
||||
\fexample{nowait}{2f}
|
||||
|
30
Examples_nthrs_dynamic.tex
Normal file
30
Examples_nthrs_dynamic.tex
Normal file
@ -0,0 +1,30 @@
|
||||
\pagebreak
|
||||
\chapter{Interaction Between the \code{num\_threads} Clause and \code{omp\_set\_dynamic}}
|
||||
\label{chap:nthrs_dynamic}
|
||||
|
||||
The following example demonstrates the \code{num\_threads} clause and the effect
|
||||
of the \\
|
||||
\code{omp\_set\_dynamic} routine on it.
|
||||
|
||||
The call to the \code{omp\_set\_dynamic} routine with argument \code{0} in
|
||||
C/C++, or \code{.FALSE.} in Fortran, disables the dynamic adjustment of the number
|
||||
of threads in OpenMP implementations that support it. In this case, 10 threads
|
||||
are provided. Note that in case of an error the OpenMP implementation is free to
|
||||
abort the program or to supply any number of threads available.
|
||||
|
||||
\cexample{nthrs_dynamic}{1c}
|
||||
|
||||
\fexample{nthrs_dynamic}{1f}
|
||||
|
||||
The call to the \code{omp\_set\_dynamic} routine with a non-zero argument in
|
||||
C/C++, or \code{.TRUE.} in Fortran, allows the OpenMP implementation to choose
|
||||
any number of threads between 1 and 10.
|
||||
|
||||
\cexample{nthrs_dynamic}{2c}
|
||||
|
||||
\fexample{nthrs_dynamic}{2f}
|
||||
|
||||
It is good practice to set the \plc{dyn-var} ICV explicitly by calling the \code{omp\_set\_dynamic}
|
||||
routine, as its default setting is implementation defined.
|
||||
|
||||
|
12
Examples_nthrs_nesting.tex
Normal file
12
Examples_nthrs_nesting.tex
Normal file
@ -0,0 +1,12 @@
|
||||
\pagebreak
|
||||
\chapter{Controlling the Number of Threads on Multiple Nesting Levels}
|
||||
\label{chap:nthrs_nesting}
|
||||
|
||||
The following examples demonstrate how to use the \code{OMP\_NUM\_THREADS} environment
|
||||
variable to control the number of threads on multiple nesting levels:
|
||||
|
||||
\cexample{nthrs_nesting}{1c}
|
||||
|
||||
\fexample{nthrs_nesting}{1f}
|
||||
|
||||
|
28
Examples_ordered.tex
Normal file
28
Examples_ordered.tex
Normal file
@ -0,0 +1,28 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{ordered} Clause and the \code{ordered} Construct}
|
||||
\label{chap:ordered}
|
||||
|
||||
Ordered constructs are useful for sequentially ordering the output from work that
|
||||
is done in parallel. The following program prints out the indices in sequential
|
||||
order:
|
||||
|
||||
\cexample{ordered}{1c}
|
||||
|
||||
\fexample{ordered}{1f}
|
||||
|
||||
It is possible to have multiple \code{ordered} constructs within a loop region
|
||||
with the \code{ordered} clause specified. The first example is non-conforming
|
||||
because all iterations execute two \code{ordered} regions. An iteration of a
|
||||
loop must not execute more than one \code{ordered} region:
|
||||
|
||||
\cexample{ordered}{2c}
|
||||
|
||||
\fexample{ordered}{2f}
|
||||
|
||||
The following is a conforming example with more than one \code{ordered} construct.
|
||||
Each iteration will execute only one \code{ordered} region:
|
||||
|
||||
\cexample{ordered}{3c}
|
||||
|
||||
\fexample{ordered}{3f}
|
||||
|
12
Examples_parallel.tex
Normal file
12
Examples_parallel.tex
Normal file
@ -0,0 +1,12 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{parallel} Construct}
|
||||
\label{chap:parallel}
|
||||
|
||||
The \code{parallel} construct can be used in coarse-grain parallel programs.
|
||||
In the following example, each thread in the \code{parallel} region decides what
|
||||
part of the global array \plc{x} to work on, based on the thread number:
|
||||
|
||||
\cexample{parallel}{1c}
|
||||
|
||||
\fexample{parallel}{1f}
|
||||
|
11
Examples_ploop.tex
Normal file
11
Examples_ploop.tex
Normal file
@ -0,0 +1,11 @@
|
||||
\chapter{A Simple Parallel Loop}
|
||||
\label{chap:ploop}
|
||||
|
||||
The following example demonstrates how to parallelize a simple loop using the parallel
|
||||
loop construct. The loop iteration variable is private by default, so it is not
|
||||
necessary to specify it explicitly in a \code{private} clause.
|
||||
|
||||
\cexample{ploop}{1c}
|
||||
|
||||
\fexample{ploop}{1f}
|
||||
|
11
Examples_pra_iterator.tex
Normal file
11
Examples_pra_iterator.tex
Normal file
@ -0,0 +1,11 @@
|
||||
\pagebreak
|
||||
\chapter{Parallel Random Access Iterator Loop}
|
||||
\ccppspecificstart
|
||||
\label{chap:pra_iterator}
|
||||
|
||||
The following example shows a parallel random access iterator loop.
|
||||
|
||||
\cnexample{pra_iterator}{1c}
|
||||
\ccppspecificend
|
||||
|
||||
|
31
Examples_private.tex
Normal file
31
Examples_private.tex
Normal file
@ -0,0 +1,31 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{private} Clause}
|
||||
\label{chap:private}
|
||||
|
||||
In the following example, the values of original list items \plc{i} and \plc{j}
|
||||
are retained on exit from the \code{parallel} region, while the private list
|
||||
items \plc{i} and \plc{j} are modified within the \code{parallel} construct.
|
||||
|
||||
\cexample{private}{1c}
|
||||
|
||||
\fexample{private}{1f}
|
||||
|
||||
In the following example, all uses of the variable \plc{a} within the loop construct
|
||||
in the routine \plc{f} refer to a private list item \plc{a}, while it is
|
||||
unspecified whether references to \plc{a} in the routine \plc{g} are to a
|
||||
private list item or the original list item.
|
||||
|
||||
\cexample{private}{2c}
|
||||
|
||||
\fexample{private}{2f}
|
||||
|
||||
The following example demonstrates that a list item that appears in a \code{private}
|
||||
clause in a \code{parallel} construct may also appear in a \code{private}
|
||||
clause in an enclosed worksharing construct, which results in an additional private
|
||||
copy.
|
||||
|
||||
\cexample{private}{3c}
|
||||
|
||||
\fexample{private}{3f}
|
||||
|
||||
|
13
Examples_psections.tex
Normal file
13
Examples_psections.tex
Normal file
@ -0,0 +1,13 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{parallel} \code{sections} Construct}
|
||||
\label{chap:psections}
|
||||
|
||||
In the following example routines \code{XAXIS}, \code{YAXIS}, and \code{ZAXIS} can
|
||||
be executed concurrently. The first \code{section} directive is optional. Note
|
||||
that all \code{section} directives need to appear in the \code{parallel sections}
|
||||
construct.
|
||||
|
||||
\cexample{psections}{1c}
|
||||
|
||||
\fexample{psections}{1f}
|
||||
|
60
Examples_reduction.tex
Normal file
60
Examples_reduction.tex
Normal file
@ -0,0 +1,60 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{reduction} Clause}
|
||||
\label{chap:reduction}
|
||||
|
||||
The following example demonstrates the \code{reduction} clause ; note that some
|
||||
reductions can be expressed in the loop in several ways, as shown for the \code{max}
|
||||
and \code{min} reductions below:
|
||||
|
||||
\cexample{reduction}{1c}
|
||||
|
||||
\fexample{reduction}{1f}
|
||||
|
||||
A common implementation of the preceding example is to treat it as if it had been
|
||||
written as follows:
|
||||
|
||||
\cexample{reduction}{2c}
|
||||
|
||||
\fortranspecificstart
|
||||
\fnexample{reduction}{2f}
|
||||
|
||||
The following program is non-conforming because the reduction is on the
|
||||
\emph{intrinsic procedure name} \code{MAX} but that name has been redefined to be the variable
|
||||
named \code{MAX}.
|
||||
% blue line floater at top of this page for "Fortran, cont."
|
||||
\begin{figure}[t!]
|
||||
\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
|
||||
\end{figure}
|
||||
|
||||
\fnexample{reduction}{3f}
|
||||
|
||||
The following conforming program performs the reduction using the
|
||||
\emph{intrinsic procedure name} \code{MAX} even though the intrinsic \code{MAX} has been renamed
|
||||
to \code{REN}.
|
||||
|
||||
\fnexample{reduction}{4f}
|
||||
|
||||
The following conforming program performs the reduction using
|
||||
\plc{intrinsic procedure name} \code{MAX} even though the intrinsic \code{MAX} has been renamed
|
||||
to \code{MIN}.
|
||||
|
||||
\fnexample{reduction}{5f}
|
||||
\fortranspecificend
|
||||
|
||||
The following example is non-conforming because the initialization (\code{a =
|
||||
0}) of the original list item \code{a} is not synchronized with the update of
|
||||
\code{a} as a result of the reduction computation in the \code{for} loop. Therefore,
|
||||
the example may print an incorrect value for \code{a}.
|
||||
|
||||
To avoid this problem, the initialization of the original list item \code{a}
|
||||
should complete before any update of \code{a} as a result of the \code{reduction}
|
||||
clause. This can be achieved by adding an explicit barrier after the assignment
|
||||
\code{a = 0}, or by enclosing the assignment \code{a = 0} in a \code{single}
|
||||
directive (which has an implied barrier), or by initializing \code{a} before
|
||||
the start of the \code{parallel} region.
|
||||
|
||||
\cexample{reduction}{3c}
|
||||
|
||||
\fexample{reduction}{6f}
|
||||
|
||||
|
24
Examples_set_dynamic_nthrs.tex
Normal file
24
Examples_set_dynamic_nthrs.tex
Normal file
@ -0,0 +1,24 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{omp\_set\_dynamic} and \\
|
||||
\code{omp\_set\_num\_threads} Routines}
|
||||
\label{chap:set_dynamic_nthrs}
|
||||
|
||||
Some programs rely on a fixed, prespecified number of threads to execute correctly.
|
||||
Because the default setting for the dynamic adjustment of the number of threads
|
||||
is implementation defined, such programs can choose to turn off the dynamic threads
|
||||
capability and set the number of threads explicitly to ensure portability. The
|
||||
following example shows how to do this using \code{omp\_set\_dynamic}, and \code{omp\_set\_num\_threads}.
|
||||
|
||||
In this example, the program executes correctly only if it is executed by 16 threads.
|
||||
If the implementation is not capable of supporting 16 threads, the behavior of
|
||||
this example is implementation defined. Note that the number of threads executing
|
||||
a \code{parallel} region remains constant during the region, regardless of the
|
||||
dynamic threads setting. The dynamic threads mechanism determines the number of
|
||||
threads to use at the start of the \code{parallel} region and keeps it constant
|
||||
for the duration of the region.
|
||||
|
||||
\cexample{set_dynamic_nthrs}{1c}
|
||||
|
||||
\fexample{set_dynamic_nthrs}{1f}
|
||||
|
||||
|
19
Examples_simple_lock.tex
Normal file
19
Examples_simple_lock.tex
Normal file
@ -0,0 +1,19 @@
|
||||
\pagebreak
|
||||
\chapter{Simple Lock Routines}
|
||||
\label{chap:simple_lock}
|
||||
|
||||
In the following example, the lock routines cause the threads to be idle while
|
||||
waiting for entry to the first critical section, but to do other work while waiting
|
||||
for entry to the second. The \code{omp\_set\_lock} function blocks, but the \code{omp\_test\_lock}
|
||||
function does not, allowing the work in \code{skip} to be done.
|
||||
|
||||
Note that the argument to the lock routines should have type \code{omp\_lock\_t},
|
||||
and that there is no need to flush it.
|
||||
|
||||
\cexample{simple_lock}{1c}
|
||||
|
||||
Note that there is no need to flush the lock variable.
|
||||
|
||||
\fexample{simple_lock}{1f}
|
||||
|
||||
|
18
Examples_single.tex
Normal file
18
Examples_single.tex
Normal file
@ -0,0 +1,18 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{single} Construct}
|
||||
\label{chap:single}
|
||||
|
||||
The following example demonstrates the \code{single} construct. In the example,
|
||||
only one thread prints each of the progress messages. All other threads will skip
|
||||
the \code{single} region and stop at the barrier at the end of the \code{single}
|
||||
construct until all threads in the team have reached the barrier. If other threads
|
||||
can proceed without waiting for the thread executing the \code{single} region,
|
||||
a \code{nowait} clause can be specified, as is done in the third \code{single}
|
||||
construct in this example. The user must not make any assumptions as to which thread
|
||||
will execute a \code{single} region.
|
||||
|
||||
\cexample{single}{1c}
|
||||
|
||||
\fexample{single}{1f}
|
||||
|
||||
|
31
Examples_standalone.tex
Normal file
31
Examples_standalone.tex
Normal file
@ -0,0 +1,31 @@
|
||||
\pagebreak
|
||||
\chapter{Placement of \code{flush}, \code{barrier}, \code{taskwait}
|
||||
and \code{taskyield} Directives}
|
||||
\label{chap:standalone}
|
||||
|
||||
The following example is non-conforming, because the \code{flush}, \code{barrier},
|
||||
\code{taskwait}, and \code{taskyield} directives are stand-alone directives
|
||||
and cannot be the immediate substatement of an \code{if} statement.
|
||||
|
||||
\cexample{standalone}{1c}
|
||||
|
||||
The following example is non-conforming, because the \code{flush}, \code{barrier},
|
||||
\code{taskwait}, and \code{taskyield} directives are stand-alone directives
|
||||
and cannot be the action statement of an \code{if} statement or a labeled branch
|
||||
target.
|
||||
|
||||
\fexample{standalone}{1f}
|
||||
|
||||
The following version of the above example is conforming because the \code{flush},
|
||||
\code{barrier}, \code{taskwait}, and \code{taskyield} directives are enclosed
|
||||
in a compound statement.
|
||||
|
||||
\cexample{standalone}{2c}
|
||||
|
||||
The following example is conforming because the \code{flush}, \code{barrier},
|
||||
\code{taskwait}, and \code{taskyield} directives are enclosed in an \code{if}
|
||||
construct or follow the labeled branch target.
|
||||
|
||||
\fexample{standalone}{2f}
|
||||
|
||||
|
96
Examples_target.tex
Normal file
96
Examples_target.tex
Normal file
@ -0,0 +1,96 @@
|
||||
\pagebreak
|
||||
\chapter{\code{target} Construct}
|
||||
\label{chap:target}
|
||||
|
||||
\section{\code{target} Construct on \code{parallel} Construct}
|
||||
|
||||
This following example shows how the \code{target} construct offloads a code
|
||||
region to a target device. The variables \plc{p}, \plc{v1}, \plc{v2}, and \plc{N} are implicitly mapped
|
||||
to the the target device.
|
||||
|
||||
\cexample{target}{1c}
|
||||
|
||||
\fexample{target}{1f}
|
||||
|
||||
\section{\code{target} Construct with \code{map} Clause}
|
||||
|
||||
This following example shows how the \code{target} construct offloads a code
|
||||
region to a target device. The variables \plc{p}, \plc{v1} and \plc{v2} are explicitly mapped to the
|
||||
the target device using the map clause. The variable \plc{N} is implicitly mapped to
|
||||
the target device.
|
||||
|
||||
\cexample{target}{2c}
|
||||
|
||||
\fexample{target}{2f}
|
||||
|
||||
\section{\code{map} Clause with \code{to}/\code{from} map-types}
|
||||
|
||||
The following example shows how the \code{target} construct offloads a code region
|
||||
to a target device. In the \code{map} clause, the \code{to} and \code{from}
|
||||
map-types define the mapping between the original (host) data and the target (device)
|
||||
data. The \code{to} map-type specifies that the data will only be read on the
|
||||
device, and the \code{from} map-type specifies that the data will only be written
|
||||
to on the device. By specifying a guaranteed access on the device, data transfers
|
||||
can be reduced for the \code{target} region.
|
||||
|
||||
The \code{to} map-type indicates that at the start of the \code{target} region
|
||||
the variables \plc{v1} and \plc{v2} are initialized with the values of the corresponding variables
|
||||
on the host device, and at the end of the \code{target} region the variables
|
||||
\plc{v1} and \plc{v2} are not assigned to their corresponding variables on the host device.
|
||||
|
||||
The \code{from} map-type indicates that at the start of the \code{target} region
|
||||
the variable \plc{p} is not initialized with the value of the corresponding variable
|
||||
on the host device, and at the end of the \code{target} region the variable \plc{p}
|
||||
is assigned to the corresponding variable on the host device.
|
||||
|
||||
\cexample{target}{3c}
|
||||
|
||||
The \code{to} and \code{from} map-types allow programmers to optimize data
|
||||
motion. Since data for the \plc{v} arrays are not returned, and data for the \plc{p} array
|
||||
are not transferred to the device, only one-half of the data is moved, compared
|
||||
to the default behavior of an implicit mapping.
|
||||
|
||||
\fexample{target}{3f}
|
||||
|
||||
\section{\code{map} Clause with Array Sections}
|
||||
|
||||
The following example shows how the \code{target} construct offloads a code region
|
||||
to a target device. In the \code{map} clause, map-types are used to optimize
|
||||
the mapping of variables to the target device. Because variables \plc{p}, \plc{v1} and \plc{v2} are
|
||||
pointers, array section notation must be used to map the arrays. The notation \code{:N}
|
||||
is equivalent to \code{0:N}.
|
||||
|
||||
\cexample{target}{4c}
|
||||
|
||||
In C, the length of the pointed-to array must be specified. In Fortran the extent
|
||||
of the array is known and the length need not be specified. A section of the array
|
||||
can be specified with the usual Fortran syntax, as shown in the following example.
|
||||
The value 1 is assumed for the lower bound for array section \plc{v2(:N)}.
|
||||
|
||||
\fexample{target}{4f}
|
||||
|
||||
A more realistic situation in which an assumed-size array is passed to \code{vec\_mult}
|
||||
requires that the length of the arrays be specified, because the compiler does
|
||||
not know the size of the storage. A section of the array must be specified with
|
||||
the usual Fortran syntax, as shown in the following example. The value 1 is assumed
|
||||
for the lower bound for array section \plc{v2(:N)}.
|
||||
|
||||
\fexample{target}{4bf}
|
||||
|
||||
\section{\code{target} Construct with \code{if} Clause}
|
||||
|
||||
The following example shows how the \code{target} construct offloads a code region
|
||||
to a target device.
|
||||
|
||||
The \code{if} clause on the \code{target} construct indicates that if the variable
|
||||
\plc{N} is smaller than a given threshold, then the \code{target} region will be executed
|
||||
by the host device.
|
||||
|
||||
The \code{if} clause on the \code{parallel} construct indicates that if the
|
||||
variable \plc{N} is smaller than a second threshold then the \code{parallel} region
|
||||
is inactive.
|
||||
|
||||
\cexample{target}{5c}
|
||||
|
||||
\fexample{target}{5f}
|
||||
|
175
Examples_target_data.tex
Normal file
175
Examples_target_data.tex
Normal file
@ -0,0 +1,175 @@
|
||||
\pagebreak
|
||||
\chapter{\code{target} \code{data} Construct}
|
||||
\label{chap:target_data}
|
||||
|
||||
\section{Simple \code{target} \code{data} Construct}
|
||||
|
||||
This example shows how the \code{target} \code{data} construct maps variables
|
||||
to a device data environment. The \code{target} \code{data} construct creates
|
||||
a new device data environment and maps the variables \plc{v1}, \plc{v2}, and \plc{p} to the new device
|
||||
data environment. The \code{target} construct enclosed in the \code{target}
|
||||
\code{data} region creates a new device data environment, which inherits the
|
||||
variables \plc{v1}, \plc{v2}, and \plc{p} from the enclosing device data environment. The variable
|
||||
\plc{N} is mapped into the new device data environment from the encountering task's data
|
||||
environment.
|
||||
|
||||
\cexample{target_data}{1c}
|
||||
|
||||
The Fortran code passes a reference and specifies the extent of the arrays in the
|
||||
declaration. No length information is necessary in the map clause, as is required
|
||||
with C/C++ pointers.
|
||||
|
||||
\fexample{target_data}{1f}
|
||||
|
||||
\section{\code{target} \code{data} Region Enclosing Multiple \code{target} Regions}
|
||||
|
||||
The following examples show how the \code{target} \code{data} construct maps
|
||||
variables to a device data environment of a \code{target} region. The \code{target}
|
||||
\code{data} construct creates a device data environment and encloses \code{target}
|
||||
regions, which have their own device data environments. The device data environment
|
||||
of the \code{target} \code{data} region is inherited by the device data environment
|
||||
of an enclosed \code{target} region. The \code{target} \code{data} construct
|
||||
is used to create variables that will persist throughout the \code{target} \code{data}
|
||||
region.
|
||||
|
||||
In the following example the variables \plc{v1} and \plc{v2} are mapped at each \code{target}
|
||||
construct. Instead of mapping the variable \plc{p} twice, once at each \code{target}
|
||||
construct, \plc{p} is mapped once by the \code{target} \code{data} construct.
|
||||
|
||||
\cexample{target_data}{2c}
|
||||
|
||||
|
||||
The Fortran code uses reference and specifies the extent of the \plc{p}, \plc{v1} and \plc{v2} arrays.
|
||||
No length information is necessary in the \code{map} clause, as is required with
|
||||
C/C++ pointers. The arrays \plc{v1} and \plc{v2} are mapped at each \code{target} construct.
|
||||
Instead of mapping the array \plc{p} twice, once at each target construct, \plc{p} is mapped
|
||||
once by the \code{target} \code{data} construct.
|
||||
|
||||
\fexample{target_data}{2f}
|
||||
|
||||
In the following example, the variable tmp defaults to \code{tofrom} map-type
|
||||
and is mapped at each \code{target} construct. The array \plc{Q} is mapped once at
|
||||
the enclosing \code{target} \code{data} region instead of at each \code{target}
|
||||
construct.
|
||||
|
||||
\cexample{target_data}{3c}
|
||||
|
||||
In the following example the arrays \plc{v1} and \plc{v2} are mapped at each \code{target}
|
||||
construct. Instead of mapping the array \plc{Q} twice at each \code{target} construct,
|
||||
\plc{Q} is mapped once by the \code{target} \code{data} construct. Note, the \plc{tmp}
|
||||
variable is implicitly remapped for each \code{target} region, mapping the value
|
||||
from the device to the host at the end of the first \code{target} region, and
|
||||
from the host to the device for the second \code{target} region.
|
||||
|
||||
\fexample{target_data}{3f}
|
||||
|
||||
\section{\code{target} \code{data} Construct with Orphaned Call}
|
||||
|
||||
The following two examples show how the \code{target} \code{data} construct
|
||||
maps variables to a device data environment. The \code{target} \code{data}
|
||||
construct's device data environment encloses the \code{target} construct's device
|
||||
data environment in the function \code{vec\_mult()}.
|
||||
|
||||
When the type of the variable appearing in an array section is pointer, the pointer
|
||||
variable and the storage location of the corresponding array section are mapped
|
||||
to the device data environment. The pointer variable is treated as if it had appeared
|
||||
in a \code{map} clause with a map-type of \code{alloc}. The array section's
|
||||
storage location is mapped according to the map-type in the \code{map} clause
|
||||
(the default map-type is \code{tofrom}).
|
||||
|
||||
The \code{target} construct's device data environment inherits the storage locations
|
||||
of the array sections \plc{v1[0:N]}, \plc{v2[:n]}, and \plc{p0[0:N]} from the enclosing target data
|
||||
construct's device data environment. Neither initialization nor assignment is performed
|
||||
for the array sections in the new device data environment.
|
||||
|
||||
The pointer variables \plc{p1}, \plc{v3}, and \plc{v4} are mapped into the target construct's device
|
||||
data environment with an implicit map-type of alloc and they are assigned the address
|
||||
of the storage location associated with their corresponding array sections. Note
|
||||
that the following pairs of array section storage locations are equivalent (\plc{p0[:N]},
|
||||
\plc{p1[:N]}), (\plc{v1[:N]},\plc{v3[:N]}), and (\plc{v2[:N]},\plc{v4[:N]}).
|
||||
|
||||
\cexample{target_data}{4c}
|
||||
|
||||
The Fortran code maps the pointers and storage in an identical manner (same extent,
|
||||
but uses indices from 1 to \plc{N}).
|
||||
|
||||
The \code{target} construct's device data environment inherits the storage locations
|
||||
of the arrays \plc{v1}, \plc{v2} and \plc{p0} from the enclosing \code{target} \code{data} constructs's
|
||||
device data environment. However, in Fortran the associated data of the pointer
|
||||
is known, and the shape is not required.
|
||||
|
||||
The pointer variables \plc{p1}, \plc{v3}, and \plc{v4} are mapped into the \code{target} construct's
|
||||
device data environment with an implicit map-type of \code{alloc} and they are
|
||||
assigned the address of the storage location associated with their corresponding
|
||||
array sections. Note that the following pair of array storage locations are equivalent
|
||||
(\plc{p0},\plc{p1}), (\plc{v1},\plc{v3}), and (\plc{v2},\plc{v4}).
|
||||
|
||||
\fexample{target_data}{4f}
|
||||
|
||||
|
||||
In the following example, the variables \plc{p1}, \plc{v3}, and \plc{v4} are references to the pointer
|
||||
variables \plc{p0}, \plc{v1} and \plc{v2} respectively. The \code{target} construct's device data
|
||||
environment inherits the pointer variables \plc{p0}, \plc{v1}, and \plc{v2} from the enclosing \code{target}
|
||||
\code{data} construct's device data environment. Thus, \plc{p1}, \plc{v3}, and \plc{v4} are already
|
||||
present in the device data environment.
|
||||
|
||||
\cexample{target_data}{5c}
|
||||
|
||||
In the following example, the usual Fortran approach is used for dynamic memory.
|
||||
The \plc{p0}, \plc{v1}, and \plc{v2} arrays are allocated in the main program and passed as references
|
||||
from one routine to another. In \code{vec\_mult}, \plc{p1}, \plc{v3} and \plc{v4} are references to the
|
||||
\plc{p0}, \plc{v1}, and \plc{v2} arrays, respectively. The \code{target} construct's device data
|
||||
environment inherits the arrays \plc{p0}, \plc{v1}, and \plc{v2} from the enclosing target data construct's
|
||||
device data environment. Thus, \plc{p1}, \plc{v3}, and \plc{v4} are already present in the device
|
||||
data environment.
|
||||
|
||||
\fexample{target_data}{5f}
|
||||
|
||||
\section{\code{target} \code{data} Construct with \code{if} Clause}
|
||||
|
||||
The following two examples show how the \code{target} \code{data} construct
|
||||
maps variables to a device data environment.
|
||||
|
||||
In the following example, the if clause on the \code{target} \code{data} construct
|
||||
indicates that if the variable \plc{N} is smaller than a given threshold, then the \code{target}
|
||||
\code{data} construct will not create a device data environment.
|
||||
|
||||
The \code{target} constructs enclosed in the \code{target} \code{data} region
|
||||
must also use an \code{if} clause on the same condition, otherwise the pointer
|
||||
variable \plc{p} is implicitly mapped with a map-type of \code{tofrom}, but the storage
|
||||
location for the array section \plc{p[0:N]} will not be mapped in the device data environments
|
||||
of the \code{target} constructs.
|
||||
|
||||
\cexample{target_data}{6c}
|
||||
|
||||
The \code{if} clauses work the same way for the following Fortran code. The \code{target}
|
||||
constructs enclosed in the \code{target} \code{data} region should also use
|
||||
an \code{if} clause with the same condition, so that the \code{target} \code{data}
|
||||
region and the \code{target} region are either both created for the device, or
|
||||
are both ignored.
|
||||
|
||||
\fexample{target_data}{6f}
|
||||
|
||||
In the following example, when the \code{if} clause conditional expression on
|
||||
the \code{target} construct evaluates to \plc{false}, the target region will
|
||||
execute on the host device. However, the \code{target} \code{data} construct
|
||||
created an enclosing device data environment that mapped \plc{p[0:N]} to a device data
|
||||
environment on the default device. At the end of the \code{target} \code{data}
|
||||
region the array section \plc{p[0:N]} will be assigned from the device data environment
|
||||
to the corresponding variable in the data environment of the task that encountered
|
||||
the \code{target} \code{data} construct, resulting in undefined values in \plc{p[0:N]}.
|
||||
|
||||
\cexample{target_data}{7c}
|
||||
|
||||
The \code{if} clauses work the same way for the following Fortran code. When
|
||||
the \code{if} clause conditional expression on the \code{target} construct
|
||||
evaluates to \plc{false}, the \code{target} region will execute on the host
|
||||
device. However, the \code{target} \code{data} construct created an enclosing
|
||||
device data environment that mapped the \plc{p} array (and \plc{v1} and \plc{v2}) to a device data
|
||||
environment on the default target device. At the end of the \code{target} \code{data}
|
||||
region the \plc{p} array will be assigned from the device data environment to the corresponding
|
||||
variable in the data environment of the task that encountered the \code{target}
|
||||
\code{data} construct, resulting in undefined values in \plc{p}.
|
||||
|
||||
\fexample{target_data}{7f}
|
||||
|
53
Examples_target_update.tex
Normal file
53
Examples_target_update.tex
Normal file
@ -0,0 +1,53 @@
|
||||
\pagebreak
|
||||
\chapter{\code{target} \code{update} Construct}
|
||||
\label{chap:target_update}
|
||||
|
||||
\section{Simple \code{target} \code{data} and \code{target} \code{update} Constructs}
|
||||
|
||||
The following example shows how the \code{target} \code{update} construct updates
|
||||
variables in a device data environment.
|
||||
|
||||
The \code{target} \code{data} construct maps array sections \plc{v1[:N]} and \plc{v2[:N]}
|
||||
(arrays \plc{v1} and \plc{v2} in the Fortran code) into a device data environment.
|
||||
|
||||
The task executing on the host device encounters the first \code{target} region
|
||||
and waits for the completion of the region.
|
||||
|
||||
After the execution of the first \code{target} region, the task executing on
|
||||
the host device then assigns new values to \plc{v1[:N]} and \plc{v2[:N]} (\plc{v1} and \plc{v2} arrays
|
||||
in Fortran code) in the task's data environment by calling the function \code{init\_again()}.
|
||||
|
||||
The \code{target} \code{update} construct assigns the new values of \plc{v1} and
|
||||
\plc{v2} from the task's data environment to the corresponding mapped array sections
|
||||
in the device data environment of the \code{target} \code{data} construct.
|
||||
|
||||
The task executing on the host device then encounters the second \code{target}
|
||||
region and waits for the completion of the region.
|
||||
|
||||
The second \code{target} region uses the updated values of \plc{v1[:N]} and \plc{v2[:N]}.
|
||||
|
||||
\cexample{target_update}{1c}
|
||||
|
||||
\fexample{target_update}{1f}
|
||||
|
||||
\section{\code{target} \code{update} Construct with \code{if} Clause}
|
||||
|
||||
The following example shows how the \code{target} \code{update} construct updates
|
||||
variables in a device data environment.
|
||||
|
||||
The \code{target} \code{data} construct maps array sections \plc{v1[:N]} and \plc{v2[:N]}
|
||||
(arrays \plc{v1} and \plc{v2} in the Fortran code) into a device data environment. In between
|
||||
the two \code{target} regions, the task executing on the host device conditionally
|
||||
assigns new values to \plc{v1} and \plc{v2} in the task's data environment. The function \code{maybe\_init\_again()}
|
||||
returns \plc{true} if new data is written.
|
||||
|
||||
When the conditional expression (the return value of \code{maybe\_init\_again()}) in the
|
||||
\code{if} clause is \plc{true}, the \code{target} \code{update} construct
|
||||
assigns the new values of \plc{v1} and \plc{v2} from the task's data environment to the corresponding
|
||||
mapped array sections in the \code{target} \code{data} construct's device data
|
||||
environment.
|
||||
|
||||
\cexample{target_update}{2c}
|
||||
|
||||
\fexample{target_update}{2f}
|
||||
|
20
Examples_taskgroup.tex
Normal file
20
Examples_taskgroup.tex
Normal file
@ -0,0 +1,20 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{taskgroup} Construct}
|
||||
\label{chap:taskgroup}
|
||||
|
||||
In this example, tasks are grouped and synchronized using the \code{taskgroup}
|
||||
construct.
|
||||
|
||||
Initially, one task (the task executing the \code{start\_background\_work()}
|
||||
call) is created in the \code{parallel} region, and later a parallel tree traversal
|
||||
is started (the task executing the root of the recursive \code{compute\_tree()}
|
||||
calls). While synchronizing tasks at the end of each tree traversal, using the
|
||||
\code{taskgroup} construct ensures that the formerly started background task
|
||||
does not participate in the synchronization, and is left free to execute in parallel.
|
||||
This is opposed to the behaviour of the \code{taskwait} construct, which would
|
||||
include the background tasks in the synchronization.
|
||||
|
||||
\cexample{taskgroup}{1c}
|
||||
|
||||
\fexample{taskgroup}{1f}
|
||||
|
258
Examples_tasking.tex
Normal file
258
Examples_tasking.tex
Normal file
@ -0,0 +1,258 @@
|
||||
\pagebreak
|
||||
\chapter{Tasking Constructs}
|
||||
\label{chap:tasking}
|
||||
|
||||
The following example shows how to traverse a tree-like structure using explicit
|
||||
tasks. Note that the \code{traverse} function should be called from within a
|
||||
parallel region for the different specified tasks to be executed in parallel. Also
|
||||
note that the tasks will be executed in no specified order because there are no
|
||||
synchronization directives. Thus, assuming that the traversal will be done in post
|
||||
order, as in the sequential code, is wrong.
|
||||
|
||||
\cexample{tasking}{1c}
|
||||
|
||||
\fexample{tasking}{1f}
|
||||
|
||||
In the next example, we force a postorder traversal of the tree by adding a \code{taskwait}
|
||||
directive. Now, we can safely assume that the left and right sons have been executed
|
||||
before we process the current node.
|
||||
|
||||
\cexample{tasking}{2c}
|
||||
|
||||
\fexample{tasking}{2f}
|
||||
|
||||
The following example demonstrates how to use the \code{task} construct to process
|
||||
elements of a linked list in parallel. The thread executing the \code{single}
|
||||
region generates all of the explicit tasks, which are then executed by the threads
|
||||
in the current team. The pointer \plc{p} is \code{firstprivate} by default
|
||||
on the \code{task} construct so it is not necessary to specify it in a \code{firstprivate}
|
||||
clause.
|
||||
|
||||
\cexample{tasking}{3c}
|
||||
|
||||
\fexample{tasking}{3f}
|
||||
|
||||
The \code{fib()} function should be called from within a \code{parallel} region
|
||||
for the different specified tasks to be executed in parallel. Also, only one thread
|
||||
of the \code{parallel} region should call \code{fib()} unless multiple concurrent
|
||||
Fibonacci computations are desired.
|
||||
|
||||
\cexample{tasking}{4c}
|
||||
|
||||
\fexample{tasking}{4f}
|
||||
|
||||
Note: There are more efficient algorithms for computing Fibonacci numbers. This
|
||||
classic recursion algorithm is for illustrative purposes.
|
||||
|
||||
The following example demonstrates a way to generate a large number of tasks with
|
||||
one thread and execute them with the threads in the team. While generating these
|
||||
tasks, the implementation may reach its limit on unassigned tasks. If it does,
|
||||
the implementation is allowed to cause the thread executing the task generating
|
||||
loop to suspend its task at the task scheduling point in the \code{task} directive,
|
||||
and start executing unassigned tasks. Once the number of unassigned tasks is sufficiently
|
||||
low, the thread may resume execution of the task generating loop.
|
||||
|
||||
\cexample{tasking}{5c}
|
||||
\pagebreak
|
||||
\fexample{tasking}{5f}
|
||||
|
||||
The following example is the same as the previous one, except that the tasks are
|
||||
generated in an untied task. While generating the tasks, the implementation may
|
||||
reach its limit on unassigned tasks. If it does, the implementation is allowed
|
||||
to cause the thread executing the task generating loop to suspend its task at the
|
||||
task scheduling point in the \code{task} directive, and start executing unassigned
|
||||
tasks. If that thread begins execution of a task that takes a long time to complete,
|
||||
the other threads may complete all the other tasks before it is finished.
|
||||
|
||||
In this case, since the loop is in an untied task, any other thread is eligible
|
||||
to resume the task generating loop. In the previous examples, the other threads
|
||||
would be forced to idle until the generating thread finishes its long task, since
|
||||
the task generating loop was in a tied task.
|
||||
|
||||
\cexample{tasking}{6c}
|
||||
|
||||
\fexample{tasking}{6f}
|
||||
|
||||
The following two examples demonstrate how the scheduling rules illustrated in
|
||||
\$ affect the usage of \code{threadprivate} variables in tasks. A \code{threadprivate}
|
||||
variable can be modified by another task that is executed by the same thread. Thus,
|
||||
the value of a \code{threadprivate} variable cannot be assumed to be unchanged
|
||||
across a task scheduling point. In untied tasks, task scheduling points may be
|
||||
added in any place by the implementation.
|
||||
|
||||
A task switch may occur at a task scheduling point. A single thread may execute
|
||||
both of the task regions that modify \code{tp}. The parts of these task regions
|
||||
in which \code{tp} is modified may be executed in any order so the resulting
|
||||
value of \code{var} can be either 1 or 2.
|
||||
|
||||
\cexample{tasking}{7c}
|
||||
|
||||
|
||||
\fexample{tasking}{7f}
|
||||
|
||||
In this example, scheduling constraints prohibit a thread in the team from executing
|
||||
a new task that modifies \code{tp} while another such task region tied to the
|
||||
same thread is suspended. Therefore, the value written will persist across the
|
||||
task scheduling point.
|
||||
|
||||
\cexample{tasking}{8c}
|
||||
|
||||
|
||||
\fexample{tasking}{8f}
|
||||
|
||||
The following two examples demonstrate how the scheduling rules illustrated in
|
||||
\$ affect the usage of locks and critical sections in tasks. If a lock is held
|
||||
across a task scheduling point, no attempt should be made to acquire the same lock
|
||||
in any code that may be interleaved. Otherwise, a deadlock is possible.
|
||||
|
||||
In the example below, suppose the thread executing task 1 defers task 2. When
|
||||
it encounters the task scheduling point at task 3, it could suspend task 1 and
|
||||
begin task 2 which will result in a deadlock when it tries to enter critical region
|
||||
1.
|
||||
|
||||
\cexample{tasking}{9c}
|
||||
|
||||
|
||||
\fexample{tasking}{9f}
|
||||
|
||||
In the following example, \code{lock} is held across a task scheduling point.
|
||||
However, according to the scheduling restrictions, the executing thread can't
|
||||
begin executing one of the non-descendant tasks that also acquires \code{lock} before
|
||||
the task region is complete. Therefore, no deadlock is possible.
|
||||
|
||||
\cexample{tasking}{10c}
|
||||
|
||||
|
||||
\fexample{tasking}{10f}
|
||||
|
||||
The following examples illustrate the use of the \code{mergeable} clause in the
|
||||
\code{task} construct. In this first example, the \code{task} construct has
|
||||
been annotated with the \code{mergeable} clause. The addition of this clause
|
||||
allows the implementation to reuse the data environment (including the ICVs) of
|
||||
the parent task for the task inside \code{foo} if the task is included or undeferred.
|
||||
Thus, the result of the execution may differ depending on whether the task is merged
|
||||
or not. Therefore the mergeable clause needs to be used with caution. In this example,
|
||||
the use of the mergeable clause is safe. As \code{x} is a shared variable the
|
||||
outcome does not depend on whether or not the task is merged (that is, the task
|
||||
will always increment the same variable and will always compute the same value
|
||||
for \code{x}).
|
||||
|
||||
\cexample{tasking}{11c}
|
||||
|
||||
\fexample{tasking}{11f}
|
||||
|
||||
This second example shows an incorrect use of the \code{mergeable} clause. In
|
||||
this example, the created task will access different instances of the variable
|
||||
\code{x} if the task is not merged, as \code{x} is \code{firstprivate}, but
|
||||
it will access the same variable \code{x} if the task is merged. As a result,
|
||||
the behavior of the program is unspecified and it can print two different values
|
||||
for \code{x} depending on the decisions taken by the implementation.
|
||||
|
||||
\cexample{tasking}{12c}
|
||||
|
||||
\fexample{tasking}{12f}
|
||||
|
||||
The following example shows the use of the \code{final} clause and the \code{omp\_in\_final}
|
||||
API call in a recursive binary search program. To reduce overhead, once a certain
|
||||
depth of recursion is reached the program uses the \code{final} clause to create
|
||||
only included tasks, which allow additional optimizations.
|
||||
|
||||
The use of the \code{omp\_in\_final} API call allows programmers to optimize
|
||||
their code by specifying which parts of the program are not necessary when a task
|
||||
can create only included tasks (that is, the code is inside a \code{final} task).
|
||||
In this example, the use of a different state variable is not necessary so once
|
||||
the program reaches the part of the computation that is finalized and copying from
|
||||
the parent state to the new state is eliminated. The allocation of \code{new\_state}
|
||||
in the stack could also be avoided but it would make this example less clear. The
|
||||
\code{final} clause is most effective when used in conjunction with the \code{mergeable}
|
||||
clause since all tasks created in a \code{final} task region are included tasks
|
||||
that can be merged if the \code{mergeable} clause is present.
|
||||
|
||||
\cexample{tasking}{13c}
|
||||
|
||||
\fexample{tasking}{13f}
|
||||
|
||||
The following example illustrates the difference between the \code{if} and the
|
||||
\code{final} clauses. The \code{if} clause has a local effect. In the first
|
||||
nest of tasks, the one that has the \code{if} clause will be undeferred but
|
||||
the task nested inside that task will not be affected by the \code{if} clause
|
||||
and will be created as usual. Alternatively, the \code{final} clause affects
|
||||
all \code{task} constructs in the \code{final} task region but not the \code{final}
|
||||
task itself. In the second nest of tasks, the nested tasks will be created as included
|
||||
tasks. Note also that the conditions for the \code{if} and \code{final} clauses
|
||||
are usually the opposite.
|
||||
|
||||
\cexample{tasking}{14c}
|
||||
|
||||
\fexample{tasking}{14f}
|
||||
|
||||
\section*{Task Dependences}
|
||||
|
||||
\section{Flow Dependence}
|
||||
|
||||
In this example we show a simple flow dependence expressed using the \code{depend}
|
||||
clause on the \code{task} construct.
|
||||
|
||||
\cexample{tasking}{15c}
|
||||
|
||||
\fexample{tasking}{15f}
|
||||
|
||||
The program will always print \texttt{"}x = 2\texttt{"}, because the \code{depend}
|
||||
clauses enforce the ordering of the tasks. If the \code{depend} clauses had been
|
||||
omitted, then the tasks could execute in any order and the program and the program
|
||||
would have a race condition.
|
||||
|
||||
\section{Anti-dependence}
|
||||
|
||||
In this example we show an anti-dependence expressed using the \code{depend}
|
||||
clause on the \code{task} construct.
|
||||
|
||||
\cexample{tasking}{16c}
|
||||
|
||||
\fexample{tasking}{16f}
|
||||
|
||||
The program will always print \texttt{"}x = 1\texttt{"}, because the \code{depend}
|
||||
clauses enforce the ordering of the tasks. If the \code{depend} clauses had been
|
||||
omitted, then the tasks could execute in any order and the program would have a
|
||||
race condition.
|
||||
|
||||
\section{Output Dependence}
|
||||
|
||||
In this example we show an output dependence expressed using the \code{depend}
|
||||
clause on the \code{task} construct.
|
||||
|
||||
\cexample{tasking}{17c}
|
||||
|
||||
\fexample{tasking}{17f}
|
||||
|
||||
The program will always print \texttt{"}x = 2\texttt{"}, because the \code{depend}
|
||||
clauses enforce the ordering of the tasks. If the \code{depend} clauses had been
|
||||
omitted, then the tasks could execute in any order and the program would have a
|
||||
race condition.
|
||||
|
||||
\section{Concurrent Execution with Dependences}
|
||||
|
||||
In this example we show potentially concurrent execution of tasks using multiple
|
||||
flow dependences expressed using the \code{depend} clause on the \code{task}
|
||||
construct.
|
||||
|
||||
\cexample{tasking}{18c}
|
||||
|
||||
\fexample{tasking}{18f}
|
||||
|
||||
The last two tasks are dependent on the first task. However there is no dependence
|
||||
between the last two tasks, which may execute in any order (or concurrently if
|
||||
more than one thread is available). Thus, the possible outputs are \texttt{"}x
|
||||
+ 1 = 3. x + 2 = 4. \texttt{"} and \texttt{"}x + 2 = 4. x + 1 = 3. \texttt{"}.
|
||||
If the \code{depend} clauses had been omitted, then all of the tasks could execute
|
||||
in any order and the program would have a race condition.
|
||||
|
||||
\section{Matrix multiplication}
|
||||
|
||||
This example shows a task-based blocked matrix multiplication. Matrices are of
|
||||
NxN elements, and the multiplication is implemented using blocks of BSxBS elements.
|
||||
|
||||
\cexample{tasking}{19c}
|
||||
|
||||
\fexample{tasking}{19f}
|
||||
|
14
Examples_taskyield.tex
Normal file
14
Examples_taskyield.tex
Normal file
@ -0,0 +1,14 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{taskyield} Directive}
|
||||
\label{chap:taskyield}
|
||||
|
||||
The following example illustrates the use of the \code{taskyield} directive.
|
||||
The tasks in the example compute something useful and then do some computation
|
||||
that must be done in a critical region. By using \code{taskyield} when a task
|
||||
cannot get access to the \code{critical} region the implementation can suspend
|
||||
the current task and schedule some other task that can do something useful.
|
||||
|
||||
\cexample{taskyield}{1c}
|
||||
|
||||
\fexample{taskyield}{1f}
|
||||
|
118
Examples_teams.tex
Normal file
118
Examples_teams.tex
Normal file
@ -0,0 +1,118 @@
|
||||
\pagebreak
|
||||
\chapter{\code{teams} Constructs}
|
||||
\label{chap:teams}
|
||||
|
||||
\section{\code{target} and \code{teams} Constructs with \code{omp\_get\_num\_teams}\\
|
||||
and \code{omp\_get\_team\_num} Routines}
|
||||
|
||||
The following example shows how the \code{target} and \code{teams} constructs
|
||||
are used to create a league of thread teams that execute a region. The \code{teams}
|
||||
construct creates a league of at most two teams where the master thread of each
|
||||
team executes the \code{teams} region.
|
||||
|
||||
The \code{omp\_get\_num\_teams} routine returns the number of teams executing in a \code{teams}
|
||||
region. The \code{omp\_get\_team\_num} routine returns the team number, which is an integer
|
||||
between 0 and one less than the value returned by \code{omp\_get\_num\_teams}. The following
|
||||
example manually distributes a loop across two teams.
|
||||
|
||||
\cexample{teams}{1c}
|
||||
|
||||
\fexample{teams}{1f}
|
||||
|
||||
\section{\code{target}, \code{teams}, and \code{distribute} Constructs}
|
||||
|
||||
The following example shows how the \code{target}, \code{teams}, and \code{distribute}
|
||||
constructs are used to execute a loop nest in a \code{target} region. The \code{teams}
|
||||
construct creates a league and the master thread of each team executes the \code{teams}
|
||||
region. The \code{distribute} construct schedules the subsequent loop iterations
|
||||
across the master threads of each team.
|
||||
|
||||
The number of teams in the league is less than or equal to the variable \plc{num\_blocks}.
|
||||
Each team in the league has a number of threads less than or equal to the variable
|
||||
\plc{block\_threads}. The iterations in the outer loop are distributed among the master
|
||||
threads of each team.
|
||||
|
||||
When a team's master thread encounters the parallel loop construct before the inner
|
||||
loop, the other threads in its team are activated. The team executes the \code{parallel}
|
||||
region and then workshares the execution of the loop.
|
||||
|
||||
Each master thread executing the \code{teams} region has a private copy of the
|
||||
variable \plc{sum} that is created by the \code{reduction} clause on the \code{teams} construct.
|
||||
The master thread and all threads in its team have a private copy of the variable
|
||||
\plc{sum} that is created by the \code{reduction} clause on the parallel loop construct.
|
||||
The second private \plc{sum} is reduced into the master thread's private copy of \plc{sum}
|
||||
created by the \code{teams} construct. At the end of the \code{teams} region,
|
||||
each master thread's private copy of \plc{sum} is reduced into the final \plc{sum} that is
|
||||
implicitly mapped into the \code{target} region.
|
||||
|
||||
\cexample{teams}{2c}
|
||||
|
||||
\fexample{teams}{2f}
|
||||
|
||||
\section{\code{target} \code{teams}, and Distribute Parallel Loop Constructs}
|
||||
|
||||
The following example shows how the \code{target} \code{teams} and distribute
|
||||
parallel loop constructs are used to execute a \code{target} region. The \code{target}
|
||||
\code{teams} construct creates a league of teams where the master thread of each
|
||||
team executes the \code{teams} region.
|
||||
|
||||
The distribute parallel loop construct schedules the loop iterations across the
|
||||
master threads of each team and then across the threads of each team.
|
||||
|
||||
\cexample{teams}{3c}
|
||||
|
||||
\fexample{teams}{3f}
|
||||
|
||||
\section{\code{target} \code{teams} and Distribute Parallel Loop
|
||||
Constructs with Scheduling Clauses}
|
||||
|
||||
The following example shows how the \code{target} \code{teams} and distribute
|
||||
parallel loop constructs are used to execute a \code{target} region. The \code{teams}
|
||||
construct creates a league of at most eight teams where the master thread of each
|
||||
team executes the \code{teams} region. The number of threads in each team is
|
||||
less than or equal to 16.
|
||||
|
||||
The \code{distribute} parallel loop construct schedules the subsequent loop iterations
|
||||
across the master threads of each team and then across the threads of each team.
|
||||
|
||||
The \code{dist\_schedule} clause on the distribute parallel loop construct indicates
|
||||
that loop iterations are distributed to the master thread of each team in chunks
|
||||
of 1024 iterations.
|
||||
|
||||
The \code{schedule} clause indicates that the 1024 iterations distributed to
|
||||
a master thread are then assigned to the threads in its associated team in chunks
|
||||
of 64 iterations.
|
||||
|
||||
\cexample{teams}{4c}
|
||||
|
||||
\fexample{teams}{4f}
|
||||
|
||||
\section{\code{target} \code{teams} and \code{distribute} \code{simd} Constructs}
|
||||
|
||||
The following example shows how the \code{target} \code{teams} and \code{distribute}
|
||||
\code{simd} constructs are used to execute a loop in a \code{target} region.
|
||||
The \code{target} \code{teams} construct creates a league of teams where the
|
||||
master thread of each team executes the \code{teams} region.
|
||||
|
||||
The \code{distribute} \code{simd} construct schedules the loop iterations across
|
||||
the master thread of each team and then uses SIMD parallelism to execute the iterations.
|
||||
|
||||
\cexample{teams}{5c}
|
||||
|
||||
\fexample{teams}{5f}
|
||||
|
||||
\section{\code{target} \code{teams} and Distribute Parallel Loop SIMD Constructs}
|
||||
|
||||
The following example shows how the \code{target} \code{teams} and the distribute
|
||||
parallel loop SIMD constructs are used to execute a loop in a \code{target} \code{teams}
|
||||
region. The \code{target} \code{teams} construct creates a league of teams
|
||||
where the master thread of each team executes the \code{teams} region.
|
||||
|
||||
The distribute parallel loop SIMD construct schedules the loop iterations across
|
||||
the master thread of each team and then across the threads of each team where each
|
||||
thread uses SIMD parallelism.
|
||||
|
||||
\cexample{teams}{6c}
|
||||
|
||||
\fexample{teams}{6f}
|
||||
|
106
Examples_threadprivate.tex
Normal file
106
Examples_threadprivate.tex
Normal file
@ -0,0 +1,106 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{threadprivate} Directive}
|
||||
\label{chap:threadprivate}
|
||||
|
||||
The following examples demonstrate how to use the \code{threadprivate} directive
|
||||
to give each thread a separate counter.
|
||||
|
||||
\cexample{threadprivate}{1c}
|
||||
|
||||
\fexample{threadprivate}{1f}
|
||||
|
||||
\ccppspecificstart
|
||||
The following example uses \code{threadprivate} on a static variable:
|
||||
|
||||
\cnexample{threadprivate}{2c}
|
||||
|
||||
The following example demonstrates unspecified behavior for the initialization
|
||||
of a \code{threadprivate} variable. A \code{threadprivate} variable is initialized
|
||||
once at an unspecified point before its first reference. Because \code{a} is
|
||||
constructed using the value of \code{x} (which is modified by the statement
|
||||
\code{x++}), the value of \code{a.val} at the start of the \code{parallel}
|
||||
region could be either 1 or 2. This problem is avoided for \code{b}, which uses
|
||||
an auxiliary \code{const} variable and a copy-constructor.
|
||||
|
||||
\cnexample{threadprivate}{3c}
|
||||
\ccppspecificend
|
||||
|
||||
The following examples show non-conforming uses and correct uses of the \code{threadprivate}
|
||||
directive.
|
||||
|
||||
\fortranspecificstart
|
||||
The following example is non-conforming because the common block is not declared
|
||||
local to the subroutine that refers to it:
|
||||
|
||||
\fnexample{threadprivate}{2f}
|
||||
|
||||
The following example is also non-conforming because the common block is not declared
|
||||
local to the subroutine that refers to it:
|
||||
|
||||
\fnexample{threadprivate}{3f}
|
||||
|
||||
The following example is a correct rewrite of the previous example:
|
||||
% blue line floater at top of this page for "Fortran, cont."
|
||||
\begin{figure}[t!]
|
||||
\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
|
||||
\end{figure}
|
||||
|
||||
\fnexample{threadprivate}{4f}
|
||||
|
||||
The following is an example of the use of \code{threadprivate} for local variables:
|
||||
|
||||
\fnexample{threadprivate}{5f}
|
||||
% blue line floater at top of this page for "Fortran, cont."
|
||||
\begin{figure}[t!]
|
||||
\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
|
||||
\end{figure}
|
||||
|
||||
The above program, if executed by two threads, will print one of the following
|
||||
two sets of output:
|
||||
|
||||
\code{a = 11 12 13}
|
||||
\\
|
||||
\code{ptr = 4}
|
||||
\\
|
||||
\code{i = 15}
|
||||
|
||||
\code{A is not allocated}
|
||||
\\
|
||||
\code{ptr = 4}
|
||||
\\
|
||||
\code{i = 5}
|
||||
|
||||
or
|
||||
|
||||
\code{A is not allocated}
|
||||
\\
|
||||
\code{ptr = 4}
|
||||
\\
|
||||
\code{i = 15}
|
||||
|
||||
\code{a = 1 2 3}
|
||||
\\
|
||||
\code{ptr = 4}
|
||||
\\
|
||||
\code{i = 5}
|
||||
|
||||
The following is an example of the use of \code{threadprivate} for module variables:
|
||||
|
||||
\fnexample{threadprivate}{6f}
|
||||
\fortranspecificend
|
||||
|
||||
\ccppspecificstart
|
||||
The following example illustrates initialization of \code{threadprivate} variables
|
||||
for class-type \code{T}. \code{t1} is default constructed, \code{t2} is constructed
|
||||
taking a constructor accepting one argument of integer type, \code{t3} is copy
|
||||
constructed with argument \code{f()}:
|
||||
|
||||
\cnexample{threadprivate}{4c}
|
||||
|
||||
The following example illustrates the use of \code{threadprivate} for static
|
||||
class members. The \code{threadprivate} directive for a static class member must
|
||||
be placed inside the class definition.
|
||||
|
||||
\cnexample{threadprivate}{5c}
|
||||
\ccppspecificend
|
||||
|
76
Examples_workshare.tex
Normal file
76
Examples_workshare.tex
Normal file
@ -0,0 +1,76 @@
|
||||
\pagebreak
|
||||
\chapter{The \code{workshare} Construct}
|
||||
\fortranspecificstart
|
||||
\label{chap:workshare}
|
||||
|
||||
The following are examples of the \code{workshare} construct.
|
||||
|
||||
In the following example, \code{workshare} spreads work across the threads executing
|
||||
the \code{parallel} region, and there is a barrier after the last statement.
|
||||
Implementations must enforce Fortran execution rules inside of the \code{workshare}
|
||||
block.
|
||||
|
||||
\fnexample{workshare}{1f}
|
||||
|
||||
In the following example, the barrier at the end of the first \code{workshare}
|
||||
region is eliminated with a \code{nowait} clause. Threads doing \code{CC =
|
||||
DD} immediately begin work on \code{EE = FF} when they are done with \code{CC
|
||||
= DD}.
|
||||
|
||||
\fnexample{workshare}{2f}
|
||||
% blue line floater at top of this page for "Fortran, cont."
|
||||
\begin{figure}[t!]
|
||||
\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
|
||||
\end{figure}
|
||||
|
||||
The following example shows the use of an \code{atomic} directive inside a \code{workshare}
|
||||
construct. The computation of \code{SUM(AA)} is workshared, but the update to
|
||||
\code{R} is atomic.
|
||||
|
||||
\fnexample{workshare}{3f}
|
||||
|
||||
Fortran \code{WHERE} and \code{FORALL} statements are \emph{compound statements},
|
||||
made up of a \emph{control} part and a \emph{statement} part. When \code{workshare}
|
||||
is applied to one of these compound statements, both the control and the statement
|
||||
parts are workshared. The following example shows the use of a \code{WHERE} statement
|
||||
in a \code{workshare} construct.
|
||||
|
||||
Each task gets worked on in order by the threads:
|
||||
|
||||
\code{AA = BB} then
|
||||
\\
|
||||
\code{CC = DD} then
|
||||
\\
|
||||
\code{EE .ne. 0} then
|
||||
\\
|
||||
\code{FF = 1 / EE} then
|
||||
\\
|
||||
\code{GG = HH}
|
||||
|
||||
\fnexample{workshare}{4f}
|
||||
% blue line floater at top of this page for "Fortran, cont."
|
||||
\begin{figure}[t!]
|
||||
\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
|
||||
\end{figure}
|
||||
|
||||
In the following example, an assignment to a shared scalar variable is performed
|
||||
by one thread in a \code{workshare} while all other threads in the team wait.
|
||||
|
||||
\fnexample{workshare}{5f}
|
||||
|
||||
The following example contains an assignment to a private scalar variable, which
|
||||
is performed by one thread in a \code{workshare} while all other threads wait.
|
||||
It is non-conforming because the private scalar variable is undefined after the
|
||||
assignment statement.
|
||||
|
||||
\fnexample{workshare}{6f}
|
||||
|
||||
Fortran execution rules must be enforced inside a \code{workshare} construct.
|
||||
In the following example, the same result is produced in the following program
|
||||
fragment regardless of whether the code is executed sequentially or inside an OpenMP
|
||||
program with multiple threads:
|
||||
|
||||
\fnexample{workshare}{7f}
|
||||
\fortranspecificend
|
||||
|
||||
|
18
Examples_worksharing_critical.tex
Normal file
18
Examples_worksharing_critical.tex
Normal file
@ -0,0 +1,18 @@
|
||||
\pagebreak
|
||||
\chapter{Worksharing Constructs Inside a \code{critical} Construct}
|
||||
\label{chap:worksharing_critical}
|
||||
|
||||
The following example demonstrates using a worksharing construct inside a \code{critical}
|
||||
construct. This example is conforming because the worksharing \code{single}
|
||||
region is not closely nested inside the \code{critical} region. A single thread
|
||||
executes the one and only section in the \code{sections} region, and executes
|
||||
the \code{critical} region. The same thread encounters the nested \code{parallel}
|
||||
region, creates a new team of threads, and becomes the master of the new team.
|
||||
One of the threads in the new team enters the \code{single} region and increments
|
||||
\code{i} by \code{1}. At the end of this example \code{i} is equal to \code{2}.
|
||||
|
||||
\cexample{worksharing_critical}{1c}
|
||||
|
||||
\fexample{worksharing_critical}{1f}
|
||||
|
||||
|
69
Introduction_Chapt.tex
Normal file
69
Introduction_Chapt.tex
Normal file
@ -0,0 +1,69 @@
|
||||
% This is the introduction for the OpenMP Examples document.
|
||||
% This is an included file. See the master file (openmp-examples.tex) for more information.
|
||||
%
|
||||
% When editing this file:
|
||||
%
|
||||
% 1. To change formatting, appearance, or style, please edit openmp.sty.
|
||||
%
|
||||
% 2. Custom commands and macros are defined in openmp.sty.
|
||||
%
|
||||
% 3. Be kind to other editors -- keep a consistent style by copying-and-pasting to
|
||||
% create new content.
|
||||
%
|
||||
% 4. We use semantic markup, e.g. (see openmp.sty for a full list):
|
||||
% \code{} % for bold monospace keywords, code, operators, etc.
|
||||
% \plc{} % for italic placeholder names, grammar, etc.
|
||||
%
|
||||
% 5. Other recommendations:
|
||||
% Use the convenience macros defined in openmp.sty for the minor headers
|
||||
% such as Comments, Syntax, etc.
|
||||
%
|
||||
% To keep items together on the same page, prefer the use of
|
||||
% \begin{samepage}.... Avoid \parbox for text blocks as it interrupts line numbering.
|
||||
% When possible, avoid \filbreak, \pagebreak, \newpage, \clearpage unless that's
|
||||
% what you mean. Use \needspace{} cautiously for troublesome paragraphs.
|
||||
%
|
||||
% Avoid absolute lengths and measures in this file; use relative units when possible.
|
||||
% Vertical space can be relative to \baselineskip or ex units. Horizontal space
|
||||
% can be relative to \linewidth or em units.
|
||||
%
|
||||
% Prefer \emph{} to italicize terminology, e.g.:
|
||||
% This is a \emph{definition}, not a placeholder.
|
||||
% This is a \plc{var-name}.
|
||||
%
|
||||
|
||||
\chapter*{Introduction}
|
||||
\label{chap:introduction}
|
||||
This collection of programming examples supplements the OpenMP API for Shared
|
||||
Memory Parallelization specifications, and is not part of the formal specifications. It
|
||||
assumes familiarity with the OpenMP specifications, and shares the typographical
|
||||
conventions used in that document.
|
||||
|
||||
\notestart
|
||||
\noteheader – This first release of the OpenMP Examples reflects the OpenMP Version 4.0
|
||||
specifications. Additional examples are being developed and will be published in future
|
||||
releases of this document.
|
||||
\noteend
|
||||
|
||||
The OpenMP API specification provides a model for parallel programming that is
|
||||
portable across shared memory architectures from different vendors. Compilers from
|
||||
numerous vendors support the OpenMP API.
|
||||
|
||||
The directives, library routines, and environment variables demonstrated in this
|
||||
document allow users to create and manage parallel programs while permitting
|
||||
portability. The directives extend the C, C++ and Fortran base languages with single
|
||||
program multiple data (SPMD) constructs, tasking constructs, device constructs,
|
||||
worksharing constructs, and synchronization constructs, and they provide support for
|
||||
sharing and privatizing data. The functionality to control the runtime environment is
|
||||
provided by library routines and environment variables. Compilers that support the
|
||||
OpenMP API often include a command line option to the compiler that activates and
|
||||
allows interpretation of all OpenMP directives.
|
||||
|
||||
Complete information about the OpenMP API and a list of the compilers that support
|
||||
the OpenMP API can be found at the OpenMP.org web site
|
||||
|
||||
\code{http://www.openmp.org}
|
||||
|
||||
|
||||
% This is the end of introduction.tex of the OpenMP Examples document.
|
||||
|
88
Makefile
Normal file
88
Makefile
Normal file
@ -0,0 +1,88 @@
|
||||
# Makefile for the OpenMP Examples document in LaTex format.
|
||||
# For more information, see the master document, openmp-examples.tex.
|
||||
|
||||
version=4.0.1ltx
|
||||
default: openmp-examples.pdf
|
||||
|
||||
|
||||
CHAPTERS=Title_Page.tex \
|
||||
Introduction_Chapt.tex \
|
||||
Examples_Chapt.tex \
|
||||
Examples_ploop.tex \
|
||||
Examples_mem_model.tex \
|
||||
Examples_cond_comp.tex \
|
||||
Examples_icv.tex \
|
||||
Examples_parallel.tex \
|
||||
Examples_nthrs_nesting.tex \
|
||||
Examples_nthrs_dynamic.tex \
|
||||
Examples_affinity.tex \
|
||||
Examples_fort_do.tex \
|
||||
Examples_fort_loopvar.tex \
|
||||
Examples_nowait.tex \
|
||||
Examples_collapse.tex \
|
||||
Examples_psections.tex \
|
||||
Examples_fpriv_sections.tex \
|
||||
Examples_single.tex \
|
||||
Examples_tasking.tex \
|
||||
Examples_taskgroup.tex \
|
||||
Examples_taskyield.tex \
|
||||
Examples_workshare.tex \
|
||||
Examples_master.tex \
|
||||
Examples_critical.tex \
|
||||
Examples_worksharing_critical.tex \
|
||||
Examples_barrier_regions.tex \
|
||||
Examples_atomic.tex \
|
||||
Examples_atomic_restrict.tex \
|
||||
Examples_flush_nolist.tex \
|
||||
Examples_standalone.tex \
|
||||
Examples_ordered.tex \
|
||||
Examples_cancellation.tex \
|
||||
Examples_threadprivate.tex \
|
||||
Examples_pra_iterator.tex \
|
||||
Examples_fort_sp_common.tex \
|
||||
Examples_default_none.tex \
|
||||
Examples_fort_race.tex \
|
||||
Examples_private.tex \
|
||||
Examples_fort_sa_private.tex \
|
||||
Examples_carrays_fpriv.tex \
|
||||
Examples_lastprivate.tex \
|
||||
Examples_reduction.tex \
|
||||
Examples_copyin.tex \
|
||||
Examples_copyprivate.tex \
|
||||
Examples_nested_loop.tex \
|
||||
Examples_nesting_restrict.tex \
|
||||
Examples_set_dynamic_nthrs.tex \
|
||||
Examples_get_nthrs.tex \
|
||||
Examples_init_lock.tex \
|
||||
Examples_lock_owner.tex \
|
||||
Examples_simple_lock.tex \
|
||||
Examples_nestable_lock.tex \
|
||||
Examples_target.tex \
|
||||
Examples_target_data.tex \
|
||||
Examples_target_update.tex \
|
||||
Examples_declare_target.tex \
|
||||
Examples_teams.tex \
|
||||
Examples_async_target.tex \
|
||||
Examples_array_sections.tex \
|
||||
Examples_device.tex \
|
||||
Examples_associate.tex
|
||||
|
||||
INTERMEDIATE_FILES=openmp-examples.pdf \
|
||||
openmp-examples.toc \
|
||||
openmp-examples.idx \
|
||||
openmp-examples.aux \
|
||||
openmp-examples.ilg \
|
||||
openmp-examples.ind \
|
||||
openmp-examples.out \
|
||||
openmp-examples.log
|
||||
|
||||
openmp-examples.pdf: $(CHAPTERS) openmp.sty openmp-examples.tex openmp-logo.png
|
||||
rm -f $(INTERMEDIATE_FILES)
|
||||
pdflatex -interaction=batchmode -file-line-error openmp-examples.tex
|
||||
pdflatex -interaction=batchmode -file-line-error openmp-examples.tex
|
||||
pdflatex -interaction=batchmode -file-line-error openmp-examples.tex
|
||||
cp openmp-examples.pdf openmp-examples-${version}.pdf
|
||||
|
||||
clean:
|
||||
rm -f $(INTERMEDIATE_FILES)
|
||||
|
3
README
Normal file
3
README
Normal file
@ -0,0 +1,3 @@
|
||||
This is the OpenMP 4.0 specification in LaTex format.
|
||||
Please see the master file, openmp-4.0.tex, for more information.
|
||||
|
42
Title_Page.tex
Normal file
42
Title_Page.tex
Normal file
@ -0,0 +1,42 @@
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Title page
|
||||
|
||||
\begin{titlepage}
|
||||
\begin{flushleft}
|
||||
\hspace{-6em} \includegraphics[width=0.4\textwidth]{openmp-logo.png}
|
||||
\end{flushleft}
|
||||
|
||||
\begin{adjustwidth}{-0.75in}{0in}
|
||||
\begin{center}
|
||||
\Huge
|
||||
\textsf{OpenMP\\Application Programming\\Interface}
|
||||
|
||||
% An optional subtitle can go here:
|
||||
\vspace{0.5in}\textsf{Examples}\vspace{-0.7in}
|
||||
\normalsize
|
||||
|
||||
\vspace{1.0in}
|
||||
|
||||
\textbf{Version 4.0.1.ltx -- February, 2014}
|
||||
\end{center}
|
||||
\end{adjustwidth}
|
||||
|
||||
\vspace{3.0in}
|
||||
|
||||
\begin{adjustwidth}{0pt}{1em}\setlength{\parskip}{0.25\baselineskip}%
|
||||
Copyright © 1997-2014 OpenMP Architecture Review Board.\\
|
||||
Permission to copy without fee all or part of this material is granted,
|
||||
provided the OpenMP Architecture Review Board copyright notice and
|
||||
the title of this document appear. Notice is given that copying is by
|
||||
permission of OpenMP Architecture Review Board.\end{adjustwidth}
|
||||
|
||||
\end{titlepage}
|
||||
|
||||
% Blank page
|
||||
|
||||
\clearpage
|
||||
\thispagestyle{empty}
|
||||
\phantom{a}
|
||||
\emph{This page intentionally left blank}
|
||||
\vfill
|
||||
|
BIN
figs/proc_bind_fig.pdf
Normal file
BIN
figs/proc_bind_fig.pdf
Normal file
Binary file not shown.
BIN
figs/proc_bind_fig.png
Normal file
BIN
figs/proc_bind_fig.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 13 KiB |
137
openmp-examples.tex
Normal file
137
openmp-examples.tex
Normal file
@ -0,0 +1,137 @@
|
||||
% Welcome to openmp-examples.tex.
|
||||
% This is the master LaTex file for the OpenMP Examples document.
|
||||
%
|
||||
% The files in this set include:
|
||||
%
|
||||
% openmp-examples.tex - this file, the master file
|
||||
% Makefile - makes the document
|
||||
% openmp.sty - the main style file
|
||||
% Title_Page.tex - the title page
|
||||
% openmplogo.png - the logo
|
||||
% Introduction_Chapt.tex - unnumbered introductory chapter
|
||||
% Examples_Chapt.tex - unnumbered chapter
|
||||
% Examples_Sects.tex - examples
|
||||
% sources/*.c, *.f - C/C++/Fortran example source files
|
||||
%
|
||||
% When editing this file:
|
||||
%
|
||||
% 1. To change formatting, appearance, or style, please edit openmp.sty.
|
||||
%
|
||||
% 2. Custom commands and macros are defined in openmp.sty.
|
||||
%
|
||||
% 3. Be kind to other editors -- keep a consistent style by copying-and-pasting to
|
||||
% create new content.
|
||||
%
|
||||
% 4. We use semantic markup, e.g. (see openmp.sty for a full list):
|
||||
% \code{} % for bold monospace keywords, code, operators, etc.
|
||||
% \plc{} % for italic placeholder names, grammar, etc.
|
||||
%
|
||||
% 5. Other recommendations:
|
||||
% Use the convenience macros defined in openmp.sty for the minor headers
|
||||
% such as Comments, Syntax, etc.
|
||||
%
|
||||
% To keep items together on the same page, prefer the use of
|
||||
% \begin{samepage}.... Avoid \parbox for text blocks as it interrupts line numbering.
|
||||
% When possible, avoid \filbreak, \pagebreak, \newpage, \clearpage unless that's
|
||||
% what you mean. Use \needspace{} cautiously for troublesome paragraphs.
|
||||
%
|
||||
% Avoid absolute lengths and measures in this file; use relative units when possible.
|
||||
% Vertical space can be relative to \baselineskip or ex units. Horizontal space
|
||||
% can be relative to \linewidth or em units.
|
||||
%
|
||||
% Prefer \emph{} to italicize terminology, e.g.:
|
||||
% This is a \emph{definition}, not a placeholder.
|
||||
% This is a \plc{var-name}.
|
||||
%
|
||||
|
||||
% The following says letter size, but the style sheet may change the size
|
||||
\documentclass[10pt,letterpaper,twoside,makeidx,hidelinks]{scrreprt}
|
||||
|
||||
% Text to appear in the footer on even-numbered pages:
|
||||
\newcommand{\footerText}{OpenMP Examples Version 4.0.1 - February 2014}
|
||||
|
||||
% Unified style sheet for OpenMP documents:
|
||||
\input{openmp.sty}
|
||||
|
||||
|
||||
\begin{document}
|
||||
\pagenumbering{roman}
|
||||
\input{Title_Page}
|
||||
|
||||
\setcounter{page}{0}
|
||||
\setcounter{tocdepth}{2}
|
||||
|
||||
\begin{spacing}{1.3}
|
||||
\tableofcontents
|
||||
\end{spacing}
|
||||
|
||||
% Uncomment the next line to enable line numbering on the main body text:
|
||||
\linenumbers\pagewiselinenumbers
|
||||
|
||||
\newpage\pagenumbering{arabic}
|
||||
|
||||
\input{Introduction_Chapt}
|
||||
\input{Examples_Chapt}
|
||||
|
||||
\setcounter{chapter}{0} % start chapter numbering here
|
||||
|
||||
\input{Examples_ploop}
|
||||
\input{Examples_mem_model}
|
||||
\input{Examples_cond_comp}
|
||||
\input{Examples_icv}
|
||||
\input{Examples_parallel}
|
||||
\input{Examples_nthrs_nesting}
|
||||
\input{Examples_nthrs_dynamic}
|
||||
\input{Examples_affinity}
|
||||
\input{Examples_fort_do}
|
||||
\input{Examples_fort_loopvar}
|
||||
\input{Examples_nowait}
|
||||
\input{Examples_collapse}
|
||||
\input{Examples_psections}
|
||||
\input{Examples_fpriv_sections}
|
||||
\input{Examples_single}
|
||||
\input{Examples_tasking}
|
||||
\input{Examples_taskgroup}
|
||||
\input{Examples_taskyield}
|
||||
\input{Examples_workshare}
|
||||
\input{Examples_master}
|
||||
\input{Examples_critical}
|
||||
\input{Examples_worksharing_critical}
|
||||
\input{Examples_barrier_regions}
|
||||
\input{Examples_atomic}
|
||||
\input{Examples_atomic_restrict}
|
||||
\input{Examples_flush_nolist}
|
||||
\input{Examples_standalone}
|
||||
\input{Examples_ordered}
|
||||
\input{Examples_cancellation}
|
||||
\input{Examples_threadprivate}
|
||||
\input{Examples_pra_iterator}
|
||||
\input{Examples_fort_sp_common}
|
||||
\input{Examples_default_none}
|
||||
\input{Examples_fort_race}
|
||||
\input{Examples_private}
|
||||
\input{Examples_fort_sa_private}
|
||||
\input{Examples_carrays_fpriv}
|
||||
\input{Examples_lastprivate}
|
||||
\input{Examples_reduction}
|
||||
\input{Examples_copyin}
|
||||
\input{Examples_copyprivate}
|
||||
\input{Examples_nested_loop}
|
||||
\input{Examples_nesting_restrict}
|
||||
\input{Examples_set_dynamic_nthrs}
|
||||
\input{Examples_get_nthrs}
|
||||
\input{Examples_init_lock}
|
||||
\input{Examples_lock_owner}
|
||||
\input{Examples_simple_lock}
|
||||
\input{Examples_nestable_lock}
|
||||
\input{Examples_target}
|
||||
\input{Examples_target_data}
|
||||
\input{Examples_target_update}
|
||||
\input{Examples_declare_target}
|
||||
\input{Examples_teams}
|
||||
\input{Examples_async_target}
|
||||
\input{Examples_array_sections}
|
||||
\input{Examples_device}
|
||||
\input{Examples_associate}
|
||||
\end{document}
|
||||
|
BIN
openmp-logo.png
Normal file
BIN
openmp-logo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.2 KiB |
484
openmp.sty
Normal file
484
openmp.sty
Normal file
@ -0,0 +1,484 @@
|
||||
% This is openmp.sty, the preamble and style definitions for the OpenMP specification.
|
||||
% This is an include file. Please see the master file for more information.
|
||||
%
|
||||
% When editing this file:
|
||||
%
|
||||
% 1. To change formatting, appearance, or style, please edit openmp.sty.
|
||||
%
|
||||
% 2. Custom commands and macros are defined in openmp.sty.
|
||||
%
|
||||
% 3. Be kind to other editors -- keep a consistent style by copying-and-pasting to
|
||||
% create new content.
|
||||
%
|
||||
% 4. We use semantic markup, e.g. (see openmp.sty for a full list):
|
||||
% \code{} % for bold monospace keywords, code, operators, etc.
|
||||
% \plc{} % for italic placeholder names, grammar, etc.
|
||||
%
|
||||
% 5. Other recommendations:
|
||||
% Use the convenience macros defined in openmp.sty for the minor headers
|
||||
% such as Comments, Syntax, etc.
|
||||
%
|
||||
% To keep items together on the same page, prefer the use of
|
||||
% \begin{samepage}.... Avoid \parbox for text blocks as it interrupts line numbering.
|
||||
% When possible, avoid \filbreak, \pagebreak, \newpage, \clearpage unless that's
|
||||
% what you mean. Use \needspace{} cautiously for troublesome paragraphs.
|
||||
%
|
||||
% Avoid absolute lengths and measures in this file; use relative units when possible.
|
||||
% Vertical space can be relative to \baselineskip or ex units. Horizontal space
|
||||
% can be relative to \linewidth or em units.
|
||||
%
|
||||
% Prefer \emph{} to italicize terminology, e.g.:
|
||||
% This is a \emph{definition}, not a placeholder.
|
||||
% This is a \plc{var-name}.
|
||||
%
|
||||
% Quick list of the environments, commands and macros supported. Search below for more details.
|
||||
%
|
||||
% \binding % makes header of the same name
|
||||
% \comments
|
||||
% \constraints
|
||||
% \crossreferences
|
||||
% \descr
|
||||
% \effect
|
||||
% \format
|
||||
% \restrictions
|
||||
% \summary
|
||||
% \syntax
|
||||
%
|
||||
% \code{} % monospace, bold
|
||||
% \plc{} % for any kind of placeholder: italic
|
||||
% \begin{codepar} % for blocks of verbatim code: monospace, bold
|
||||
% \begin{boxedcode} % outlined verbatim code for syntax definitions, prototypes, etc.
|
||||
% \begin{indentedcodelist} % used with,e.g., "where clause is one of the following:"
|
||||
%
|
||||
% \specref{} % formats the cross-reference "Section X on page Y"
|
||||
%
|
||||
% \notestart % black horizontal rule for Notes
|
||||
% \noteend
|
||||
%
|
||||
% \cspecificstart % blue horizontal rule for C-specific text
|
||||
% \cspecificend
|
||||
%
|
||||
% \cppspecificstart % blue horizontal rule for C++ -specific text
|
||||
% \cppspecificend
|
||||
%
|
||||
% \ccppspecificstart % blue horizontal rule for C / C++ -specific text
|
||||
% \ccppspecificend
|
||||
%
|
||||
% \fortranspecificstart % blue horizontal rule for Fortran-specific text
|
||||
% \fortranspecificend
|
||||
%
|
||||
% \glossaryterm % for use in formatting glossary entries
|
||||
% \glossarydefstart
|
||||
% \glossarydefend
|
||||
%
|
||||
% \compactitem % single-spaced itemized lists for the Examples doc
|
||||
% \cexample % C/C++ code example for the Examples doc
|
||||
% \fexample % Fortran code example for the Examples doc
|
||||
|
||||
|
||||
\usepackage{comment} % allow use of \begin{comment}
|
||||
\usepackage{ifpdf,ifthen} % allow conditional tests in LaTeX definitions
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Document data
|
||||
%
|
||||
\author{}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Fonts
|
||||
|
||||
\usepackage{amsmath}
|
||||
\usepackage{amsfonts}
|
||||
\usepackage{amssymb}
|
||||
\usepackage{courier}
|
||||
\usepackage{helvet}
|
||||
\usepackage[utf8]{inputenc}
|
||||
|
||||
% Main body serif font:
|
||||
\usepackage{tgtermes}
|
||||
\usepackage[T1]{fontenc}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Graphic elements
|
||||
|
||||
\usepackage{graphicx}
|
||||
\usepackage{framed} % for making boxes with \begin{framed}
|
||||
\usepackage{tikz} % for flow charts, diagrams, arrows
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Page formatting
|
||||
|
||||
\usepackage[paperwidth=7.5in, paperheight=9in,
|
||||
top=0.75in, bottom=1.0in, left=1.4in, right=0.6in]{geometry}
|
||||
|
||||
\usepackage{changepage} % allows left/right-page margin readjustments
|
||||
|
||||
\setlength{\oddsidemargin}{0.45in}
|
||||
\setlength{\evensidemargin}{0.185in}
|
||||
\raggedbottom
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Paragraph formatting
|
||||
|
||||
\usepackage{setspace} % allows use of \singlespacing, \onehalfspacing
|
||||
\usepackage{needspace} % allows use of \needspace to keep lines together
|
||||
\usepackage{parskip} % removes paragraph indenting
|
||||
|
||||
\raggedright
|
||||
\usepackage[raggedrightboxes]{ragged2e} % is this needed?
|
||||
|
||||
\lefthyphenmin=60 % only hyphenate if the left part is >= this many chars
|
||||
\righthyphenmin=60 % only hyphenate if the right part is >= this many chars
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Bulleted (itemized) lists
|
||||
% Align bullets with section header
|
||||
% Align text left
|
||||
% Small bullets
|
||||
% \compactitem for single-spaced lists (used in the Examples doc)
|
||||
|
||||
\usepackage{enumitem} % for setting margins on lists
|
||||
\setlist{leftmargin=*} % don't indent bullet items
|
||||
\renewcommand{\labelitemi}{{\normalsize$\bullet$}} % bullet size
|
||||
|
||||
% There is a \compactitem defined in package parlist (and perhaps others), however,
|
||||
% we'll define our own version of compactitem in terms of package enumitem that
|
||||
% we already use:
|
||||
\newenvironment{compactitem}
|
||||
{\begin{itemize}[itemsep=-1.2ex]}
|
||||
{\end{itemize}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Tables
|
||||
|
||||
% This allows tables to flow across page breaks, headers on each new page, etc.
|
||||
\usepackage{supertabular}
|
||||
\usepackage{caption}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Line numbering
|
||||
|
||||
\usepackage[pagewise]{lineno} % for line numbers on left side of the page
|
||||
\pagewiselinenumbers
|
||||
\setlength\linenumbersep{6em}
|
||||
\renewcommand\linenumberfont{\normalfont\small\sffamily}
|
||||
\nolinenumbers % start with line numbers off
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Footers
|
||||
|
||||
\usepackage{fancyhdr} % makes right/left footers
|
||||
\pagestyle{fancy}
|
||||
\fancyhead{} % clear all header fields
|
||||
\cfoot{}
|
||||
\renewcommand{\headrulewidth}{0pt}
|
||||
|
||||
% Left side on even pages:
|
||||
% This requires that \footerText be defined in the master document:
|
||||
\fancyfoot[LE]{\bfseries \thepage \mdseries \hspace{2em} \footerText}
|
||||
\fancyhfoffset[E]{4em}
|
||||
|
||||
% Right side on odd pages:
|
||||
\fancyfoot[RO]{\mdseries \leftmark \hspace{2em} \bfseries \thepage}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Section header format - we use four levels: \chapter \section \subsection \subsubsection.
|
||||
|
||||
\usepackage{titlesec} % format headers with \titleformat{}
|
||||
|
||||
% Format and spacing for chapter, section, subsection, and subsubsection headers:
|
||||
|
||||
\setcounter{secnumdepth}{4} % show numbers down to subsubsection level
|
||||
|
||||
\titleformat{\chapter}[display]%
|
||||
{\normalfont\sffamily\upshape\Huge\bfseries\fontsize{20}{20}\selectfont}%
|
||||
{\normalfont\sffamily\scshape\large\bfseries \hspace{-0.7in} \MakeUppercase%
|
||||
{\chaptertitlename} \thechapter}%
|
||||
{0.8in}{}[\vspace{2ex}\hrule]
|
||||
\titlespacing{\chapter}{0ex}{0em plus 1em minus 1em}{3em plus 1em minus 1em}[10em]
|
||||
|
||||
\titleformat{\section}[hang]{\huge\bfseries\sffamily\fontsize{16}{16}\selectfont}{\thesection}{1.0em}{}
|
||||
\titlespacing{\section}{-5em}{5em plus 1em minus 1em}{1em plus 0.5em minus 0em}[10em]
|
||||
|
||||
\titleformat{\subsection}[hang]{\LARGE\bfseries\sffamily\fontsize{14}{14}\selectfont}{\thesubsection}{1.0em}{}
|
||||
\titlespacing{\subsection}{-5em}{4em plus 1em minus 2.0em}{0.75em plus 0.5em minus 0em}[10em]
|
||||
|
||||
\titleformat{\subsubsection}[hang]{\needspace{1\baselineskip}%
|
||||
\Large\bfseries\sffamily\fontsize{12}{12}\selectfont}{\thesubsubsection}{1.0em}{}
|
||||
\titlespacing{\subsubsection}{-5em}{3em plus 1em minus 1em}{0.5em plus 0.5em minus 0em}[10em]
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Macros for minor headers: Summary, Syntax, Description, etc.
|
||||
% These headers are defined in terms of \paragraph
|
||||
|
||||
\titleformat{\paragraph}[block]{\large\bfseries\sffamily\fontsize{11}{11}\selectfont}{}{}{}
|
||||
\titlespacing{\paragraph}{0em}{1.5em plus 0.55em minus 0.5em}{0.0em plus 0.55em minus 0.0em}
|
||||
|
||||
% Use one of the convenience macros below, or \littleheader{} for an arbitrary header
|
||||
\newcommand{\littleheader}[1] {\paragraph*{#1}}
|
||||
|
||||
\newcommand{\binding} {\littleheader{Binding}}
|
||||
\newcommand{\comments} {\littleheader{Comments}}
|
||||
\newcommand{\constraints} {\littleheader{Constraints on Arguments}}
|
||||
\newcommand{\crossreferences} {\littleheader{Cross References}}
|
||||
\newcommand{\descr} {\littleheader{Description}}
|
||||
\newcommand{\effect} {\littleheader{Effect}}
|
||||
\newcommand{\format} {\littleheader{Format}}
|
||||
\newcommand{\restrictions} {\littleheader{Restrictions}}
|
||||
\newcommand{\summary} {\littleheader{Summary}}
|
||||
\newcommand{\syntax} {\littleheader{Syntax}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Code and placeholder semantic tagging.
|
||||
%
|
||||
% When possible, prefer semantic tags instead of typographic tags. The
|
||||
% following semantics tags are defined here:
|
||||
%
|
||||
% \code{} % for bold monospace keywords, code, operators, etc.
|
||||
% \plc{} % for italic placeholder names, grammar, etc.
|
||||
%
|
||||
% For function prototypes or other code snippets, you can use \code{} as
|
||||
% the outer wrapper, and use \plc{{} inside. Example:
|
||||
%
|
||||
% \code{\#pragma omp directive ( \plc{some-placeholder-identifier} :}
|
||||
%
|
||||
% To format text in italics for emphasis (rather than text as a placeholder),
|
||||
% use the generic \emph{} command. Example:
|
||||
%
|
||||
% This sentence \emph{emphasizes some non-placeholder words}.
|
||||
|
||||
% Enable \alltt{} for formatting blocks of code:
|
||||
\usepackage{alltt}
|
||||
|
||||
% This sets the default \code{} font to tt (monospace) and bold:
|
||||
\newcommand{\code}[1]{{\texttt{\textbf{#1}}}}
|
||||
\newcommand{\nspace}[1]{{\textrm{\textmd{ }}}}
|
||||
|
||||
% This defines the \plc{} placeholder font to be tt normal slanted:
|
||||
\newcommand{\plc}[1] {{\textrm{\textmd{\itshape{#1}}}}}
|
||||
|
||||
% Environment for a paragraph of literal code, single-spaced, no outline, no indenting:
|
||||
\newenvironment{codepar}[1]
|
||||
{\begin{alltt}\bfseries #1}
|
||||
{\end{alltt}}
|
||||
|
||||
% For blocks of code inside a box frame:
|
||||
\newenvironment{boxedcode}[1]
|
||||
{\vspace{0.25em plus 5em minus 0.25em}\begin{framed}\begin{minipage}[t]{\textwidth}\begin{alltt}\bfseries #1}
|
||||
{\end{alltt}\end{minipage}\end{framed}\vspace{0.25em plus 5em minus 0.25em}}
|
||||
|
||||
% This sets the margins in the framed box:
|
||||
\setlength{\FrameSep}{0.6em}
|
||||
|
||||
% For indented lists of verbatim code at a relaxed line spacing,
|
||||
% e.g., for use after "where clause is one of the following:"
|
||||
\usepackage{setspace}
|
||||
\newenvironment{indentedcodelist}{%
|
||||
\begin{adjustwidth}{0.25in}{}\begin{spacing}{1.5}\begin{alltt}\bfseries}
|
||||
{\end{alltt}\end{spacing}\vspace{-0.25\baselineskip}\end{adjustwidth}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Macros for the black and blue lines and arrows delineating language-specific
|
||||
% and notes sections. Example:
|
||||
%
|
||||
% \fortranspecificstart
|
||||
% This is text that applies to Fortran.
|
||||
% \fortranspecificend
|
||||
|
||||
% local parameters for use \linewitharrows and \notelinewitharrows:
|
||||
\newlength{\sbsz}\setlength{\sbsz}{0.05in} % size of arrows
|
||||
\newlength{\sblw}\setlength{\sblw}{1.35pt} % line width (thickness)
|
||||
\newlength{\sbtw} % text width
|
||||
\newlength{\sblen} % total width of horizontal rule
|
||||
\newlength{\sbht} % height of arrows
|
||||
\newlength{\sbhadj} % vertical adjustment for aligning arrows with the line
|
||||
\newlength{\sbns}\setlength{\sbns}{7\baselineskip} % arg for \needspace for downward arrows
|
||||
|
||||
% \notelinewitharrows is a helper command that makes a black Note marker:
|
||||
% arg 1 = 1 or -1 for up or down arrows
|
||||
% arg 2 = solid or dashed or loosely dashed, etc.
|
||||
\newcommand{\notelinewitharrows}[2]{%
|
||||
\needspace{0.1\baselineskip}%
|
||||
\vbox{\begin{tikzpicture}%
|
||||
\setlength{\sblen}{\linewidth}%
|
||||
\setlength{\sbht}{#1\sbsz}\setlength{\sbht}{1.4\sbht}%
|
||||
\setlength{\sbhadj}{#1\sblw}\setlength{\sbhadj}{0.25\sbhadj}%
|
||||
\filldraw (\sblen, 0) -- (\sblen - \sbsz, \sbht) -- (\sblen - 2\sbsz, 0) -- (\sblen, 0);
|
||||
\draw[line width=\sblw, #2] (2\sbsz - \sblw, \sbhadj) -- (\sblen - 2\sbsz + \sblw, \sbhadj);
|
||||
\filldraw (0, 0) -- (\sbsz, \sbht) -- (0 + 2\sbsz, 0) -- (0, 0);
|
||||
\end{tikzpicture}}}
|
||||
|
||||
% \linewitharrows is a helper command that makes a blue horizontal line, up or down arrows, and some text:
|
||||
% arg 1 = 1 or -1 for up or down arrows
|
||||
% arg 2 = solid or dashed or loosely dashed, etc.
|
||||
% arg 3 = text
|
||||
% arg 4 = text width
|
||||
\newcommand{\linewitharrows}[4]{%
|
||||
\needspace{0.1\baselineskip}%
|
||||
\vbox to 1\baselineskip {\begin{tikzpicture}%
|
||||
\setlength{\sbtw}{#4}%
|
||||
\setlength{\sblen}{\linewidth}%
|
||||
\setlength{\sbht}{#1\sbsz}\setlength{\sbht}{1.4\sbht}%
|
||||
\setlength{\sbhadj}{#1\sblw}\setlength{\sbhadj}{0.25\sbhadj}%
|
||||
\filldraw[color=blue!40] (\sblen, 0) -- (\sblen - \sbsz, \sbht) -- (\sblen - 2\sbsz, 0) -- (\sblen, 0);
|
||||
\draw[line width=\sblw, color=blue!40, #2] (2\sbsz - \sblw, \sbhadj) -- (0.5\sblen - 0.5\sbtw, \sbhadj);
|
||||
\draw[line width=\sblw, color=blue!40, #2] (0.5\sblen + 0.5\sbtw, \sbhadj) -- (\sblen - 2\sbsz + \sblw, \sbhadj);
|
||||
\filldraw[color=blue!40] (0, 0) -- (\sbsz, \sbht) -- (0 + 2\sbsz, 0) -- (0, 0);
|
||||
\node[color=blue!90] at (0.5\sblen, 0) {\large \textsf{\textup{#3}}};
|
||||
\end{tikzpicture}}}
|
||||
|
||||
\newcommand{\VSPb}{\vspace{0.5ex plus 5ex minus 0.25ex}}
|
||||
\newcommand{\VSPa}{\vspace{0.25ex plus 5ex minus 0.25ex}}
|
||||
|
||||
% C
|
||||
\newcommand{\cspecificstart}{\needspace{\sbns}\linewitharrows{-1}{solid}{C}{3em}}
|
||||
\newcommand{\cspecificend}{\linewitharrows{1}{solid}{C}{3em}\bigskip}
|
||||
|
||||
% C/C++
|
||||
\newcommand{\ccppspecificstart}{\VSPb\linewitharrows{-1}{solid}{C / C++}{6em}\VSPa}
|
||||
\newcommand{\ccppspecificend}{\VSPb\linewitharrows{1}{solid}{C / C++}{6em}\VSPa}
|
||||
|
||||
% C++
|
||||
\newcommand{\cppspecificstart}{\needspace{\sbns}\linewitharrows{-1}{solid}{C++}{6em}}
|
||||
\newcommand{\cppspecificend}{\linewitharrows{1}{solid}{C++}{6em}\bigskip}
|
||||
|
||||
% C90
|
||||
\newcommand{\cNinetyspecificstart}{\needspace{\sbns}\linewitharrows{-1}{solid}{C90}{4em}}
|
||||
\newcommand{\cNinetyspecificend}{\linewitharrows{1}{solid}{C90}{4em}\bigskip}
|
||||
|
||||
% C99
|
||||
\newcommand{\cNinetyNinespecificstart}{\needspace{\sbns}\linewitharrows{-1}{solid}{C99}{4em}}
|
||||
\newcommand{\cNinetyNinespecificend}{\linewitharrows{1}{solid}{C99}{4em}\bigskip}
|
||||
|
||||
% Fortran
|
||||
\newcommand{\fortranspecificstart}{\VSPb\linewitharrows{-1}{solid}{Fortran}{6em}\VSPa}
|
||||
\newcommand{\fortranspecificend}{\VSPb\linewitharrows{1}{solid}{Fortran}{6em}\VSPa}
|
||||
|
||||
% Note
|
||||
\newcommand{\notestart}{\VSPb\notelinewitharrows{-1}{solid}\VSPa}
|
||||
\newcommand{\noteend}{\VSPb\notelinewitharrows{1}{solid}\VSPa}
|
||||
|
||||
% convenience macro for formatting the word "Note:" at the beginning of note blocks:
|
||||
\newcommand{\noteheader}{{\textrm{\textsf{\textbf\textup\normalsize{{{{Note }}}}}}}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Glossary formatting
|
||||
|
||||
\newcommand{\glossaryterm}[1]{\needspace{1ex}
|
||||
\begin{adjustwidth}{-0.75in}{0.0in}
|
||||
\nolinenumbers\parbox[b][-0.95\baselineskip][t]{1.4in}{\flushright \textbf{#1}}
|
||||
\end{adjustwidth}\linenumbers}
|
||||
|
||||
\newcommand{\glossarydefstart}{
|
||||
\begin{adjustwidth}{0.79in}{0.0in}}
|
||||
|
||||
\newcommand{\glossarydefend}{
|
||||
\end{adjustwidth}\vspace{-1.5\baselineskip}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Indexing and Table of Contents
|
||||
|
||||
\usepackage{makeidx}
|
||||
\usepackage[nodotinlabels]{titletoc} % required for its [nodotinlabels] option
|
||||
|
||||
% Clickable links in TOC and index:
|
||||
\usepackage[hyperindex=true,linktocpage=true]{hyperref}
|
||||
\hypersetup{
|
||||
colorlinks = true, % Colors links instead of red boxes
|
||||
urlcolor = blue, % Color for external links
|
||||
linkcolor = blue % Color for internal links
|
||||
}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Formats a cross reference label as "Section X on page Y".
|
||||
|
||||
\newcommand{\specref}[1]{Section~\ref{#1} on page~\pageref{#1}}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Code example formatting for the Examples document
|
||||
% This defines:
|
||||
% /cexample formats blue markers, caption, and code for C/C++ examples
|
||||
% /fexample formats blue markers, caption, and code for Fortran examples
|
||||
% Thanks to Jin, Haoqiang H. for the original definitions of the following:
|
||||
|
||||
\usepackage{color,fancyvrb} % for \VerbatimInput
|
||||
\usepackage{toolbox} % for \toolboxMakeSplit
|
||||
|
||||
\renewcommand\theFancyVerbLine{\normalfont\footnotesize\sffamily S-\arabic{FancyVerbLine}}
|
||||
|
||||
\newcommand{\myreplace}[3]{\bgroup\toolboxMakeSplit*{#1}{DoSplit}%
|
||||
\long\def\DoReplace##1{\DoSplit{##1}\lefttext\righttext
|
||||
\lefttext
|
||||
\toolboxIfElse{\ifx\righttext\undefined}{}%
|
||||
{#2\expandafter\DoReplace\expandafter{\righttext}}}%
|
||||
\DoReplace{#3}\egroup}
|
||||
|
||||
\newcommand{\escstr}[1]{\myreplace{_}{\_}{#1}}
|
||||
|
||||
\def\exampleheader#1#2{%
|
||||
\ifthenelse{ \equal{#1}{} }{
|
||||
\def\cname{#2}
|
||||
\def\ename\cname
|
||||
}{
|
||||
\def\cname{#1.#2}
|
||||
% Use following line for old numbering
|
||||
\def\ename{\thechapter.#2}
|
||||
% Use following for mneumonics
|
||||
% \def\ename{\escstr{#1}.#2}
|
||||
}
|
||||
\noindent
|
||||
\textit{Example \ename}
|
||||
%\vspace*{-3mm}
|
||||
}
|
||||
|
||||
\def\cnexample#1#2{%
|
||||
\exampleheader{#1}{#2}
|
||||
\code{\VerbatimInput[numbers=left,numbersep=10ex,firstnumber=1,firstline=8,fontsize=\small]%
|
||||
%\code{\VerbatimInput[numbers=left,firstnumber=1,firstline=8,fontsize=\small]%
|
||||
%\code{\VerbatimInput[firstline=8,fontsize=\small]%
|
||||
{sources/Example_\cname.c}}
|
||||
}
|
||||
|
||||
\def\fnexample#1#2{%
|
||||
\exampleheader{#1}{#2}
|
||||
\code{\VerbatimInput[numbers=left,numbersep=10ex,firstnumber=1,firstline=6,fontsize=\small]%
|
||||
%\code{\VerbatimInput[numbers=left,firstnumber=1,firstline=6,fontsize=\small]%
|
||||
%\code{\VerbatimInput[firstline=6,fontsize=\small]%
|
||||
{sources/Example_\cname.f}}
|
||||
}
|
||||
|
||||
\newcommand\cexample[2]{%
|
||||
\needspace{5\baselineskip}\ccppspecificstart
|
||||
\cnexample{#1}{#2}
|
||||
\ccppspecificend
|
||||
}
|
||||
|
||||
\newcommand\fexample[2]{%
|
||||
\needspace{5\baselineskip}\fortranspecificstart
|
||||
\fnexample{#1}{#2}
|
||||
\fortranspecificend
|
||||
}
|
||||
|
||||
|
||||
% Set default fonts:
|
||||
\rmfamily\mdseries\upshape\normalsize
|
||||
|
||||
|
||||
% This is the end of openmp.sty of the OpenMP specification.
|
15
sources/Example_affinity.1c.c
Normal file
15
sources/Example_affinity.1c.c
Normal file
@ -0,0 +1,15 @@
|
||||
/*
|
||||
* @@name: affinity.1c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: yes
|
||||
* @@expect: success
|
||||
*/
|
||||
void work();
|
||||
void main()
|
||||
{
|
||||
#pragma omp parallel proc_bind(spread) num_threads(4)
|
||||
{
|
||||
work();
|
||||
}
|
||||
}
|
10
sources/Example_affinity.1f.f
Normal file
10
sources/Example_affinity.1f.f
Normal file
@ -0,0 +1,10 @@
|
||||
! @@name: affinity.1f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: yes
|
||||
! @@linkable: yes
|
||||
! @@expect: success
|
||||
PROGRAM EXAMPLE
|
||||
!$OMP PARALLEL PROC_BIND(SPREAD) NUM_THREADS(4)
|
||||
CALL WORK()
|
||||
!$OMP END PARALLEL
|
||||
END PROGRAM EXAMPLE
|
15
sources/Example_affinity.2c.c
Normal file
15
sources/Example_affinity.2c.c
Normal file
@ -0,0 +1,15 @@
|
||||
/*
|
||||
* @@name: affinity.2c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
*/
|
||||
void work();
|
||||
void foo()
|
||||
{
|
||||
#pragma omp parallel num_threads(16) proc_bind(spread)
|
||||
{
|
||||
work();
|
||||
}
|
||||
}
|
10
sources/Example_affinity.2f.f
Normal file
10
sources/Example_affinity.2f.f
Normal file
@ -0,0 +1,10 @@
|
||||
! @@name: affinity.2f
|
||||
! @@type: F-free
|
||||
! @@compilable: yes
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
subroutine foo
|
||||
!$omp parallel num_threads(16) proc_bind(spread)
|
||||
call work()
|
||||
!$omp end parallel
|
||||
end subroutine
|
15
sources/Example_affinity.3c.c
Normal file
15
sources/Example_affinity.3c.c
Normal file
@ -0,0 +1,15 @@
|
||||
/*
|
||||
* @@name: affinity.3c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: yes
|
||||
* @@expect: success
|
||||
*/
|
||||
void work();
|
||||
void main()
|
||||
{
|
||||
#pragma omp parallel proc_bind(close) num_threads(4)
|
||||
{
|
||||
work();
|
||||
}
|
||||
}
|
10
sources/Example_affinity.3f.f
Normal file
10
sources/Example_affinity.3f.f
Normal file
@ -0,0 +1,10 @@
|
||||
! @@name: affinity.3f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: yes
|
||||
! @@linkable: yes
|
||||
! @@expect: success
|
||||
PROGRAM EXAMPLE
|
||||
!$OMP PARALLEL PROC_BIND(CLOSE) NUM_THREADS(4)
|
||||
CALL WORK()
|
||||
!$OMP END PARALLEL
|
||||
END PROGRAM EXAMPLE
|
15
sources/Example_affinity.4c.c
Normal file
15
sources/Example_affinity.4c.c
Normal file
@ -0,0 +1,15 @@
|
||||
/*
|
||||
* @@name: affinity.4c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
*/
|
||||
void work();
|
||||
void foo()
|
||||
{
|
||||
#pragma omp parallel num_threads(16) proc_bind(close)
|
||||
{
|
||||
work();
|
||||
}
|
||||
}
|
10
sources/Example_affinity.4f.f
Normal file
10
sources/Example_affinity.4f.f
Normal file
@ -0,0 +1,10 @@
|
||||
! @@name: affinity.4f
|
||||
! @@type: F-free
|
||||
! @@compilable: yes
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
subroutine foo
|
||||
!$omp parallel num_threads(16) proc_bind(close)
|
||||
call work()
|
||||
!$omp end parallel
|
||||
end subroutine
|
15
sources/Example_affinity.5c.c
Normal file
15
sources/Example_affinity.5c.c
Normal file
@ -0,0 +1,15 @@
|
||||
/*
|
||||
* @@name: affinity.5c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: yes
|
||||
* @@expect: success
|
||||
*/
|
||||
void work();
|
||||
void main()
|
||||
{
|
||||
#pragma omp parallel proc_bind(master) num_threads(4)
|
||||
{
|
||||
work();
|
||||
}
|
||||
}
|
10
sources/Example_affinity.5f.f
Normal file
10
sources/Example_affinity.5f.f
Normal file
@ -0,0 +1,10 @@
|
||||
! @@name: affinity.5f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: yes
|
||||
! @@linkable: yes
|
||||
! @@expect: success
|
||||
PROGRAM EXAMPLE
|
||||
!$OMP PARALLEL PROC_BIND(MASTER) NUM_THREADS(4)
|
||||
CALL WORK()
|
||||
!$OMP END PARALLEL
|
||||
END PROGRAM EXAMPLE
|
19
sources/Example_array_sections.1c.c
Normal file
19
sources/Example_array_sections.1c.c
Normal file
@ -0,0 +1,19 @@
|
||||
/*
|
||||
* @@name: array_sections.1c
|
||||
* @@type: C
|
||||
* @@compilable: no
|
||||
* @@linkable: no
|
||||
* @@expect: failure
|
||||
*/
|
||||
void foo ()
|
||||
{
|
||||
int A[30];
|
||||
#pragma omp target data map( A[0:4] )
|
||||
{
|
||||
/* Cannot map distinct parts of the same array */
|
||||
#pragma omp target map( A[7:20] )
|
||||
{
|
||||
A[2] = 0;
|
||||
}
|
||||
}
|
||||
}
|
15
sources/Example_array_sections.1f.f
Normal file
15
sources/Example_array_sections.1f.f
Normal file
@ -0,0 +1,15 @@
|
||||
! @@name: array_sections.1f
|
||||
! @@type: F-free
|
||||
! @@compilable: no
|
||||
! @@linkable: no
|
||||
! @@expect: failure
|
||||
subroutine foo()
|
||||
integer :: A(30)
|
||||
A = 1
|
||||
!$omp target data map( A(1:4) )
|
||||
! Cannot map distinct parts of the same array
|
||||
!$omp target map( A(8:27) )
|
||||
A(3) = 0
|
||||
!$omp end target map
|
||||
!$omp end target data
|
||||
end subroutine
|
23
sources/Example_array_sections.2c.c
Normal file
23
sources/Example_array_sections.2c.c
Normal file
@ -0,0 +1,23 @@
|
||||
/*
|
||||
* @@name: array_sections.2c
|
||||
* @@type: C
|
||||
* @@compilable: no
|
||||
* @@linkable: no
|
||||
* @@expect: failure
|
||||
*/
|
||||
void foo ()
|
||||
{
|
||||
int A[30], *p;
|
||||
#pragma omp target data map( A[0:4] )
|
||||
{
|
||||
p = &A[0];
|
||||
/* invalid because p[3] and A[3] are the same
|
||||
* location on the host but the array section
|
||||
* specified via p[...] is not a subset of A[0:4] */
|
||||
#pragma omp target map( p[3:20] )
|
||||
{
|
||||
A[2] = 0;
|
||||
p[8] = 0;
|
||||
}
|
||||
}
|
||||
}
|
20
sources/Example_array_sections.2f.f
Normal file
20
sources/Example_array_sections.2f.f
Normal file
@ -0,0 +1,20 @@
|
||||
! @@name: array_sections.2f
|
||||
! @@type: F-free
|
||||
! @@compilable: no
|
||||
! @@linkable: no
|
||||
! @@expect: failure
|
||||
subroutine foo()
|
||||
integer,target :: A(30)
|
||||
integer,pointer :: p(:)
|
||||
A=1
|
||||
!$omp target data map( A(1:4) )
|
||||
p=>A
|
||||
! invalid because p(4) and A(4) are the same
|
||||
! location on the host but the array section
|
||||
! specified via p(...) is not a subset of A(1:4)
|
||||
!$omp target map( p(4:23) )
|
||||
A(3) = 0
|
||||
p(9) = 0
|
||||
!$omp end target
|
||||
!$omp end target data
|
||||
end subroutine
|
20
sources/Example_array_sections.3c.c
Normal file
20
sources/Example_array_sections.3c.c
Normal file
@ -0,0 +1,20 @@
|
||||
/*
|
||||
* @@name: array_sections.3c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
*/
|
||||
void foo ()
|
||||
{
|
||||
int A[30], *p;
|
||||
#pragma omp target data map( A[0:4] )
|
||||
{
|
||||
p = &A[0];
|
||||
#pragma omp target map( p[7:20] )
|
||||
{
|
||||
A[2] = 0;
|
||||
p[8] = 0;
|
||||
}
|
||||
}
|
||||
}
|
16
sources/Example_array_sections.3f.f
Normal file
16
sources/Example_array_sections.3f.f
Normal file
@ -0,0 +1,16 @@
|
||||
! @@name: array_sections.3f
|
||||
! @@type: F-free
|
||||
! @@compilable: yes
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
subroutine foo()
|
||||
integer,target :: A(30)
|
||||
integer,pointer :: p(:)
|
||||
!$omp target data map( A(1:4) )
|
||||
p=>A
|
||||
!$omp target map( p(8:27) )
|
||||
A(3) = 0
|
||||
p(9) = 0
|
||||
!$omp end target map
|
||||
!$omp end target data
|
||||
end subroutine
|
21
sources/Example_array_sections.4c.c
Normal file
21
sources/Example_array_sections.4c.c
Normal file
@ -0,0 +1,21 @@
|
||||
/*
|
||||
* @@name: array_sections.4c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
*/
|
||||
void foo ()
|
||||
{
|
||||
int A[30];
|
||||
#pragma omp target data map( A[0:10] )
|
||||
{
|
||||
p = &A[0];
|
||||
#pragma omp target map( p[3:7] )
|
||||
{
|
||||
A[2] = 0;
|
||||
p[8] = 0;
|
||||
A[8] = 1;
|
||||
}
|
||||
}
|
||||
}
|
17
sources/Example_array_sections.4f.f
Normal file
17
sources/Example_array_sections.4f.f
Normal file
@ -0,0 +1,17 @@
|
||||
! @@name: array_sections.4f
|
||||
! @@type: F-free
|
||||
! @@compilable: yes
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
subroutine foo()
|
||||
integer,target :: A(30)
|
||||
integer,pointer :: p(:)
|
||||
!$omp target data map( A(1:10) )
|
||||
p=>A
|
||||
!$omp target map( p(4:10) )
|
||||
A(3) = 0
|
||||
p(9) = 0
|
||||
A(9) = 1
|
||||
!$omp end target
|
||||
!$omp end target data
|
||||
end subroutine
|
13
sources/Example_associate.1f.f
Normal file
13
sources/Example_associate.1f.f
Normal file
@ -0,0 +1,13 @@
|
||||
! @@name: associate.1f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: no
|
||||
! @@linkable: no
|
||||
! @@expect: failure
|
||||
program example
|
||||
real :: a, c
|
||||
associate (b => a)
|
||||
!$omp parallel private(b, c) ! invalid to privatize b
|
||||
c = 2.0*b
|
||||
!$omp end parallel
|
||||
end associate
|
||||
end program
|
15
sources/Example_associate.2f.f
Normal file
15
sources/Example_associate.2f.f
Normal file
@ -0,0 +1,15 @@
|
||||
! @@name: associate.2f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: yes
|
||||
! @@linkable: yes
|
||||
! @@expect: success
|
||||
program example
|
||||
use omp_lib
|
||||
integer i
|
||||
!$omp parallel private(i)
|
||||
i = omp_get_thread_num()
|
||||
associate(thread_id => i)
|
||||
print *, thread_id ! print private i value
|
||||
end associate
|
||||
!$omp end parallel
|
||||
end program
|
16
sources/Example_associate.3f.f
Normal file
16
sources/Example_associate.3f.f
Normal file
@ -0,0 +1,16 @@
|
||||
! @@name: associate.3f
|
||||
! @@type: F-free
|
||||
! @@compilable: yes
|
||||
! @@linkable: yes
|
||||
! @@expect: success
|
||||
program example
|
||||
integer :: v
|
||||
v = 15
|
||||
associate(u => v)
|
||||
!$omp parallel private(v)
|
||||
v = -1
|
||||
print *, v ! private v=-1
|
||||
print *, u ! original v=15
|
||||
!$omp end parallel
|
||||
end associate
|
||||
end program
|
28
sources/Example_async_target.1c.c
Normal file
28
sources/Example_async_target.1c.c
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
* @@name: async_target.1c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
*/
|
||||
#pragma omp declare target
|
||||
float F(float);
|
||||
#pragma omp end declare target
|
||||
#define N 1000000000
|
||||
#define CHUNKSZ 1000000
|
||||
void init(float *, int);
|
||||
float Z[N];
|
||||
void pipedF()
|
||||
{
|
||||
int C, i;
|
||||
init(Z, N);
|
||||
for (C=0; C<N; C+=CHUNKSZ)
|
||||
{
|
||||
#pragma omp task
|
||||
#pragma omp target map(Z[C:CHUNKSZ])
|
||||
#pragma omp parallel for
|
||||
for (i=0; i<CHUNKSZ; i++)
|
||||
Z[i] = F(Z[i]);
|
||||
}
|
||||
#pragma omp taskwait
|
||||
}
|
38
sources/Example_async_target.1f.f
Normal file
38
sources/Example_async_target.1f.f
Normal file
@ -0,0 +1,38 @@
|
||||
! @@name: async_target.1f
|
||||
! @@type: F-free
|
||||
! @@compilable: yes
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
module parameters
|
||||
integer, parameter :: N=1000000000, CHUNKSZ=1000000
|
||||
end module
|
||||
subroutine pipedF()
|
||||
use parameters, ONLY: N, CHUNKSZ
|
||||
integer :: C, i
|
||||
real :: z(N)
|
||||
|
||||
interface
|
||||
function F(z)
|
||||
!$omp declare target
|
||||
real, intent(IN) ::z
|
||||
real ::F
|
||||
end function F
|
||||
end interface
|
||||
|
||||
call init(z,N)
|
||||
|
||||
do C=1,N,CHUNKSZ
|
||||
|
||||
!$omp task
|
||||
!$omp target map(z(C:C+CHUNKSZ-1))
|
||||
!$omp parallel do
|
||||
do i=C,C+CHUNKSZ-1
|
||||
z(i) = F(z(i))
|
||||
end do
|
||||
!$omp end target
|
||||
!$omp end task
|
||||
|
||||
end do
|
||||
print*, z
|
||||
|
||||
end subroutine pipedF
|
39
sources/Example_async_target.2c.c
Normal file
39
sources/Example_async_target.2c.c
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* @@name: async_target.2c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
extern void init(float *, float *, int);
|
||||
extern void output(float *, int);
|
||||
void vec_mult(float *p, float *v1, float *v2, int N, int dev)
|
||||
{
|
||||
int i;
|
||||
init(p, N);
|
||||
#pragma omp task depend(out: v1, v2)
|
||||
#pragma omp target device(dev) map(v1, v2)
|
||||
{
|
||||
// check whether on device dev
|
||||
if (omp_is_initial_device())
|
||||
abort();
|
||||
v1 = malloc(N*sizeof(float));
|
||||
v2 = malloc(N*sizeof(float));
|
||||
init(v1,v2);
|
||||
}
|
||||
foo(); // execute other work asychronously
|
||||
#pragma omp task depend(in: v1, v2)
|
||||
#pragma omp target device(dev) map(to: v1, v2) map(from: p[0:N])
|
||||
{
|
||||
// check whether on device dev
|
||||
if (omp_is_initial_device())
|
||||
abort();
|
||||
#pragma omp parallel for
|
||||
for (i=0; i<N; i++)
|
||||
p[i] = v1[i] * v2[i];
|
||||
output(p, N);
|
||||
free(v1);
|
||||
free(v2);
|
||||
}
|
||||
}
|
39
sources/Example_async_target.2f.f
Normal file
39
sources/Example_async_target.2f.f
Normal file
@ -0,0 +1,39 @@
|
||||
! @@name: async_target.2f
|
||||
! @@type: F-free
|
||||
! @@compilable: yes
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
subroutine mult(p, N, idev)
|
||||
use omp_lib, ONLY: omp_is_initial_device
|
||||
real :: p(N)
|
||||
real,allocatable :: v1(:), v2(:)
|
||||
integer :: i, idev
|
||||
!$omp declare target (init)
|
||||
|
||||
!$omp task depend(out: v1,v2)
|
||||
!$omp target device(idev) map(v1,v2)
|
||||
if( omp_is_initial_device() ) &
|
||||
stop "not executing on target device"
|
||||
allocate(v1(N), v2(N))
|
||||
call init(v1,v2,N)
|
||||
!$omp end target
|
||||
!$omp end task
|
||||
|
||||
call foo() ! execute other work asychronously
|
||||
|
||||
!$omp task depend(in: v1,v2)
|
||||
!$omp target device(idev) map(to: v1,v2) map(from: p)
|
||||
if( omp_is_initial_device() ) &
|
||||
stop "not executing on target device"
|
||||
!$omp parallel do
|
||||
do i = 1,N
|
||||
p(i) = v1(i) * v2(i)
|
||||
end do
|
||||
deallocate(v1,v2)
|
||||
|
||||
!$omp end target
|
||||
!$omp end task
|
||||
|
||||
call output(p, N)
|
||||
|
||||
end subroutine
|
45
sources/Example_atomic.1c.c
Normal file
45
sources/Example_atomic.1c.c
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* @@name: atomic.1c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: yes
|
||||
* @@expect: success
|
||||
*/
|
||||
float work1(int i)
|
||||
{
|
||||
return 1.0 * i;
|
||||
}
|
||||
|
||||
float work2(int i)
|
||||
{
|
||||
return 2.0 * i;
|
||||
}
|
||||
|
||||
void atomic_example(float *x, float *y, int *index, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
#pragma omp parallel for shared(x, y, index, n)
|
||||
for (i=0; i<n; i++) {
|
||||
#pragma omp atomic update
|
||||
x[index[i]] += work1(i);
|
||||
y[i] += work2(i);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
float x[1000];
|
||||
float y[10000];
|
||||
int index[10000];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 10000; i++) {
|
||||
index[i] = i % 1000;
|
||||
y[i]=0.0;
|
||||
}
|
||||
for (i = 0; i < 1000; i++)
|
||||
x[i] = 0.0;
|
||||
atomic_example(x, y, index, 10000);
|
||||
return 0;
|
||||
}
|
49
sources/Example_atomic.1f.f
Normal file
49
sources/Example_atomic.1f.f
Normal file
@ -0,0 +1,49 @@
|
||||
! @@name: atomic.1f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: yes
|
||||
! @@linkable: yes
|
||||
! @@expect: success
|
||||
REAL FUNCTION WORK1(I)
|
||||
INTEGER I
|
||||
WORK1 = 1.0 * I
|
||||
RETURN
|
||||
END FUNCTION WORK1
|
||||
|
||||
REAL FUNCTION WORK2(I)
|
||||
INTEGER I
|
||||
WORK2 = 2.0 * I
|
||||
RETURN
|
||||
END FUNCTION WORK2
|
||||
|
||||
SUBROUTINE SUB(X, Y, INDEX, N)
|
||||
REAL X(*), Y(*)
|
||||
INTEGER INDEX(*), N
|
||||
|
||||
INTEGER I
|
||||
|
||||
!$OMP PARALLEL DO SHARED(X, Y, INDEX, N)
|
||||
DO I=1,N
|
||||
!$OMP ATOMIC UPDATE
|
||||
X(INDEX(I)) = X(INDEX(I)) + WORK1(I)
|
||||
Y(I) = Y(I) + WORK2(I)
|
||||
ENDDO
|
||||
|
||||
END SUBROUTINE SUB
|
||||
|
||||
PROGRAM ATOMIC_EXAMPLE
|
||||
REAL X(1000), Y(10000)
|
||||
INTEGER INDEX(10000)
|
||||
INTEGER I
|
||||
|
||||
DO I=1,10000
|
||||
INDEX(I) = MOD(I, 1000) + 1
|
||||
Y(I) = 0.0
|
||||
ENDDO
|
||||
|
||||
DO I = 1,1000
|
||||
X(I) = 0.0
|
||||
ENDDO
|
||||
|
||||
CALL SUB(X, Y, INDEX, 10000)
|
||||
|
||||
END PROGRAM ATOMIC_EXAMPLE
|
27
sources/Example_atomic.2c.c
Normal file
27
sources/Example_atomic.2c.c
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* @@name: atomic.2c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
*/
|
||||
int atomic_read(const int *p)
|
||||
{
|
||||
int value;
|
||||
/* Guarantee that the entire value of *p is read atomically. No part of
|
||||
* *p can change during the read operation.
|
||||
*/
|
||||
#pragma omp atomic read
|
||||
value = *p;
|
||||
return value;
|
||||
}
|
||||
|
||||
void atomic_write(int *p, int value)
|
||||
{
|
||||
/* Guarantee that value is stored atomically into *p. No part of *p can
|
||||
change
|
||||
* until after the entire write operation is completed.
|
||||
*/
|
||||
#pragma omp atomic write
|
||||
*p = value;
|
||||
}
|
24
sources/Example_atomic.2f.f
Normal file
24
sources/Example_atomic.2f.f
Normal file
@ -0,0 +1,24 @@
|
||||
! @@name: atomic.2f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: yes
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
function atomic_read(p)
|
||||
integer :: atomic_read
|
||||
integer, intent(in) :: p
|
||||
! Guarantee that the entire value of p is read atomically. No part of
|
||||
! p can change during the read operation.
|
||||
|
||||
!$omp atomic read
|
||||
atomic_read = p
|
||||
return
|
||||
end function atomic_read
|
||||
|
||||
subroutine atomic_write(p, value)
|
||||
integer, intent(out) :: p
|
||||
integer, intent(in) :: value
|
||||
! Guarantee that value is stored atomically into p. No part of p can change
|
||||
! until after the entire write operation is completed.
|
||||
!$omp atomic write
|
||||
p = value
|
||||
end subroutine atomic_write
|
44
sources/Example_atomic.3c.c
Normal file
44
sources/Example_atomic.3c.c
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* @@name: atomic.3c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
*/
|
||||
int fetch_and_add(int *p)
|
||||
{
|
||||
/* Atomically read the value of *p and then increment it. The previous value
|
||||
is
|
||||
* returned. This can be used to implement a simple lock as shown below.
|
||||
*/
|
||||
int old;
|
||||
#pragma omp atomic capture
|
||||
{ old = *p; (*p)++; }
|
||||
return old;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use fetch_and_add to implement a lock
|
||||
*/
|
||||
struct locktype {
|
||||
int ticketnumber;
|
||||
int turn;
|
||||
};
|
||||
void do_locked_work(struct locktype *lock)
|
||||
{
|
||||
int atomic_read(const int *p);
|
||||
void work();
|
||||
|
||||
// Obtain the lock
|
||||
int myturn = fetch_and_add(&lock->ticketnumber);
|
||||
while (atomic_read(&lock->turn) != myturn)
|
||||
;
|
||||
// Do some work. The flush is needed to ensure visibility of
|
||||
// variables not involved in atomic directives
|
||||
|
||||
#pragma omp flush
|
||||
work();
|
||||
#pragma omp flush
|
||||
// Release the lock
|
||||
fetch_and_add(&lock->turn);
|
||||
}
|
50
sources/Example_atomic.3f.f
Normal file
50
sources/Example_atomic.3f.f
Normal file
@ -0,0 +1,50 @@
|
||||
! @@name: atomic.3f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: yes
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
function fetch_and_add(p)
|
||||
integer:: fetch_and_add
|
||||
integer, intent(inout) :: p
|
||||
|
||||
! Atomically read the value of p and then increment it. The previous value is
|
||||
! returned. This can be used to implement a simple lock as shown below.
|
||||
!$omp atomic capture
|
||||
fetch_and_add = p
|
||||
p = p + 1
|
||||
!$omp end atomic
|
||||
end function fetch_and_add
|
||||
module m
|
||||
interface
|
||||
function fetch_and_add(p)
|
||||
integer :: fetch_and_add
|
||||
integer, intent(inout) :: p
|
||||
end function
|
||||
function atomic_read(p)
|
||||
integer :: atomic_read
|
||||
integer, intent(in) :: p
|
||||
end function
|
||||
end interface
|
||||
type locktype
|
||||
integer ticketnumber
|
||||
integer turn
|
||||
end type
|
||||
contains
|
||||
subroutine do_locked_work(lock)
|
||||
type(locktype), intent(inout) :: lock
|
||||
integer myturn
|
||||
integer junk
|
||||
! obtain the lock
|
||||
myturn = fetch_and_add(lock%ticketnumber)
|
||||
do while (atomic_read(lock%turn) .ne. myturn)
|
||||
continue
|
||||
enddo
|
||||
! Do some work. The flush is needed to ensure visibility of variables
|
||||
! not involved in atomic directives
|
||||
!$omp flush
|
||||
call work
|
||||
!$omp flush
|
||||
! Release the lock
|
||||
junk = fetch_and_add(lock%turn)
|
||||
end subroutine
|
||||
end module
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user