mirror of
https://github.com/OpenMP/Examples.git
synced 2025-04-03 13:21:33 +01:00
merge with examples-internal/v5.1
This commit is contained in:
parent
60e8ece384
commit
fb0edc81e7
@ -1,5 +1,4 @@
|
||||
\pagebreak
|
||||
\chapter{SIMD}
|
||||
\cchapter{SIMD}{SIMD}
|
||||
\label{chap:simd}
|
||||
|
||||
Single instruction, multiple data (SIMD) is a form of parallel execution
|
||||
@ -12,7 +11,7 @@ Many processors have SIMD (vector) units that can perform simultaneously
|
||||
Loops without loop-carried backward dependency (or with dependency preserved using
|
||||
ordered simd) are candidates for vectorization by the compiler for
|
||||
execution with SIMD units. In addition, with state-of-the-art vectorization
|
||||
technology and \code{declare simd} construct extensions for function vectorization
|
||||
technology and \code{declare simd} directive extensions for function vectorization
|
||||
in the OpenMP 4.5 specification, loops with function calls can be vectorized as well.
|
||||
The basic idea is that a scalar function call in a loop can be replaced by a vector version
|
||||
of the function, and the loop can be vectorized simultaneously by combining a loop
|
||||
@ -46,3 +45,8 @@ execution in different SIMD units.
|
||||
%\code{parallel for simd}).
|
||||
|
||||
|
||||
%===== Examples Sections =====
|
||||
\input{SIMD/SIMD}
|
||||
\input{SIMD/linear_modifier}
|
||||
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
\pagebreak
|
||||
\chapter{OpenMP Affinity}
|
||||
\cchapter{OpenMP Affinity}{affinity}
|
||||
\label{chap:openmp_affinity}
|
||||
|
||||
OpenMP Affinity consists of a \code{proc\_bind} policy (thread affinity policy) and a specification of
|
||||
@ -53,21 +52,21 @@ variables for the MPI library. %Forked threads within an MPI process
|
||||
%which sets \code{OMP\_PLACES} specifically for the MPI process.
|
||||
|
||||
Threads of a team are positioned onto places in a compact manner, a
|
||||
scattered distribution, or onto the master's place, by setting the
|
||||
scattered distribution, or onto the primary thread's place, by setting the
|
||||
\code{OMP\_PROC\_BIND} environment variable or the \code{proc\_bind} clause to
|
||||
\plc{close}, \plc{spread}, or \plc{master}, respectively. When
|
||||
\code{close}, \code{spread}, or \code{primary} (\code{master} has been deprecated), respectively. When
|
||||
\code{OMP\_PROC\_BIND} is set to FALSE no binding is enforced; and
|
||||
when the value is TRUE, the binding is implementation defined to
|
||||
a set of places in the \code{OMP\_PLACES} variable or to places
|
||||
defined by the implementation if the \code{OMP\_PLACES} variable
|
||||
is not set.
|
||||
is not set.
|
||||
|
||||
The \code{OMP\_PLACES} variable can also be set to an abstract name
|
||||
(\plc{threads}, \plc{cores}, \plc{sockets}) to specify that a place is
|
||||
(\code{threads}, \code{cores}, \code{sockets}) to specify that a place is
|
||||
either a single hardware thread, a core, or a socket, respectively.
|
||||
This description of the \code{OMP\_PLACES} is most useful when the
|
||||
number of threads is equal to the number of hardware thread, cores
|
||||
or sockets. It can also be used with a \plc{close} or \plc{spread}
|
||||
or sockets. It can also be used with a \code{close} or \code{spread}
|
||||
distribution policy when the equality doesn't hold.
|
||||
|
||||
|
||||
@ -116,3 +115,11 @@ distribution policy when the equality doesn't hold.
|
||||
% thread # 0 * * * * _ _ _ _ _ _ _ _ #mask for thread 0
|
||||
% thread # 0 _ _ _ _ * * * * _ _ _ _ #mask for thread 1
|
||||
% thread # 0 _ _ _ _ _ _ _ _ * * * * #mask for thread 2
|
||||
|
||||
|
||||
%===== Examples Sections =====
|
||||
\input{affinity/affinity}
|
||||
\input{affinity/task_affinity}
|
||||
\input{affinity/affinity_display}
|
||||
\input{affinity/affinity_query}
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
\pagebreak
|
||||
\chapter{Data Environment}
|
||||
\cchapter{Data Environment}{data_environment}
|
||||
\label{chap:data_environment}
|
||||
The OpenMP \plc{data environment} contains data attributes of variables and
|
||||
objects. Many constructs (such as \code{parallel}, \code{simd}, \code{task})
|
||||
@ -73,3 +72,22 @@ it has been referenced (+1 on entry and -1 on exited) in nested (structured)
|
||||
map regions and/or accumulative (unstructured) mappings, determines the operation.
|
||||
Details of the \code{map} clause and reference count operation are specified
|
||||
in the \plc{map Clause} subsection of the OpenMP Specifications document.
|
||||
|
||||
|
||||
%===== Examples Sections =====
|
||||
\input{data_environment/threadprivate}
|
||||
\input{data_environment/default_none}
|
||||
\input{data_environment/private}
|
||||
\input{data_environment/fort_loopvar}
|
||||
\input{data_environment/fort_sp_common}
|
||||
\input{data_environment/fort_sa_private}
|
||||
\input{data_environment/carrays_fpriv}
|
||||
\input{data_environment/lastprivate}
|
||||
\input{data_environment/reduction}
|
||||
\input{data_environment/udr}
|
||||
\input{data_environment/scan}
|
||||
\input{data_environment/copyin}
|
||||
\input{data_environment/copyprivate}
|
||||
\input{data_environment/cpp_reference}
|
||||
\input{data_environment/associate}
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
\pagebreak
|
||||
\chapter{Devices}
|
||||
\cchapter{Devices}{devices}
|
||||
\label{chap:devices}
|
||||
|
||||
The \code{target} construct consists of a \code{target} directive
|
||||
@ -51,3 +50,26 @@ This new specification does not affect the execution of
|
||||
pre-4.5 code; it is a necessary element for asynchronous
|
||||
execution of the \code{target} region when using the new \code{nowait}
|
||||
clause introduced in OpenMP 4.5.
|
||||
|
||||
|
||||
%===== Examples Sections =====
|
||||
\input{devices/target}
|
||||
\input{devices/target_defaultmap}
|
||||
\input{devices/target_pointer_mapping}
|
||||
\input{devices/target_structure_mapping}
|
||||
\input{devices/target_fort_allocatable_array_mapping}
|
||||
\input{devices/array_sections}
|
||||
\input{devices/array_shaping}
|
||||
\input{devices/target_mapper}
|
||||
\input{devices/target_data}
|
||||
\input{devices/target_unstructured_data}
|
||||
\input{devices/target_update}
|
||||
\input{devices/target_associate_ptr}
|
||||
\input{devices/declare_target}
|
||||
\input{devices/teams}
|
||||
\input{devices/async_target_depend}
|
||||
\input{devices/async_target_with_tasks}
|
||||
\input{devices/async_target_nowait}
|
||||
\input{devices/async_target_nowait_depend}
|
||||
\input{devices/device}
|
||||
|
||||
|
45
Chap_directives.tex
Normal file
45
Chap_directives.tex
Normal file
@ -0,0 +1,45 @@
|
||||
\cchapter{OpenMP Directive Syntax}{directives}
|
||||
\label{chap:directive_syntax}
|
||||
|
||||
OpenMP \emph{directives} use base-language mechanisms to specify OpenMP program behavior.
|
||||
In C code, the directives are formed exclusively with pragmas, whereas in C++
|
||||
code, directives are formed from either pragmas or attributes.
|
||||
Fortran directives are formed with comments in free form and fixed form sources (codes).
|
||||
All of these mechanism allow the compilation to ignore the OpenMP directives if
|
||||
OpenMP is not supported or enabled.
|
||||
|
||||
|
||||
The OpenMP directive is a combination of the base-language mechanism and a \plc{directive-specification},
|
||||
as shown below. The \plc{directive-specification} consists
|
||||
of the \plc{directive-name} which may seldomly have arguments,
|
||||
followed by optional \plc{clauses}. Full details of the syntax can be found in the OpenMP Specification.
|
||||
Illustrations of the syntax is given in the examples.
|
||||
|
||||
The formats for combining a base-language mechanism and a \plc{directive-specification} are:
|
||||
|
||||
C/C++ pragmas
|
||||
\begin{indentedcodelist}
|
||||
\code{\#pragma omp} \plc{directive-specification}
|
||||
\end{indentedcodelist}
|
||||
|
||||
C++ attributes
|
||||
\begin{indentedcodelist}
|
||||
\code{[[omp :: directive(} \plc{directive-specification} \code{)]]}
|
||||
\code{[[using omp : directive(} \plc{directive-specification} \code{)]]}
|
||||
\end{indentedcodelist}
|
||||
|
||||
Fortran comments
|
||||
\begin{indentedcodelist}
|
||||
\code{!\$omp} \plc{directive-specification}
|
||||
\end{indentedcodelist}
|
||||
|
||||
where \code{c\$omp} and \code{*\$omp} may be used in Fortran fixed form sources.
|
||||
|
||||
|
||||
%===== Examples Sections =====
|
||||
\input{directives/pragmas}
|
||||
\input{directives/attributes}
|
||||
\input{directives/fixed_format_comments}
|
||||
\input{directives/free_format_comments}
|
||||
|
||||
|
25
Chap_loop_transformations.tex
Normal file
25
Chap_loop_transformations.tex
Normal file
@ -0,0 +1,25 @@
|
||||
\cchapter{Loop Transformations}{loop_transformations}
|
||||
\label{chap:loop_transformations}
|
||||
|
||||
To obtain better performance on a platform, code may need to be restructured
|
||||
relative to the way it is written (which is often for best readability).
|
||||
User-directed loop transformations accomplish this goal by providing a means
|
||||
to separate code semantics and its optimization.
|
||||
|
||||
A loop transformation construct states that a transformation operation is to be
|
||||
performed on set of nested loops. This directive approach can target specific loops
|
||||
for transformation, rather than applying more time-consuming general compiler
|
||||
heuristics methods with compiler options that may not be able to discover
|
||||
optimal transformations.
|
||||
|
||||
Loop transformations can be augmented by preprocessor support or OpenMP \code{metadirective}
|
||||
directives, to select optimal dimension and size parameters for specific platforms,
|
||||
facilitating a single code base for multiple platforms.
|
||||
Moreover, directive-based transformations make experimenting easier:
|
||||
whereby specific hot spots can be affected by transformation directives.
|
||||
|
||||
|
||||
%===== Examples Sections =====
|
||||
\input{loop_transformations/tile}
|
||||
\input{loop_transformations/unroll}
|
||||
|
@ -1,5 +1,4 @@
|
||||
\pagebreak
|
||||
\chapter{Memory Model}
|
||||
\cchapter{Memory Model}{memory_model}
|
||||
\label{chap:memory_model}
|
||||
|
||||
OpenMP provides a shared-memory model that allows all threads on a given
|
||||
@ -129,3 +128,10 @@ section of the OpenMP Specifications document.
|
||||
% in \plc{atomic Construct} subsection of the OpenMP Specifications document).
|
||||
|
||||
% Examples 1-3 show the difficulty of synchronizing threads through \code{flush} and \code{atomic} directives.
|
||||
|
||||
|
||||
%===== Examples Sections =====
|
||||
\input{memory_model/mem_model}
|
||||
\input{memory_model/allocators}
|
||||
\input{memory_model/fort_race}
|
||||
|
||||
|
19
Chap_ompt_interface.tex
Normal file
19
Chap_ompt_interface.tex
Normal file
@ -0,0 +1,19 @@
|
||||
\cchapter{OMPT Interface}{ompt_interface}
|
||||
\label{chap:ompt_interface}
|
||||
OMPT defines mechanisms and an API for interfacing with tools in the OpenMP program.
|
||||
|
||||
The OMPT API provides the following functionality:
|
||||
\begin{itemize}
|
||||
\addtolength{\itemindent}{1cm}
|
||||
\item examines the state associated with an OpenMP thread
|
||||
\item interprets the call stack of an OpenMP thread
|
||||
\item receives notification about OpenMP events
|
||||
\item traces activity on OpenMP target devices
|
||||
\item assesses implementation-dependent details
|
||||
\item controls a tool from an OpenMP application
|
||||
\end{itemize}
|
||||
|
||||
The following sections will illustrate basic mechanisms and operations of the OMPT API.
|
||||
|
||||
|
||||
\input{ompt_interface/ompt_start}
|
@ -1,5 +1,4 @@
|
||||
\pagebreak
|
||||
\chapter{Parallel Execution}
|
||||
\cchapter{Parallel Execution}{parallel_execution}
|
||||
\label{chap:parallel_execution}
|
||||
|
||||
A single thread, the \plc{initial thread}, begins sequential execution of
|
||||
@ -10,7 +9,7 @@ A \code{parallel} construct encloses code,
|
||||
forming a parallel region. An \plc{initial thread} encountering a \code{parallel}
|
||||
region forks (creates) a team of threads at the beginning of the
|
||||
\code{parallel} region, and joins them (removes from execution) at the
|
||||
end of the region. The initial thread becomes the master thread of the team in a
|
||||
end of the region. The initial thread becomes the primary thread of the team in a
|
||||
\code{parallel} region with a \plc{thread} number equal to zero, the other
|
||||
threads are numbered from 1 to number of threads minus 1.
|
||||
A team may be comprised of just a single thread.
|
||||
@ -19,9 +18,9 @@ Each thread of a team is assigned an implicit task consisting of code within the
|
||||
parallel region. The task that creates a parallel region is suspended while the
|
||||
tasks of the team are executed. A thread is tied to its task; that is,
|
||||
only the thread assigned to the task can execute that task. After completion
|
||||
of the \code{parallel} region, the master thread resumes execution of the generating task.
|
||||
of the \code{parallel} region, the primary thread resumes execution of the generating task.
|
||||
|
||||
%After the \code{parallel} region the master thread becomes the initial
|
||||
%After the \code{parallel} region the primary thread becomes the initial
|
||||
%thread again, and continues to execute the \plc{sequential part}.
|
||||
|
||||
Any task within a \code{parallel} region is allowed to encounter another
|
||||
@ -43,7 +42,8 @@ defined. When dynamic adjustment is on and the number of threads is specified,
|
||||
the number of threads becomes an upper limit for the number of threads to be
|
||||
provided by the OpenMP runtime.
|
||||
|
||||
\pagebreak
|
||||
%\pagebreak
|
||||
\bigskip
|
||||
WORKSHARING CONSTRUCTS
|
||||
|
||||
A worksharing construct distributes the execution of the associated region
|
||||
@ -96,9 +96,33 @@ region with a single structure block (section of code). Statements in the
|
||||
by threads of the team.
|
||||
|
||||
\bigskip
|
||||
MASTER CONSTRUCT
|
||||
MASKED CONSTRUCT
|
||||
|
||||
The \code{masked} construct is not a worksharing construct. The \code{masked} region is
|
||||
executed only by the primary thread. There is no implicit barrier (and flush)
|
||||
at the end of the \code{masked} region; hence the other threads of the team continue
|
||||
execution beyond code statements beyond the \code{masked} region.
|
||||
The \code{master} contruct, which has been deprecated in OpenMP 5.1, has identical semantics
|
||||
to the \code{masked} contruct with no \code{filter} clause.
|
||||
|
||||
|
||||
%===== Examples Sections =====
|
||||
\input{parallel_execution/ploop}
|
||||
\input{parallel_execution/parallel}
|
||||
\input{parallel_execution/host_teams}
|
||||
\input{parallel_execution/nthrs_nesting}
|
||||
\input{parallel_execution/nthrs_dynamic}
|
||||
\input{parallel_execution/fort_do}
|
||||
\input{parallel_execution/nowait}
|
||||
\input{parallel_execution/collapse}
|
||||
\input{parallel_execution/linear_in_loop}
|
||||
\input{parallel_execution/psections}
|
||||
\input{parallel_execution/fpriv_sections}
|
||||
\input{parallel_execution/single}
|
||||
\input{parallel_execution/workshare}
|
||||
\input{parallel_execution/masked}
|
||||
\input{parallel_execution/loop}
|
||||
\input{parallel_execution/pra_iterator}
|
||||
\input{parallel_execution/set_dynamic_nthrs}
|
||||
\input{parallel_execution/get_nthrs}
|
||||
|
||||
The \code{master} construct is not a worksharing construct. The master region is
|
||||
is executed only by the master thread. There is no implicit barrier (and flush)
|
||||
at the end of the \code{master} region; hence the other threads of the team continue
|
||||
execution beyond code statements beyond the \code{master} region.
|
||||
|
@ -1,17 +1,26 @@
|
||||
\pagebreak
|
||||
\chapter{Program Control}
|
||||
\label{sec:program_control}
|
||||
\cchapter{Program Control}{program_control}
|
||||
\label{chap:program_control}
|
||||
|
||||
Some specific and elementary concepts of controlling program execution are
|
||||
illustrated in the examples of this chapter. Control can be directly
|
||||
managed with conditional control code (ifdef's with the \code{\_OPENMP}
|
||||
macro, and the Fortran sentinel (\code{!\$})
|
||||
for conditionally compiling). The \code{if} clause on some constructs
|
||||
Basic concepts and mechanisms for directing and controlling a program compilation and execution
|
||||
are provided in this introduction and illustrated in subsequent examples.
|
||||
|
||||
\bigskip
|
||||
CONDITIONAL COMPILATION and EXECUTION
|
||||
|
||||
Conditional compilation can be performed with conventional \#ifdef directives
|
||||
in C, C++, and Fortran, and additionally with OpenMP sentinel (\code{!\$}) in Fortran.
|
||||
The \code{if} clause on some directives
|
||||
can direct the runtime to ignore or alter the behavior of the construct.
|
||||
Of course, the base-language \code{if} statements can be used to control the "execution"
|
||||
Of course, the base-language \code{if} statements can be used to control the execution
|
||||
of stand-alone directives (such as \code{flush}, \code{barrier}, \code{taskwait},
|
||||
and \code{taskyield}).
|
||||
However, the directives must appear in a block structure, and not as a substatement as shown in examples 1 and 2 of this chapter.
|
||||
However, the directives must appear in a block structure, and not as a substatement.
|
||||
The \code{metadirective} and \code{declare}~\code{variant} directives provide conditional
|
||||
selection of directives and routines for compilation (and use), respectively.
|
||||
The \code{assume} and \code{requires} directives provide invariants
|
||||
for optimizing compilation, and essential features for compilation
|
||||
and correct execution, respectively.
|
||||
|
||||
|
||||
\bigskip
|
||||
CANCELLATION
|
||||
@ -28,15 +37,15 @@ The \code{cancel} construct is also a cancellation point for any other thread of
|
||||
to also continue execution at the end of the named region.
|
||||
|
||||
Also, once the specified region has been activated for cancellation any thread that encounnters
|
||||
a \code{cancellation point} construct with the same named region (\plc{construct-type-clause}),
|
||||
a \code{cancellation}~\code{point} construct with the same named region (\plc{construct-type-clause}),
|
||||
continues execution at the end of the region.
|
||||
|
||||
For an activated \code{cancel taskgroup} construct, the tasks that
|
||||
belong to the taskgroup set of the innermost enclosing taskgroup region will be canceled.
|
||||
|
||||
A task that encounters the cancel taskgroup construct continues execution at the end of its
|
||||
A task that encounters a \code{cancel}~\code{taskgroup} construct continues execution at the end of its
|
||||
task region. Any task of the taskgroup that has already begun execution will run to completion,
|
||||
unless it encounters a \code{cancellation point}; tasks that have not begun execution "may" be
|
||||
unless it encounters a \code{cancellation}~\code{point}; tasks that have not begun execution may be
|
||||
discarded as completed tasks.
|
||||
|
||||
\bigskip
|
||||
@ -44,9 +53,10 @@ CONTROL VARIABLES
|
||||
|
||||
Internal control variables (ICV) are used by implementations to hold values which control the execution
|
||||
of OpenMP regions. Control (and hence the ICVs) may be set as implementation defaults,
|
||||
or set and adjusted through environment variables, clauses, and API functions. Many of the ICV control
|
||||
values are accessible through API function calls. Also, initial ICV values are reported by the runtime
|
||||
if the \code{OMP\_DISPLAY\_ENV} environment variable has been set to \code{TRUE}.
|
||||
or set and adjusted through environment variables, clauses, and API functions.
|
||||
%Many of the ICV control values are accessible through API function calls.
|
||||
Initial ICV values are reported by the runtime
|
||||
if the \code{OMP\_DISPLAY\_ENV} environment variable has been set to \code{TRUE} or \code{VERBOSE}.
|
||||
|
||||
%As an example, the \plc{nthreads-var} is the ICV that holds the number of threads
|
||||
%to be used in a \code{parallel} region. It can be set with the \code{OMP\_NUM\_THREADS} environment variable,
|
||||
@ -59,9 +69,9 @@ CONTROL VARIABLES
|
||||
\bigskip
|
||||
NESTED CONSTRUCTS
|
||||
|
||||
Certain combinations of nested constructs are permitted, giving rise to a \plc{combined} construct
|
||||
consisting of two or more constructs. These can be used when the two (or several) constructs would be used
|
||||
immediately in succession (closely nested). A combined construct can use the clauses of the component
|
||||
Certain combinations of nested constructs are permitted, giving rise to \plc{combined} constructs
|
||||
consisting of two or more directives. These can be used when the two (or several) constructs would be used
|
||||
immediately in succession (closely nested). A \plc{combined} construct can use the clauses of the component
|
||||
constructs without restrictions.
|
||||
A \plc{composite} construct is a combined construct which has one or more clauses with (an often obviously)
|
||||
modified or restricted meaning, relative to when the constructs are uncombined. %%[appear separately (singly).
|
||||
@ -72,14 +82,32 @@ modified or restricted meaning, relative to when the constructs are uncombined.
|
||||
%the parallel loop constructs and the \code{SIMD} construct), because the \code{collapse} clause must
|
||||
%explicitly address the ordering of loop chunking \plc{and} SIMD "combined" execution.
|
||||
|
||||
Certain nestings are forbidden, and often the reasoning is obvious. Worksharing constructs cannot be nested, and
|
||||
Certain nestings are forbidden, and often the reasoning is obvious. For example, worksharing constructs cannot be nested, and
|
||||
the \code{barrier} construct cannot be nested inside a worksharing construct, or a \code{critical} construct.
|
||||
Also, \code{target} constructs cannot be nested.
|
||||
Also, \code{target} constructs cannot be nested, unless the nested target is a reverse offload.
|
||||
|
||||
The \code{parallel} construct can be nested, as well as the \code{task} construct. The parallel
|
||||
execution in the nested \code{parallel} construct(s) is control by the \code{OMP\_NESTED} and
|
||||
\code{OMP\_MAX\_ACTIVE\_LEVELS} environment variables, and the \code{omp\_set\_nested()} and
|
||||
\code{omp\_set\_max\_active\_levels()} functions.
|
||||
The \code{parallel} construct can be nested, as well as the \code{task} construct.
|
||||
The parallel execution in the nested parallel construct(s) is controlled by the
|
||||
\code{OMP\_MAX\_ACTIVE\_LEVELS} environment variable, and the \code{omp\_set\_max\_active\_levels} routine.
|
||||
Use the \code{omp\_get\_max\_active\_levels} routine to determine the maximum levels provided by an implementation.
|
||||
As of OpenMP 5.0, use of the \code{OMP\_NESTED} environment variable and the \code{omp\_set\_nested} routine
|
||||
has been deprecated.
|
||||
|
||||
More details on nesting can be found in the \plc{Nesting of Regions} of the \plc{Directives}
|
||||
chapter in the OpenMP Specifications document.
|
||||
|
||||
|
||||
%===== Examples Sections =====
|
||||
\input{program_control/cond_comp}
|
||||
\input{program_control/icv}
|
||||
\input{program_control/standalone}
|
||||
\input{program_control/cancellation}
|
||||
\input{program_control/requires}
|
||||
\input{program_control/variant}
|
||||
\input{program_control/metadirective}
|
||||
\input{program_control/nested_loop}
|
||||
\input{program_control/nesting_restrict}
|
||||
\input{program_control/target_offload}
|
||||
\input{program_control/interop}
|
||||
\input{program_control/utilities}
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
\pagebreak
|
||||
\chapter{Synchronization}
|
||||
\cchapter{Synchronization}{synchronization}
|
||||
\label{chap:synchronization}
|
||||
|
||||
The \code{barrier} construct is a stand-alone directive that requires all threads
|
||||
@ -79,3 +78,23 @@ Scheduling constraints on task execution can be prescribed by the \code{depend}
|
||||
clause to enforce dependence on previously generated tasks.
|
||||
More details on controlling task executions can be found in the \plc{Tasking} Chapter
|
||||
in the OpenMP Specifications document. %(DO REF. RIGHT.)
|
||||
|
||||
|
||||
%===== Examples Sections =====
|
||||
\input{synchronization/critical}
|
||||
\input{synchronization/worksharing_critical}
|
||||
\input{synchronization/barrier_regions}
|
||||
\input{synchronization/atomic}
|
||||
\input{synchronization/atomic_restrict}
|
||||
\input{synchronization/flush_nolist}
|
||||
\input{synchronization/acquire_release}
|
||||
\input{synchronization/ordered}
|
||||
\input{synchronization/depobj}
|
||||
\input{synchronization/doacross}
|
||||
\input{synchronization/locks}
|
||||
\input{synchronization/init_lock}
|
||||
\input{synchronization/init_lock_with_hint}
|
||||
\input{synchronization/lock_owner}
|
||||
\input{synchronization/simple_lock}
|
||||
\input{synchronization/nestable_lock}
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
\pagebreak
|
||||
\chapter{Tasking}
|
||||
\cchapter{Tasking}{tasking}
|
||||
\label{chap:tasking}
|
||||
|
||||
Tasking constructs provide units of work to a thread for execution.
|
||||
@ -50,3 +49,14 @@ A complete list of the tasking constructs and details of their clauses
|
||||
can be found in the \plc{Tasking Constructs} chapter of the OpenMP Specifications,
|
||||
in the \plc{OpenMP Application Programming Interface} section.
|
||||
|
||||
|
||||
%===== Examples Sections =====
|
||||
\input{tasking/tasking}
|
||||
\input{tasking/task_priority}
|
||||
\input{tasking/task_dep}
|
||||
\input{tasking/task_detach}
|
||||
\input{tasking/taskgroup}
|
||||
\input{tasking/taskyield}
|
||||
\input{tasking/taskloop}
|
||||
\input{tasking/parallel_masked_taskloop}
|
||||
|
||||
|
153
Contributions.md
Normal file
153
Contributions.md
Normal file
@ -0,0 +1,153 @@
|
||||
# Contributing
|
||||
|
||||
The usual process for adding new examples, making changes or adding corrections
|
||||
is to submit an issue for discussion and initial evaluation of changes or example additions.
|
||||
When there is a consensus at a meeting about the contribution,
|
||||
you will be asked to submit a pull request.
|
||||
|
||||
Of course, if your contribution is an obvious correction, clarification, or note, you
|
||||
may want to submit a pull request directly.
|
||||
|
||||
-----------------------------------------------------------
|
||||
|
||||
## The OpenMP Examples document
|
||||
|
||||
The OpenMP Examples document is in LaTeX format.
|
||||
Please see the master LaTeX file, `openmp-examples.tex`, for more information.
|
||||
|
||||
## Maintainer
|
||||
|
||||
[OpenMP Examples Subcommittee](http://twiki.openmp.org/twiki/bin/view/OpenMPLang/OpenMPExamplesSubCommittee)
|
||||
For a brief revision history, see `Changes.log` in the repo.
|
||||
|
||||
## Git procedure
|
||||
|
||||
* Fork your own branch of the OpenMP [examples-internal repo](https:/github.com/openmp/examples-internal)
|
||||
* Clone your fork locally
|
||||
* If you are working on generic or old-version updates, create a branch off master.
|
||||
* If you are working on an example for a release candidate for version #.#, create a branch off work_#.#.
|
||||
1.) `git clone --branch <master|work_#.#> https://github.com/<my_account>/examples-internal`
|
||||
2.) `git checkout -b <branch_name>`
|
||||
3.) ... `add`, `commit`
|
||||
4.) `git push -u origin <branch_name>`
|
||||
5.) `make` or `make diff` will create a full-document pdf or just a pdf with differences (do this at any point).
|
||||
* `git status` and `git branch -a` are your friends
|
||||
* Submit an issue for your work (usually with a diff pdf), and then you will be asked to submit a pull request
|
||||
* Create an issue by selecting the (issue tab)[https://github.com/openmp/examples-internal/issues] and clicking on `new issue`.
|
||||
* Use this MarkDown Cheatsheet for (issue formatting)[https://wordpress.com/support/markdown-quick-reference/]
|
||||
* More MarkDown details are available (here)[https://markdown-it.github.io]
|
||||
* You can cut and paste markdown formatted text in a (reader)[https://dillinger.io] to see formatting effects.
|
||||
* Forced spaces are available in Markdown. On a Mac is is "option+space".
|
||||
* Polling is available. Go to (gh-poll)[https://app.gh-polls.com/]. Type an option on each line, then click `copy markdown`, and paste the contents into the issue. (Use preview to check your poll, and then submit it.)
|
||||
* Create a pull request
|
||||
|
||||
|
||||
## Processing source code
|
||||
|
||||
* Prepare source code (C/C++ and Fortran) and a text description (use similar styles found in recent examples)
|
||||
* Determine the *example* name `<ename>`, *sequence* number `<seq-no>` and *compiler* suffix `<csuffix>` for the example
|
||||
* The syntax is: `<ename>.<seq-no>.<csuffix>` (e.g. `affinity_display.1.f90`)
|
||||
* The example name may be a Section name (e.g. affinity), or a Subsection name (affinity_display)
|
||||
* If you are creating a new Chapter, it may be the chapter name.
|
||||
* New examples are usually added at the end of a Section or Subsection. Number it as the next number in the sequence numbers for examples in that Section or Subsection.
|
||||
* The compiler suffix `<csuffix>` is `c`, `cpp`, `f`, and `f90` for C, C++ and Fortran codes.
|
||||
* Insert the code in the sources directory for each chapter, and include the following metadata:
|
||||
* Metadata Tags for example sources:
|
||||
```
|
||||
@@name: <ename>.<seq-no>[c|cpp|f|f90]
|
||||
@@type: C|C++|F-fixed|F-free
|
||||
@@compilable: yes|no|maybe
|
||||
@@linkable: yes|no|maybe
|
||||
@@expect: success|failure|nothing|rt-error
|
||||
@@version: omp_<verno>
|
||||
```
|
||||
* **name**
|
||||
is the name of an example
|
||||
* **type**
|
||||
is the source code type, which can be translated into or from proper file extension (c,cpp,f,f90)
|
||||
* **compilable**
|
||||
indicates whether the source code is compilable
|
||||
* **linkable**
|
||||
indicates whether the source code is linkable
|
||||
* **expect**
|
||||
indicates some expected result for testing purpose "`success|failure|nothing`" applies
|
||||
to the result of code compilation "`rt-error`" is for a case where compilation may be
|
||||
successful, but the code contains potential runtime issues (such as race condition).
|
||||
Alternative would be to just use "`conforming`" or "`non-conforming`".
|
||||
* **version**
|
||||
indicates features for a specific OpenMP version, such as "`omp_5.0`"
|
||||
|
||||
|
||||
## Process for text
|
||||
* Create or update the description text in a Section/Subsection file under each chapter directory, usually `<chap_directory>/<ename>.tex`
|
||||
* If adding a new Subsection, just include it in the appropriate subsection file (`<subsection>.tex`)
|
||||
* If adding a new Section, create an `<section>.tex` file and add an entry in the corresponding chapter file, such as `Chap_affinity.tex`
|
||||
* If adding a new Chapter, create a `Chap_<chap_name>.tex` file with introductory text, and add a new `<section>.tex` file with text and links to the code. Update `Makefile` and `openmp-examples.tex` to include the new chapter file.
|
||||
* Commit your changes into your fork of examples-internal
|
||||
* Summit your issue at [OpenMP Examples internal repo]( https://github.com/openmp/examples-internal/issues), and include a PDF when ready.
|
||||
* Examples subcommittee members can view [meeting schedule and notes](http://twiki.openmp.org/twiki/bin/view/OpenMPLang/ExamplesSchedules)
|
||||
* Shepherd your issue to acceptance (discussed at weekly Examples meeting and in issue comments)
|
||||
* When it is in a ready state, you should then submit a pull request.
|
||||
* It will be reviewed and voted on, and changes will be requested.
|
||||
* Once the last changes are made, it will be verified and merged into an appropriate branch (either the `master` branch or a working branch).
|
||||
|
||||
|
||||
|
||||
|
||||
# LaTeX macros for examples
|
||||
|
||||
* Source code with language h-rules
|
||||
```
|
||||
\cexample[<verno>]{<ename>}{<seq-no>} % for C/C++ examples
|
||||
\cppexample[<verno>]{<ename>}{<seq-no>} % for C++ examples
|
||||
\fexample[<verno>]{<ename>}{<seq-no>} % for fixed-form Fortran examples
|
||||
\ffreeexample[<verno>]{<ename>}{<seq-no>} % for free-form Fortran examples
|
||||
```
|
||||
|
||||
* Source code without language h-rules
|
||||
```
|
||||
\cnexample[<verno>]{<ename>}{<seq-no>}
|
||||
\cppnexample[<verno>]{<ename>}{<seq-no>}
|
||||
\fnexample[<verno>]{<ename>}{<seq-no>}
|
||||
\ffreenexample[<verno>]{<ename>}{<seq-no>}
|
||||
\srcnexample[<verno>]{<ename>}{<seq-no>}{<ext>}
|
||||
```
|
||||
|
||||
Optional `<verno>` can be supplied in a macro to include a specific OpenMP
|
||||
version in the example header. This option also suggests one additional
|
||||
tag (`@@version`) line is included in the corresponding source code.
|
||||
If this is not the case (i.e., no `@@version` tag line), one needs to
|
||||
prefix `<verno>` with an underscore '\_' symbol in the macro.
|
||||
|
||||
The exception is macro `\srcnexample`, for which the corresponding
|
||||
source code should not contain any `@@` metadata tags. The `ext` argument
|
||||
to this macro is the file extension (such as `h`, `hpp`, `inc`).
|
||||
|
||||
* Language h-rules
|
||||
```
|
||||
\cspecificstart, \cspecificend
|
||||
\cppspecificstart, \cppspecificend
|
||||
\ccppspecificstart, \ccppspecificend
|
||||
\fortranspecificstart, \fortranspecificend
|
||||
```
|
||||
|
||||
* Chapter and section macros
|
||||
```
|
||||
\cchapter{<Chapter Name>}{<chap_directory>}
|
||||
```
|
||||
|
||||
The `\cchapter` macro is used for starting a chapter with proper page spacing.
|
||||
`<Chapter Name>` is the name of a chapter and `<chap_directory>` is the name
|
||||
of the chapter directory. All section and subsection files for the chapter
|
||||
should be placed under `<chap_directory>`. The corresponding example sources
|
||||
should be placed under the `sources` directory inside `<chap_directory>`.
|
||||
|
||||
A previously-defined macro `\sinput{<section_file>}` to import a section
|
||||
file from `<chap_directory>` is no longer supported. Please use
|
||||
`\input{<chap_directory>/<section_file>}` explicitly.
|
||||
|
||||
* See `openmp.sty` for more information
|
||||
|
||||
### License
|
||||
|
||||
For copyright information, please see `omp_copyright.txt`.
|
21
Deprecated_Features_Chapt.tex
Normal file
21
Deprecated_Features_Chapt.tex
Normal file
@ -0,0 +1,21 @@
|
||||
\bchapter{Deprecated Features}
|
||||
\label{chap:deprecated_features}
|
||||
|
||||
Deprecation of features began in OpenMP 5.0.
|
||||
Examples that use a deprecated feature have been updated with an equivalent replacement feature.
|
||||
|
||||
Deprecations affecting examples are the following:
|
||||
\begin{description}[labelindent=5mm,font=\normalfont]
|
||||
\item[5.1] -- \ \scode{masked} construct replaces \scode{master} construct.
|
||||
\item[5.1] -- \ \scode{primary} affinity policy replaces \scode{master} affinity policy.
|
||||
\item[5.0] -- \ \scode{omp_sync_hint_*} constants replace \scode{omp_lock_hint_*} constants.
|
||||
\end{description}
|
||||
|
||||
These replacements appear in examples that illustrate, otherwise, earlier features.
|
||||
When using a compiler that is compliant with a version prior to
|
||||
the indicated version, the earlier form of
|
||||
an example is restored by a C-style conditional compilation using the \scode{_OPENMP} macro.
|
||||
|
||||
Since Fortran compilers do not preprocess codes by default, a Fortran preprocessor
|
||||
flag will be required to compile Fortran examples with the C-style conditional
|
||||
compilation statements.
|
@ -1,7 +1,6 @@
|
||||
|
||||
\chapter*{Examples}
|
||||
\bchapter{Examples}
|
||||
\label{chap:examples}
|
||||
\addcontentsline{toc}{chapter}{\protect\numberline{}Examples}
|
||||
|
||||
The following are examples of the OpenMP API directives, constructs, and routines.
|
||||
\ccppspecificstart
|
||||
A statement following a directive is compound only when necessary, and a
|
||||
@ -12,15 +11,14 @@ Each example is labeled as \plc{ename.seqno.ext}, where \plc{ename} is
|
||||
the example name, \plc{seqno} is the sequence number in a section, and
|
||||
\plc{ext} is the source file extension to indicate the code type and
|
||||
source form. \plc{ext} is one of the following:
|
||||
\begin{compactitem}
|
||||
\item \plc{c} -- C code,
|
||||
\item \plc{cpp} -- C++ code,
|
||||
\item \plc{f} -- Fortran code in fixed form, and
|
||||
\item \plc{f90} -- Fortran code in free form.
|
||||
\end{compactitem}
|
||||
\begin{description}[noitemsep,labelindent=5mm,widest=f90]
|
||||
\item[\plc{c}] -- \ C code,
|
||||
\item[\plc{cpp}] -- \ C++ code,
|
||||
\item[\plc{f}] -- \ Fortran code in fixed form, and
|
||||
\item[\plc{f90}] -- \ Fortran code in free form.
|
||||
\end{description}
|
||||
|
||||
Some of the example labels may include version information
|
||||
(\code{\small{}omp\_\plc{verno}}) to indicate features that are illustrated
|
||||
by an example for a specific OpenMP version, such as ``\plc{scan.1.c}
|
||||
\;(\code{\small{}omp\_5.0}).''
|
||||
|
||||
|
@ -1,13 +0,0 @@
|
||||
\pagebreak
|
||||
\section{The \code{master} Construct}
|
||||
\label{sec:master}
|
||||
|
||||
The following example demonstrates the master construct . In the example, the master
|
||||
keeps track of how many iterations have been executed and prints out a progress
|
||||
report. The other threads skip the master region without waiting.
|
||||
|
||||
\cexample{master}{1}
|
||||
|
||||
\fexample{master}{1}
|
||||
|
||||
|
@ -1,33 +0,0 @@
|
||||
\pagebreak
|
||||
\section{The \code{parallel master taskloop} Construct}
|
||||
\label{sec:parallel_master_taskloop}
|
||||
|
||||
In the OpenMP 5.0 Specification several combined constructs containing
|
||||
the \code{taskloop} construct were added.
|
||||
|
||||
Just as the \code{for} and \code{do} constructs have been combined
|
||||
with the \code{parallel} construct for convenience, so too, the combined
|
||||
\code{parallel}~\code{master}~\code{taskloop} and
|
||||
\code{parallel}~\code{master}~\code{taskloop}~\code{simd}
|
||||
constructs have been created for convenience.
|
||||
|
||||
In the following example the first \code{taskloop} construct is enclosed
|
||||
by the usual \code{parallel} and \code{master} constructs to form
|
||||
a team of threads, and a single task generator (master thread) for
|
||||
the \code{taskloop} construct.
|
||||
|
||||
The same OpenMP operations for the first taskloop are accomplished by the second
|
||||
taskloop with the \code{parallel}~\code{master}~\code{taskloop}
|
||||
combined construct.
|
||||
The third taskloop uses the combined \code{parallel}~\code{master}~\code{taskloop}~\code{simd}
|
||||
construct to accomplish the same behavior as closely nested \code{parallel master},
|
||||
and \code{taskloop simd} constructs.
|
||||
|
||||
As with any combined construct the clauses of the components may be used
|
||||
with appropriate restrictions. The combination of the \code{parallel}~\code{master} construct
|
||||
with the \code{taskloop} or \code{taskloop}~\code{simd} construct produces no additional
|
||||
restrictions.
|
||||
|
||||
\cexample[5.0]{parallel_master_taskloop}{1}
|
||||
|
||||
\ffreeexample[5.0]{parallel_master_taskloop}{1}
|
@ -1,54 +0,0 @@
|
||||
\pagebreak
|
||||
\section{Structure mapping}
|
||||
\label{sec:structure_mapping}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
In the example below, only structure elements \plc{S.a}, \plc{S.b} and \plc{S.p}
|
||||
of the \plc{S} structure appear in \code{map} clauses of a \code{target} construct.
|
||||
Only these components have corresponding variables and storage on the device.
|
||||
Hence, the large arrays, \plc{S.buffera} and \plc{S.bufferb}, and the \plc{S.x} component have no storage
|
||||
on the device and cannot be accessed.
|
||||
|
||||
Also, since the pointer member \plc{S.p} is used in an array section of a
|
||||
\code{map} clause, the array storage of the array section on the device,
|
||||
\plc{S.p[:N]}, is \emph{attached} to the pointer member \plc{S.p} on the device.
|
||||
Explicitly mapping the pointer member \plc{S.p} is optional in this case.
|
||||
|
||||
Note: The buffer arrays and the \plc{x} variable have been grouped together, so that
|
||||
the components that will reside on the device are all together (without gaps).
|
||||
This allows the runtime to optimize the transfer and the storage footprint on the device.
|
||||
|
||||
\cexample[5.0]{target_struct_map}{1}
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
The following example is a slight modification of the above example for
|
||||
a C++ class. In the member function \plc{SAXPY::driver}
|
||||
the array section \plc{p[:N]} is \emph{attached} to the pointer member \plc{p}
|
||||
on the device.
|
||||
|
||||
\cppexample[5.0]{target_struct_map}{2}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
%In this example a pointer, \plc{p}, is mapped in a
|
||||
%\code{target}~\code{data} construct (\code{map(p)}) and remains
|
||||
%persistent throughout the \code{target}~\code{data} region. The address stored
|
||||
%on the host is not assigned to the device pointer variable, and
|
||||
%the device value is not copied back to the host at the end of the
|
||||
%region (for a pointer, it is as though \code{map(alloc:p}) is effectively
|
||||
%used). The array section, \plc{p[:N]}, is mapped on both \code{target}
|
||||
%constructs, and the pointer \plc{p} on the device is attached at the
|
||||
%beginning and detached at the end of the regions to the newly created
|
||||
%array section on the device.
|
||||
%
|
||||
%Also, in the following example the global variable, \plc{a}, becomes
|
||||
%allocated when it is first used on the device in a \code{target} region,
|
||||
%and persists on the device for all target regions. The value on the
|
||||
%device and host may be different, as shown by the print statements.
|
||||
%The values may be made consistent with the \code{update} construct,
|
||||
%as shown in the \plc{declare\_target.3.c} and \plc{declare\_target.3.f90}
|
||||
%examples.
|
||||
%
|
||||
%\cexample{target_struct_map}{2}
|
@ -1,19 +1,17 @@
|
||||
\pagebreak
|
||||
\chapter*{Foreword}
|
||||
\bchapter{Foreword}
|
||||
\label{chap:foreword}
|
||||
\addcontentsline{toc}{chapter}{\protect\numberline{}Foreword}
|
||||
|
||||
The OpenMP Examples document has been updated with new features
|
||||
found in the OpenMP 5.0 Specification. The additional examples and updates
|
||||
found in the OpenMP 5.1 Specification. The additional examples and updates
|
||||
are referenced in the Document Revision History of the Appendix on page~\pageref{chap:history}.
|
||||
|
||||
Text describing an example with a 5.0 feature specifically states
|
||||
that the feature support begins in the OpenMP 5.0 Specification. Also,
|
||||
an \code{\small omp\_5.0} keyword has been added to metadata in the source code.
|
||||
These distinctions are presented to remind readers that a 5.0 compliant
|
||||
Text describing an example with a 5.1 feature specifically states
|
||||
that the feature support begins in the OpenMP 5.1 Specification. Also,
|
||||
an \code{\small omp\_5.1} keyword is included in the metadata of the source code.
|
||||
These distinctions are presented to remind readers that a 5.1 compliant
|
||||
OpenMP implementation is necessary to use these features in codes.
|
||||
|
||||
Examples for most of the 5.0 features are included in this document,
|
||||
Examples for most of the 5.1 features are included in this document,
|
||||
and incremental releases will become available as more feature examples
|
||||
and updates are submitted, and approved by the OpenMP Examples Subcommittee.
|
||||
|
||||
@ -21,3 +19,5 @@ and updates are submitted, and approved by the OpenMP Examples Subcommittee.
|
||||
Examples Subcommitee Co-chairs: \smallskip\linebreak
|
||||
Henry Jin (\textsc{NASA} Ames Research Center) \linebreak
|
||||
Kent Milfeld (\textsc{TACC}, Texas Advanced Research Center)
|
||||
|
||||
|
||||
|
77
History.tex
77
History.tex
@ -1,6 +1,73 @@
|
||||
\chapter{Document Revision History}
|
||||
\cchapter{Document Revision History}{history}
|
||||
\label{chap:history}
|
||||
|
||||
%=====================================
|
||||
\section{Changes from 5.0.1 to 5.1}
|
||||
\label{sec:history_501_to_51}
|
||||
|
||||
\begin{itemize}
|
||||
\item General changes:
|
||||
\begin{itemize}
|
||||
\item Replaced \code{master} construct example with equivalent \code{masked} construct example (\specref{sec:masked})
|
||||
\item Primary thread is now used to describe thread number 0 in the current team
|
||||
\item \code{primary} thread affinity policy is now used to specify that every
|
||||
thread in the team is assigned to the same place as the primary thread (\specref{subsec:affinity_primary})
|
||||
\item The \scode{omp_lock_hint_*} constants have been renamed \scode{omp_sync_hint_*} (\specref{sec:critical}, \specref{sec:locks})
|
||||
\end{itemize}
|
||||
|
||||
\item Added the following new chapters:
|
||||
\begin{itemize}
|
||||
\item Deprecated Features (on page~\pageref{chap:deprecated_features})
|
||||
\item Directive Syntax (\specref{chap:directive_syntax})
|
||||
\item Loop Transformations (\specref{chap:loop_transformations})
|
||||
\item OMPT Interface (\specref{chap:ompt_interface})
|
||||
\end{itemize}
|
||||
|
||||
\item Added the following examples for the 5.1 features:
|
||||
\begin{itemize}
|
||||
\item OpenMP directives in C++ \plc{attribute} specifiers
|
||||
(\specref{sec:attributes})
|
||||
\item Directive syntax adjustment to allow Fortran \code{BLOCK} ...
|
||||
\code{END}~\code{BLOCK} as a structured block
|
||||
(\specref{sec:fortran_free_format_comments})
|
||||
\item \code{omp\_target\_is\_accessible} API routine
|
||||
(\specref{sec:pointer_mapping})
|
||||
\item Fortran allocatable array mapping in \code{target} regions (\specref{sec:fort_allocatable_array_mapping})
|
||||
\item \code{begin}~\code{declare}~\code{target} (with
|
||||
\code{end}~\code{declare}~\code{target}) directive
|
||||
(\specref{subsec:declare_target_class})
|
||||
\item \code{tile} construct (\specref{sec:tile})
|
||||
\item \code{unroll} construct (\specref{sec:unroll})
|
||||
\item Reduction with the \code{scope} construct
|
||||
(\specref{subsec:reduction_scope})
|
||||
\item \code{metadirective} directive with dynamic \code{condition} selector
|
||||
(\specref{sec:metadirective})
|
||||
\item \code{interop} construct (\specref{sec:interop})
|
||||
\item Environment display with the \scode{omp_display_env} routine
|
||||
(\specref{subsec:display_env})
|
||||
\item \code{error} directive (\specref{subsec:error})
|
||||
\end{itemize}
|
||||
|
||||
\item Included additional examples for the 5.0 features:
|
||||
\begin{itemize}
|
||||
\item \code{collapse} clause for non-rectangular loop nest
|
||||
(\specref{sec:collapse})
|
||||
\item \code{detach} clause for tasks (\specref{sec:task_detachment})
|
||||
\item Pointer attachment for a structure member (\specref{sec:structure_mapping})
|
||||
\item Host and device pointer association with the \scode{omp_target_associate_ptr} routine (\specref{sec:target_associate_ptr})
|
||||
|
||||
\item Sample code on activating the tool interface
|
||||
(\specref{sec:ompt_start})
|
||||
\end{itemize}
|
||||
|
||||
\item Added other examples:
|
||||
\begin{itemize}
|
||||
\item The \scode{omp_get_wtime} routine (\specref{subsec:get_wtime})
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
|
||||
%=====================================
|
||||
\section{Changes from 5.0.0 to 5.0.1}
|
||||
\label{sec:history_50_to_501}
|
||||
|
||||
@ -18,7 +85,7 @@ OpenMP 3.0 and later.
|
||||
\item \code{conditional} modifier for the \code{lastprivate} clause (\specref{sec:lastprivate})
|
||||
\item \code{task} modifier for the \code{reduction} clause (\specref{subsec:task_reduction})
|
||||
\item Reduction on combined target constructs (\specref{subsec:target_reduction})
|
||||
\item Task reduction with target constructs
|
||||
\item Task reduction with \code{target} constructs
|
||||
(\specref{subsec:target_task_reduction})
|
||||
\item \code{scan} directive for returning the \emph{prefix sum} of a reduction (\specref{sec:scan})
|
||||
|
||||
@ -59,12 +126,12 @@ in \specref{sec:mem_model}.
|
||||
\item \code{mutexinoutset} task dependences (\specref{subsec:task_dep_mutexinoutset})
|
||||
\item Multidependence Iterators (in \code{depend} clauses) (\specref{subsec:depend_iterator})
|
||||
\item Combined constructs: \code{parallel}~\code{master}~\code{taskloop} and \code{parallel}~\code{master}~\code{taskloop}~\code{simd}
|
||||
(\specref{sec:parallel_master_taskloop})
|
||||
(\specref{sec:parallel_masked_taskloop})
|
||||
\item Reverse Offload through \plc{ancestor} modifier of \code{device} clause. (\specref{subsec:target_reverse_offload})
|
||||
\item Pointer Mapping - behavior of mapped pointers (\specref{sec:pointer_mapping}) %Example_target_ptr_map*
|
||||
\item Structure Mapping - behavior of mapped structures (\specref{sec:structure_mapping}) %Examples_target_structure_mapping.tex target_struct_map*
|
||||
\item Array Shaping with the \plc{shape-operator} (\specref{sec:array-shaping})
|
||||
\item The \code{declare}~\code{mapper} construct (\specref{sec:declare_mapper})
|
||||
\item The \code{declare}~\code{mapper} directive (\specref{sec:declare_mapper})
|
||||
\item Acquire and Release Semantics Synchronization: Memory ordering
|
||||
clauses \code{acquire}, \code{release}, and \code{acq\_rel} were added
|
||||
to flush and atomic constructs
|
||||
@ -150,7 +217,7 @@ Added the following new examples:
|
||||
\item array sections in device constructs (\specref{sec:array_sections})
|
||||
\item \code{target}~\code{data} construct (\specref{sec:target_data})
|
||||
\item \code{target}~\code{update} construct (\specref{sec:target_update})
|
||||
\item \code{declare}~\code{target} construct (\specref{sec:declare_target})
|
||||
\item \code{declare}~\code{target} directive (\specref{sec:declare_target})
|
||||
\item \code{teams} constructs (\specref{sec:teams})
|
||||
\item asynchronous execution of a \code{target} region using tasks (\specref{subsec:async_target_with_tasks})
|
||||
\item device runtime routines (\specref{sec:device})
|
||||
|
@ -1,5 +1,5 @@
|
||||
% This is the introduction for the OpenMP Examples document.
|
||||
% This is an included file. See the master file (openmp-examples.tex) for more information.
|
||||
% This is an included file. See the main file (openmp-examples.tex) for more information.
|
||||
%
|
||||
% When editing this file:
|
||||
%
|
||||
@ -32,9 +32,9 @@
|
||||
% This is a \plc{var-name}.
|
||||
%
|
||||
|
||||
\chapter*{Introduction}
|
||||
\bchapter{Introduction}
|
||||
\label{chap:introduction}
|
||||
\addcontentsline{toc}{chapter}{\protect\numberline{}Introduction}
|
||||
|
||||
This collection of programming examples supplements the OpenMP API for Shared
|
||||
Memory Parallelization specifications, and is not part of the formal specifications. It
|
||||
assumes familiarity with the OpenMP specifications, and shares the typographical
|
||||
@ -59,7 +59,7 @@ directory at
|
||||
\href{https://github.com/OpenMP/Examples}{https://github.com/OpenMP/Examples}.
|
||||
The codes for this OpenMP \VER{} Examples document have the tag \plc{v\VER}.
|
||||
|
||||
%\href{https://github.com/OpenMP/Examples/tree/master/sources}{https://github.com/OpenMP/Examples/sources}.
|
||||
%\href{https://github.com/OpenMP/Examples/tree/main/sources}{https://github.com/OpenMP/Examples/sources}.
|
||||
|
||||
Complete information about the OpenMP API and a list of the compilers that support
|
||||
the OpenMP API can be found at the OpenMP.org web site
|
||||
|
27
Makefile
27
Makefile
@ -1,7 +1,7 @@
|
||||
# Makefile for the OpenMP Examples document in LaTex format.
|
||||
# For more information, see the master document, openmp-examples.tex.
|
||||
# For more information, see the main document, openmp-examples.tex.
|
||||
|
||||
version=5.0.1
|
||||
version=5.1
|
||||
default: openmp-examples.pdf
|
||||
diff: openmp-diff-abridged.pdf
|
||||
|
||||
@ -9,13 +9,16 @@ diff: openmp-diff-abridged.pdf
|
||||
CHAPTERS=Title_Page.tex \
|
||||
Foreword_Chapt.tex \
|
||||
Introduction_Chapt.tex \
|
||||
Examples_*.tex \
|
||||
History.tex
|
||||
Examples_Chapt.tex \
|
||||
Deprecated_Features_Chapt.tex \
|
||||
Chap_*.tex \
|
||||
History.tex \
|
||||
*/*.tex
|
||||
|
||||
SOURCES=sources/*.c \
|
||||
sources/*.cpp \
|
||||
sources/*.f90 \
|
||||
sources/*.f
|
||||
SOURCES=*/sources/*.c \
|
||||
*/sources/*.cpp \
|
||||
*/sources/*.f90 \
|
||||
*/sources/*.f
|
||||
|
||||
INTERMEDIATE_FILES=openmp-examples.pdf \
|
||||
openmp-examples.toc \
|
||||
@ -49,11 +52,11 @@ endif
|
||||
ifdef DIFF_FROM
|
||||
VC_DIFF_FROM := -r ${DIFF_FROM}
|
||||
else
|
||||
VC_DIFF_FROM := -r master
|
||||
VC_DIFF_FROM := -r work_5.1
|
||||
endif
|
||||
|
||||
DIFF_TO:=HEAD
|
||||
DIFF_FROM:=master
|
||||
DIFF_FROM:=work_5.1
|
||||
DIFF_TYPE:=UNDERLINE
|
||||
|
||||
COMMON_DIFF_OPTS:=--math-markup=whole \
|
||||
@ -66,6 +69,10 @@ VC_DIFF_MINIMAL_OPTS:= --only-changes --force
|
||||
|
||||
%.tmpdir: $(wildcard *.sty) $(wildcard *.png) $(wildcard *.aux) openmp-examples.pdf
|
||||
mkdir -p $@/sources
|
||||
for i in affinity devices loop_transformations parallel_execution SIMD tasking \
|
||||
data_environment memory_model program_control synchronization \
|
||||
directives ompt_interface; do \
|
||||
mkdir -p $@/$$i; ln -sf "$$PWD"/$$i/sources $@/$$i/sources; done
|
||||
mkdir -p $@/figs
|
||||
cp -f $^ "$@/"
|
||||
cp -f sources/* "$@/sources"
|
||||
|
76
README
76
README
@ -1,76 +0,0 @@
|
||||
This is the OpenMP Examples document in LaTeX format.
|
||||
Please see the master file, openmp-examples.tex, for more information.
|
||||
|
||||
For a brief revision history, please see Changes.log.
|
||||
|
||||
For copyright information, please see omp_copyright.txt.
|
||||
|
||||
|
||||
1) Process for adding an example
|
||||
|
||||
- Prepare source code and text description
|
||||
- Give a high level description in a trac ticket
|
||||
- Determine a name (ename) for the example
|
||||
- Propose a new name if creating a new chapter
|
||||
- Use the existing name if adding to an existing chapter
|
||||
- Number the example within the chapter (seq-no)
|
||||
- Create files for the source code with proper tags in
|
||||
sources/Example_<ename>.<seq-no>c.c
|
||||
sources/Example_<ename>.<seq-no>f.f
|
||||
- Create or update the description text in the chapter file
|
||||
Examples_<ename>,tex
|
||||
- If needed, add the new chapter file name in
|
||||
Makefile
|
||||
openmp-examples.tex
|
||||
- Commit the changes in git and push to the GitHub repo
|
||||
- Discuss and vote in committee
|
||||
|
||||
2) Tags (meta data) for example sources
|
||||
|
||||
@@name: <ename>.<seq-no>[c|cpp|f|f90]
|
||||
@@type: C|C++|F-fixed|F-free
|
||||
@@compilable: yes|no|maybe
|
||||
@@linkable: yes|no|maybe
|
||||
@@expect: success|failure|nothing|rt-error
|
||||
@@version: omp_<verno>
|
||||
|
||||
"name" is the name of an example
|
||||
"type" is the source code type, which can be translated into or from
|
||||
proper file extension (c,cpp,f,f90)
|
||||
"compilable" indicates whether the source code is compilable
|
||||
"linkable" indicates whether the source code is linkable
|
||||
"expect" indicates some expected result for testing purpose
|
||||
"success|failure|nothing" applies to the result of code compilation
|
||||
"rt-error" is for a case where compilation may be successful,
|
||||
but the code contains potential runtime issues (such as race condition).
|
||||
Alternative would be to just use "conforming" or "non-conforming".
|
||||
"version" indicates features for a specific OpenMP version, such as "omp_5.0"
|
||||
|
||||
3) LaTeX macros for examples
|
||||
|
||||
- Source code with language h-rules
|
||||
\cexample[<verno>]{<ename>}{<seq-no>} % for C/C++ examples
|
||||
\cppexample[<verno>]{<ename>}{<seq-no>} % for C++ examples
|
||||
\fexample[<verno>]{<ename>}{<seq-no>} % for fixed-form Fortran examples
|
||||
\ffreeexample[<verno>]{<ename>}{<seq-no>} % for free-form Fortran examples
|
||||
|
||||
- Source code without language h-rules
|
||||
\cnexample[<verno>]{<ename>}{<seq-no>}
|
||||
\cppnexample[<verno>]{<ename>}{<seq-no>}
|
||||
\fnexample[<verno>]{<ename>}{<seq-no>}
|
||||
\ffreenexample[<verno>]{<ename>}{<seq-no>}
|
||||
|
||||
Optional <verno> can be supplied in a macro to include a specific OpenMP
|
||||
version in the example header. This option also suggests one additional
|
||||
tag (@@version) line is included in the corresponding source code.
|
||||
If this is not the case (i.e., no @@version tag line), one needs to
|
||||
prefix <verno> with an underscore '_' symbol in the macro.
|
||||
|
||||
- Language h-rules
|
||||
\cspecificstart, \cspecificend
|
||||
\cppspecificstart, \cppspecificend
|
||||
\ccppspecificstart, \ccppspecificend
|
||||
\fortranspecificstart, \fortranspecificend
|
||||
|
||||
- See openmp.sty for more information
|
||||
|
10
README.md
Normal file
10
README.md
Normal file
@ -0,0 +1,10 @@
|
||||
# OpenMP Examples Document
|
||||
|
||||
This is the OpenMP Examples document in LaTeX format.
|
||||
|
||||
Please see [Contributions.md](Contributions.md) on how to make contributions to adding new examples.
|
||||
|
||||
For a brief revision history, please see [Changes.log](Changes.log).
|
||||
|
||||
For copyright information, please see [omp_copyright.txt](omp_copyright.txt).
|
||||
|
@ -1,5 +1,5 @@
|
||||
%\pagebreak
|
||||
\section{\code{simd} and \code{declare} \code{simd} Constructs}
|
||||
\section{\code{simd} and \code{declare} \code{simd} Directives}
|
||||
\label{sec:SIMD}
|
||||
|
||||
The following example illustrates the basic use of the \code{simd} construct
|
||||
@ -8,29 +8,27 @@ to assure the compiler that the loop can be vectorized.
|
||||
\cexample[4.0]{SIMD}{1}
|
||||
|
||||
\ffreeexample[4.0]{SIMD}{1}
|
||||
|
||||
\clearpage
|
||||
|
||||
|
||||
When a function can be inlined within a loop the compiler has an opportunity to
|
||||
vectorize the loop. By guaranteeing SIMD behavior of a function's operations,
|
||||
characterizing the arguments of the function and privatizing temporary
|
||||
variables of the loop, the compiler can often create faster, vector code for
|
||||
the loop. In the examples below the \code{declare} \code{simd} construct is
|
||||
the loop. In the examples below the \code{declare} \code{simd} directive is
|
||||
used on the \plc{add1} and \plc{add2} functions to enable creation of their
|
||||
corresponding SIMD function versions for execution within the associated SIMD
|
||||
loop. The functions characterize two different approaches of accessing data
|
||||
within the function: by a single variable and as an element in a data array,
|
||||
respectively. The \plc{add3} C function uses dereferencing.
|
||||
|
||||
The \code{declare} \code{simd} constructs also illustrate the use of
|
||||
The \code{declare} \code{simd} directives also illustrate the use of
|
||||
\code{uniform} and \code{linear} clauses. The \code{uniform(fact)} clause
|
||||
indicates that the variable \plc{fact} is invariant across the SIMD lanes. In
|
||||
the \plc{add2} function \plc{a} and \plc{b} are included in the \code{uniform}
|
||||
list because the C pointer and the Fortran array references are constant. The
|
||||
\plc{i} index used in the \plc{add2} function is included in a \code{linear}
|
||||
clause with a constant-linear-step of 1, to guarantee a unity increment of the
|
||||
associated loop. In the \code{declare} \code{simd} construct for the \plc{add3}
|
||||
associated loop. In the \code{declare} \code{simd} directive for the \plc{add3}
|
||||
C function the \code{linear(a,b:1)} clause instructs the compiler to generate
|
||||
unit-stride loads across the SIMD lanes; otherwise, costly \emph{gather}
|
||||
instructions would be generated for the unknown sequence of access of the
|
||||
@ -44,7 +42,7 @@ variable.
|
||||
|
||||
\ffreeexample[4.0]{SIMD}{2}
|
||||
|
||||
\pagebreak
|
||||
%\pagebreak
|
||||
A thread that encounters a SIMD construct executes a vectorized code of the
|
||||
iterations. Similar to the concerns of a worksharing loop a loop vectorized
|
||||
with a SIMD construct must assure that temporary and reduction variables are
|
||||
@ -57,7 +55,7 @@ construct.
|
||||
\ffreeexample[4.0]{SIMD}{3}
|
||||
|
||||
|
||||
\pagebreak
|
||||
%\pagebreak
|
||||
A \code{safelen(N)} clause in a \code{simd} construct assures the compiler that
|
||||
there are no loop-carried dependencies for vectors of size \plc{N} or below. If
|
||||
the \code{safelen} clause is not specified, then the default safelen value is
|
||||
@ -72,7 +70,7 @@ than 16, the behavior is undefined.
|
||||
|
||||
\ffreeexample[4.0]{SIMD}{4}
|
||||
|
||||
\pagebreak
|
||||
%\pagebreak
|
||||
The following SIMD construct instructs the compiler to collapse the \plc{i} and
|
||||
\plc{j} loops into a single SIMD loop in which SIMD chunks are executed by
|
||||
threads of the team. Within the workshared loop chunks of a thread, the SIMD
|
||||
@ -88,7 +86,7 @@ chunks are executed in the lanes of the vector units.
|
||||
\label{sec:SIMD_branch}
|
||||
|
||||
The following examples illustrate the use of the \code{declare} \code{simd}
|
||||
construct with the \code{inbranch} and \code{notinbranch} clauses. The
|
||||
directive with the \code{inbranch} and \code{notinbranch} clauses. The
|
||||
\code{notinbranch} clause informs the compiler that the function \plc{foo} is
|
||||
never called conditionally in the SIMD loop of the function \plc{myaddint}. On
|
||||
the other hand, the \code{inbranch} clause for the function goo indicates that
|
@ -27,7 +27,7 @@ Source codes for OpenMP \PVER{} Examples can be downloaded from
|
||||
\href{https://github.com/OpenMP/Examples/tree/v\VER}{github}.\\
|
||||
|
||||
\begin{adjustwidth}{0pt}{1em}\setlength{\parskip}{0.25\baselineskip}%
|
||||
Copyright © 1997-2020 OpenMP Architecture Review Board.\\
|
||||
Copyright \copyright{} 1997-2021 OpenMP Architecture Review Board.\\
|
||||
Permission to copy without fee all or part of this material is granted,
|
||||
provided the OpenMP Architecture Review Board copyright notice and
|
||||
the title of this document appear. Notice is given that copying is by
|
||||
@ -37,14 +37,11 @@ permission of OpenMP Architecture Review Board.\end{adjustwidth}
|
||||
|
||||
% Blank page
|
||||
|
||||
\clearpage
|
||||
\thispagestyle{empty}
|
||||
\phantom{a}
|
||||
\emph{This page intentionally left blank}
|
||||
\cleardoublepage
|
||||
|
||||
%For final version, uncomment the line above, comment out the lines below
|
||||
%This working version enacted the following tickets: 287, 519, 550, 593,
|
||||
%674, 688, 689,
|
||||
%and a few other editorial changes.
|
||||
\vfill
|
||||
%\vfill
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
\pagebreak
|
||||
\section{The \code{proc\_bind} Clause}
|
||||
\section{\code{proc\_bind} Clause}
|
||||
\label{sec:affinity}
|
||||
|
||||
The following examples demonstrate how to use the \code{proc\_bind} clause to
|
||||
@ -38,8 +38,8 @@ above. Note that the threads are bound to the first place of each subpartition.
|
||||
|
||||
\fexample[4.0]{affinity}{1}
|
||||
|
||||
It is unspecified on which place the master thread is initially started. If the
|
||||
master thread is initially started on p0, the following placement of threads will
|
||||
It is unspecified on which place the primary thread is initially started. If the
|
||||
primary thread is initially started on p0, the following placement of threads will
|
||||
be applied in the parallel region:
|
||||
|
||||
\begin{compactitem}
|
||||
@ -53,7 +53,7 @@ be applied in the parallel region:
|
||||
\end{compactitem}
|
||||
|
||||
|
||||
If the master thread would initially be started on p2, the placement of threads
|
||||
If the primary thread would initially be started on p2, the placement of threads
|
||||
and distribution of the place partition would be as follows:
|
||||
|
||||
\begin{compactitem}
|
||||
@ -71,7 +71,7 @@ the number of threads is greater than the number of places in the parent's place
|
||||
partition.
|
||||
|
||||
Let \plc{T} be the number of threads in the team, and \plc{P} be the number of places in the
|
||||
parent's place partition. The first \plc{T/P} threads of the team (including the master
|
||||
parent's place partition. The first \plc{T/P} threads of the team (including the primary
|
||||
thread) execute on the parent's place. The next \plc{T/P} threads execute on the next
|
||||
place in the place partition, and so on, with wrap around.
|
||||
|
||||
@ -79,8 +79,8 @@ place in the place partition, and so on, with wrap around.
|
||||
|
||||
\ffreeexample[4.0]{affinity}{2}
|
||||
|
||||
It is unspecified on which place the master thread is initially started. If the
|
||||
master thread is initially started on p0, the following placement of threads will
|
||||
It is unspecified on which place the primary thread is initially started. If the
|
||||
primary thread is initially started on p0, the following placement of threads will
|
||||
be applied in the parallel region:
|
||||
|
||||
\begin{compactitem}
|
||||
@ -101,7 +101,7 @@ be applied in the parallel region:
|
||||
\item threads 14,15 execute on p7 with the place partition p7
|
||||
\end{compactitem}
|
||||
|
||||
If the master thread would initially be started on p2, the placement of threads
|
||||
If the primary thread would initially be started on p2, the placement of threads
|
||||
and distribution of the place partition would be as follows:
|
||||
|
||||
\begin{compactitem}
|
||||
@ -134,8 +134,8 @@ The place partition is not changed by the \code{close} policy.
|
||||
|
||||
\fexample[4.0]{affinity}{3}
|
||||
|
||||
It is unspecified on which place the master thread is initially started. If the
|
||||
master thread is initially started on p0, the following placement of threads will
|
||||
It is unspecified on which place the primary thread is initially started. If the
|
||||
primary thread is initially started on p0, the following placement of threads will
|
||||
be applied in the \code{parallel} region:
|
||||
|
||||
\begin{compactitem}
|
||||
@ -148,7 +148,7 @@ be applied in the \code{parallel} region:
|
||||
\item thread 3 executes on p3 with the place partition p0-p7
|
||||
\end{compactitem}
|
||||
|
||||
If the master thread would initially be started on p2, the placement of threads
|
||||
If the primary thread would initially be started on p2, the placement of threads
|
||||
and distribution of the place partition would be as follows:
|
||||
|
||||
\begin{compactitem}
|
||||
@ -166,7 +166,7 @@ the number of threads is greater than the number of places in the parent's place
|
||||
partition.
|
||||
|
||||
Let \plc{T} be the number of threads in the team, and \plc{P} be the number of places in the
|
||||
parent's place partition. The first \plc{T/P} threads of the team (including the master
|
||||
parent's place partition. The first \plc{T/P} threads of the team (including the primary
|
||||
thread) execute on the parent's place. The next \plc{T/P} threads execute on the next
|
||||
place in the place partition, and so on, with wrap around. The place partition
|
||||
is not changed by the \code{close} policy.
|
||||
@ -175,8 +175,8 @@ is not changed by the \code{close} policy.
|
||||
|
||||
\ffreeexample[4.0]{affinity}{4}
|
||||
|
||||
It is unspecified on which place the master thread is initially started. If the
|
||||
master thread is initially running on p0, the following placement of threads will
|
||||
It is unspecified on which place the primary thread is initially started. If the
|
||||
primary thread is initially running on p0, the following placement of threads will
|
||||
be applied in the parallel region:
|
||||
|
||||
\begin{compactitem}
|
||||
@ -197,7 +197,7 @@ be applied in the parallel region:
|
||||
\item threads 14,15 execute on p7 with the place partition p0-p7
|
||||
\end{compactitem}
|
||||
|
||||
If the master thread would initially be started on p2, the placement of threads
|
||||
If the primary thread would initially be started on p2, the placement of threads
|
||||
and distribution of the place partition would be as follows:
|
||||
|
||||
\begin{compactitem}
|
||||
@ -218,26 +218,27 @@ and distribution of the place partition would be as follows:
|
||||
\item threads 14,15 execute on p1 with the place partition p0-p7
|
||||
\end{compactitem}
|
||||
|
||||
\subsection{Master Affinity Policy}
|
||||
\label{subsec:affinity_master}
|
||||
\subsection{Primary Affinity Policy}
|
||||
\label{subsec:affinity_primary}
|
||||
|
||||
The following example shows the result of the \code{master} affinity policy on
|
||||
The following example shows the result of the \code{primary} affinity policy on
|
||||
the partition list for the machine architecture depicted above. The place partition
|
||||
is not changed by the master policy.
|
||||
is not changed by the primary policy.
|
||||
|
||||
\cexample[4.0]{affinity}{5}
|
||||
|
||||
\fexample[4.0]{affinity}{5}
|
||||
\fexample[4.0]{affinity}{5}[1]
|
||||
\clearpage
|
||||
|
||||
It is unspecified on which place the master thread is initially started. If the
|
||||
master thread is initially running on p0, the following placement of threads will
|
||||
It is unspecified on which place the primary thread is initially started. If the
|
||||
primary thread is initially running on p0, the following placement of threads will
|
||||
be applied in the parallel region:
|
||||
|
||||
\begin{compactitem}
|
||||
\item threads 0-3 execute on p0 with the place partition p0-p7
|
||||
\end{compactitem}
|
||||
|
||||
If the master thread would initially be started on p2, the placement of threads
|
||||
If the primary thread would initially be started on p2, the placement of threads
|
||||
and distribution of the place partition would be as follows:
|
||||
|
||||
\begin{compactitem}
|
@ -12,9 +12,9 @@ at selected locations within code.
|
||||
For the first example the environment variable \code{OMP\_DISPLAY\_AFFINITY} has been
|
||||
set to \code{TRUE}, and execution occurs on an 8-core system with \code{OMP\_NUM\_THREADS} set to 8.
|
||||
|
||||
The affinity for the master thread is reported through a call to the API
|
||||
The affinity for the primary thread is reported through a call to the API
|
||||
\code{omp\_display\_affinity()} routine. For default affinity settings
|
||||
the report shows that the master thread can execute on any of the cores.
|
||||
the report shows that the primary thread can execute on any of the cores.
|
||||
In the following parallel region the affinity for each of the team threads is reported
|
||||
automatically since the \code{OMP\_DISPLAY\_AFFINITY} environment variable has been set
|
||||
to \code{TRUE}.
|
@ -2,7 +2,7 @@
|
||||
* @@name: affinity.1c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
* @@version: omp_4.0
|
||||
*/
|
@ -1,7 +1,7 @@
|
||||
! @@name: affinity.1f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: yes
|
||||
! @@linkable: yes
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
! @@version: omp_4.0
|
||||
PROGRAM EXAMPLE
|
@ -2,7 +2,7 @@
|
||||
* @@name: affinity.3c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
* @@version: omp_4.0
|
||||
*/
|
@ -1,7 +1,7 @@
|
||||
! @@name: affinity.3f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: yes
|
||||
! @@linkable: yes
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
! @@version: omp_4.0
|
||||
PROGRAM EXAMPLE
|
21
affinity/sources/affinity.5.c
Normal file
21
affinity/sources/affinity.5.c
Normal file
@ -0,0 +1,21 @@
|
||||
/*
|
||||
* @@name: affinity.5c
|
||||
* @@type: C
|
||||
* @@compilable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
* @@version: omp_5.1
|
||||
*/
|
||||
#if _OPENMP < 202011
|
||||
#define primary master
|
||||
#endif
|
||||
|
||||
void work();
|
||||
int main()
|
||||
{
|
||||
#pragma omp parallel proc_bind(primary) num_threads(4)
|
||||
{
|
||||
work();
|
||||
}
|
||||
return 0;
|
||||
}
|
16
affinity/sources/affinity.5.f
Normal file
16
affinity/sources/affinity.5.f
Normal file
@ -0,0 +1,16 @@
|
||||
! @@name: affinity.5f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: yes
|
||||
! @@requires: preprocessing
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
! @@version: omp_5.1
|
||||
#if _OPENMP < 202011
|
||||
#define primary master
|
||||
#endif
|
||||
|
||||
PROGRAM EXAMPLE
|
||||
!$OMP PARALLEL PROC_BIND(primary) NUM_THREADS(4)
|
||||
CALL WORK()
|
||||
!$OMP END PARALLEL
|
||||
END PROGRAM EXAMPLE
|
@ -6,7 +6,6 @@
|
||||
* @@expect: success
|
||||
* @@version: omp_5.0
|
||||
*/
|
||||
|
||||
double * alloc_init_B(double *A, int N);
|
||||
void compute_on_B(double *B, int N);
|
||||
|
@ -4,7 +4,6 @@
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
! @@version: omp_5.0
|
||||
|
||||
subroutine task_affinity(A, N)
|
||||
|
||||
external alloc_init_B
|
@ -11,7 +11,7 @@
|
||||
|
||||
int main(void){ //MAX threads = 8, single socket system
|
||||
|
||||
omp_display_affinity(NULL); //API call-- Displays Affinity of Master Thread
|
||||
omp_display_affinity(NULL); //API call-- Displays Affinity of Primary Thread
|
||||
|
||||
// API CALL OUTPUT (default format):
|
||||
//team_num= 0, nesting_level= 0, thread_num= 0, thread_affinity= 0,1,2,3,4,5,6,7
|
@ -4,17 +4,16 @@
|
||||
! @@linkable: yes
|
||||
! @@expect: success
|
||||
! @@version: omp_5.0
|
||||
|
||||
program affinity_display ! MAX threads = 8, single socket system
|
||||
|
||||
use omp_lib
|
||||
implicit none
|
||||
character(len=0) :: null
|
||||
|
||||
call omp_display_affinity(null) !API call- Displays Affinity of Master Thread
|
||||
call omp_display_affinity(null) !API call- Displays Affinity of Primary Thrd
|
||||
|
||||
! API CALL OUTPUT (default format):
|
||||
! team_num= 0, nesting_level= 0, thread_num= 0, thread_affinity= 0,1,2,3,4,5,6,7
|
||||
!team_num= 0, nesting_level= 0, thread_num= 0, thread_affinity= 0,1,2,3,4,5,6,7
|
||||
|
||||
|
||||
! OMP_DISPLAY_AFFINITY=TRUE, OMP_NUM_THREADS=8
|
@ -4,7 +4,6 @@
|
||||
! @@linkable: yes
|
||||
! @@expect: success
|
||||
! @@version: omp_5.0
|
||||
|
||||
program affinity_display
|
||||
|
||||
use omp_lib
|
@ -4,7 +4,6 @@
|
||||
! @@linkable: yes
|
||||
! @@expect: success
|
||||
! @@version: omp_5.0
|
||||
|
||||
program affinity_display
|
||||
use omp_lib
|
||||
implicit none
|
@ -4,7 +4,6 @@
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
! @@version: omp_4.5
|
||||
|
||||
subroutine socket_init(socket_num)
|
||||
use omp_lib
|
||||
integer :: socket_num, n_procs
|
@ -27,6 +27,7 @@ The association between \plc{u} and the original \plc{v} is retained (see the Da
|
||||
Attribute Rules section in the OpenMP 4.0 API Specifications). Inside the \code{parallel}
|
||||
region, \plc{v} has the value of -1 and \plc{u} has the value of the original \plc{v}.
|
||||
|
||||
\pagebreak
|
||||
\ffreenexample[4.0]{associate}{3}
|
||||
\fortranspecificend
|
||||
|
@ -1,9 +1,9 @@
|
||||
\pagebreak
|
||||
\section{The \code{copyin} Clause}
|
||||
\section{\code{copyin} Clause}
|
||||
\label{sec:copyin}
|
||||
|
||||
The \code{copyin} clause is used to initialize threadprivate data upon entry
|
||||
to a \code{parallel} region. The value of the threadprivate variable in the master
|
||||
to a \code{parallel} region. The value of the threadprivate variable in the primary
|
||||
thread is copied to the threadprivate variable of each other team member.
|
||||
|
||||
\cexample{copyin}{1}
|
@ -1,5 +1,5 @@
|
||||
\pagebreak
|
||||
\section{The \code{copyprivate} Clause}
|
||||
\section{\code{copyprivate} Clause}
|
||||
\label{sec:copyprivate}
|
||||
|
||||
The \code{copyprivate} clause can be used to broadcast values acquired by a single
|
||||
@ -20,14 +20,14 @@ any of the threads have left the barrier at the end of the construct.
|
||||
|
||||
\fexample{copyprivate}{1}
|
||||
|
||||
In this example, assume that the input must be performed by the master thread.
|
||||
Since the \code{master} construct does not support the \code{copyprivate} clause,
|
||||
In this example, assume that the input must be performed by the primary thread.
|
||||
Since the \code{masked} construct does not support the \code{copyprivate} clause,
|
||||
it cannot broadcast the input value that is read. However, \code{copyprivate}
|
||||
is used to broadcast an address where the input value is stored.
|
||||
is used to broadcast an address where the input value is stored.
|
||||
|
||||
\cexample{copyprivate}{2}
|
||||
\cexample[5.1]{copyprivate}{2}
|
||||
|
||||
\fexample{copyprivate}{2}
|
||||
\fexample[5.1]{copyprivate}{2}[1]
|
||||
|
||||
Suppose that the number of lock variables required within a \code{parallel} region
|
||||
cannot easily be determined prior to entering it. The \code{copyprivate} clause
|
@ -1,5 +1,5 @@
|
||||
\pagebreak
|
||||
\section{The \code{default(none)} Clause}
|
||||
\section{\code{default(none)} Clause}
|
||||
\label{sec:default_none}
|
||||
|
||||
The following example distinguishes the variables that are affected by the \code{default(none)}
|
@ -9,6 +9,7 @@ clause rules with regard to storage association.
|
||||
\fnexample{fort_sa_private}{1}
|
||||
|
||||
\fnexample{fort_sa_private}{2}
|
||||
\clearpage
|
||||
|
||||
\fnexample{fort_sa_private}{3}
|
||||
% blue line floater at top of this page for "Fortran, cont."
|
||||
@ -18,6 +19,6 @@ clause rules with regard to storage association.
|
||||
|
||||
\fnexample{fort_sa_private}{4}
|
||||
|
||||
\fnexample{fort_sa_private}{5}
|
||||
\fnexample[5.1]{fort_sa_private}{5}
|
||||
\fortranspecificend
|
||||
|
@ -19,6 +19,7 @@ The following example is also conforming:
|
||||
%\begin{figure}[t!]
|
||||
%\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
|
||||
%\end{figure}
|
||||
\clearpage
|
||||
|
||||
The following example is conforming:
|
||||
|
@ -1,5 +1,5 @@
|
||||
\pagebreak
|
||||
\section{The \code{lastprivate} Clause}
|
||||
\section{\code{lastprivate} Clause}
|
||||
\label{sec:lastprivate}
|
||||
|
||||
Correct execution sometimes depends on the value that the last iteration of a loop
|
@ -1,5 +1,5 @@
|
||||
\pagebreak
|
||||
\section{The \code{private} Clause}
|
||||
\section{\code{private} Clause}
|
||||
\label{sec:private}
|
||||
|
||||
In the following example, the values of original list items \plc{i} and \plc{j}
|
@ -5,7 +5,7 @@
|
||||
|
||||
This section covers ways to perform reductions in parallel, task, taskloop, and SIMD regions.
|
||||
|
||||
\subsection{The \code{reduction} Clause}
|
||||
\subsection{\code{reduction} Clause}
|
||||
\label{subsec:reduction}
|
||||
|
||||
The following example demonstrates the \code{reduction} clause; note that some
|
||||
@ -49,7 +49,7 @@ to \code{MIN}.
|
||||
\ffreenexample{reduction}{5}
|
||||
\fortranspecificend
|
||||
|
||||
\pagebreak
|
||||
%\pagebreak
|
||||
The following example is non-conforming because the initialization (\code{a =
|
||||
0}) of the original list item \code{a} is not synchronized with the update of
|
||||
\code{a} as a result of the reduction computation in the \code{for} loop. Therefore,
|
||||
@ -62,9 +62,9 @@ clause. This can be achieved by adding an explicit barrier after the assignment
|
||||
directive (which has an implied barrier), or by initializing \code{a} before
|
||||
the start of the \code{parallel} region.
|
||||
|
||||
\cexample{reduction}{6}
|
||||
\cexample[5.1]{reduction}{6}
|
||||
|
||||
\fexample{reduction}{6}
|
||||
\fexample[5.1]{reduction}{6}[1]
|
||||
|
||||
The following example demonstrates the reduction of array \plc{a}. In C/C++ this is illustrated by the explicit use of an array section \plc{a[0:N]} in the \code{reduction} clause. The corresponding Fortran example uses array syntax supported in the base language. As of the OpenMP 4.5 specification the explicit use of array section in the \code{reduction} clause in Fortran is not permitted. But this oversight has been fixed in the OpenMP 5.0 specification.
|
||||
|
||||
@ -154,7 +154,7 @@ second \code{target} construct.
|
||||
\cexample[5.0]{target_reduction}{1}
|
||||
|
||||
\ffreeexample[5.0]{target_reduction}{1}
|
||||
\clearpage
|
||||
%\clearpage
|
||||
|
||||
In next example, the variables \code{sum1} and \code{sum2} remain on the
|
||||
device for the duration of the \code{target}~\code{data} region so that it is
|
||||
@ -184,9 +184,9 @@ task reduction will be combined (in some order) into the original variable
|
||||
listed in the \code{task\_reduction} clause before exiting the \code{taskgroup}
|
||||
region.
|
||||
|
||||
\cexample[5.0]{target_task_reduction}{1}
|
||||
\cexample[5.1]{target_task_reduction}{1}
|
||||
|
||||
\ffreeexample[5.0]{target_task_reduction}{1}
|
||||
\ffreeexample[5.1]{target_task_reduction}{1}[1]
|
||||
|
||||
In the next pair of examples, the task reduction is defined by a
|
||||
\code{reduction} clause with the \code{task} modifier, rather than a
|
||||
@ -201,13 +201,13 @@ into the original reduction variable, \code{sum}.
|
||||
Next, the \code{task} modifier is again used to define a task reduction over
|
||||
participating tasks. This time, the participating tasks are a target task
|
||||
resulting from a \code{target} construct with the \code{in\_reduction} clause,
|
||||
and the implicit task (executing on the master thread) that calls
|
||||
and the implicit task (executing on the primary thread) that calls
|
||||
\code{host\_compute}. As before, the partial results from these paricipating
|
||||
tasks are combined in some order into the original reduction variable.
|
||||
|
||||
\cexample[5.0]{target_task_reduction}{2b}
|
||||
\cexample[5.1]{target_task_reduction}{2b}
|
||||
|
||||
\ffreeexample[5.0]{target_task_reduction}{2b}
|
||||
\ffreeexample[5.1]{target_task_reduction}{2b}[1]
|
||||
|
||||
|
||||
\subsection{Taskloop Reduction}
|
||||
@ -266,7 +266,7 @@ by the taskloop will participate on it.
|
||||
|
||||
\cexample[5.0]{taskloop_reduction}{2}
|
||||
\ffreeexample[5.0]{taskloop_reduction}{2}
|
||||
\clearpage
|
||||
%\clearpage
|
||||
|
||||
In the OpenMP 5.0 Specification, \code{reduction} clauses for the
|
||||
\code{taskloop}~\code{ simd} construct were also added.
|
||||
@ -339,8 +339,21 @@ At the end of the parallel region \plc{asum} contains the combined result of all
|
||||
%At the end of the parallel region \plc{asum} contains the combined result of all reductions.
|
||||
|
||||
|
||||
\cexample[5.0]{taskloop_simd_reduction}{1}
|
||||
\cexample[5.1]{taskloop_simd_reduction}{1}
|
||||
|
||||
\ffreeexample[5.0]{taskloop_simd_reduction}{1}
|
||||
\ffreeexample[5.1]{taskloop_simd_reduction}{1}[1]
|
||||
|
||||
|
||||
\subsection{Reduction with the \code{scope} Construct}
|
||||
\label{subsec:reduction_scope}
|
||||
|
||||
The following example illustrates the use of the \code{scope} construct
|
||||
to perform a reduction in a \code{parallel} region. The case is useful for
|
||||
producing a reduction and accessing reduction variables inside a \code{parallel} region
|
||||
without using a worksharing-loop construct.
|
||||
|
||||
\cppexample[5.1]{scope_reduction}{1}
|
||||
\clearpage
|
||||
|
||||
\ffreeexample[5.1]{scope_reduction}{1}
|
||||
|
@ -1,5 +1,5 @@
|
||||
\pagebreak
|
||||
\section{The \code{scan} Directive}
|
||||
\section{\code{scan} Directive}
|
||||
\label{sec:scan}
|
||||
|
||||
The following examples illustrate how to parallelize a loop that saves
|
@ -4,7 +4,12 @@
|
||||
* @@compilable: yes
|
||||
* @@linkable: no
|
||||
* @@expect: success
|
||||
* @@version: omp_5.1
|
||||
*/
|
||||
#if _OPENMP < 202011
|
||||
#define masked master
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
@ -18,7 +23,7 @@ float read_next( ) {
|
||||
} /* copies the pointer only */
|
||||
|
||||
|
||||
#pragma omp master
|
||||
#pragma omp masked
|
||||
{
|
||||
scanf("%f", tmp);
|
||||
}
|
@ -1,8 +1,14 @@
|
||||
! @@name: copyprivate.2f
|
||||
! @@type: F-fixed
|
||||
! @@compilable: yes
|
||||
! @@requires: preprocessing
|
||||
! @@linkable: no
|
||||
! @@expect: success
|
||||
! @@version: omp_5.1
|
||||
#if _OPENMP < 202011
|
||||
#define MASKED MASTER
|
||||
#endif
|
||||
|
||||
REAL FUNCTION READ_NEXT()
|
||||
REAL, POINTER :: TMP
|
||||
|
||||
@ -10,9 +16,9 @@
|
||||
ALLOCATE (TMP)
|
||||
!$OMP END SINGLE COPYPRIVATE (TMP) ! copies the pointer only
|
||||
|
||||
!$OMP MASTER
|
||||
!$OMP MASKED
|
||||
READ (11) TMP
|
||||
!$OMP END MASTER
|
||||
!$OMP END MASKED
|
||||
|
||||
!$OMP BARRIER
|
||||
READ_NEXT = TMP
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user