diff --git a/Chap_SIMD.tex b/Chap_SIMD.tex index dc57ef3..8a57ac9 100644 --- a/Chap_SIMD.tex +++ b/Chap_SIMD.tex @@ -31,7 +31,7 @@ execution within loops that contain the function and have a \code{simd} directive. Clauses provide argument specifications (\code{linear}, \code{uniform}, and \code{aligned}), a requested vector length (\code{simdlen}), and designate whether the function is always/never -called conditionally in a loop (\code{branch}/\code{inbranch}). +called conditionally in a loop (\code{notinbranch}/\code{inbranch}). The latter is for optimizing performance. Also, the \code{simd} construct has been combined with the worksharing loop diff --git a/Chap_data_environment.tex b/Chap_data_environment.tex index fad3911..18e5ff7 100644 --- a/Chap_data_environment.tex +++ b/Chap_data_environment.tex @@ -22,7 +22,7 @@ Data-sharing attributes of variables can be classified as being \plc{predetermin Certain variables and objects have predetermined attributes. A commonly found case is the loop iteration variable in associated loops of a \code{for} or \code{do} construct. It has a private data-sharing attribute. -Variables with predetermined data-sharing attributes can not be listed in a data-sharing clause; but there are some +Variables with predetermined data-sharing attributes cannot be listed in a data-sharing clause; but there are some exceptions (mainly concerning loop iteration variables). Variables with explicitly determined data-sharing attributes are those that are @@ -50,7 +50,7 @@ The common \plc{list items} are arrays, array sections, scalars, pointers, and structure elements (members). Procedures and global variables have predetermined data mapping if they appear -within the list or block of a \code{declare target} directive. Also, a C/C++ pointer +within the list or block of a \code{declare}~\code{target} directive. Also, a C/C++ pointer is mapped as a zero-length array section, as is a C++ variable that is a reference to a pointer. % Waiting for response from Eric on this. diff --git a/Chap_devices.tex b/Chap_devices.tex index 0be8779..7647f03 100644 --- a/Chap_devices.tex +++ b/Chap_devices.tex @@ -15,7 +15,7 @@ data to the device storage. The constructs that explicitly create storage, transfer data, and free storage on the device -are catagorized as structured and unstructured. The +are categorized as structured and unstructured. The \code{target} \code{data} construct is structured. It creates a data region around \code{target} constructs, and is convenient for providing persistent data throughout multiple @@ -33,14 +33,14 @@ the device, and controls on the storage duration. There is an important change in the OpenMP 4.5 specification that alters the data model for scalar variables and C/C++ pointer variables. The default behavior for scalar variables and C/C++ pointer variables -in an 4.5 compliant code is \code{firstprivate}. Example +in a 4.5 compliant code is \code{firstprivate}. Example codes that have been updated to reflect this new behavior are annotated with a description that describes changes required for correct execution. Often it is a simple matter of mapping the variable as \code{tofrom} to obtain the intended 4.0 behavior. In OpenMP version 4.5 the mechanism for target -execution is specified as occuring through a \plc{target task}. +execution is specified as occurring through a \plc{target task}. When the \code{target} construct is encountered a new \plc{target task} is generated. The \plc{target task} completes after the \code{target} region has executed and all data @@ -59,13 +59,14 @@ clause introduced in OpenMP 4.5. \input{devices/target_structure_mapping} \input{devices/target_fort_allocatable_array_mapping} \input{devices/array_sections} +\input{devices/C++_virtual_functions} \input{devices/array_shaping} \input{devices/target_mapper} \input{devices/target_data} \input{devices/target_unstructured_data} \input{devices/target_update} -\input{devices/target_associate_ptr} \input{devices/declare_target} +\input{devices/lambda_expressions} \input{devices/teams} \input{devices/async_target_depend} \input{devices/async_target_with_tasks} diff --git a/Chap_directives.tex b/Chap_directives.tex index c0daeba..7a997cc 100644 --- a/Chap_directives.tex +++ b/Chap_directives.tex @@ -1,11 +1,12 @@ \cchapter{OpenMP Directive Syntax}{directives} \label{chap:directive_syntax} +\index{directive syntax} OpenMP \emph{directives} use base-language mechanisms to specify OpenMP program behavior. In C code, the directives are formed exclusively with pragmas, whereas in C++ code, directives are formed from either pragmas or attributes. Fortran directives are formed with comments in free form and fixed form sources (codes). -All of these mechanism allow the compilation to ignore the OpenMP directives if +All of these mechanisms allow the compilation to ignore the OpenMP directives if OpenMP is not supported or enabled. @@ -35,6 +36,18 @@ Fortran comments where \code{c\$omp} and \code{*\$omp} may be used in Fortran fixed form sources. +Most OpenMP directives accept clauses that alter the semantics of the directive in some way, +and some directives also accept parenthesized arguments that follow the directive name. +A clause may just be a keyword (e.g., \scode{untied}) or it may also accept argument lists +(e.g., \scode{shared(x,y,z)}) and/or optional modifiers (e.g., \scode{tofrom} in +\scode{map(tofrom:}~\scode{x,y,z)}). +Clause modifiers may be "simple" or "complex" -- a complex modifier consists of a +keyword followed by one or more parameters, bracketed by parentheses, while a simple +modifier does not. An example of a complex modifier is the \scode{iterator} modifier, +as in \scode{map(iterator(i=0:n),}~\scode{tofrom:}~\scode{p[i])}, or the \scode{step} modifier, as in +\scode{linear(x:}~\scode{ref,}~\scode{step(4))}. +In the preceding examples, \scode{tofrom} and \scode{ref} are simple modifiers. + %===== Examples Sections ===== \input{directives/pragmas} diff --git a/Chap_introduction.tex b/Chap_introduction.tex new file mode 100644 index 0000000..aa75d8b --- /dev/null +++ b/Chap_introduction.tex @@ -0,0 +1,73 @@ +% This is the introduction for the OpenMP Examples document. +% This is an included file. See the main file (openmp-examples.tex) for more information. +% +% When editing this file: +% +% 1. To change formatting, appearance, or style, please edit openmp.sty. +% +% 2. Custom commands and macros are defined in openmp.sty. +% +% 3. Be kind to other editors -- keep a consistent style by copying-and-pasting to +% create new content. +% +% 4. We use semantic markup, e.g. (see openmp.sty for a full list): +% \code{} % for bold monospace keywords, code, operators, etc. +% \plc{} % for italic placeholder names, grammar, etc. +% +% 5. Other recommendations: +% Use the convenience macros defined in openmp.sty for the minor headers +% such as Comments, Syntax, etc. +% +% To keep items together on the same page, prefer the use of +% \begin{samepage}.... Avoid \parbox for text blocks as it interrupts line numbering. +% When possible, avoid \filbreak, \pagebreak, \newpage, \clearpage unless that's +% what you mean. Use \needspace{} cautiously for troublesome paragraphs. +% +% Avoid absolute lengths and measures in this file; use relative units when possible. +% Vertical space can be relative to \baselineskip or ex units. Horizontal space +% can be relative to \linewidth or em units. +% +% Prefer \emph{} to italicize terminology, e.g.: +% This is a \emph{definition}, not a placeholder. +% This is a \plc{var-name}. +% + +\cchapter{Introduction}{introduction} +\label{chap:introduction} + +This collection of programming examples supplements the OpenMP API for Shared +Memory Parallelization specifications, and is not part of the formal specifications. It +assumes familiarity with the OpenMP specifications, and shares the typographical +conventions used in that document. + +The OpenMP API specification provides a model for parallel programming that is +portable across shared memory architectures from different vendors. Compilers from +numerous vendors support the OpenMP API. + +The directives, library routines, and environment variables demonstrated in this +document allow users to create and manage parallel programs while permitting +portability. The directives extend the C, C++ and Fortran base languages with single +program multiple data (SPMD) constructs, tasking constructs, device constructs, +worksharing constructs, and synchronization constructs, and they provide support for +sharing and privatizing data. The functionality to control the runtime environment is +provided by library routines and environment variables. Compilers that support the +OpenMP API often include a command line option to the compiler that activates and +allows interpretation of all OpenMP directives. + +The documents and source codes for OpenMP Examples can be downloaded from +\href{https://github.com/OpenMP/Examples}{https://github.com/OpenMP/Examples}. +Each directory holds the contents of a chapter and has a \splc{sources} subdirectory of its codes. +The codes for this OpenMP \VER{} Examples document have the tag +\href{https://github.com/OpenMP/Examples/tree/v\VER}{\plc{v\PVER}}. + +Complete information about the OpenMP API and a list of the compilers that support +the OpenMP API can be found at the OpenMP.org web site + +\code{https://www.openmp.org} + +\clearpage + +\input{introduction/Examples} + +% This is the end of introduction.tex of the OpenMP Examples document. + diff --git a/Chap_loop_transformations.tex b/Chap_loop_transformations.tex index a9f49bd..4a77e65 100644 --- a/Chap_loop_transformations.tex +++ b/Chap_loop_transformations.tex @@ -22,4 +22,5 @@ whereby specific hot spots can be affected by transformation directives. %===== Examples Sections ===== \input{loop_transformations/tile} \input{loop_transformations/unroll} +\input{loop_transformations/partial_tile} diff --git a/Chap_memory_model.tex b/Chap_memory_model.tex index c3bc1a8..c39e099 100644 --- a/Chap_memory_model.tex +++ b/Chap_memory_model.tex @@ -25,7 +25,7 @@ flush operation is characterized by its flush properties -- some combination of flushes, a \emph{flush-set}. A \emph{strong} flush will force consistency between the temporary view and the -memory for all variables in its \emph{flush-set}. Furthermore all strong flushes in a +memory for all variables in its \emph{flush-set}. Furthermore, all strong flushes in a program that have intersecting flush-sets will execute in some total order, and within a thread strong flushes may not be reordered with respect to other memory operations on variables in its flush-set. \emph{Release} and @@ -53,7 +53,7 @@ do not have a well-defined \emph{completion order}. The existence of data races in OpenMP programs result in undefined behavior, and so they should generally be avoided for programs to be correct. The completion order of accesses to a shared variable is guaranteed in OpenMP through a set of memory -consistency rules that are described in the \plc{OpenMP Memory Consitency} +consistency rules that are described in the \plc{OpenMP Memory Consistency} section of the OpenMP Specifications document. %This chapter also includes examples that exhibit non-sequentially consistent diff --git a/Chap_parallel_execution.tex b/Chap_parallel_execution.tex index af139a3..37e28e5 100644 --- a/Chap_parallel_execution.tex +++ b/Chap_parallel_execution.tex @@ -102,8 +102,8 @@ The \code{masked} construct is not a worksharing construct. The \code{masked} r executed only by the primary thread. There is no implicit barrier (and flush) at the end of the \code{masked} region; hence the other threads of the team continue execution beyond code statements beyond the \code{masked} region. -The \code{master} contruct, which has been deprecated in OpenMP 5.1, has identical semantics -to the \code{masked} contruct with no \code{filter} clause. +The \code{master} construct, which has been deprecated in OpenMP 5.1, has identical semantics +to the \code{masked} construct with no \code{filter} clause. %===== Examples Sections ===== diff --git a/Chap_program_control.tex b/Chap_program_control.tex index 3ed6d24..a8fd458 100644 --- a/Chap_program_control.tex +++ b/Chap_program_control.tex @@ -108,6 +108,7 @@ chapter in the OpenMP Specifications document. \input{program_control/nested_loop} \input{program_control/nesting_restrict} \input{program_control/target_offload} +\input{program_control/reproducible} \input{program_control/interop} \input{program_control/utilities} diff --git a/Chap_synchronization.tex b/Chap_synchronization.tex index 00a7cc9..7ebf3e2 100644 --- a/Chap_synchronization.tex +++ b/Chap_synchronization.tex @@ -36,7 +36,7 @@ of ordered regions while allowing code outside the region to run in parallel. Since OpenMP 4.5 the \code{ordered} construct can also be a stand-alone directive that specifies cross-iteration dependences in a doacross loop nest. -The \code{depend} clause uses a \code{sink} \plc{dependence-type}, along with a +The \code{depend} clause uses a \code{sink} \plc{dependence-type}, along with an iteration vector argument (vec) to indicate the iteration that satisfies the dependence. The \code{depend} clause with a \code{source} \plc{dependence-type} specifies dependence satisfaction. diff --git a/Contributions.md b/Contributions.md index c5a8dcc..f5d79b4 100644 --- a/Contributions.md +++ b/Contributions.md @@ -54,25 +54,28 @@ For a brief revision history, see `Changes.log` in the repo. * Insert the code in the sources directory for each chapter, and include the following metadata: * Metadata Tags for example sources: ``` - @@name: .[c|cpp|f|f90] + @@name: . @@type: C|C++|F-fixed|F-free + @@requires: preprocessing @@compilable: yes|no|maybe @@linkable: yes|no|maybe - @@expect: success|failure|nothing|rt-error + @@expect: success|compile-time-error|runtime-error|undefined-behavior @@version: omp_ ``` * **name** is the name of an example * **type** - is the source code type, which can be translated into or from proper file extension (c,cpp,f,f90) + is the source code type, which can be translated into or from proper file extension (C:c,C++:cpp,F-fixed:f,F-free:f90) + * **requires** + any additional requirements, currently `preprocessing` for requiring preprocessing * **compilable** indicates whether the source code is compilable * **linkable** indicates whether the source code is linkable * **expect** - indicates some expected result for testing purpose "`success|failure|nothing`" applies - to the result of code compilation "`rt-error`" is for a case where compilation may be - successful, but the code contains potential runtime issues (such as race condition). + indicates some expected result for testing purpose "`success|compile-time-error|ct-error`" applies + to the result of code compilation; "`runtime-error|rt-error`" is for a case where compilation may be + successful, but the code contains potential runtime issues (such as race condition); `undefined-behavior` could result from a non-conforming code. Alternative would be to just use "`conforming`" or "`non-conforming`". * **version** indicates features for a specific OpenMP version, such as "`omp_5.0`" @@ -94,23 +97,30 @@ For a brief revision history, see `Changes.log` in the repo. -# LaTeX macros for examples +## LaTeX macros for examples +The following describes LaTeX macros defined specifically for examples. * Source code with language h-rules +* Source code without language h-rules +* Language h-rules +* Other macros +* See `openmp.sty` for more information + +### Source code with language h-rules ``` - \cexample[]{}{} % for C/C++ examples - \cppexample[]{}{} % for C++ examples - \fexample[]{}{} % for fixed-form Fortran examples - \ffreeexample[]{}{} % for free-form Fortran examples + \cexample[]{}{}[] % for C/C++ examples + \cppexample[]{}{}[] % for C++ examples + \fexample[]{}{}[] % for fixed-form Fortran examples + \ffreeexample[]{}{}[] % for free-form Fortran examples ``` -* Source code without language h-rules +### Source code without language h-rules ``` - \cnexample[]{}{} - \cppnexample[]{}{} - \fnexample[]{}{} - \ffreenexample[]{}{} - \srcnexample[]{}{}{} + \cnexample[]{}{}[] + \cppnexample[]{}{}[] + \fnexample[]{}{}[] + \ffreenexample[]{}{}[] + \srcnexample[]{}{}{}[] ``` Optional `` can be supplied in a macro to include a specific OpenMP @@ -123,7 +133,11 @@ For a brief revision history, see `Changes.log` in the repo. source code should not contain any `@@` metadata tags. The `ext` argument to this macro is the file extension (such as `h`, `hpp`, `inc`). -* Language h-rules + The `` option to each macro allows finer-control of any additional lines + to be skipped due to addition of new `@@` tags, such as `@@requires`. + The default value for `` is 0. + +### Language h-rules ``` \cspecificstart, \cspecificend \cppspecificstart, \cppspecificend @@ -131,9 +145,11 @@ For a brief revision history, see `Changes.log` in the repo. \fortranspecificstart, \fortranspecificend ``` -* Chapter and section macros +### Other macros ``` \cchapter{}{} + \hexentry[ext1]{}[ext2]{} + \hexmentry[ext1]{}[ext2]{}{} ``` The `\cchapter` macro is used for starting a chapter with proper page spacing. @@ -146,8 +162,15 @@ A previously-defined macro `\sinput{}` to import a section file from `` is no longer supported. Please use `\input{/}` explicitly. -* See `openmp.sty` for more information +The two macros `\hexentry` and `\hexmentry` are defined for simplifying +entries in the feature deprecation and update tables. Option `[ext1]` is +the file extension with a default value of `c` and option `[ext2]` is +the file extension for the associated second file if present. +`` is the version tag of the corresponding example +in the earlier version. `\hexentry` assumes no name change for an example +in different versions; `\hexmentry` can be used to specify a prior name +if it is different. -### License +## License For copyright information, please see `omp_copyright.txt`. diff --git a/Deprecated_Features.tex b/Deprecated_Features.tex new file mode 100644 index 0000000..bc5bd9e --- /dev/null +++ b/Deprecated_Features.tex @@ -0,0 +1,281 @@ +\cchapter{Feature Deprecations and Updates in Examples}{deprecated_features} +\label{chap:deprecated_features} +\label{sec:deprecated_features} +\index{deprecated features} + +Deprecation of features began in OpenMP 5.0. +Examples that use a deprecated feature have been updated with an equivalent +replacement feature. + +Table~\ref{tab:Deprecated Features} summarizes deprecated features and +their replacements in each version. Affected examples are updated +accordingly and listed in Section~\ref{sec:Updated Examples}. + +\nolinenumbers +\renewcommand{\arraystretch}{1.4} +\tablefirsthead{% +\hline +\textbf{Version} & \textbf{Deprecated Feature} & \textbf{Replacement}\\ +\hline\\[-3.5ex] +} +\tablehead{% +\multicolumn{2}{l}{\small\slshape table continued from previous page}\\ +\hline +\textbf{Version} & \textbf{Deprecated Feature} & \textbf{Replacement}\\ +\hline\\[-3ex] +} +\tabletail{% +\hline\\[-4ex] +\multicolumn{2}{l}{\small\slshape table continued on next page}\\ +} +\tablelasttail{\hline\\[-2ex]} +\tablecaption{Deprecated Features and Their Replacements\label{tab:Deprecated Features}} +\begin{supertabular}{p{0.4in} p{2.3in} p{2.2in}} +5.2 & \scode{default} clause on metadirectives + & \scode{otherwise} clause \\ +5.2 & delimited \scode{declare}~\scode{target} directive for C/C++ + & \scode{begin}~\scode{declare}~\scode{target} directive \\ +5.2 & \scode{to} clause on \scode{declare}~\scode{target} directive + & \scode{enter} clause \\ +5.2 & non-argument \scode{destroy} clause on \scode{depobj} construct + & \scode{destroy(}\plc{argument}\code{)} \\ +5.2 & \scode{allocate} construct for Fortran \scode{ALLOCATE} statements + & \scode{allocators} construct \\ +5.2 & \scode{depend} clause on \scode{ordered} construct + & \scode{doacross} clause \\ +5.2 & \scode{linear(}\plc{modifier(list): linear-step}\code{)} clause + & \scode{linear(}\plc{list:}~\scode{step(}\plc{linear-step}\scode{)}\plc{, modifier}\scode{)} clause \\ +\hline +5.1 & \scode{master} construct + & \scode{masked} construct \\ +5.1 & \scode{master} affinity policy + & \scode{primary} affinity policy \\ +\hline +5.0 & \scode{omp_lock_hint_*} constants + & \scode{omp_sync_hint_*} constants \\[2pt] +\end{supertabular} + +\linenumbers +These replacements appear in examples that illustrate, otherwise, earlier features. +When using a compiler that is compliant with a version prior to +the indicated version, the earlier form of an example for a previous +version is listed as a reference. + +\newpage +\section{Updated Examples for Different Versions} +\label{sec:Updated Examples} + +The following tables list the updated examples for different versions as +a result of feature deprecation. The \emph{Earlier Version} column of +the tables shows the version tag of the earlier version. It also shows +the prior name of an example when it has been renamed. + +Table~\ref{tab:Updated Examples 5.2} lists the updated examples for OpenMP 5.2 +in the Examples Document Version +\href{https://github.com/OpenMP/Examples/tree/v5.2}{5.2}. +The \emph{Earlier Version} column of the table lists the earlier version +tags of the examples that can be found in +the Examples Document Version +\href{https://github.com/OpenMP/Examples/tree/v5.1}{5.1}. + +\index{clauses!default@\code{default}} +\index{clauses!otherwise@\code{otherwise}} +\index{clauses!to@\code{to}} +\index{clauses!enter@\code{enter}} +\index{clauses!depend@\code{depend}} +\index{clauses!doacross@\code{doacross}} +\index{clauses!linear@\code{linear}} +\index{clauses!destroy@\code{destroy}} +\index{default clause@\code{default} clause} +\index{otherwise clause@\code{otherwise} clause} +\index{to clause@\code{to} clause} +\index{enter clause@\code{enter} clause} +\index{depend clause@\code{depend} clause} +\index{doacross clause@\code{doacross} clause} +\index{linear clause@\code{linear} clause} +\index{destroy clause@\code{destroy} clause} +\index{directives!begin declare target@\code{begin}~\code{declare}~\code{target}} +\index{begin declare target directive@\code{begin}~\code{declare}~\code{target} directive} +\index{allocate construct@\code{allocate} construct} +\index{allocators construct@\code{allocators} construct} + +\nolinenumbers +\renewcommand{\arraystretch}{1.0} +\tablefirsthead{% +\hline\\[-2ex] +\textbf{Example Name} & \textbf{Earlier Version} & \textbf{Feature Updated} +\\[2pt] +\hline\\[-2ex] +} +\tablehead{% +\multicolumn{2}{l}{\small\slshape table continued from previous page}\\[2pt] +\hline\\[-2ex] +\textbf{Example Name} & \textbf{Earlier Version} & \textbf{Feature Updated}\\[2pt] +\hline\\[-2ex] +} +\tabletail{% +\hline\\[-2.5ex] +\multicolumn{2}{l}{\small\slshape table continued on next page}\\ +} +\tablelasttail{\hline\\[-1ex]} +\tablecaption{Updated Examples for Version 5.2\label{tab:Updated Examples 5.2}} +\begin{supertabular}{p{1.7in} p{1.2in} p{2.1in}} + \hexentry{error.1}[f90]{5.1} & + \scode{default} clause on metadirectives \\ + \hexentry{metadirective.1}[f90]{5.0} & + replaced with \scode{otherwise} clause \\ + \hexentry{metadirective.2}[f90]{5.0} & \\ + \hexentry{metadirective.3}[f90]{5.0} & \\ + \hexentry{metadirective.4}[f90]{5.1} & \\ + \hexentry{target_ptr_map.4}{5.1} & \\ + \hexentry{target_ptr_map.5}[f90]{5.1} & \\[2pt] +\hline\\[-2ex] + \hexentry[f90]{array_shaping.1}{5.0} & + \scode{to} clause on \scode{declare} \scode{target} \\ + \hexentry{target_reverse_offload.7}{5.0} & + directive replaced with \scode{enter} clause \\ + \hexentry{target_task_reduction.1}[f90]{5.1} & \\ + \hexentry{target_task_reduction.2a}[f90]{5.0} & \\ + \hexentry{target_task_reduction.2b}[f90]{5.1} &\\[2pt] +\hline\\[-2ex] + \hexentry{array_shaping.1}{5.0} & + delimited \scode{declare}~\scode{target} \\ + \hexentry{async_target.1}{4.0} & + directive replaced with \\ + \hexentry{async_target.2}{4.0} & + \scode{begin}~\scode{declare}~\scode{target} \\ + \hexentry{declare_target.1}{4.0} & + directive for C/C++ \\ + \hexentry[cpp]{declare_target.2c}{4.0} & \\ + \hexentry{declare_target.3}{4.0} & \\ + \hexentry{declare_target.4}{4.0} & \\ + \hexentry{declare_target.5}{4.0} & \\ + \hexentry{declare_target.6}{4.0} & \\ + \hexentry{declare_variant.1}{5.0} & \\ + \hexentry{device.1}{4.0} & \\ + \hexentry{metadirective.3}{5.0} & \\ + \hexentry{target_ptr_map.2}{5.0} & \\ + \hexentry{target_ptr_map.3a}{5.0} & \\ + \hexentry{target_ptr_map.3b}{5.0} & \\ + \hexentry{target_struct_map.1}{5.0} & \\ + \hexentry[cpp]{target_struct_map.2}{5.0} & \\ + \hexentry{target_struct_map.3}{5.0} & \\ + \hexentry{target_struct_map.4}{5.0} & \\[2pt] +\hline\\[-2ex] + \hexentry{doacross.1}[f90]{4.5} & + \scode{depend} clause on \scode{ordered} \\ + \hexentry{doacross.2}[f90]{4.5} & + construct replaced with \scode{doacross} \\ + \hexentry{doacross.3}[f90]{4.5} & + clause \\ + \hexentry{doacross.4}[f90]{4.5} & \\[2pt] +\hline\\[-2ex] + \hexentry[cpp]{linear_modifier.1}[f90]{4.5} & + modifier syntax change for \scode{linear} \\ + \hexentry[cpp]{linear_modifier.2}[f90]{4.5} & + clause on \scode{declare}~\scode{simd} directive \\ + \hexentry{linear_modifier.3}[f90]{4.5} & \\[2pt] +\hline\\[-2ex] + \hexentry[f90]{allocators.1}{5.0} & + \scode{allocate} construct replaced with \scode{allocators} construct + for Fortran allocate statements \\[2pt] +\hline\\[-2ex] + \hexentry{depobj.1}[f90]{5.0} & + argument added to \scode{destroy} clause on \scode{depobj} + construct \\[2pt] +\end{supertabular} + +\linenumbers +Table~\ref{tab:Updated Examples 5.1} lists the updated examples for OpenMP 5.1 +in the Examples Document Version +\href{https://github.com/OpenMP/Examples/tree/v5.1}{5.1}. +The \emph{Earlier Version} column of the table lists the earlier version +tags and prior names of the examples that can be found in +the Examples Document Version +\href{https://github.com/OpenMP/Examples/tree/v5.0.1}{5.0.1}. + +\index{affinity!master policy@\code{master} policy} +\index{affinity!primary policy@\code{primary} policy} +\index{constructs!master@\code{master}} +\index{constructs!masked@\code{masked}} +\index{master construct@\code{master} construct} +\index{masked construct@\code{masked} construct} + +\nolinenumbers +\renewcommand{\arraystretch}{1.0} +\tablefirsthead{% +\hline\\[-2ex] +\textbf{Example Name} & \textbf{Earlier Version} & \textbf{Feature Updated} +\\[2pt] +\hline\\[-2ex] +} +\tablehead{% +\multicolumn{2}{l}{\small\slshape table continued from previous page}\\[2pt] +\hline\\[-2ex] +\textbf{Example Name} & \textbf{Earlier Version} & \textbf{Feature Updated}\\[2pt] +\hline\\[-2ex] +} +\tabletail{% +\hline\\[-2.5ex] +\multicolumn{2}{l}{\small\slshape table continued on next page}\\ +} +\tablelasttail{\hline\\[-1ex]} +\tablecaption{Updated Examples for Version 5.1\label{tab:Updated Examples 5.1}} +\begin{supertabular}{p{1.8in} p{1.4in} p{1.8in}} + \hexentry{affinity.5}[f]{4.0} & + \scode{master} affinity policy replaced with \scode{primary} policy \\[2pt] +\hline\\[-2ex] + \hexentry{async_target.3}[f90]{5.0} & + \scode{master} construct replaced \\ + \hexentry{cancellation.2}[f90]{4.0} & + with \scode{masked} construct \\ + \hexentry{copyprivate.2}[f]{3.0} & \\ + \hexentry[f]{fort_sa_private.5}{3.0} & \\ + \hexentry{lock_owner.1}[f]{3.0} & \\ + \hexmentry{masked.1}[f]{3.0}{master.1} & \\ + \hexmentry{parallel_masked_taskloop.1}[f90]{5.0}{parallel_master_taskloop.1} &\\ + \hexentry{reduction.6}[f]{3.0} & \\ + \hexentry{target_task_reduction.1}[f90]{5.0} & \\ + \hexentry{target_task_reduction.2b}[f90]{5.0} & \\ + \hexentry{taskloop_simd_reduction.1}[f90]{5.0} & \\ + \hexentry{task_detach.1}[f90]{5.0} & \\[2pt] +\end{supertabular} + +\linenumbers +Table~\ref{tab:Updated Examples 5.0} lists the updated examples for OpenMP 5.0 +in the Examples Document Version +\href{https://github.com/OpenMP/Examples/tree/v5.1}{5.1}. +The \emph{Earlier Version} column of the table lists the earlier version +tags of the examples that can be found in +the Examples Document Version +\href{https://github.com/OpenMP/Examples/tree/v5.0.1}{5.0.1}. + +\nolinenumbers +\renewcommand{\arraystretch}{1.0} +\tablefirsthead{% +\hline\\[-2ex] +\textbf{Example Name} & \textbf{Earlier Version} & \textbf{Feature Updated} +\\[2pt] +\hline\\[-2ex] +} +\tablehead{% +\multicolumn{2}{l}{\small\slshape table continued from previous page}\\[2pt] +\hline\\[-2ex] +\textbf{Example Name} & \textbf{Earlier Version} & \textbf{Feature Updated}\\[2pt] +\hline\\[-2ex] +} +\tabletail{% +\hline\\[-2.5ex] +\multicolumn{2}{l}{\small\slshape table continued on next page}\\ +} +\tablelasttail{\hline\\[-1ex]} +\tablecaption{Updated Examples for Version 5.0\label{tab:Updated Examples 5.0}} +\begin{supertabular}{p{1.6in} p{1.3in} p{2.1in}} + \hexentry{critical.2}[f]{4.5} & + \scode{omp_lock_hint_*} constants \\ + \hexentry[cpp]{init_lock_with_hint.1}[f]{4.5} & + replaced with \scode{omp_sync_hint_*} constants \\[2pt] +\end{supertabular} + +\linenumbers + diff --git a/Foreword_Chapt.tex b/Foreword_Chapt.tex index b1af0e3..522e07b 100644 --- a/Foreword_Chapt.tex +++ b/Foreword_Chapt.tex @@ -1,23 +1,33 @@ -\bchapter{Foreword} +\chapter*{Foreword} \label{chap:foreword} The OpenMP Examples document has been updated with new features -found in the OpenMP 5.1 Specification. The additional examples and updates +found in the OpenMP \VER\ Specification. The additional examples and updates are referenced in the Document Revision History of the Appendix on page~\pageref{chap:history}. -Text describing an example with a 5.1 feature specifically states -that the feature support begins in the OpenMP 5.1 Specification. Also, -an \code{\small omp\_5.1} keyword is included in the metadata of the source code. -These distinctions are presented to remind readers that a 5.1 compliant +Text describing an example with a \VER\ feature specifically states +that the feature support begins in the OpenMP \VER\ Specification. Also, +an \code{\small omp\_\VER} keyword is included in the metadata of the source code. +These distinctions are presented to remind readers that a \VER\ compliant OpenMP implementation is necessary to use these features in codes. -Examples for most of the 5.1 features are included in this document, +Examples for most of the \VER\ features are included in this document, and incremental releases will become available as more feature examples -and updates are submitted, and approved by the OpenMP Examples Subcommittee. +and updates are submitted and approved by the OpenMP Examples Subcommittee. + +Examples are accepted for this document after discussions, revisions and reviews +in the Examples Subcommittee, and two reviews/discussions and two votes +in the OpenMP Language Committee. +Draft examples are often derived from case studies for new features in the language, +and are revised to illustrate the basic application of the features with code comments, +and a text description. We are grateful to the numerous members of the Language Committee +who took the time to prepare codes and descriptions, and shepherd them through +the acceptance process. We sincerely appreciate the Example Subcommittee members, who +actively participated and contributed in weekly meetings over the years. \bigskip -Examples Subcommitee Co-chairs: \smallskip\linebreak +Examples Subcommittee Co-chairs: \smallskip\linebreak Henry Jin (\textsc{NASA} Ames Research Center) \linebreak -Kent Milfeld (\textsc{TACC}, Texas Advanced Research Center) +Kent Milfeld (\textsc{TACC}, Texas Advanced Computing Center) diff --git a/History.tex b/History.tex index 75e69e9..63c766b 100644 --- a/History.tex +++ b/History.tex @@ -1,6 +1,72 @@ \cchapter{Document Revision History}{history} \label{chap:history} +%===================================== +\section{Changes from 5.1 to 5.2} +\label{sec:history_51_to_52} + +\begin{itemize} +\item General changes: +\begin{itemize} +\item Included a description of the semantics for OpenMP directive syntax + (see \specref{chap:directive_syntax}) +\item Reorganized the Introduction Chapter and moved the Feature +Deprecation Chapter to Appendix~\ref{chap:deprecated_features} +\item Included a list of examples that were updated for feature deprecation +and replacement in each version (see Appendix~\ref{sec:Updated Examples}) +\item Added Index entries +\end{itemize} + +\item Updated the examples for feature deprecation and replacement in OpenMP 5.2. +See Table~\ref{tab:Deprecated Features} and +Table~\ref{tab:Updated Examples 5.2} for details. + +\item Added the following examples for the 5.2 features: +\begin{itemize} + \item Mapping class objects with virtual functions + (\specref{sec:virtual_functions}) + \item \scode{allocators} construct for Fortran \code{allocate} statement + (\specref{sec:allocators}) + \item Behavior of reallocation of variables through OpenMP allocator in + Fortran (\specref{sec:allocators}) +\end{itemize} + +\item Added the following examples for the 5.1 features: +\begin{itemize} + \item Clarification of optional \code{end} directive for strictly structured + block in Fortran (\specref{sec:fortran_free_format_comments}) + \item \scode{filter} clause on \scode{masked} construct (\specref{sec:masked}) + \item \scode{omp_all_memory} reserved locator for specifying task dependences + (\specref{subsec:depend_undefer_task}) + \item Behavior of Fortran allocatable variables in \code{target} regions + (\specref{sec:fort_allocatable_array_mapping}) + \item Device memory routines in Fortran + (\specref{subsec:target_mem_and_device_ptrs}) + \item Partial tiles from \scode{tile} construct + (\specref{sec:incomplete_tiles}) + \item Fortran associate names and selectors in \code{target} region + (\specref{sec:associate_target}) + \item \scode{allocate} directive for variable declarations and + \scode{allocate} clause on \scode{task} constructs + (\specref{sec:allocators}) + \item Controlling concurrency and reproducibility with \code{order} clause + (\specref{sec:reproducible_modifier}) +\end{itemize} + +\item Added other examples: +\begin{itemize} + \item Using lambda expressions with \scode{target} constructs + (\specref{sec:lambda_expressions}) + \item Target memory and device pointer routines + (\specref{subsec:target_mem_and_device_ptrs}) + \item Examples to illustrate the ordering properties of + the \plc{flush} operation (\specref{sec:mem_model}) + \item User selector in the \code{metadirective} directive + (\specref{sec:metadirective}) +\end{itemize} + +\end{itemize} + %===================================== \section{Changes from 5.0.1 to 5.1} \label{sec:history_501_to_51} diff --git a/Makefile b/Makefile index 376edae..3156ec1 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,18 @@ # Makefile for the OpenMP Examples document in LaTex format. # For more information, see the main document, openmp-examples.tex. -version=5.1 +version=5.2 default: openmp-examples.pdf diff: openmp-diff-abridged.pdf +book: BOOK_BUILD="\\\\def\\\\bookbuild{1}" +book: clean openmp-examples.pdf + cp openmp-examples-${version}.pdf openmp-examples-${version}-book.pdf CHAPTERS=Title_Page.tex \ Foreword_Chapt.tex \ - Introduction_Chapt.tex \ - Examples_Chapt.tex \ - Deprecated_Features_Chapt.tex \ Chap_*.tex \ + Deprecated_Features.tex \ History.tex \ */*.tex @@ -22,6 +23,8 @@ SOURCES=*/sources/*.c \ INTERMEDIATE_FILES=openmp-examples.pdf \ openmp-examples.toc \ + openmp-examples.lof \ + openmp-examples.lot \ openmp-examples.idx \ openmp-examples.aux \ openmp-examples.ilg \ @@ -29,20 +32,30 @@ INTERMEDIATE_FILES=openmp-examples.pdf \ openmp-examples.out \ openmp-examples.log +LATEXCMD=pdflatex -interaction=batchmode -file-line-error +LATEXDCMD=$(LATEXCMD) -draftmode + # check for branches names with "name_XXX" DIFF_TICKET_ID=$(shell git rev-parse --abbrev-ref HEAD) -openmp-examples.pdf: $(CHAPTERS) $(SOURCES) openmp.sty openmp-examples.tex openmp-logo.png +openmp-examples.pdf: $(CHAPTERS) $(SOURCES) openmp.sty openmp-examples.tex openmp-logo.png generated-include.tex rm -f $(INTERMEDIATE_FILES) - pdflatex -interaction=batchmode -file-line-error openmp-examples.tex - pdflatex -interaction=batchmode -file-line-error openmp-examples.tex - pdflatex -interaction=batchmode -file-line-error openmp-examples.tex + touch generated-include.tex + $(LATEXDCMD) openmp-examples.tex + makeindex -s openmp-index.ist openmp-examples.idx + $(LATEXDCMD) openmp-examples.tex + $(LATEXCMD) openmp-examples.tex cp openmp-examples.pdf openmp-examples-${version}.pdf clean: rm -f $(INTERMEDIATE_FILES) + rm -f generated-include.tex rm -f openmp-diff-full.pdf openmp-diff-abridged.pdf rm -rf *.tmpdir + cd util; make clean + +realclean: clean + rm -f openmp-examples-${version}.pdf openmp-examples-${version}-book.pdf ifdef DIFF_TO VC_DIFF_TO := -r ${DIFF_TO} @@ -52,11 +65,11 @@ endif ifdef DIFF_FROM VC_DIFF_FROM := -r ${DIFF_FROM} else - VC_DIFF_FROM := -r work_5.1 + VC_DIFF_FROM := -r work_5.2 endif DIFF_TO:=HEAD -DIFF_FROM:=work_5.1 +DIFF_FROM:=work_5.2 DIFF_TYPE:=UNDERLINE COMMON_DIFF_OPTS:=--math-markup=whole \ @@ -67,6 +80,10 @@ VC_DIFF_OPTS:=${COMMON_DIFF_OPTS} --force -c latexdiff.cfg --flatten --type="${D VC_DIFF_MINIMAL_OPTS:= --only-changes --force +generated-include.tex: + echo "$(BOOK_BUILD)" + echo "$(BOOK_BUILD)" > $@ + %.tmpdir: $(wildcard *.sty) $(wildcard *.png) $(wildcard *.aux) openmp-examples.pdf mkdir -p $@/sources for i in affinity devices loop_transformations parallel_execution SIMD tasking \ @@ -88,3 +105,5 @@ openmp-diff-minimal.pdf: diffs-slow-minimal.tmpdir env PATH="$(shell pwd)/util/latexdiff:$(PATH)" latexdiff-vc ${VC_DIFF_MINIMAL_OPTS} -d $< ${VC_DIFF_OPTS} openmp-examples.tex cp $ #define NN 1023 int a[NN]; -#pragma omp declare simd linear(ref(p)) simdlen(8) +#pragma omp declare simd linear(p: ref) simdlen(8) void add_one2(int& p) { p += 1; diff --git a/SIMD/sources/linear_modifier.1.f90 b/SIMD/sources/linear_modifier.1.f90 index 04fc8c4..5b55bf9 100644 --- a/SIMD/sources/linear_modifier.1.f90 +++ b/SIMD/sources/linear_modifier.1.f90 @@ -1,16 +1,16 @@ -! @@name: linear_modifier.1.f90 +! @@name: linear_modifier.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes ! @@expect: success -! @@version: omp_4.5 +! @@version: omp_5.2 module m integer, parameter :: NN = 1023 integer :: a(NN) contains subroutine add_one2(p) - !$omp declare simd(add_one2) linear(ref(p)) simdlen(8) + !$omp declare simd(add_one2) linear(p: ref) simdlen(8) implicit none integer :: p diff --git a/SIMD/sources/linear_modifier.2.cpp b/SIMD/sources/linear_modifier.2.cpp index 6b66595..a150e49 100644 --- a/SIMD/sources/linear_modifier.2.cpp +++ b/SIMD/sources/linear_modifier.2.cpp @@ -1,17 +1,17 @@ /* -* @@name: linear_modifier.2cpp +* @@name: linear_modifier.2 * @@type: C++ * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_4.5 +* @@version: omp_5.2 */ #include #define NN 1023 int a[NN]; -#pragma omp declare simd linear(ref(p)) linear(uval(i)) +#pragma omp declare simd linear(p: ref) linear(i: uval) void add_one2(int& p, const int& i) { p += i; diff --git a/SIMD/sources/linear_modifier.2.f90 b/SIMD/sources/linear_modifier.2.f90 index 911157d..6f22937 100644 --- a/SIMD/sources/linear_modifier.2.f90 +++ b/SIMD/sources/linear_modifier.2.f90 @@ -1,16 +1,16 @@ -! @@name: linear_modifier.2f90 +! @@name: linear_modifier.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes ! @@expect: success -! @@version: omp_4.5 +! @@version: omp_5.2 module m integer, parameter :: NN = 1023 integer :: a(NN) contains subroutine add_one2(p, i) - !$omp declare simd(add_one2) linear(ref(p)) linear(uval(i)) + !$omp declare simd(add_one2) linear(p: ref) linear(i: uval) implicit none integer :: p integer, intent(in) :: i diff --git a/SIMD/sources/linear_modifier.3.c b/SIMD/sources/linear_modifier.3.c index 186ae76..422a2e0 100644 --- a/SIMD/sources/linear_modifier.3.c +++ b/SIMD/sources/linear_modifier.3.c @@ -1,16 +1,16 @@ /* -* @@name: linear_modifier.3c +* @@name: linear_modifier.3 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_4.5 +* @@version: omp_5.2 */ #include #define N 128 -#pragma omp declare simd simdlen(4) uniform(x, y) linear(val(i):1) +#pragma omp declare simd simdlen(4) uniform(x, y) linear(i:val,step(1)) double func(double x[], double y[], int i) { return (x[i] + y[i]); diff --git a/SIMD/sources/linear_modifier.3.f90 b/SIMD/sources/linear_modifier.3.f90 index 0ac0679..f47f84f 100644 --- a/SIMD/sources/linear_modifier.3.f90 +++ b/SIMD/sources/linear_modifier.3.f90 @@ -1,13 +1,13 @@ -! @@name: linear_modifier.3f +! @@name: linear_modifier.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes ! @@expect: success -! @@version: omp_4.5 +! @@version: omp_5.2 module func_mod contains real(8) function func(x, y, i) -!$omp declare simd(func) simdlen(4) uniform(x, y) linear(val(i):1) +!$omp declare simd(func) simdlen(4) uniform(x, y) linear(i:val,step(1)) implicit none real(8), intent(in) :: x(*), y(*) integer, intent(in) :: i diff --git a/Title_Page.tex b/Title_Page.tex index b43a169..ab4d1f6 100644 --- a/Title_Page.tex +++ b/Title_Page.tex @@ -23,11 +23,12 @@ \vspace{2.3in} %was 3.0 -Source codes for OpenMP \PVER{} Examples can be downloaded from - \href{https://github.com/OpenMP/Examples/tree/v\VER}{github}.\\ +Source codes for OpenMP \VER{} Examples are available at + \href{https://github.com/OpenMP/Examples/tree/v\VER}% + {github (https://github.com/OpenMP/Examples/tree/v\VER)}.\\ \begin{adjustwidth}{0pt}{1em}\setlength{\parskip}{0.25\baselineskip}% -Copyright \copyright{} 1997-2021 OpenMP Architecture Review Board.\\ +Copyright \copyright{} 1997-2022 OpenMP Architecture Review Board.\\ Permission to copy without fee all or part of this material is granted, provided the OpenMP Architecture Review Board copyright notice and the title of this document appear. Notice is given that copying is by @@ -37,7 +38,7 @@ permission of OpenMP Architecture Review Board.\end{adjustwidth} % Blank page -\cleardoublepage +%\cleardoublepage %For final version, uncomment the line above, comment out the lines below %This working version enacted the following tickets: 287, 519, 550, 593, diff --git a/affinity/affinity.tex b/affinity/affinity.tex index c69a3b9..1402331 100644 --- a/affinity/affinity.tex +++ b/affinity/affinity.tex @@ -1,19 +1,24 @@ \pagebreak \section{\code{proc\_bind} Clause} \label{sec:affinity} +\index{affinity!proc_bind clause@\scode{proc_bind} clause} +\index{clauses!proc_bind@\scode{proc_bind}} +\index{proc_bind clause@\scode{proc_bind} clause} The following examples demonstrate how to use the \code{proc\_bind} clause to control the thread binding for a team of threads in a \code{parallel} region. -The machine architecture is depicted in the figure below. It consists of two sockets, +The machine architecture is depicted in Figure~\ref{fig:mach_arch}. It consists of two sockets, each equipped with a quad-core processor and configured to execute two hardware threads simultaneously on each core. These examples assume a contiguous core numbering starting from 0, such that the hardware threads 0,1 form the first physical core. \ifpdf -%\begin{figure}[htbp] -\centerline{\includegraphics[width=3.8in,keepaspectratio=true]% +\begin{figure}[htb] +\centerline{\includegraphics[width=3.0in,keepaspectratio=true]% {figs/proc_bind_fig.pdf}} -%\end{figure} +\caption{A machine architecture with two quad-core processors} +\label{fig:mach_arch} +\end{figure} \fi The following equivalent place list declarations consist of eight places (which @@ -27,6 +32,8 @@ or \subsection{Spread Affinity Policy} \label{subsec:affinity_spread} +\index{affinity!spread policy@\code{spread} policy} +\index{spread policy@\code{spread} policy} The following example shows the result of the \code{spread} affinity policy on @@ -124,6 +131,8 @@ and distribution of the place partition would be as follows: \subsection{Close Affinity Policy} \label{subsec:affinity_close} +\index{affinity!close policy@\code{close} policy} +\index{close policy@\code{close} policy} The following example shows the result of the \code{close} affinity policy on the partition list when the number of threads is less than or equal to the number @@ -220,6 +229,8 @@ and distribution of the place partition would be as follows: \subsection{Primary Affinity Policy} \label{subsec:affinity_primary} +\index{affinity!primary policy@\code{primary} policy} +\index{primary policy@\code{primary} policy} The following example shows the result of the \code{primary} affinity policy on the partition list for the machine architecture depicted above. The place partition @@ -227,7 +238,7 @@ is not changed by the primary policy. \cexample[4.0]{affinity}{5} -\fexample[4.0]{affinity}{5}[1] +\fexample[4.0]{affinity}{5} \clearpage It is unspecified on which place the primary thread is initially started. If the diff --git a/affinity/affinity_display.tex b/affinity/affinity_display.tex index 3cda660..fc33de3 100644 --- a/affinity/affinity_display.tex +++ b/affinity/affinity_display.tex @@ -1,5 +1,14 @@ \section{Affinity Display} \label{sec:affinity_display} +\index{affinity display!OMP_DISPLAY_AFFINITY@\scode{OMP_DISPLAY_AFFINITY}} +\index{environment variables!OMP_DISPLAY_AFFINITY@\scode{OMP_DISPLAY_AFFINITY}} +\index{OMP_DISPLAY_AFFINITY@\scode{OMP_DISPLAY_AFFINITY}} +\index{affinity display!OMP_AFFINITY_FORMAT@\scode{OMP_AFFINITY_FORMAT}} +\index{environment variables!OMP_AFFINITY_FORMAT@\scode{OMP_AFFINITY_FORMAT}} +\index{OMP_AFFINITY_FORMAT@\scode{OMP_AFFINITY_FORMAT}} +\index{affinity display!omp_display_affinity routine@\scode{omp_display_affinity} routine} +\index{routines!omp_display_affinity@\scode{omp_display_affinity}} +\index{omp_display_affinity routine@\scode{omp_display_affinity} routine} The following examples illustrate ways to display thread affinity. Automatic display of affinity can be invoked by setting @@ -49,6 +58,8 @@ where the numbers correspond to core ids for the system. Note, \code{OMP\_DISPLA set and is \code{FALSE} by default. This example shows how to use API routines to perform affinity display operations. +\index{environment variables!OMP_PLACES@\scode{OMP_PLACES}} +\index{OMP_PLACES@\scode{OMP_PLACES}} For each of the two first-level threads the \code{OMP\_PLACES} variable specifies a place with all the core-ids of the socket (\{0,2,4,6\} for one thread and \{1,3,5,7\} for the other). (As is sometimes the case in 2-socket systems, one socket may consist @@ -62,8 +73,14 @@ the affinities for the threads on each socket are printed according to this form \ffreeexample[5.0]{affinity_display}{2} +\index{affinity display!omp_get_affinity_format routine@\scode{omp_get_affinity_format} routine} +\index{routines!omp_get_affinity_format@\scode{omp_get_affinity_format}} +\index{omp_get_affinity_format routine@\scode{omp_get_affinity_format} routine} +\index{affinity display!omp_set_affinity_format routine@\scode{omp_set_affinity_format} routine} +\index{routines!omp_set_affinity_format@\scode{omp_set_affinity_format}} +\index{omp_set_affinity_format routine@\scode{omp_set_affinity_format} routine} The next example illustrates more details about affinity formatting. -First, the \code{omp\_get\_affininity\_format()} API routine is used to +First, the \code{omp\_get\_affinity\_format()} API routine is used to obtain the default format. The code checks to make sure the storage provides enough space to hold the format. Next, the \code{omp\_set\_affinity\_format()} API routine sets a user-defined @@ -83,6 +100,9 @@ and the "0" indicates that any unused space is to be prefixed with zeros %The period (\plc{.}) indicates right justified and \plc{0} leading zeros. %All other text in the format is just user narrative. +\index{affinity display!omp_capture_affinity routine@\scode{omp_capture_affinity} routine} +\index{routines!omp_capture_affinity@\scode{omp_capture_affinity}} +\index{omp_capture_affinity routine@\scode{omp_capture_affinity} routine} Within the parallel region the affinity for each thread is captured by \code{omp\_capture\_affinity()} into a buffer array with elements indexed by the thread number (\plc{thrd\_num}). @@ -98,6 +118,7 @@ The maximum value for the number of characters (\plc{nchars}) returned by clause and the \plc{if(nchars >= max\_req\_store) max\_req\_store=nchars} statement. It is used to report possible truncation (if \plc{max\_req\_store} > \plc{buffer\_store}). +\newpage \cexample[5.0]{affinity_display}{3} \ffreeexample[5.0]{affinity_display}{3} diff --git a/affinity/affinity_query.tex b/affinity/affinity_query.tex index 2cd0d57..5db9d3d 100644 --- a/affinity/affinity_query.tex +++ b/affinity/affinity_query.tex @@ -1,5 +1,18 @@ \section{Affinity Query Functions} \label{sec: affinity_query} +\index{affinity query!omp_get_num_places routine@\scode{omp_get_num_places} routine} +\index{routines!omp_get_num_places@\scode{omp_get_num_places}} +\index{omp_get_num_places routine@\scode{omp_get_num_places} routine} +\index{affinity query!omp_get_place_num routine@\scode{omp_get_place_num} routine} +\index{routines!omp_get_place_num@\scode{omp_get_place_num}} +\index{omp_get_place_num routine@\scode{omp_get_place_num} routine} +\index{affinity query!omp_get_place_num_procs routine@\scode{omp_get_place_num_procs} routine} +\index{routines!omp_get_place_num_procs@\scode{omp_get_place_num_procs}} +\index{omp_get_place_num_procs routine@\scode{omp_get_place_num_procs} routine} +\index{affinity!spread policy@\code{spread} policy} +\index{spread policy@\code{spread} policy} +\index{environment variables!OMP_PLACES@\scode{OMP_PLACES}} +\index{OMP_PLACES@\scode{OMP_PLACES}} In the example below a team of threads is generated on each socket of the system, using nested parallelism. Several query functions are used diff --git a/affinity/sources/affinity.1.c b/affinity/sources/affinity.1.c index 8ab77ba..6a408fe 100644 --- a/affinity/sources/affinity.1.c +++ b/affinity/sources/affinity.1.c @@ -1,5 +1,5 @@ /* -* @@name: affinity.1c +* @@name: affinity.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/affinity/sources/affinity.1.f b/affinity/sources/affinity.1.f index 8bb0b48..c4420f2 100644 --- a/affinity/sources/affinity.1.f +++ b/affinity/sources/affinity.1.f @@ -1,4 +1,4 @@ -! @@name: affinity.1f +! @@name: affinity.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/affinity/sources/affinity.2.c b/affinity/sources/affinity.2.c index 7c75ba5..00291f6 100644 --- a/affinity/sources/affinity.2.c +++ b/affinity/sources/affinity.2.c @@ -1,5 +1,5 @@ /* -* @@name: affinity.2c +* @@name: affinity.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/affinity/sources/affinity.2.f90 b/affinity/sources/affinity.2.f90 index 8baf7e8..d87bb6f 100644 --- a/affinity/sources/affinity.2.f90 +++ b/affinity/sources/affinity.2.f90 @@ -1,4 +1,4 @@ -! @@name: affinity.2f +! @@name: affinity.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/affinity/sources/affinity.3.c b/affinity/sources/affinity.3.c index 87e9524..eff8467 100644 --- a/affinity/sources/affinity.3.c +++ b/affinity/sources/affinity.3.c @@ -1,5 +1,5 @@ /* -* @@name: affinity.3c +* @@name: affinity.3 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/affinity/sources/affinity.3.f b/affinity/sources/affinity.3.f index a9225a7..072a976 100644 --- a/affinity/sources/affinity.3.f +++ b/affinity/sources/affinity.3.f @@ -1,4 +1,4 @@ -! @@name: affinity.3f +! @@name: affinity.3 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/affinity/sources/affinity.4.c b/affinity/sources/affinity.4.c index 5972414..40af217 100644 --- a/affinity/sources/affinity.4.c +++ b/affinity/sources/affinity.4.c @@ -1,5 +1,5 @@ /* -* @@name: affinity.4c +* @@name: affinity.4 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/affinity/sources/affinity.4.f90 b/affinity/sources/affinity.4.f90 index b95002b..ab9686e 100644 --- a/affinity/sources/affinity.4.f90 +++ b/affinity/sources/affinity.4.f90 @@ -1,4 +1,4 @@ -! @@name: affinity.4f +! @@name: affinity.4 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/affinity/sources/affinity.5.c b/affinity/sources/affinity.5.c index b3916b8..94500ee 100644 --- a/affinity/sources/affinity.5.c +++ b/affinity/sources/affinity.5.c @@ -1,15 +1,11 @@ /* -* @@name: affinity.5c +* @@name: affinity.5 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success * @@version: omp_5.1 */ -#if _OPENMP < 202011 -#define primary master -#endif - void work(); int main() { diff --git a/affinity/sources/affinity.5.f b/affinity/sources/affinity.5.f index 6b85909..81531e7 100644 --- a/affinity/sources/affinity.5.f +++ b/affinity/sources/affinity.5.f @@ -1,14 +1,9 @@ -! @@name: affinity.5f +! @@name: affinity.5 ! @@type: F-fixed ! @@compilable: yes -! @@requires: preprocessing ! @@linkable: no ! @@expect: success ! @@version: omp_5.1 -#if _OPENMP < 202011 -#define primary master -#endif - PROGRAM EXAMPLE !$OMP PARALLEL PROC_BIND(primary) NUM_THREADS(4) CALL WORK() diff --git a/affinity/sources/affinity.6.c b/affinity/sources/affinity.6.c index 061af9c..829c665 100644 --- a/affinity/sources/affinity.6.c +++ b/affinity/sources/affinity.6.c @@ -1,5 +1,5 @@ /* -* @@name: affinity.1.c +* @@name: affinity.6 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/affinity/sources/affinity.6.f90 b/affinity/sources/affinity.6.f90 index 47bf55e..45f7e2b 100644 --- a/affinity/sources/affinity.6.f90 +++ b/affinity/sources/affinity.6.f90 @@ -1,4 +1,4 @@ -! @@name: affinity.6f +! @@name: affinity.6 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/affinity/sources/affinity_display.1.c b/affinity/sources/affinity_display.1.c index 8a0a98a..2ebefee 100644 --- a/affinity/sources/affinity_display.1.c +++ b/affinity/sources/affinity_display.1.c @@ -1,5 +1,5 @@ /* -* @@name: affinity_display.1.c +* @@name: affinity_display.1 * @@type: C * @@compilable: yes * @@linkable: yes @@ -9,52 +9,53 @@ #include #include -int main(void){ //MAX threads = 8, single socket system +int main(void){ //MAX threads = 8, single socket system - omp_display_affinity(NULL); //API call-- Displays Affinity of Primary Thread + //API call-- Displays Affinity of Primary Thread + omp_display_affinity(NULL); -// API CALL OUTPUT (default format): -//team_num= 0, nesting_level= 0, thread_num= 0, thread_affinity= 0,1,2,3,4,5,6,7 + // API CALL OUTPUT (default format): + // team_num= 0, nesting_level= 0, thread_num= 0, + // thread_affinity= 0,1,2,3,4,5,6,7 - - // OMP_DISPLAY_AFFINITY=TRUE, OMP_NUM_THREADS=8 + // OMP_DISPLAY_AFFINITY=TRUE, OMP_NUM_THREADS=8 #pragma omp parallel num_threads(omp_get_num_procs()) { - if(omp_get_thread_num()==0) + if(omp_get_thread_num()==0) printf("1st Parallel Region -- Affinity Reported \n"); - // DISPLAY OUTPUT (default format) has been sorted: - // team_num= 0, nesting_level= 1, thread_num= 0, thread_affinity= 0 - // team_num= 0, nesting_level= 1, thread_num= 1, thread_affinity= 1 - // ... - // team_num= 0, nesting_level= 1, thread_num= 7, thread_affinity= 7 + // DISPLAY OUTPUT (default format) has been sorted: + // team_num= 0, nesting_level= 1, thread_num= 0, thread_affinity= 0 + // team_num= 0, nesting_level= 1, thread_num= 1, thread_affinity= 1 + // ... + // team_num= 0, nesting_level= 1, thread_num= 7, thread_affinity= 7 - // doing work here + // doing work here } #pragma omp parallel num_threads( omp_get_num_procs() ) { - if(omp_get_thread_num()==0) - printf("%s%s\n","Same Affinity as in Previous Parallel Region", - " -- no Affinity Reported\n"); + if(omp_get_thread_num()==0) + printf("%s%s\n","Same Affinity as in Previous Parallel Region", + " -- no Affinity Reported\n"); - // NO AFFINITY OUTPUT: - //(output in 1st parallel region only for OMP_DISPLAY_AFFINITY=TRUE) - - // doing more work here + // NO AFFINITY OUTPUT: + //(output in 1st parallel region only for OMP_DISPLAY_AFFINITY=TRUE) + + // doing more work here } - // Report Affinity for 1/2 number of threads + // Report Affinity for 1/2 number of threads #pragma omp parallel num_threads( omp_get_num_procs()/2 ) { - if(omp_get_thread_num()==0) + if(omp_get_thread_num()==0) printf("Report Affinity for using 1/2 of max threads.\n"); - - // DISPLAY OUTPUT (default format) has been sorted: - // team_num= 0, nesting_level= 1, thread_num= 0, thread_affinity= 0,1 - // team_num= 0, nesting_level= 1, thread_num= 1, thread_affinity= 2,3 - // team_num= 0, nesting_level= 1, thread_num= 2, thread_affinity= 4,5 - // team_num= 0, nesting_level= 1, thread_num= 3, thread_affinity= 6,7 + + // DISPLAY OUTPUT (default format) has been sorted: + // team_num= 0, nesting_level= 1, thread_num= 0, thread_affinity= 0,1 + // team_num= 0, nesting_level= 1, thread_num= 1, thread_affinity= 2,3 + // team_num= 0, nesting_level= 1, thread_num= 2, thread_affinity= 4,5 + // team_num= 0, nesting_level= 1, thread_num= 3, thread_affinity= 6,7 // do work } diff --git a/affinity/sources/affinity_display.1.f90 b/affinity/sources/affinity_display.1.f90 index 6a1957e..2bbc971 100644 --- a/affinity/sources/affinity_display.1.f90 +++ b/affinity/sources/affinity_display.1.f90 @@ -1,4 +1,4 @@ -! @@name: affinity_display.1.f90 +! @@name: affinity_display.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes @@ -10,13 +10,15 @@ program affinity_display ! MAX threads = 8, single socket system implicit none character(len=0) :: null - call omp_display_affinity(null) !API call- Displays Affinity of Primary Thrd + ! API call - Displays Affinity of Primary Thread + call omp_display_affinity(null) -! API CALL OUTPUT (default format): -!team_num= 0, nesting_level= 0, thread_num= 0, thread_affinity= 0,1,2,3,4,5,6,7 + ! API CALL OUTPUT (default format): + ! team_num= 0, nesting_level= 0, thread_num= 0, & + ! thread_affinity= 0,1,2,3,4,5,6,7 - ! OMP_DISPLAY_AFFINITY=TRUE, OMP_NUM_THREADS=8 + ! OMP_DISPLAY_AFFINITY=TRUE, OMP_NUM_THREADS=8 !$omp parallel num_threads(omp_get_num_procs()) @@ -24,11 +26,11 @@ program affinity_display ! MAX threads = 8, single socket system print*, "1st Parallel Region -- Affinity Reported" endif - ! DISPLAY OUTPUT (default format) has been sorted: - ! team_num= 0, nesting_level= 1, thread_num= 0, thread_affinity= 0 - ! team_num= 0, nesting_level= 1, thread_num= 1, thread_affinity= 1 - ! ... - ! team_num= 0, nesting_level= 1, thread_num= 7, thread_affinity= 7 + ! DISPLAY OUTPUT (default format) has been sorted: + ! team_num= 0, nesting_level= 1, thread_num= 0, thread_affinity= 0 + ! team_num= 0, nesting_level= 1, thread_num= 1, thread_affinity= 1 + ! ... + ! team_num= 0, nesting_level= 1, thread_num= 7, thread_affinity= 7 ! doing work here @@ -40,25 +42,30 @@ program affinity_display ! MAX threads = 8, single socket system print*, "Same Affinity in Parallel Region -- no Affinity Reported" endif - ! NO AFFINITY OUTPUT: - !(output in 1st parallel region only for OMP_DISPLAY_AFFINITY=TRUE) + ! NO AFFINITY OUTPUT: + ! (output in 1st parallel region only for + ! OMP_DISPLAY_AFFINITY=TRUE) ! doing more work here !$omp end parallel - ! Report Affinity for 1/2 number of threads + ! Report Affinity for 1/2 number of threads !$omp parallel num_threads( omp_get_num_procs()/2 ) if(omp_get_thread_num()==0) then - print*, "Different Affinity in Parallel Region -- Affinity Reported" + print*, "Altered Affinity in Parallel Region -- Affinity Reported" endif - ! DISPLAY OUTPUT (default format) has been sorted: - ! team_num= 0, nesting_level= 1, thread_num= 0, thread_affinity= 0,1 - ! team_num= 0, nesting_level= 1, thread_num= 1, thread_affinity= 2,3 - ! team_num= 0, nesting_level= 1, thread_num= 2, thread_affinity= 4,5 - ! team_num= 0, nesting_level= 1, thread_num= 3, thread_affinity= 6,7 + ! DISPLAY OUTPUT (default format) has been sorted: + ! team_num= 0, nesting_level= 1, thread_num= 0, & + ! thread_affinity= 0,1 + ! team_num= 0, nesting_level= 1, thread_num= 1, & + ! thread_affinity= 2,3 + ! team_num= 0, nesting_level= 1, thread_num= 2, & + ! thread_affinity= 4,5 + ! team_num= 0, nesting_level= 1, thread_num= 3, & + ! thread_affinity= 6,7 ! do work diff --git a/affinity/sources/affinity_display.2.c b/affinity/sources/affinity_display.2.c index aa79166..0e4b329 100644 --- a/affinity/sources/affinity_display.2.c +++ b/affinity/sources/affinity_display.2.c @@ -1,5 +1,5 @@ /* -* @@name: affinity_display.2c +* @@name: affinity_display.2 * @@type: C * @@compilable: yes * @@linkable: yes @@ -14,62 +14,65 @@ void socket_work(int socket_num, int n_thrds); int main(void) { - int n_sockets, socket_num, n_thrds_on_socket; + int n_sockets, socket_num, n_thrds_on_socket; - omp_set_nested(1); // or env var= OMP_NESTED=true - omp_set_max_active_levels(2); // or env var= OMP_MAX_ACTIVE_LEVELS=2 + omp_set_nested(1); // or env var= OMP_NESTED=true + omp_set_max_active_levels(2); // or env var= OMP_MAX_ACTIVE_LEVELS=2 - n_sockets = omp_get_num_places(); - n_thrds_on_socket = omp_get_place_num_procs(0); + n_sockets = omp_get_num_places(); + n_thrds_on_socket = omp_get_place_num_procs(0); - // OMP_NUM_THREADS=2,4 - // OMP_PLACES="{0,2,4,6},{1,3,5,7}" #2 sockets; even/odd proc-ids - // OMP_AFFINITY_FORMAT=\ - // "nest_level= %L, parent_thrd_num= %a, thrd_num= %n, thrd_affinity= %A" - - #pragma omp parallel num_threads(n_sockets) private(socket_num) - { - socket_num = omp_get_place_num(); +// OMP_NUM_THREADS=2,4 +// OMP_PLACES="{0,2,4,6},{1,3,5,7}" #2 sockets; even/odd proc-ids +// OMP_AFFINITY_FORMAT=\ +// "nest_level= %L, parent_thrd_num= %a, thrd_num= %n, thrd_affinity= %A" - if(socket_num==0) - printf(" LEVEL 1 AFFINITIES 1 thread/socket, %d sockets:\n\n", n_sockets); + #pragma omp parallel num_threads(n_sockets) private(socket_num) + { + socket_num = omp_get_place_num(); - omp_display_affinity(NULL); // not needed if OMP_DISPLAY_AFFINITY=TRUE + if(socket_num==0) + printf(" LEVEL 1 AFFINITIES 1 thread/socket, %d sockets:\n\n", + n_sockets); - // OUTPUT: - // LEVEL 1 AFFINITIES 1 thread/socket, 2 sockets: - // nest_level= 1, parent_thrd_num= 0, thrd_num= 0, thrd_affinity= 0,2,4,6 - // nest_level= 1, parent_thrd_num= 0, thrd_num= 1, thrd_affinity= 1,3,5,7 + // not needed if OMP_DISPLAY_AFFINITY=TRUE + omp_display_affinity(NULL); - socket_work(socket_num, n_thrds_on_socket); - } - - return 0; +// OUTPUT: +// LEVEL 1 AFFINITIES 1 thread/socket, 2 sockets: +// nest_level= 1, parent_thrd_num= 0, thrd_num= 0, thrd_affinity= 0,2,4,6 +// nest_level= 1, parent_thrd_num= 0, thrd_num= 1, thrd_affinity= 1,3,5,7 + + socket_work(socket_num, n_thrds_on_socket); + } + + return 0; } void socket_work(int socket_num, int n_thrds) -{ - #pragma omp parallel num_threads(n_thrds) - { - if(omp_get_thread_num()==0) - printf(" LEVEL 2 AFFINITIES, %d threads on socket %d\n",n_thrds, socket_num); - - omp_display_affinity(NULL); // not needed if OMP_DISPLAY_AFFINITY=TRUE - - // OUTPUT: - // LEVEL 2 AFFINITIES, 4 threads on socket 0 - // nest_level= 2, parent_thrd_num= 0, thrd_num= 0, thrd_affinity= 0 - // nest_level= 2, parent_thrd_num= 0, thrd_num= 1, thrd_affinity= 2 - // nest_level= 2, parent_thrd_num= 0, thrd_num= 2, thrd_affinity= 4 - // nest_level= 2, parent_thrd_num= 0, thrd_num= 3, thrd_affinity= 6 +{ + #pragma omp parallel num_threads(n_thrds) + { + if(omp_get_thread_num()==0) + printf(" LEVEL 2 AFFINITIES, %d threads on socket %d\n", + n_thrds, socket_num); + + // not needed if OMP_DISPLAY_AFFINITY=TRUE + omp_display_affinity(NULL); + + // OUTPUT: + // LEVEL 2 AFFINITIES, 4 threads on socket 0 + // nest_level= 2, parent_thrd_num= 0, thrd_num= 0, thrd_affinity= 0 + // nest_level= 2, parent_thrd_num= 0, thrd_num= 1, thrd_affinity= 2 + // nest_level= 2, parent_thrd_num= 0, thrd_num= 2, thrd_affinity= 4 + // nest_level= 2, parent_thrd_num= 0, thrd_num= 3, thrd_affinity= 6 + + // LEVEL 2 AFFINITIES, 4 threads on socket 1 + // nest_level= 2, parent_thrd_num= 1, thrd_num= 0, thrd_affinity= 1 + // nest_level= 2, parent_thrd_num= 1, thrd_num= 1, thrd_affinity= 3 + // nest_level= 2, parent_thrd_num= 1, thrd_num= 2, thrd_affinity= 5 + // nest_level= 2, parent_thrd_num= 1, thrd_num= 3, thrd_affinity= 7 - // LEVEL 2 AFFINITIES, 4 threads on socket 1 - // nest_level= 2, parent_thrd_num= 1, thrd_num= 0, thrd_affinity= 1 - // nest_level= 2, parent_thrd_num= 1, thrd_num= 1, thrd_affinity= 3 - // nest_level= 2, parent_thrd_num= 1, thrd_num= 2, thrd_affinity= 5 - // nest_level= 2, parent_thrd_num= 1, thrd_num= 3, thrd_affinity= 7 - // ... Do Some work on Socket - - } -} + } +} diff --git a/affinity/sources/affinity_display.2.f90 b/affinity/sources/affinity_display.2.f90 index f274473..3750aa8 100644 --- a/affinity/sources/affinity_display.2.f90 +++ b/affinity/sources/affinity_display.2.f90 @@ -1,4 +1,4 @@ -! @@name: affinity_display.2.f90 +! @@name: affinity_display.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes @@ -20,22 +20,26 @@ program affinity_display ! OMP_NUM_THREADS=2,4 ! OMP_PLACES="{0,2,4,6},{1,3,5,7}" #2 sockets; even/odd proc-ids ! OMP_AFFINITY_FORMAT=\ - ! "nest_level= %L, parent_thrd_num= %a, thrd_num= %n, thrd_affinity= %A" + !"nest_level= %L, parent_thrd_num= %a, thrd_num= %n, thrd_affinity= %A" !$omp parallel num_threads(n_sockets) private(socket_num) socket_num = omp_get_place_num() if(socket_num==0) then - write(*,'("LEVEL 1 AFFINITIES 1 thread/socket ",i0," sockets")')n_sockets + write(*,'("LEVEL 1 AFFINITIES 1 thread/socket ",i0," sockets")') & + n_sockets endif - call omp_display_affinity(null) !not needed if OMP_DISPLAY_AFFINITY=TRUE + call omp_display_affinity(null) ! not needed + ! if OMP_DISPLAY_AFFINITY=TRUE ! OUTPUT: ! LEVEL 1 AFFINITIES 1 thread/socket, 2 sockets: - ! nest_level= 1, parent_thrd_num= 0, thrd_num= 0, thrd_affinity= 0,2,4,6 - ! nest_level= 1, parent_thrd_num= 0, thrd_num= 1, thrd_affinity= 1,3,5,7 + ! nest_level= 1, parent_thrd_num= 0, thrd_num= 0, & + ! thrd_affinity= 0,2,4,6 + ! nest_level= 1, parent_thrd_num= 0, thrd_num= 1, & + ! thrd_affinity= 1,3,5,7 call socket_work(socket_num, n_thrds_on_socket) @@ -56,7 +60,8 @@ subroutine socket_work(socket_num, n_thrds) n_thrds,socket_num endif - call omp_display_affinity(null); !not needed if OMP_DISPLAY_AFFINITY=TRUE + call omp_display_affinity(null) ! not needed + ! if OMP_DISPLAY_AFFINITY=TRUE ! OUTPUT: ! LEVEL 2 AFFINITIES, 4 threads on socket 0 diff --git a/affinity/sources/affinity_display.3.c b/affinity/sources/affinity_display.3.c index 439a329..c09e6c3 100644 --- a/affinity/sources/affinity_display.3.c +++ b/affinity/sources/affinity_display.3.c @@ -1,5 +1,5 @@ /* -* @@name: affinity_display.3.c +* @@name: affinity_display.3 * @@type: C * @@compilable: yes * @@linkable: yes @@ -25,9 +25,9 @@ int main(void){ char **buffer; -// CODE SEGMENT 1 AFFINITY FORMAT + // CODE SEGMENT 1 AFFINITY FORMAT -// Get and Display Default Affinity Format + // Get and Display Default Affinity Format nchars = omp_get_affinity_format(default_format,(size_t)FORMAT_STORE); printf("Default Affinity Format is: %s\n",default_format); @@ -37,44 +37,49 @@ int main(void){ printf(" FORMAT_STORE to %d.\n", nchars+1); } -// Set Affinity Format + // Set Affinity Format omp_set_affinity_format(my_format); printf("Affinity Format set to: %s\n",my_format); -// CODE SEGMENT 2 CAPTURE AFFINITY + // CODE SEGMENT 2 CAPTURE AFFINITY -// Set up buffer for affinity of n threads + // Set up buffer for affinity of n threads n = omp_get_num_procs(); buffer = (char **)malloc( sizeof(char *) * n ); - for(i=0;in) exit(1); //safety: don't exceed # of buffers + //safety: don't exceed # of buffers + if(omp_get_num_threads()>n) exit(1); thrd_num=omp_get_thread_num(); - nchars=omp_capture_affinity(buffer[thrd_num],(size_t)BUFFER_STORE,NULL); + nchars=omp_capture_affinity(buffer[thrd_num], + (size_t)BUFFER_STORE,NULL); if(nchars > max_req_store) max_req_store=nchars; // ... } - for(i=0;i=BUFFER_STORE){ diff --git a/affinity/sources/affinity_display.3.f90 b/affinity/sources/affinity_display.3.f90 index a262411..5a012a3 100644 --- a/affinity/sources/affinity_display.3.f90 +++ b/affinity/sources/affinity_display.3.f90 @@ -1,4 +1,4 @@ -! @@name: affinity_display.3.f90 +! @@name: affinity_display.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/affinity/sources/affinity_query.1.c b/affinity/sources/affinity_query.1.c index 0cbc18a..e10f6f4 100644 --- a/affinity/sources/affinity_query.1.c +++ b/affinity/sources/affinity_query.1.c @@ -1,5 +1,5 @@ /* -* @@name: affinity_query.1c +* @@name: affinity_query.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/affinity/sources/affinity_query.1.f90 b/affinity/sources/affinity_query.1.f90 index 96dbca6..dc494de 100644 --- a/affinity/sources/affinity_query.1.f90 +++ b/affinity/sources/affinity_query.1.f90 @@ -1,4 +1,4 @@ -! @@name: affinity_query.1f +! @@name: affinity_query.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/affinity/task_affinity.tex b/affinity/task_affinity.tex index ddc2695..b3cd77d 100644 --- a/affinity/task_affinity.tex +++ b/affinity/task_affinity.tex @@ -1,5 +1,9 @@ \section{Task Affinity} \label{sec: task_affinity} +\index{affinity!task affinity} +\index{affinity!affinity clause@\code{affinity} clause} +\index{clauses!affinity@\code{affinity}} +\index{affinity clause@\code{affinity} clause} The next example illustrates the use of the \code{affinity} clause with a \code{task} construct. diff --git a/data_environment/associate.tex b/data_environment/associate.tex index 339b121..dea9204 100644 --- a/data_environment/associate.tex +++ b/data_environment/associate.tex @@ -2,6 +2,7 @@ \section{Fortran \code{ASSOCIATE} Construct} \fortranspecificstart \label{sec:associate} +\index{ASSOCIATE construct, Fortran@\code{ASSOCIATE} construct, Fortran} The following is an invalid example of specifying an associate name on a data-sharing attribute clause. The constraint in the Data Sharing Attribute Rules section in the OpenMP @@ -29,5 +30,40 @@ region, \plc{v} has the value of -1 and \plc{u} has the value of the original \p \pagebreak \ffreenexample[4.0]{associate}{3} + +% blue line floater at top of this page for "Fortran, cont." +\begin{figure}[t!] +\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em} +\end{figure} +\label{sec:associate_target} + +\bigskip +The following example illustrates mapping behavior for a Fortran +associate name and its selector for a \scode{target} construct. + +For the first 3 \scode{target} constructs the associate name \splc{a_aray} is +associated with the selector \splc{aray}, an array. +For the \scode{target} construct of code block TARGET 1 just the selector +\splc{aray} is used and is implicitly mapped, +likewise for the associate name \splc{a_aray} in the TARGET 2 block. +However, mapping an associate name and its selector is not valid for the same +\scode{target} construct. Hence the TARGET 3 block is non-conforming. + + +In TARGET 4, the \splc{scalr} selector used in the \scode{target} region +has an implicit data-sharing attribute of firstprivate since it is a scalar. +Hence, the assigned value is not returned. +In TARGET 5, the associate name \splc{a_scalr} is implicitly mapped and the +assigned value is returned to the host (default \scode{tofrom} mapping behavior). +In TARGET 6, the use of the associate name and its selector in the \scode{target} +region is conforming because the scalar firstprivate behavior of the selector +and the implicit mapping of the associate name are allowed. +At the end of the \scode{target} region only the +associate name's value is returned to the host. +In TARGET 7, the selector and associate name appear in +an explicit mapping for the same \scode{target} construct, +hence the code block is non-conforming. + +\ffreenexample[5.1]{associate}{4} \fortranspecificend diff --git a/data_environment/carrays_fpriv.tex b/data_environment/carrays_fpriv.tex index f9086bb..fb94285 100644 --- a/data_environment/carrays_fpriv.tex +++ b/data_environment/carrays_fpriv.tex @@ -2,6 +2,8 @@ \section{C/C++ Arrays in a \code{firstprivate} Clause} \ccppspecificstart \label{sec:carrays_fpriv} +\index{clauses!firstprivate@\code{firstprivate}} +\index{firstprivate clause@\code{firstprivate} clause!C/C++ arrays in} The following example illustrates the size and value of list items of array or pointer type in a \code{firstprivate} clause . The size of new list items is diff --git a/data_environment/copyin.tex b/data_environment/copyin.tex index b7a9b9e..103166e 100644 --- a/data_environment/copyin.tex +++ b/data_environment/copyin.tex @@ -1,6 +1,10 @@ \pagebreak \section{\code{copyin} Clause} \label{sec:copyin} +\index{clauses!copyin@\code{copyin}} +\index{copyin clause@\code{copyin} clause} +\index{directives!threadprivate@\code{threadprivate}} +\index{threadprivate directive@\code{threadprivate} directive} The \code{copyin} clause is used to initialize threadprivate data upon entry to a \code{parallel} region. The value of the threadprivate variable in the primary diff --git a/data_environment/copyprivate.tex b/data_environment/copyprivate.tex index abf739a..8387048 100644 --- a/data_environment/copyprivate.tex +++ b/data_environment/copyprivate.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{copyprivate} Clause} \label{sec:copyprivate} +\index{clauses!copyprivate@\code{copyprivate}} +\index{copyprivate clause@\code{copyprivate} clause} The \code{copyprivate} clause can be used to broadcast values acquired by a single thread directly to all instances of the private variables in the other threads. @@ -9,6 +11,8 @@ is not affected by the presence of the directives. If it is called from a \code{ region, then the actual arguments with which \code{a} and \code{b} are associated must be private. +\index{constructs!single@\code{single}} +\index{single construct@\code{single} construct} The thread that executes the structured block associated with the \code{single} construct broadcasts the values of the private variables \code{a}, \code{b}, \code{x}, and @@ -20,6 +24,8 @@ any of the threads have left the barrier at the end of the construct. \fexample{copyprivate}{1} +\index{constructs!masked@\code{masked}} +\index{masked construct@\code{masked} construct} In this example, assume that the input must be performed by the primary thread. Since the \code{masked} construct does not support the \code{copyprivate} clause, it cannot broadcast the input value that is read. However, \code{copyprivate} @@ -27,7 +33,7 @@ is used to broadcast an address where the input value is stored. \cexample[5.1]{copyprivate}{2} -\fexample[5.1]{copyprivate}{2}[1] +\fexample[5.1]{copyprivate}{2} Suppose that the number of lock variables required within a \code{parallel} region cannot easily be determined prior to entering it. The \code{copyprivate} clause diff --git a/data_environment/cpp_reference.tex b/data_environment/cpp_reference.tex index 89f04d8..6022330 100644 --- a/data_environment/cpp_reference.tex +++ b/data_environment/cpp_reference.tex @@ -1,6 +1,8 @@ \section{C++ Reference in Data-Sharing Clauses} \cppspecificstart \label{sec:cpp_reference} +\index{clauses!data-sharing, C++ reference in} +\index{data-sharing clauses, C++ reference in} C++ reference types are allowed in data-sharing attribute clauses as of OpenMP 4.5, except for the \code{threadprivate}, \code{copyin} and \code{copyprivate} clauses. diff --git a/data_environment/default_none.tex b/data_environment/default_none.tex index 0129b88..93a189a 100644 --- a/data_environment/default_none.tex +++ b/data_environment/default_none.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{default(none)} Clause} \label{sec:default_none} +\index{clauses!default(none)@\code{default(none)}} +\index{default(none) clause@\code{default(none)} clause} The following example distinguishes the variables that are affected by the \code{default(none)} clause from those that are not. diff --git a/data_environment/fort_loopvar.tex b/data_environment/fort_loopvar.tex index 0781d40..0cd098f 100644 --- a/data_environment/fort_loopvar.tex +++ b/data_environment/fort_loopvar.tex @@ -2,6 +2,7 @@ \section{Fortran Private Loop Iteration Variables} \label{sec:fort_loopvar} \fortranspecificstart +\index{loop variables, Fortran} In general loop iteration variables will be private, when used in the \plc{do-loop} of a \code{do} and \code{parallel do} construct or in sequential loops in a diff --git a/data_environment/fort_sa_private.tex b/data_environment/fort_sa_private.tex index 1ee270b..e0e1098 100644 --- a/data_environment/fort_sa_private.tex +++ b/data_environment/fort_sa_private.tex @@ -2,6 +2,8 @@ \section{Fortran Restrictions on Storage Association with the \code{private} Clause} \fortranspecificstart \label{sec:fort_sa_private} +\index{clauses!private@\code{private}} +\index{private clause@\code{private} clause!storage association, Fortran} The following non-conforming examples illustrate the implications of the \code{private} clause rules with regard to storage association. diff --git a/data_environment/fort_sp_common.tex b/data_environment/fort_sp_common.tex index 712ea67..962b44d 100644 --- a/data_environment/fort_sp_common.tex +++ b/data_environment/fort_sp_common.tex @@ -2,6 +2,10 @@ \section{Fortran Restrictions on \code{shared} and \code{private} Clauses with Common Blocks} \fortranspecificstart \label{sec:fort_sp_common} +\index{clauses!private@\code{private}} +\index{clauses!shared@\code{shared}} +\index{private clause@\code{private} clause!common blocks, Fortran} +\index{shared clause@\code{shared} clause!common blocks, Fortran} When a named common block is specified in a \code{private}, \code{firstprivate}, or \code{lastprivate} clause of a construct, none of its members may be declared diff --git a/data_environment/lastprivate.tex b/data_environment/lastprivate.tex index d87783f..306b4f6 100644 --- a/data_environment/lastprivate.tex +++ b/data_environment/lastprivate.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{lastprivate} Clause} \label{sec:lastprivate} +\index{clauses!lastprivate@\code{lastprivate}} +\index{lastprivate clause@\code{lastprivate} clause} Correct execution sometimes depends on the value that the last iteration of a loop assigns to a variable. Such programs must list all such variables in a \code{lastprivate} @@ -12,6 +14,8 @@ sequentially. \fexample{lastprivate}{1} \clearpage +\index{lastprivate clause@\code{lastprivate} clause!conditional modifier@\code{conditional} modifier} +\index{conditional modifier@\code{conditional} modifier} The next example illustrates the use of the \code{conditional} modifier in a \code{lastprivate} clause to return the last value when it may not come from the last iteration of a loop. diff --git a/data_environment/private.tex b/data_environment/private.tex index c5f0556..2950fb0 100644 --- a/data_environment/private.tex +++ b/data_environment/private.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{private} Clause} \label{sec:private} +\index{clauses!private@\code{private}} +\index{private clause@\code{private} clause} In the following example, the values of original list items \plc{i} and \plc{j} are retained on exit from the \code{parallel} region, while the private list diff --git a/data_environment/reduction.tex b/data_environment/reduction.tex index 6876a99..2a6b4dd 100644 --- a/data_environment/reduction.tex +++ b/data_environment/reduction.tex @@ -7,6 +7,9 @@ This section covers ways to perform reductions in parallel, task, taskloop, and \subsection{\code{reduction} Clause} \label{subsec:reduction} +\index{clauses!reduction@\code{reduction}} +\index{reduction clause@\code{reduction} clause} +\index{reductions!reduction clause@\code{reduction} clause} The following example demonstrates the \code{reduction} clause; note that some reductions can be expressed in the loop in several ways, as shown for the \code{max} @@ -64,7 +67,7 @@ the start of the \code{parallel} region. \cexample[5.1]{reduction}{6} -\fexample[5.1]{reduction}{6}[1] +\fexample[5.1]{reduction}{6} The following example demonstrates the reduction of array \plc{a}. In C/C++ this is illustrated by the explicit use of an array section \plc{a[0:N]} in the \code{reduction} clause. The corresponding Fortran example uses array syntax supported in the base language. As of the OpenMP 4.5 specification the explicit use of array section in the \code{reduction} clause in Fortran is not permitted. But this oversight has been fixed in the OpenMP 5.0 specification. @@ -75,6 +78,12 @@ The following example demonstrates the reduction of array \plc{a}. In C/C++ thi \subsection{Task Reduction} \label{subsec:task_reduction} +\index{clauses!task_reduction@\scode{task_reduction}} +\index{task_reduction clause@\scode{task_reduction} clause} +\index{reductions!task_reduction clause@\scode{task_reduction} clause} +\index{clauses!in_reduction@\scode{in_reduction}} +\index{in_reduction clause@\scode{in_reduction} clause} +\index{reductions!in_reduction clause@\scode{in_reduction} clause} In OpenMP 5.0 the \code{task\_reduction} clause was created for the \code{taskgroup} construct, to allow reductions among explicit tasks that have an \code{in\_reduction} clause. @@ -97,6 +106,8 @@ reduction). \ffreeexample[5.0]{task_reduction}{1} +\index{reduction clause@\code{reduction} clause!task modifier@\code{task} modifier} +\index{task modifier@\code{task} modifier} In OpenMP 5.0 the \code{task} \plc{reduction-modifier} for the \code{reduction} clause was introduced to provide a means of performing reductions among implicit and explicit tasks. @@ -134,6 +145,9 @@ and list item (variable \code{x}) match as required. \subsection{Reduction on Combined Target Constructs} \label{subsec:target_reduction} +\index{reduction clause@\code{reduction} clause!on target construct@on \code{target} construct} +\index{constructs!target@\code{target}} +\index{target construct@\code{target} construct} When a \code{reduction} clause appears on a combined construct that combines a \code{target} construct with another construct, there is an implicit map @@ -174,6 +188,12 @@ first construct. \subsection{Task Reduction with Target Constructs} \label{subsec:target_task_reduction} +\index{in_reduction clause@\scode{in_reduction} clause} +\index{constructs!target@\code{target}} +\index{target construct@\code{target} construct} + +\index{clauses!enter@\code{enter}} +\index{enter clause@\code{enter} clause} The following examples illustrate how task reductions can apply to target tasks that result from a \code{target} construct with the \code{in\_reduction} @@ -184,34 +204,43 @@ task reduction will be combined (in some order) into the original variable listed in the \code{task\_reduction} clause before exiting the \code{taskgroup} region. -\cexample[5.1]{target_task_reduction}{1} +\cexample[5.2]{target_task_reduction}{1} -\ffreeexample[5.1]{target_task_reduction}{1}[1] +\ffreeexample[5.2]{target_task_reduction}{1} +\clearpage +\index{reduction clause@\code{reduction} clause!task modifier@\code{task} modifier} +\index{task modifier@\code{task} modifier} In the next pair of examples, the task reduction is defined by a \code{reduction} clause with the \code{task} modifier, rather than a \code{task\_reduction} clause on a \code{taskgroup} construct. Again, the partial results from the participating tasks will be combined in some order into the original reduction variable, \code{sum}. -\cexample[5.0]{target_task_reduction}{2a} +\cexample[5.2]{target_task_reduction}{2a} -\ffreeexample[5.0]{target_task_reduction}{2a} +\ffreeexample[5.2]{target_task_reduction}{2a} +\index{in_reduction clause@\scode{in_reduction} clause!with target construct@with \code{target} construct} +\index{constructs!target@\code{target}} +\index{target construct@\code{target} construct} Next, the \code{task} modifier is again used to define a task reduction over participating tasks. This time, the participating tasks are a target task resulting from a \code{target} construct with the \code{in\_reduction} clause, and the implicit task (executing on the primary thread) that calls -\code{host\_compute}. As before, the partial results from these paricipating +\code{host\_compute}. As before, the partial results from these participating tasks are combined in some order into the original reduction variable. -\cexample[5.1]{target_task_reduction}{2b} +\cexample[5.2]{target_task_reduction}{2b} -\ffreeexample[5.1]{target_task_reduction}{2b}[1] +\ffreeexample[5.2]{target_task_reduction}{2b} \subsection{Taskloop Reduction} \label{subsec:taskloop_reduction} +\index{reduction clause@\code{reduction} clause!on taskloop construct@on \code{taskloop} construct} +\index{constructs!taskloop@\code{taskloop}} +\index{taskloop construct@\code{taskloop} construct} In the OpenMP 5.0 Specification the \code{taskloop} construct was extended to include the reductions. @@ -249,7 +278,7 @@ reduction that has not been defined. %create a new reduction and also that all tasks generated by the taskloop will %participate on it. -The second example computes exactly the same value as in the preceding\plc{taskloop\_reduction.1} code section, +The second example computes exactly the same value as in the preceding \plc{taskloop\_reduction.1} code section, but in a very different way. First, in the \plc{array\_sum} function a \code{taskgroup} region is created that defines the scope of a new reduction using the \code{task\_reduction} clause. @@ -261,7 +290,7 @@ This is allowed because what is expressed with the \code{in\_reduction} clause is different from what is expressed with the \code{reduction} clause. In one case the generated tasks are specified to participate in a previously declared reduction (\code{in\_reduction} clause) whereas in the other case -creation of a new reduction is specified and also that all tasks generated +creation of a new reduction is specified and also all tasks generated by the taskloop will participate on it. \cexample[5.0]{taskloop_reduction}{2} @@ -271,6 +300,9 @@ by the taskloop will participate on it. In the OpenMP 5.0 Specification, \code{reduction} clauses for the \code{taskloop}~\code{ simd} construct were also added. +\index{reduction clause@\code{reduction} clause!on taskloop simd construct@on \code{taskloop}~\code{simd} construct} +\index{combined constructs!taskloop simd@\code{taskloop}~\code{simd}} +\index{taskloop simd construct@\code{taskloop}~\code{simd} construct} The examples below compare reductions for the \code{taskloop} and the \code{taskloop}~\code{simd} constructs. These examples illustrate the use of \code{reduction} clauses within "stand-alone" \code{taskloop} constructs, and the use of \code{in\_reduction} clauses for tasks of taskloops to participate @@ -341,11 +373,14 @@ At the end of the parallel region \plc{asum} contains the combined result of all \cexample[5.1]{taskloop_simd_reduction}{1} -\ffreeexample[5.1]{taskloop_simd_reduction}{1}[1] +\ffreeexample[5.1]{taskloop_simd_reduction}{1} \subsection{Reduction with the \code{scope} Construct} \label{subsec:reduction_scope} +\index{reduction clause@\code{reduction} clause!on scope construct@on \code{scope} construct} +\index{constructs!scope@\code{scope}} +\index{scope construct@\code{scope} construct} The following example illustrates the use of the \code{scope} construct to perform a reduction in a \code{parallel} region. The case is useful for diff --git a/data_environment/scan.tex b/data_environment/scan.tex index ea8b781..fd47523 100644 --- a/data_environment/scan.tex +++ b/data_environment/scan.tex @@ -1,6 +1,10 @@ \pagebreak \section{\code{scan} Directive} \label{sec:scan} +\index{directives!scan@\code{scan}} +\index{scan directive@\code{scan} directive} +\index{reduction clause@\code{reduction} clause!inscan modifier@\code{inscan} modifier} +\index{inscan modifier@\code{inscan} modifier} The following examples illustrate how to parallelize a loop that saves the \emph{prefix sum} of a reduction. This is accomplished by using @@ -9,6 +13,12 @@ variable of the scan, and specifying with a \code{scan} directive whether the storage statement includes or excludes the scan input of the present iteration (\texttt{k}). +\index{scan directive@\code{scan} directive!inclusive clause@\code{inclusive} clause} +\index{scan directive@\code{scan} directive!exclusive clause@\code{exclusive} clause} +\index{clauses!inclusive@\code{inclusive}} +\index{inclusive clause@\code{inclusive} clause} +\index{clauses!exclusive@\code{exclusive}} +\index{exclusive clause@\code{exclusive} clause} Basically, the \code{inscan} modifier connects a loop and/or SIMD reduction to the scan operation, and a \code{scan} construct with an \code{inclusive} or \code{exclusive} clause specifies whether the ``scan phase'' (lexical block diff --git a/data_environment/sources/associate.1.f b/data_environment/sources/associate.1.f index 500db20..f528b0d 100644 --- a/data_environment/sources/associate.1.f +++ b/data_environment/sources/associate.1.f @@ -1,4 +1,4 @@ -! @@name: associate.1f +! @@name: associate.1 ! @@type: F-fixed ! @@compilable: no ! @@linkable: no diff --git a/data_environment/sources/associate.2.f b/data_environment/sources/associate.2.f index c0787c8..f740181 100644 --- a/data_environment/sources/associate.2.f +++ b/data_environment/sources/associate.2.f @@ -1,4 +1,4 @@ -! @@name: associate.2f +! @@name: associate.2 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/associate.3.f90 b/data_environment/sources/associate.3.f90 index bd2c209..4eac6eb 100644 --- a/data_environment/sources/associate.3.f90 +++ b/data_environment/sources/associate.3.f90 @@ -1,4 +1,4 @@ -! @@name: associate.3f +! @@name: associate.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/associate.4.f90 b/data_environment/sources/associate.4.f90 new file mode 100644 index 0000000..f586721 --- /dev/null +++ b/data_environment/sources/associate.4.f90 @@ -0,0 +1,58 @@ +! @@name: associate.4 +! @@type: F-free +! @@compilable: yes +! @@linkable: yes +! @@expect: success +! @@version: omp_5.1 +program main + integer :: scalr, aray(3) + scalr = -1 ; aray = -1 + + associate(a_scalr=>scalr, a_aray=>aray) + + !$omp target !! TARGET 1 + aray = [1,2,3] + !$omp end target + print *, a_aray, aray !! 1 2 3 1 2 3 + + !$omp target !! TARGET 2 + a_aray = [4,5,6] + !$omp end target + print *, a_aray, aray !! 4 5 6 4 5 6 + +!!!$omp target !! TARGET 3 +!! !! mapping, in this case implicit, +!! !! of aray AND a_aray NOT ALLOWED +!! aray = [4,5,6] +!! a_aray = [1,2,3] +!!!$omp end target + + + !$omp target !! TARGET 4 + scalr = 1 !! scalr is firstprivate + !$omp end target + print *, a_scalr, scalr !! -1 -1 + + !$omp target !! TARGET 5 + a_scalr = 2 !! a_scalr implicitly mapped + !$omp end target + print *, a_scalr, scalr !! 2 2 + + !$omp target !! TARGET 6 + scalr = 3 !! scalr is firstprivate + print *, a_scalr, scalr !! 2 3 + a_scalr = 4 !! a_scalr implicitly mapped + print *, a_scalr, scalr !! 4 3 + !$omp end target + print *, a_scalr, scalr !! 4 4 + +!!!$omp target map(a_scalr,scalr) !! TARGET 7 + !! mapping, in this case explicit, + !! of scalr AND a_sclar NOT ALLOWED +!! scalr = 5 +!! a_scalr = 5 +!!!$omp end target + + end associate + +end program diff --git a/data_environment/sources/carrays_fpriv.1.c b/data_environment/sources/carrays_fpriv.1.c index b5d3990..fa0d0b6 100644 --- a/data_environment/sources/carrays_fpriv.1.c +++ b/data_environment/sources/carrays_fpriv.1.c @@ -1,5 +1,5 @@ /* -* @@name: carrays_fpriv.1c +* @@name: carrays_fpriv.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/data_environment/sources/copyin.1.c b/data_environment/sources/copyin.1.c index 76500ff..888f746 100644 --- a/data_environment/sources/copyin.1.c +++ b/data_environment/sources/copyin.1.c @@ -1,5 +1,5 @@ /* -* @@name: copyin.1c +* @@name: copyin.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/copyin.1.f b/data_environment/sources/copyin.1.f index ecc1b71..9f86c49 100644 --- a/data_environment/sources/copyin.1.f +++ b/data_environment/sources/copyin.1.f @@ -1,4 +1,4 @@ -! @@name: copyin.1f +! @@name: copyin.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/copyprivate.1.c b/data_environment/sources/copyprivate.1.c index adbe8a8..25a4cf3 100644 --- a/data_environment/sources/copyprivate.1.c +++ b/data_environment/sources/copyprivate.1.c @@ -1,5 +1,5 @@ /* -* @@name: copyprivate.1c +* @@name: copyprivate.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/copyprivate.1.f b/data_environment/sources/copyprivate.1.f index 315a38d..dde5e73 100644 --- a/data_environment/sources/copyprivate.1.f +++ b/data_environment/sources/copyprivate.1.f @@ -1,4 +1,4 @@ -! @@name: copyprivate.1f +! @@name: copyprivate.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/copyprivate.2.c b/data_environment/sources/copyprivate.2.c index 3b46ce5..1c27ca8 100644 --- a/data_environment/sources/copyprivate.2.c +++ b/data_environment/sources/copyprivate.2.c @@ -1,15 +1,11 @@ /* -* @@name: copyprivate.2c +* @@name: copyprivate.2 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success * @@version: omp_5.1 */ -#if _OPENMP < 202011 -#define masked master -#endif - #include #include diff --git a/data_environment/sources/copyprivate.2.f b/data_environment/sources/copyprivate.2.f index c78f2d7..e7c87a5 100644 --- a/data_environment/sources/copyprivate.2.f +++ b/data_environment/sources/copyprivate.2.f @@ -1,14 +1,9 @@ -! @@name: copyprivate.2f +! @@name: copyprivate.2 ! @@type: F-fixed ! @@compilable: yes -! @@requires: preprocessing ! @@linkable: no ! @@expect: success ! @@version: omp_5.1 -#if _OPENMP < 202011 -#define MASKED MASTER -#endif - REAL FUNCTION READ_NEXT() REAL, POINTER :: TMP diff --git a/data_environment/sources/copyprivate.3.c b/data_environment/sources/copyprivate.3.c index 00f6b0b..8e9d4a0 100644 --- a/data_environment/sources/copyprivate.3.c +++ b/data_environment/sources/copyprivate.3.c @@ -1,5 +1,5 @@ /* -* @@name: copyprivate.3c +* @@name: copyprivate.3 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/copyprivate.3.f b/data_environment/sources/copyprivate.3.f index 1926a61..c83b990 100644 --- a/data_environment/sources/copyprivate.3.f +++ b/data_environment/sources/copyprivate.3.f @@ -1,4 +1,4 @@ -! @@name: copyprivate.3f +! @@name: copyprivate.3 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/copyprivate.4.f b/data_environment/sources/copyprivate.4.f index 49b67b5..a6affa0 100644 --- a/data_environment/sources/copyprivate.4.f +++ b/data_environment/sources/copyprivate.4.f @@ -1,4 +1,4 @@ -! @@name: copyprivate.4f +! @@name: copyprivate.4 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/cpp_reference.1.cpp b/data_environment/sources/cpp_reference.1.cpp index 5c63ff5..0d47862 100644 --- a/data_environment/sources/cpp_reference.1.cpp +++ b/data_environment/sources/cpp_reference.1.cpp @@ -1,5 +1,5 @@ /* -* @@name: cpp_reference.1c +* @@name: cpp_reference.1 * @@type: C++ * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/default_none.1.c b/data_environment/sources/default_none.1.c index b72af9f..c382a81 100644 --- a/data_environment/sources/default_none.1.c +++ b/data_environment/sources/default_none.1.c @@ -1,5 +1,5 @@ /* -* @@name: default_none.1c +* @@name: default_none.1 * @@type: C * @@compilable: no * @@linkable: no diff --git a/data_environment/sources/default_none.1.f b/data_environment/sources/default_none.1.f index 72a82bb..38e74e8 100644 --- a/data_environment/sources/default_none.1.f +++ b/data_environment/sources/default_none.1.f @@ -1,4 +1,4 @@ -! @@name: default_none.1f +! @@name: default_none.1 ! @@type: F-fixed ! @@compilable: no ! @@linkable: no diff --git a/data_environment/sources/fort_loopvar.1.f90 b/data_environment/sources/fort_loopvar.1.f90 index dd7c55d..8505a4b 100644 --- a/data_environment/sources/fort_loopvar.1.f90 +++ b/data_environment/sources/fort_loopvar.1.f90 @@ -1,4 +1,4 @@ -! @@name: fort_loopvar.1f +! @@name: fort_loopvar.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/fort_loopvar.2.f90 b/data_environment/sources/fort_loopvar.2.f90 index 8d7f408..6275e13 100644 --- a/data_environment/sources/fort_loopvar.2.f90 +++ b/data_environment/sources/fort_loopvar.2.f90 @@ -1,4 +1,4 @@ -! @@name: fort_loopvar.2f +! @@name: fort_loopvar.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/fort_sa_private.1.f b/data_environment/sources/fort_sa_private.1.f index c99d747..2e22734 100644 --- a/data_environment/sources/fort_sa_private.1.f +++ b/data_environment/sources/fort_sa_private.1.f @@ -1,4 +1,4 @@ -! @@name: fort_sa_private.1f +! @@name: fort_sa_private.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/fort_sa_private.2.f b/data_environment/sources/fort_sa_private.2.f index 284bef9..ffc53bc 100644 --- a/data_environment/sources/fort_sa_private.2.f +++ b/data_environment/sources/fort_sa_private.2.f @@ -1,4 +1,4 @@ -! @@name: fort_sa_private.2f +! @@name: fort_sa_private.2 ! @@type: F-fixed ! @@compilable: maybe ! @@linkable: maybe diff --git a/data_environment/sources/fort_sa_private.3.f b/data_environment/sources/fort_sa_private.3.f index 3f1e682..cf802ed 100644 --- a/data_environment/sources/fort_sa_private.3.f +++ b/data_environment/sources/fort_sa_private.3.f @@ -1,4 +1,4 @@ -! @@name: fort_sa_private.3f +! @@name: fort_sa_private.3 ! @@type: F-fixed ! @@compilable: maybe ! @@linkable: maybe diff --git a/data_environment/sources/fort_sa_private.4.f b/data_environment/sources/fort_sa_private.4.f index 5421ca4..bffa19a 100644 --- a/data_environment/sources/fort_sa_private.4.f +++ b/data_environment/sources/fort_sa_private.4.f @@ -1,4 +1,4 @@ -! @@name: fort_sa_private.4f +! @@name: fort_sa_private.4 ! @@type: F-fixed ! @@compilable: maybe ! @@linkable: maybe diff --git a/data_environment/sources/fort_sa_private.5.f b/data_environment/sources/fort_sa_private.5.f index 80788ff..d2e31ae 100644 --- a/data_environment/sources/fort_sa_private.5.f +++ b/data_environment/sources/fort_sa_private.5.f @@ -1,4 +1,4 @@ -! @@name: fort_sa_private.5f +! @@name: fort_sa_private.5 ! @@type: F-fixed ! @@compilable: maybe ! @@linkable: maybe diff --git a/data_environment/sources/fort_sp_common.1.f b/data_environment/sources/fort_sp_common.1.f index 37724cc..b19ccba 100644 --- a/data_environment/sources/fort_sp_common.1.f +++ b/data_environment/sources/fort_sp_common.1.f @@ -1,4 +1,4 @@ -! @@name: fort_sp_common.1f +! @@name: fort_sp_common.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/fort_sp_common.2.f b/data_environment/sources/fort_sp_common.2.f index ba6b1de..2ac615e 100644 --- a/data_environment/sources/fort_sp_common.2.f +++ b/data_environment/sources/fort_sp_common.2.f @@ -1,4 +1,4 @@ -! @@name: fort_sp_common.2f +! @@name: fort_sp_common.2 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/fort_sp_common.3.f b/data_environment/sources/fort_sp_common.3.f index da6a267..699beb6 100644 --- a/data_environment/sources/fort_sp_common.3.f +++ b/data_environment/sources/fort_sp_common.3.f @@ -1,4 +1,4 @@ -! @@name: fort_sp_common.3f +! @@name: fort_sp_common.3 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/fort_sp_common.4.f b/data_environment/sources/fort_sp_common.4.f index 2d631a1..fbb4410 100644 --- a/data_environment/sources/fort_sp_common.4.f +++ b/data_environment/sources/fort_sp_common.4.f @@ -1,4 +1,4 @@ -! @@name: fort_sp_common.4f +! @@name: fort_sp_common.4 ! @@type: F-fixed ! @@compilable: no ! @@linkable: no diff --git a/data_environment/sources/fort_sp_common.5.f b/data_environment/sources/fort_sp_common.5.f index c9fecc6..6e9590b 100644 --- a/data_environment/sources/fort_sp_common.5.f +++ b/data_environment/sources/fort_sp_common.5.f @@ -1,4 +1,4 @@ -! @@name: fort_sp_common.5f +! @@name: fort_sp_common.5 ! @@type: F-fixed ! @@compilable: no ! @@linkable: no diff --git a/data_environment/sources/lastprivate.1.c b/data_environment/sources/lastprivate.1.c index ce17512..a271e8c 100644 --- a/data_environment/sources/lastprivate.1.c +++ b/data_environment/sources/lastprivate.1.c @@ -1,5 +1,5 @@ /* -* @@name: lastprivate.1c +* @@name: lastprivate.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/lastprivate.1.f b/data_environment/sources/lastprivate.1.f index c87023f..97fa9c4 100644 --- a/data_environment/sources/lastprivate.1.f +++ b/data_environment/sources/lastprivate.1.f @@ -1,4 +1,4 @@ -! @@name: lastprivate.1f +! @@name: lastprivate.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/lastprivate.2.c b/data_environment/sources/lastprivate.2.c index 057f9eb..76dffa4 100644 --- a/data_environment/sources/lastprivate.2.c +++ b/data_environment/sources/lastprivate.2.c @@ -1,5 +1,5 @@ /* -* @@name: lastprivate.2c +* @@name: lastprivate.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/lastprivate.2.f90 b/data_environment/sources/lastprivate.2.f90 index 0ec310e..f60c1f7 100644 --- a/data_environment/sources/lastprivate.2.f90 +++ b/data_environment/sources/lastprivate.2.f90 @@ -1,4 +1,4 @@ -! @@name: lastprivate.2f +! @@name: lastprivate.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/private.1.c b/data_environment/sources/private.1.c index 2b16f50..74096fb 100644 --- a/data_environment/sources/private.1.c +++ b/data_environment/sources/private.1.c @@ -1,5 +1,5 @@ /* -* @@name: private.1c +* @@name: private.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/data_environment/sources/private.1.f b/data_environment/sources/private.1.f index fec044f..86217c3 100644 --- a/data_environment/sources/private.1.f +++ b/data_environment/sources/private.1.f @@ -1,4 +1,4 @@ -! @@name: private.1f +! @@name: private.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/private.2.c b/data_environment/sources/private.2.c index bdbce8a..4c3ade2 100644 --- a/data_environment/sources/private.2.c +++ b/data_environment/sources/private.2.c @@ -1,5 +1,5 @@ /* -* @@name: private.2c +* @@name: private.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/private.2.f b/data_environment/sources/private.2.f index e38560d..df45188 100644 --- a/data_environment/sources/private.2.f +++ b/data_environment/sources/private.2.f @@ -1,4 +1,4 @@ -! @@name: private.2f +! @@name: private.2 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/private.3.c b/data_environment/sources/private.3.c index 2176aa2..17aa197 100644 --- a/data_environment/sources/private.3.c +++ b/data_environment/sources/private.3.c @@ -1,5 +1,5 @@ /* -* @@name: private.3c +* @@name: private.3 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/private.3.f b/data_environment/sources/private.3.f index 080884a..6103f2b 100644 --- a/data_environment/sources/private.3.f +++ b/data_environment/sources/private.3.f @@ -1,4 +1,4 @@ -! @@name: private.3f +! @@name: private.3 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/reduction.1.c b/data_environment/sources/reduction.1.c index 4d64017..a0df572 100644 --- a/data_environment/sources/reduction.1.c +++ b/data_environment/sources/reduction.1.c @@ -1,5 +1,5 @@ /* -* @@name: reduction.1c +* @@name: reduction.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/reduction.1.f90 b/data_environment/sources/reduction.1.f90 index 4b58299..5e3eb0d 100644 --- a/data_environment/sources/reduction.1.f90 +++ b/data_environment/sources/reduction.1.f90 @@ -1,4 +1,4 @@ -! @@name: reduction.1f +! @@name: reduction.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/reduction.2.c b/data_environment/sources/reduction.2.c index 01ede12..73aa73b 100644 --- a/data_environment/sources/reduction.2.c +++ b/data_environment/sources/reduction.2.c @@ -1,5 +1,5 @@ /* -* @@name: reduction.2c +* @@name: reduction.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/reduction.2.f90 b/data_environment/sources/reduction.2.f90 index 4e396b2..4f4afa9 100644 --- a/data_environment/sources/reduction.2.f90 +++ b/data_environment/sources/reduction.2.f90 @@ -1,4 +1,4 @@ -! @@name: reduction.2f +! @@name: reduction.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/reduction.3.c b/data_environment/sources/reduction.3.c index f1fcd34..e2eb68d 100644 --- a/data_environment/sources/reduction.3.c +++ b/data_environment/sources/reduction.3.c @@ -1,5 +1,5 @@ /* -* @@name: reduction.3c +* @@name: reduction.3 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/data_environment/sources/reduction.3.f90 b/data_environment/sources/reduction.3.f90 index 368b618..1ed3fb4 100644 --- a/data_environment/sources/reduction.3.f90 +++ b/data_environment/sources/reduction.3.f90 @@ -1,4 +1,4 @@ -! @@name: reduction.3f +! @@name: reduction.3 ! @@type: F-free ! @@compilable: no ! @@linkable: no diff --git a/data_environment/sources/reduction.4.f90 b/data_environment/sources/reduction.4.f90 index 9fb784e..5e6f81c 100644 --- a/data_environment/sources/reduction.4.f90 +++ b/data_environment/sources/reduction.4.f90 @@ -1,4 +1,4 @@ -! @@name: reduction.4f +! @@name: reduction.4 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/reduction.5.f90 b/data_environment/sources/reduction.5.f90 index a1477cf..b2b32a2 100644 --- a/data_environment/sources/reduction.5.f90 +++ b/data_environment/sources/reduction.5.f90 @@ -1,4 +1,4 @@ -! @@name: reduction.5f +! @@name: reduction.5 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/reduction.6.c b/data_environment/sources/reduction.6.c index b470e45..95a75b1 100644 --- a/data_environment/sources/reduction.6.c +++ b/data_environment/sources/reduction.6.c @@ -1,15 +1,11 @@ /* -* @@name: reduction.6c +* @@name: reduction.6 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: rt-error * @@version: omp_5.1 */ -#if _OPENMP < 202011 -#define masked master -#endif - #include int main (void) diff --git a/data_environment/sources/reduction.6.f b/data_environment/sources/reduction.6.f index 44e2ff6..c6669d9 100644 --- a/data_environment/sources/reduction.6.f +++ b/data_environment/sources/reduction.6.f @@ -1,14 +1,9 @@ -! @@name: reduction.6f +! @@name: reduction.6 ! @@type: F-fixed ! @@compilable: yes -! @@requires: preprocessing ! @@linkable: yes ! @@expect: rt-error ! @@version: omp_5.1 -#if _OPENMP < 202011 -#define MASKED MASTER -#endif - INTEGER A, I !$OMP PARALLEL SHARED(A) PRIVATE(I) diff --git a/data_environment/sources/reduction.7.c b/data_environment/sources/reduction.7.c index da24bb5..a9299f2 100644 --- a/data_environment/sources/reduction.7.c +++ b/data_environment/sources/reduction.7.c @@ -1,5 +1,5 @@ /* -* @@name: reduction.7c +* @@name: reduction.7 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/reduction.7.f90 b/data_environment/sources/reduction.7.f90 index d90e15f..f1c7407 100644 --- a/data_environment/sources/reduction.7.f90 +++ b/data_environment/sources/reduction.7.f90 @@ -1,4 +1,4 @@ -! @@name: reduction.7f +! @@name: reduction.7 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/scan.1.c b/data_environment/sources/scan.1.c index e8c2b3a..58ce56e 100644 --- a/data_environment/sources/scan.1.c +++ b/data_environment/sources/scan.1.c @@ -1,5 +1,5 @@ /* -* @@name: scan.1.c +* @@name: scan.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/data_environment/sources/scan.1.f90 b/data_environment/sources/scan.1.f90 index dd35bfc..7b09f17 100644 --- a/data_environment/sources/scan.1.f90 +++ b/data_environment/sources/scan.1.f90 @@ -1,4 +1,4 @@ -! @@name: scan.1.f +! @@name: scan.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/scan.2.c b/data_environment/sources/scan.2.c index 7ac21f6..ea59ade 100644 --- a/data_environment/sources/scan.2.c +++ b/data_environment/sources/scan.2.c @@ -1,5 +1,5 @@ /* -* @@name: scan.2.c +* @@name: scan.2 * @@type: C * @@compilable: yes * @@linkable: yes @@ -18,7 +18,8 @@ int main(void) for (int k = 0; k < N; k++) a[k] = k + 1; - // a[k] is not included in the computation of producing results in b[k] + // a[k] is not included in the computation of producing + // results in b[k] #pragma omp parallel for simd reduction(inscan,+: x) for (int k = 0; k < N; k++) { b[k] = x; diff --git a/data_environment/sources/scan.2.f90 b/data_environment/sources/scan.2.f90 index b4fdda7..18642bf 100644 --- a/data_environment/sources/scan.2.f90 +++ b/data_environment/sources/scan.2.f90 @@ -1,4 +1,4 @@ -! @@name: scan.2.f +! @@name: scan.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/scope_reduction.1.cpp b/data_environment/sources/scope_reduction.1.cpp index 917e377..0ef5f39 100644 --- a/data_environment/sources/scope_reduction.1.cpp +++ b/data_environment/sources/scope_reduction.1.cpp @@ -1,5 +1,5 @@ /* -* @@name: scope_reduction.1c +* @@name: scope_reduction.1 * @@type: C++ * @@compilable: yes * @@linkable: no diff --git a/data_environment/sources/scope_reduction.1.f90 b/data_environment/sources/scope_reduction.1.f90 index 678b8bc..945af3e 100644 --- a/data_environment/sources/scope_reduction.1.f90 +++ b/data_environment/sources/scope_reduction.1.f90 @@ -1,4 +1,4 @@ -! @@name: scope_reduction.1f +! @@name: scope_reduction.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/data_environment/sources/target_reduction.1.c b/data_environment/sources/target_reduction.1.c index 63f9f5c..d465357 100644 --- a/data_environment/sources/target_reduction.1.c +++ b/data_environment/sources/target_reduction.1.c @@ -1,5 +1,5 @@ /* -* @@name: target_reduction.1.c +* @@name: target_reduction.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/data_environment/sources/target_reduction.1.f90 b/data_environment/sources/target_reduction.1.f90 index 5818d0d..e2a63ac 100644 --- a/data_environment/sources/target_reduction.1.f90 +++ b/data_environment/sources/target_reduction.1.f90 @@ -1,6 +1,6 @@ -! @@name: target_reduction.1.f90 +! @@name: target_reduction.1 ! @@type: F-free -! @@compilable: yes, omp_5.0 +! @@compilable: yes ! @@linkable: yes ! @@expect: success ! @@version: omp_5.0 diff --git a/data_environment/sources/target_reduction.2.c b/data_environment/sources/target_reduction.2.c index f367bfd..06d3898 100644 --- a/data_environment/sources/target_reduction.2.c +++ b/data_environment/sources/target_reduction.2.c @@ -1,5 +1,5 @@ /* -* @@name: target_reduction.2.c +* @@name: target_reduction.2 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/data_environment/sources/target_reduction.2.f90 b/data_environment/sources/target_reduction.2.f90 index 4557181..5cf14e7 100644 --- a/data_environment/sources/target_reduction.2.f90 +++ b/data_environment/sources/target_reduction.2.f90 @@ -1,4 +1,4 @@ -! @@name: target_reduction.2.f90 +! @@name: target_reduction.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/target_task_reduction.1.c b/data_environment/sources/target_task_reduction.1.c index d2bde32..19604cc 100644 --- a/data_environment/sources/target_task_reduction.1.c +++ b/data_environment/sources/target_task_reduction.1.c @@ -1,17 +1,14 @@ /* -* @@name: target_task_reduction.1.c +* @@name: target_task_reduction.1 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_5.1 +* @@version: omp_5.2 */ -#if _OPENMP < 202011 -#define masked master -#endif #include -#pragma omp declare target to(device_compute) +#pragma omp declare target enter(device_compute) void device_compute(int *); void host_compute(int *); int main() diff --git a/data_environment/sources/target_task_reduction.1.f90 b/data_environment/sources/target_task_reduction.1.f90 index aa46179..60de459 100644 --- a/data_environment/sources/target_task_reduction.1.f90 +++ b/data_environment/sources/target_task_reduction.1.f90 @@ -1,18 +1,14 @@ -! @@name: target_task_reduction.1.f90 +! @@name: target_task_reduction.1 ! @@type: F-free ! @@compilable: yes -! @@requires: preprocessing ! @@linkable: no ! @@expect: success -! @@version: omp_5.1 -#if _OPENMP < 202011 -#define masked master -#endif +! @@version: omp_5.2 program target_task_reduction_ex1 interface subroutine device_compute(res) - !$omp declare target to(device_compute) + !$omp declare target enter(device_compute) integer :: res end subroutine device_compute subroutine host_compute(res) diff --git a/data_environment/sources/target_task_reduction.2a.c b/data_environment/sources/target_task_reduction.2a.c index 9802459..ceaab06 100644 --- a/data_environment/sources/target_task_reduction.2a.c +++ b/data_environment/sources/target_task_reduction.2a.c @@ -1,13 +1,13 @@ /* -* @@name: target_task_reduction.2.c +* @@name: target_task_reduction.2a * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_5.0 +* @@version: omp_5.2 */ #include -#pragma omp declare target to(device_compute) +#pragma omp declare target enter(device_compute) extern void device_compute(int *); extern void host_compute(int *); int main() diff --git a/data_environment/sources/target_task_reduction.2a.f90 b/data_environment/sources/target_task_reduction.2a.f90 index 4487a7c..94fbd61 100644 --- a/data_environment/sources/target_task_reduction.2a.f90 +++ b/data_environment/sources/target_task_reduction.2a.f90 @@ -1,13 +1,14 @@ -! @@name: target_task_reduction.2.f90 +! @@name: target_task_reduction.2a ! @@type: F-free ! @@compilable: yes ! @@linkable: yes ! @@expect: success -! @@version: omp_5.0 +! @@version: omp_5.2 + program target_task_reduction_ex2 interface subroutine device_compute(res) - !$omp declare target to(device_compute) + !$omp declare target enter(device_compute) integer :: res end subroutine device_compute subroutine host_compute(res) diff --git a/data_environment/sources/target_task_reduction.2b.c b/data_environment/sources/target_task_reduction.2b.c index 23b39eb..a27dcc8 100644 --- a/data_environment/sources/target_task_reduction.2b.c +++ b/data_environment/sources/target_task_reduction.2b.c @@ -1,17 +1,13 @@ /* -* @@name: target_task_reduction.2b.c +* @@name: target_task_reduction.2b * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_5.1 +* @@version: omp_5.2 */ -#if _OPENMP < 202011 -#define masked master -#endif - #include -#pragma omp declare target to(device_compute) +#pragma omp declare target enter(device_compute) extern void device_compute(int *); extern void host_compute(int *); int main() diff --git a/data_environment/sources/target_task_reduction.2b.f90 b/data_environment/sources/target_task_reduction.2b.f90 index ff06ab4..3e919af 100644 --- a/data_environment/sources/target_task_reduction.2b.f90 +++ b/data_environment/sources/target_task_reduction.2b.f90 @@ -1,18 +1,14 @@ -! @@name: target_task_reduction.2b.f90 +! @@name: target_task_reduction.2b ! @@type: F-free ! @@compilable: yes -! @@requires: preprocessing ! @@linkable: yes ! @@expect: success -! @@version: omp_5.1 -#if _OPENMP < 202011 -#define masked master -#endif +! @@version: omp_5.2 program target_task_reduction_ex2b interface subroutine device_compute(res) - !$omp declare target to(device_compute) + !$omp declare target enter(device_compute) integer :: res end subroutine device_compute subroutine host_compute(res) diff --git a/data_environment/sources/task_reduction.1.c b/data_environment/sources/task_reduction.1.c index 00015c2..c5f880d 100644 --- a/data_environment/sources/task_reduction.1.c +++ b/data_environment/sources/task_reduction.1.c @@ -1,5 +1,5 @@ /* -* @@name: task_reduction.1c +* @@name: task_reduction.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/data_environment/sources/task_reduction.1.f90 b/data_environment/sources/task_reduction.1.f90 index 2103212..7c0bea0 100644 --- a/data_environment/sources/task_reduction.1.f90 +++ b/data_environment/sources/task_reduction.1.f90 @@ -1,4 +1,4 @@ -! @@name: task_reduction.1f90 +! @@name: task_reduction.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/task_reduction.2.c b/data_environment/sources/task_reduction.2.c index 3afbe2c..263f578 100644 --- a/data_environment/sources/task_reduction.2.c +++ b/data_environment/sources/task_reduction.2.c @@ -1,7 +1,7 @@ /* -* @@name: task_reduction.2.c +* @@name: task_reduction.2 * @@type: C -* @@compilable: yes, omp_5.0 +* @@compilable: yes * @@linkable: yes * @@expect: success * @@version: omp_5.0 diff --git a/data_environment/sources/task_reduction.2.f90 b/data_environment/sources/task_reduction.2.f90 index 5037fb9..e6ddd0b 100644 --- a/data_environment/sources/task_reduction.2.f90 +++ b/data_environment/sources/task_reduction.2.f90 @@ -1,6 +1,6 @@ -! @@name: task_reduction.2.f90 +! @@name: task_reduction.2 ! @@type: F-free -! @@compilable: yes, omp_5.0 +! @@compilable: yes ! @@linkable: yes ! @@expect: success ! @@version: omp_5.0 diff --git a/data_environment/sources/taskloop_reduction.1.c b/data_environment/sources/taskloop_reduction.1.c index 77b8a8d..66b8250 100644 --- a/data_environment/sources/taskloop_reduction.1.c +++ b/data_environment/sources/taskloop_reduction.1.c @@ -1,5 +1,5 @@ /* -* @@name: taskloop_reduction.1.c +* @@name: taskloop_reduction.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/data_environment/sources/taskloop_reduction.1.f90 b/data_environment/sources/taskloop_reduction.1.f90 index cb8e3f2..c810b28 100644 --- a/data_environment/sources/taskloop_reduction.1.f90 +++ b/data_environment/sources/taskloop_reduction.1.f90 @@ -1,4 +1,4 @@ -! @@name: taskloop_reduction.1.f90 +! @@name: taskloop_reduction.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/taskloop_reduction.2.c b/data_environment/sources/taskloop_reduction.2.c index c901f42..7461949 100644 --- a/data_environment/sources/taskloop_reduction.2.c +++ b/data_environment/sources/taskloop_reduction.2.c @@ -1,5 +1,5 @@ /* -* @@name: taskloop_reduction.2.c +* @@name: taskloop_reduction.2 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/data_environment/sources/taskloop_reduction.2.f90 b/data_environment/sources/taskloop_reduction.2.f90 index 90b5c0b..41c919a 100644 --- a/data_environment/sources/taskloop_reduction.2.f90 +++ b/data_environment/sources/taskloop_reduction.2.f90 @@ -1,4 +1,4 @@ -! @@name: taskloop_reduction.2.f90 +! @@name: taskloop_reduction.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/data_environment/sources/taskloop_simd_reduction.1.c b/data_environment/sources/taskloop_simd_reduction.1.c index 6ea9261..e18f06a 100644 --- a/data_environment/sources/taskloop_simd_reduction.1.c +++ b/data_environment/sources/taskloop_simd_reduction.1.c @@ -1,55 +1,51 @@ /* -* @@name: taskloop_simd_reduction.1c +* @@name: taskloop_simd_reduction.1 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success * @@version: omp_5.1 */ -#if _OPENMP < 202011 -#define masked master -#endif - #include #define N 100 int main(){ - int i, a[N], asum=0; + int i, a[N], asum=0; - for(i=0;ia} and \textit{sp->b} +from the corresponding structure. + +CASE 3 is similar to CASE 2, except \textit{s} is instead captured by +reference by the lambda expression. As for CASE 2, the structure is first +mapped by an enclosing \scode{target}~\scode{data} construct, and then the +\scode{target} construct implicitly maps \textit{s} and the closure object +referenced by \textit{lambda3}. The effect of the map is to make the +the call for \textit{lambda3} refer to the corresponding \textit{s} inside the +\scode{target} construct rather than the original \textit{s}. + +In CASE 4, the program defines a static variable \textit{ss} of the same +structure type as \textit{s}. While the body of the lambda expression refers +to \textit{ss}, it is not captured. In order for \textit{lambda4} to be +callable in the \scode{target} region, the reference to \textit{ss} should be +to a device copy of \textit{ss} that also has static storage. This is achieved +with the use of the \scode{declare}~\scode{target} directive. Inside the +\scode{target} construct, all references to \textit{ss}, including in the +\textit{lambda4()} call, will refer to the corresponding \textit{ss} that +results from the \scode{declare}~\scode{target} directive. The \scode{always} +modifier is used on the \scode{map} clause to transfer the updated values for +the structure back to the host device. + +\cppnexample[5.0]{lambda_expressions}{1} +\cppspecificend diff --git a/devices/sources/array_sections.1.c b/devices/sources/array_sections.1.c index e5aa0e4..7a08979 100644 --- a/devices/sources/array_sections.1.c +++ b/devices/sources/array_sections.1.c @@ -1,5 +1,5 @@ /* -* @@name: array_sections.1c +* @@name: array_sections.1 * @@type: C * @@compilable: no * @@linkable: no diff --git a/devices/sources/array_sections.1.f90 b/devices/sources/array_sections.1.f90 index 5a565af..ec13592 100644 --- a/devices/sources/array_sections.1.f90 +++ b/devices/sources/array_sections.1.f90 @@ -1,4 +1,4 @@ -! @@name: array_sections.1f +! @@name: array_sections.1 ! @@type: F-free ! @@compilable: no ! @@linkable: no diff --git a/devices/sources/array_sections.2.c b/devices/sources/array_sections.2.c index 20239c2..b608e1a 100644 --- a/devices/sources/array_sections.2.c +++ b/devices/sources/array_sections.2.c @@ -1,5 +1,5 @@ /* -* @@name: array_sections.2c +* @@name: array_sections.2 * @@type: C * @@compilable: no * @@linkable: no diff --git a/devices/sources/array_sections.2.f90 b/devices/sources/array_sections.2.f90 index 891a386..6ec328a 100644 --- a/devices/sources/array_sections.2.f90 +++ b/devices/sources/array_sections.2.f90 @@ -1,4 +1,4 @@ -! @@name: array_sections.2f +! @@name: array_sections.2 ! @@type: F-free ! @@compilable: no ! @@linkable: no diff --git a/devices/sources/array_sections.3.c b/devices/sources/array_sections.3.c index a6641c6..bc5da8b 100644 --- a/devices/sources/array_sections.3.c +++ b/devices/sources/array_sections.3.c @@ -1,5 +1,5 @@ /* -* @@name: array_sections.3c +* @@name: array_sections.3 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/array_sections.3.f90 b/devices/sources/array_sections.3.f90 index 57bcc2c..3f7d77d 100644 --- a/devices/sources/array_sections.3.f90 +++ b/devices/sources/array_sections.3.f90 @@ -1,4 +1,4 @@ -! @@name: array_sections.3f +! @@name: array_sections.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/array_sections.4.c b/devices/sources/array_sections.4.c index fb6f4f7..abb1e3f 100644 --- a/devices/sources/array_sections.4.c +++ b/devices/sources/array_sections.4.c @@ -1,5 +1,5 @@ /* -* @@name: array_sections.4c +* @@name: array_sections.4 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/array_sections.4.f90 b/devices/sources/array_sections.4.f90 index e2c0fe9..74a7272 100644 --- a/devices/sources/array_sections.4.f90 +++ b/devices/sources/array_sections.4.f90 @@ -1,4 +1,4 @@ -! @@name: array_sections.4f +! @@name: array_sections.4 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/array_shaping.1.c b/devices/sources/array_shaping.1.c index e6eb40d..f92d257 100644 --- a/devices/sources/array_shaping.1.c +++ b/devices/sources/array_shaping.1.c @@ -1,14 +1,14 @@ /* -* @@name: array_shaping.1.c +* @@name: array_shaping.1 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_5.0 +* @@version: omp_5.1 */ -#pragma omp declare target -int do_work(double *a, int nx, int ny); -int other_work(double *a, int nx, int ny); +#pragma omp begin declare target + int do_work(double *a, int nx, int ny); + int other_work(double *a, int nx, int ny); #pragma omp end declare target void exch_data(double *a, int nx, int ny); diff --git a/devices/sources/array_shaping.1.f90 b/devices/sources/array_shaping.1.f90 index 0a30422..790e45d 100644 --- a/devices/sources/array_shaping.1.f90 +++ b/devices/sources/array_shaping.1.f90 @@ -1,19 +1,20 @@ -! @@name: array_shaping.1.f90 +! @@name: array_shaping.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no ! @@expect: success -! @@version: omp_5.0 +! @@version: omp_5.2 + module m interface subroutine do_work(a, nx, ny) - !$omp declare target to(do_work) + !$omp declare target enter(do_work) integer, intent(in) :: nx, ny double precision a(0:nx+1,ny) end subroutine do_work subroutine other_work(a, nx, ny) - !$omp declare target to(other_work) + !$omp declare target enter(other_work) integer, intent(in) :: nx, ny double precision a(0:nx+1,ny) end subroutine other_work diff --git a/devices/sources/async_target.1.c b/devices/sources/async_target.1.c index f9bbf33..821739b 100644 --- a/devices/sources/async_target.1.c +++ b/devices/sources/async_target.1.c @@ -1,14 +1,15 @@ /* -* @@name: async_target.1c +* @@name: async_target.1 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_4.0 +* @@version: omp_5.1 */ -#pragma omp declare target +#pragma omp begin declare target float F(float); #pragma omp end declare target + #define N 1000000000 #define CHUNKSZ 1000000 void init(float *, int); diff --git a/devices/sources/async_target.1.f90 b/devices/sources/async_target.1.f90 index e25876a..678194a 100644 --- a/devices/sources/async_target.1.f90 +++ b/devices/sources/async_target.1.f90 @@ -1,4 +1,4 @@ -! @@name: async_target.1f +! @@name: async_target.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/async_target.2.c b/devices/sources/async_target.2.c index 45aca25..ead4392 100644 --- a/devices/sources/async_target.2.c +++ b/devices/sources/async_target.2.c @@ -1,16 +1,18 @@ /* -* @@name: async_target.2c +* @@name: async_target.2 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_4.0 +* @@version: omp_5.1 */ #include #include -#pragma omp declare target + +#pragma omp begin declare target extern void init(float *, float *, int); #pragma omp end declare target + extern void foo(); extern void output(float *, int); void vec_mult(float *p, int N, int dev) diff --git a/devices/sources/async_target.2.f90 b/devices/sources/async_target.2.f90 index acc409a..8ad6cb2 100644 --- a/devices/sources/async_target.2.f90 +++ b/devices/sources/async_target.2.f90 @@ -1,4 +1,4 @@ -! @@name: async_target.2f +! @@name: async_target.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/async_target.3.c b/devices/sources/async_target.3.c index 0b67cbb..93d8c51 100644 --- a/devices/sources/async_target.3.c +++ b/devices/sources/async_target.3.c @@ -1,15 +1,11 @@ /* -* @@name: async_target.3c +* @@name: async_target.3 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success * @@version: omp_5.1 */ -#if _OPENMP < 202011 -#define masked master -#endif - #include #define N 1000000 //N must be even diff --git a/devices/sources/async_target.3.f90 b/devices/sources/async_target.3.f90 index ca2a05a..9eed410 100644 --- a/devices/sources/async_target.3.f90 +++ b/devices/sources/async_target.3.f90 @@ -1,14 +1,9 @@ -! @@name: async_target.3f +! @@name: async_target.3 ! @@type: F-free ! @@compilable: yes -! @@requires: preprocessing ! @@linkable: no ! @@expect: success ! @@version: omp_5.1 -#if _OPENMP < 202011 -#define masked master -#endif - program concurrent_async use omp_lib integer,parameter :: n=1000000 !!n must be even diff --git a/devices/sources/async_target.4.c b/devices/sources/async_target.4.c index b4843b1..6b21367 100644 --- a/devices/sources/async_target.4.c +++ b/devices/sources/async_target.4.c @@ -1,5 +1,5 @@ /* -* @@name: target.4c +* @@name: async_target.4 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/async_target.4.f90 b/devices/sources/async_target.4.f90 index f258d84..d97b46f 100644 --- a/devices/sources/async_target.4.f90 +++ b/devices/sources/async_target.4.f90 @@ -1,4 +1,4 @@ -! @@name: target.4f90 +! @@name: async_target.4 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/declare_target.1.c b/devices/sources/declare_target.1.c index 1e54e91..778958b 100644 --- a/devices/sources/declare_target.1.c +++ b/devices/sources/declare_target.1.c @@ -1,14 +1,15 @@ /* -* @@name: declare_target.1c +* @@name: declare_target.1 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_4.0 +* @@version: omp_5.1 */ -#pragma omp declare target +#pragma omp begin declare target extern void fib(int N); #pragma omp end declare target + #define THRESHOLD 1000000 void fib_wrapper(int n) { diff --git a/devices/sources/declare_target.1.f90 b/devices/sources/declare_target.1.f90 index 7c06616..9cf6ead 100644 --- a/devices/sources/declare_target.1.f90 +++ b/devices/sources/declare_target.1.f90 @@ -1,4 +1,4 @@ -! @@name: declare_target.1f +! @@name: declare_target.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/devices/sources/declare_target.2.f90 b/devices/sources/declare_target.2.f90 index 2025667..b1e335a 100644 --- a/devices/sources/declare_target.2.f90 +++ b/devices/sources/declare_target.2.f90 @@ -1,4 +1,4 @@ -! @@name: declare_target.2f +! @@name: declare_target.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/devices/sources/declare_target.2a.cpp b/devices/sources/declare_target.2a.cpp index f788cdb..c7c3b3b 100644 --- a/devices/sources/declare_target.2a.cpp +++ b/devices/sources/declare_target.2a.cpp @@ -1,16 +1,15 @@ /* -* @@name declare_target.2a +* @@name: declare_target.2a * @@type: C++ -* @@compilable yes +* @@compilable: yes * @@linkable: yes * @@expect: success * @@version: omp_5.1 */ - #include using namespace std; - #pragma omp begin declare target // declare target -- class and function + #pragma omp begin declare target // declare target--class and function class XOR1 { int a; @@ -21,8 +20,7 @@ using namespace std; int XOR1::foo() { return a^0x01;} #pragma omp end declare target - - #pragma omp begin declare target // declare target -- class, not function + #pragma omp begin declare target // declare target--class, not function class XOR2 { int a; @@ -34,8 +32,7 @@ using namespace std; int XOR2::foo() { return a^0x01;} - - class XOR3 // declare target -- neither class nor function + class XOR3 // declare target--neither class nor function { int a; public: @@ -44,7 +41,6 @@ using namespace std; }; int XOR3::foo() { return a^0x01;} - int main (){ XOR1 my_XOR1(3); diff --git a/devices/sources/declare_target.2b_functions.cpp b/devices/sources/declare_target.2b_functions.cpp index 85cb396..7fb1e22 100644 --- a/devices/sources/declare_target.2b_functions.cpp +++ b/devices/sources/declare_target.2b_functions.cpp @@ -1,11 +1,10 @@ /* -* @@name declare_target.2c +* @@name: declare_target.2b_functions * @@type: C++ -* @@compilable yes +* @@compilable: yes * @@linkable: no * @@expect: failure * @@version: omp_5.1 */ - #include "classes.hpp" int XOR1::foo() { return a^0x01;} diff --git a/devices/sources/declare_target.2b_main.cpp b/devices/sources/declare_target.2b_main.cpp index a76c28a..cdbbdc7 100644 --- a/devices/sources/declare_target.2b_main.cpp +++ b/devices/sources/declare_target.2b_main.cpp @@ -1,7 +1,7 @@ /* -* @@name declare_target.2b +* @@name: declare_target.2b_main * @@type: C++ -* @@compilable yes +* @@compilable: yes * @@linkable: no * @@expect: failure * @@version: omp_5.1 diff --git a/devices/sources/declare_target.2c.cpp b/devices/sources/declare_target.2c.cpp index 9365201..075c561 100644 --- a/devices/sources/declare_target.2c.cpp +++ b/devices/sources/declare_target.2c.cpp @@ -4,7 +4,7 @@ * @@compilable: no * @@linkable: no * @@expect: failure -* @@version: omp_4.0 +* @@version: omp_5.2 */ struct typeX { @@ -16,10 +16,12 @@ class typeY public: int foo() { return a^0x01;} }; -#pragma omp declare target -struct typeX varX; // ok -class typeY varY; // ok if varY.foo() not called on target device + +#pragma omp begin declare target + struct typeX varX; // ok + class typeY varY; // ok if varY.foo() not called on target device #pragma omp end declare target + void foo() { #pragma omp target diff --git a/devices/sources/declare_target.3.c b/devices/sources/declare_target.3.c index a9c3027..a1773a3 100644 --- a/devices/sources/declare_target.3.c +++ b/devices/sources/declare_target.3.c @@ -1,17 +1,20 @@ /* -* @@name: declare_target.3c +* @@name: declare_target.3 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_4.0 +* @@version: omp_5.1 */ #define N 1000 -#pragma omp declare target + +#pragma omp begin declare target float p[N], v1[N], v2[N]; #pragma omp end declare target + extern void init(float *, float *, int); extern void output(float *, int); + void vec_mult() { int i; diff --git a/devices/sources/declare_target.3.f90 b/devices/sources/declare_target.3.f90 index 5bb96e2..aa05f9b 100644 --- a/devices/sources/declare_target.3.f90 +++ b/devices/sources/declare_target.3.f90 @@ -1,4 +1,4 @@ -! @@name: declare_target.3f +! @@name: declare_target.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/declare_target.4.c b/devices/sources/declare_target.4.c index 16cee20..7b8670f 100644 --- a/devices/sources/declare_target.4.c +++ b/devices/sources/declare_target.4.c @@ -1,17 +1,18 @@ /* -* @@name: declare_target.4c +* @@name: declare_target.4 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_4.0 +* @@version: omp_5.1 */ #define N 10000 -#pragma omp declare target -float Q[N][N]; -float Pfun(const int i, const int k) -{ return Q[i][k] * Q[k][i]; } + +#pragma omp begin declare target + float Q[N][N]; + float Pfun(const int i, const int k) { return Q[i][k] * Q[k][i]; } #pragma omp end declare target + float accum(int k) { float tmp = 0.0; @@ -24,5 +25,6 @@ float accum(int k) } /* Note: The variable tmp is now mapped with tofrom, for correct - execution with 4.5 (and pre-4.5) compliant compilers. See Devices Intro. + execution with 4.5 (and pre-4.5) compliant compilers. + See Devices Intro. */ diff --git a/devices/sources/declare_target.4.f90 b/devices/sources/declare_target.4.f90 index 080da71..89b6c83 100644 --- a/devices/sources/declare_target.4.f90 +++ b/devices/sources/declare_target.4.f90 @@ -1,4 +1,4 @@ -! @@name: declare_target.4f +! @@name: declare_target.4 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/declare_target.5.c b/devices/sources/declare_target.5.c index ced21de..66d3cfb 100644 --- a/devices/sources/declare_target.5.c +++ b/devices/sources/declare_target.5.c @@ -1,15 +1,17 @@ /* -* @@name: declare_target.5c +* @@name: declare_target.5 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_4.0 +* @@version: omp_5.1 */ #define N 10000 #define M 1024 -#pragma omp declare target + +#pragma omp begin declare target float Q[N][N]; + #pragma omp declare simd uniform(i) linear(k) notinbranch float P(const int i, const int k) { @@ -35,5 +37,6 @@ float accum(void) } /* Note: The variable tmp is now mapped with tofrom, for correct - execution with 4.5 (and pre-4.5) compliant compilers. See Devices Intro. + execution with 4.5 (and pre-4.5) compliant compilers. + See Devices Intro. */ diff --git a/devices/sources/declare_target.5.f90 b/devices/sources/declare_target.5.f90 index 62902c3..306dbdf 100644 --- a/devices/sources/declare_target.5.f90 +++ b/devices/sources/declare_target.5.f90 @@ -1,4 +1,4 @@ -! @@name: declare_target.5f +! @@name: declare_target.5 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/declare_target.6.c b/devices/sources/declare_target.6.c index e8b383e..a693f56 100644 --- a/devices/sources/declare_target.6.c +++ b/devices/sources/declare_target.6.c @@ -1,10 +1,10 @@ /* -* @@name: declare_target.6.c +* @@name: declare_target.6 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_4.5 +* @@version: omp_5.1 */ #define N 100000000 @@ -18,7 +18,8 @@ void d_init(double *, double *, int); void s_output(float *, int); void d_output(double *, int); -#pragma omp declare target +#pragma omp begin declare target + void s_vec_mult_accum() { int i; diff --git a/devices/sources/declare_target.6.f90 b/devices/sources/declare_target.6.f90 index e702252..b957b3b 100644 --- a/devices/sources/declare_target.6.f90 +++ b/devices/sources/declare_target.6.f90 @@ -1,4 +1,4 @@ -! @@name: declare_target.6.f90 +! @@name: declare_target.6 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/device.1.c b/devices/sources/device.1.c index 3942275..162f7de 100644 --- a/devices/sources/device.1.c +++ b/devices/sources/device.1.c @@ -1,18 +1,18 @@ /* -* @@name: device.1c +* @@name: device.1 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_4.0 +* @@version: omp_5.1 */ #include #include -#pragma omp declare target -void vec_mult(float *p, float *v1, float *v2, int N); -extern float *p, *v1, *v2; -extern int N; +#pragma omp begin declare target + void vec_mult(float *p, float *v1, float *v2, int N); + extern float *p, *v1, *v2; + extern int N; #pragma omp end declare target extern void init_vars(float *, float *, int); diff --git a/devices/sources/device.1.f90 b/devices/sources/device.1.f90 index 6d1e1d4..29ff01e 100644 --- a/devices/sources/device.1.f90 +++ b/devices/sources/device.1.f90 @@ -1,4 +1,4 @@ -! @@name: device.1f +! @@name: device.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/devices/sources/device.2.c b/devices/sources/device.2.c index 75cdaa7..5656da7 100644 --- a/devices/sources/device.2.c +++ b/devices/sources/device.2.c @@ -1,5 +1,5 @@ /* -* @@name: device.2c +* @@name: device.2 * @@type: C * @@compilable: yes * @@linkable: no @@ -15,7 +15,9 @@ void vec_mult(float *p, float *v1, float *v2, int N) init(v1, v2, N); int ndev = omp_get_num_devices(); int do_offload = (ndev>0 && N>1000000); - #pragma omp target if(do_offload) map(to: v1[0:N], v2[:N]) map(from: p[0:N]) + #pragma omp target if(do_offload) \ + map(to: v1[0:N], v2[:N]) \ + map(from: p[0:N]) #pragma omp parallel for if(N>1000) private(i) for (i=0; i +using namespace std; + +struct S { int a; int b; }; + +int main() +{ + +// CASE 1 Lambda defined in target region + + S s = S {0,1}; + + #pragma omp target + { + auto lambda1 = [&s]() { s.a = s.b * 2; }; + s.b += 2; + lambda1(); // s.a = 3 * 2 + } + cout << s.a << " " << s.b << endl; //OUT 6 3 + +// CASE 2 Host defined lambda, Capture pointer to s + + s = {0,1}; + S *sp = &s; + auto lambda2 = [sp]() {sp->a = sp->b * 2; }; + + // closure object's sp attaches to corresponding s on target + // construct + #pragma omp target data map(sp[0]) + #pragma omp target + { + sp->b += 2; + lambda2(); + } + cout << s.a << " " << s.b << endl; //OUT 6 3 + +// CASE 3 Host defined lambda, Capture s by reference + + s = {0,1}; + auto lambda3 = [&s]() {s.a = s.b * 2; }; + + // closure object's s refers to corresponding s in target + // construct + #pragma omp target data map(s) + #pragma omp target + { + s.b += 2; + lambda3(); + } + cout << s.a << " " << s.b << endl; //OUT 6 3 + +// CASE 4 Host defined lambda, references static variable + + static S ss = {0,1}; + #pragma omp declare target enter(ss) + auto lambda4 = [&]() {ss.a = ss.b * 2; }; + + #pragma omp target map(always,from:ss) + { + ss.b += 2; + lambda4(); + } + cout << ss.a << " " << ss.b << endl; //OUT 6 3 + + return 0; +} diff --git a/devices/sources/target.1.c b/devices/sources/target.1.c index 4c5d094..972dd00 100644 --- a/devices/sources/target.1.c +++ b/devices/sources/target.1.c @@ -1,5 +1,5 @@ /* -* @@name: target.1c +* @@name: target.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target.1.f90 b/devices/sources/target.1.f90 index e38026a..b6362d0 100644 --- a/devices/sources/target.1.f90 +++ b/devices/sources/target.1.f90 @@ -1,4 +1,4 @@ -! @@name: target.1f +! @@name: target.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target.2.c b/devices/sources/target.2.c index 83d07e6..e4bec9f 100644 --- a/devices/sources/target.2.c +++ b/devices/sources/target.2.c @@ -1,5 +1,5 @@ /* -* @@name: target.2c +* @@name: target.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target.2.f90 b/devices/sources/target.2.f90 index 936b62b..3097078 100644 --- a/devices/sources/target.2.f90 +++ b/devices/sources/target.2.f90 @@ -1,4 +1,4 @@ -! @@name: target.2f +! @@name: target.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target.3.c b/devices/sources/target.3.c index ef05790..f3bb62d 100644 --- a/devices/sources/target.3.c +++ b/devices/sources/target.3.c @@ -1,5 +1,5 @@ /* -* @@name: target.3c +* @@name: target.3 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target.3.f90 b/devices/sources/target.3.f90 index 9bf85de..130cb49 100644 --- a/devices/sources/target.3.f90 +++ b/devices/sources/target.3.f90 @@ -1,4 +1,4 @@ -! @@name: target.3f +! @@name: target.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target.4.c b/devices/sources/target.4.c index c481fd5..406393a 100644 --- a/devices/sources/target.4.c +++ b/devices/sources/target.4.c @@ -1,5 +1,5 @@ /* -* @@name: target.4c +* @@name: target.4 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target.4.f90 b/devices/sources/target.4.f90 index c6c2dfc..ac00941 100644 --- a/devices/sources/target.4.f90 +++ b/devices/sources/target.4.f90 @@ -1,4 +1,4 @@ -! @@name: target.4f +! @@name: target.4 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target.4b.f90 b/devices/sources/target.4b.f90 index 7b6ac71..dbef3da 100644 --- a/devices/sources/target.4b.f90 +++ b/devices/sources/target.4b.f90 @@ -1,4 +1,4 @@ -! @@name: target.4bf +! @@name: target.4b ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target.5.c b/devices/sources/target.5.c index eb93084..6d8a14b 100644 --- a/devices/sources/target.5.c +++ b/devices/sources/target.5.c @@ -1,5 +1,5 @@ /* -* @@name: target.5c +* @@name: target.5 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target.5.f90 b/devices/sources/target.5.f90 index 0e7d49c..8472623 100644 --- a/devices/sources/target.5.f90 +++ b/devices/sources/target.5.f90 @@ -1,4 +1,4 @@ -! @@name: target.5f +! @@name: target.5 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target.6.c b/devices/sources/target.6.c index 62babdc..fdc77b6 100644 --- a/devices/sources/target.6.c +++ b/devices/sources/target.6.c @@ -1,5 +1,5 @@ /* -* @@name: target.6c +* @@name: target.6 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target.6.f90 b/devices/sources/target.6.f90 index 2c508b3..66c8b4d 100644 --- a/devices/sources/target.6.f90 +++ b/devices/sources/target.6.f90 @@ -1,4 +1,4 @@ -! @@name: target.6f90 +! @@name: target.6 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target_associate_ptr.1.c b/devices/sources/target_associate_ptr.1.c index 8ec88b9..edb061a 100644 --- a/devices/sources/target_associate_ptr.1.c +++ b/devices/sources/target_associate_ptr.1.c @@ -29,7 +29,8 @@ int main() { for (int ioff = 0; ioff < N; ioff += CS) { // Associate device memory with one chunk of host memory - omp_target_associate_ptr(&arr[ioff], dev_ptr, sizeof(int) * CS, 0, dev); + omp_target_associate_ptr(&arr[ioff], dev_ptr, + sizeof(int) * CS, 0, dev); printf("before: arr[%d]=%d\n", ioff, arr[ioff]); @@ -37,7 +38,7 @@ int main() { #pragma omp target update to(arr[ioff:CS]) device(dev) // Explicit mapping of arr to make sure that we use the allocated - // and associated memory. + // and associated memory. No host-device data update here. #pragma omp target map(tofrom : arr[ioff:CS]) device(dev) for (int i = 0; i < CS; i++) { arr[i+ioff]++; diff --git a/devices/sources/target_associate_ptr.1.f90 b/devices/sources/target_associate_ptr.1.f90 index 5a24029..c2bc1b8 100644 --- a/devices/sources/target_associate_ptr.1.f90 +++ b/devices/sources/target_associate_ptr.1.f90 @@ -41,7 +41,7 @@ program target_associate !$omp target update to(arr(ioff:ioff+CS-1)) device(dev) ! Explicit mapping of arr to make sure that we use the allocated - ! and associated memory. + ! and associated memory. No host-device data update here. !$omp target map(tofrom: arr(ioff:ioff+CS-1)) device(dev) do i = 0, CS-1 arr(i+ioff) = arr(i+ioff) + 1 diff --git a/devices/sources/target_data.1.c b/devices/sources/target_data.1.c index c13f93d..d845806 100644 --- a/devices/sources/target_data.1.c +++ b/devices/sources/target_data.1.c @@ -1,5 +1,5 @@ /* -* @@name: target_data.1c +* @@name: target_data.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target_data.1.f90 b/devices/sources/target_data.1.f90 index 2c4f32d..f205693 100644 --- a/devices/sources/target_data.1.f90 +++ b/devices/sources/target_data.1.f90 @@ -1,4 +1,4 @@ -! @@name: target_data.1f +! @@name: target_data.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target_data.2.c b/devices/sources/target_data.2.c index b545449..78c82c8 100644 --- a/devices/sources/target_data.2.c +++ b/devices/sources/target_data.2.c @@ -1,5 +1,5 @@ /* -* @@name: target_data.2c +* @@name: target_data.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target_data.2.f90 b/devices/sources/target_data.2.f90 index 9b1fabd..08e1169 100644 --- a/devices/sources/target_data.2.f90 +++ b/devices/sources/target_data.2.f90 @@ -1,4 +1,4 @@ -! @@name: target_data.2f +! @@name: target_data.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target_data.3.c b/devices/sources/target_data.3.c index e2d62d9..8da0582 100644 --- a/devices/sources/target_data.3.c +++ b/devices/sources/target_data.3.c @@ -1,5 +1,5 @@ /* -* @@name: target_data.3c +* @@name: target_data.3 * @@type: C * @@compilable: yes * @@linkable: no @@ -32,6 +32,7 @@ void gramSchmidt(float Q[][COLS], const int rows) } } -/* Note: The variable tmp is now mapped with tofrom, for correct - execution with 4.5 (and pre-4.5) compliant compilers. See Devices Intro. +/* Note: The variable tmp is now mapped with tofrom, for correct + execution with 4.5 (and pre-4.5) compliant compilers. + See Devices Intro. */ diff --git a/devices/sources/target_data.3.f90 b/devices/sources/target_data.3.f90 index 1032177..272ccb3 100644 --- a/devices/sources/target_data.3.f90 +++ b/devices/sources/target_data.3.f90 @@ -1,4 +1,4 @@ -! @@name: target_data.3f +! @@name: target_data.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target_data.4.c b/devices/sources/target_data.4.c index 89b329f..0b20044 100644 --- a/devices/sources/target_data.4.c +++ b/devices/sources/target_data.4.c @@ -1,13 +1,11 @@ /* -* @@name: target_data.4c +* @@name: target_data.4 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success * @@version: omp_4.0 */ - - void vec_mult(float*, float*, float*, int); extern void init(float*, float*, int); @@ -27,7 +25,6 @@ void foo(float *p0, float *v1, float *v2, int N) } - void vec_mult(float *p1, float *v3, float *v4, int N) { int i; @@ -38,4 +35,3 @@ void vec_mult(float *p1, float *v3, float *v4, int N) p1[i] = v3[i] * v4[i]; } } - diff --git a/devices/sources/target_data.4.f90 b/devices/sources/target_data.4.f90 index e7cd232..c5ee88b 100644 --- a/devices/sources/target_data.4.f90 +++ b/devices/sources/target_data.4.f90 @@ -1,10 +1,9 @@ -! @@name: target_data.4f +! @@name: target_data.4 ! @@type: F-free ! @@compilable: yes ! @@linkable: no ! @@expect: success ! @@version: omp_4.0 - module mults contains subroutine foo(p0,v1,v2,N) diff --git a/devices/sources/target_data.5.cpp b/devices/sources/target_data.5.cpp index f603afd..1e6fee3 100644 --- a/devices/sources/target_data.5.cpp +++ b/devices/sources/target_data.5.cpp @@ -1,5 +1,5 @@ /* -* @@name: target_data.5c +* @@name: target_data.5 * @@type: C++ * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target_data.5.f90 b/devices/sources/target_data.5.f90 index 490de6f..b35f5df 100644 --- a/devices/sources/target_data.5.f90 +++ b/devices/sources/target_data.5.f90 @@ -1,4 +1,4 @@ -! @@name: target_data.5f +! @@name: target_data.5 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target_data.6.c b/devices/sources/target_data.6.c index d912a4b..6b7b39e 100644 --- a/devices/sources/target_data.6.c +++ b/devices/sources/target_data.6.c @@ -1,5 +1,5 @@ /* -* @@name: target_data.6c +* @@name: target_data.6 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target_data.6.f90 b/devices/sources/target_data.6.f90 index 8b197c9..a5dd554 100644 --- a/devices/sources/target_data.6.f90 +++ b/devices/sources/target_data.6.f90 @@ -1,4 +1,4 @@ -! @@name: target_data.6f +! @@name: target_data.6 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target_data.7.c b/devices/sources/target_data.7.c index a07832e..89680f5 100644 --- a/devices/sources/target_data.7.c +++ b/devices/sources/target_data.7.c @@ -1,5 +1,5 @@ /* -* @@name: target_data.7c +* @@name: target_data.7 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target_data.7.f90 b/devices/sources/target_data.7.f90 index da54d4e..6ac68a4 100644 --- a/devices/sources/target_data.7.f90 +++ b/devices/sources/target_data.7.f90 @@ -1,4 +1,4 @@ -! @@name: target_data.7f +! @@name: target_data.7 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target_defaultmap.1.c b/devices/sources/target_defaultmap.1.c index f0b5acb..b7558de 100644 --- a/devices/sources/target_defaultmap.1.c +++ b/devices/sources/target_defaultmap.1.c @@ -1,5 +1,5 @@ /* -* @@name: target_defaultmap.1.c +* @@name: target_defaultmap.1 * @@type: C * @@compilable: yes * @@linkable: yes @@ -27,23 +27,23 @@ int main(){ S.s=0; S.A[0]=0; S.A[1]=0; // Target Region 1 - // Uses defaultmap to set scalars, aggregates & pointers - // to normal defaults. + // Uses defaultmap to set scalars, aggregates & + // pointers to normal defaults. #pragma omp target \ - defaultmap(firstprivate: scalar) //could also be default \ - defaultmap(tofrom: aggregate) //could also be default \ - defaultmap(default: pointer) //must be default \ + defaultmap(firstprivate: scalar) //could also be default \ + defaultmap(tofrom: aggregate)//could also be default \ + defaultmap(default: pointer) //must be default \ map(ptr2m[:N]) { - s = 3; // SCALAR firstprivate, value not returned + s = 3; //SCALAR firstprivate, value not returned - A[0] = 3; A[1] = 3; // AGGREGATE array, default map tofrom + A[0] = 3; A[1] = 3; //AGGREGATE array, default map tofrom - // AGGREGATE structure, default tofrom + //AGGREGATE structure, default tofrom S.s = 2; S.A[0] = 2; S.A[1] = 2; - ptr = &A[0]; // POINTER is private + ptr = &A[0]; //POINTER is private ptr[0] = 2; ptr[1] = 2; } @@ -60,17 +60,18 @@ int main(){ S.s +=5; S.A[0]+=5; S.A[1]+=5; } - if(s==7 && A[0]==7 && S.s==7 && S.A[0]==7) printf(" PASSED 2 of 4\n"); + if(s==7 && A[0]==7 && S.s==7 && S.A[0]==7) + printf(" PASSED 2 of 4\n"); // Target Region 3 - // defaultmap & explicit map with variables in same category + // defaultmap & explicit map with variables in same category s1=s2=s3=1; #pragma omp defaultmap(tofrom: scalar) map(firstprivate: s1,s2) { - s1 += 5; // firstprivate (s1 value not returned to host) - s2 += 5; // firstprivate (s2 value not returned to host) - s3 += s1 + s2; // mapped as tofrom + s1 += 5; // firstprivate (s1 value not returned to host) + s2 += 5; // firstprivate (s2 value not returned to host) + s3 += s1 + s2; // mapped as tofrom } if(s1==1 && s2==1 && s3==13 ) printf(" PASSED 3 of 4\n"); @@ -80,7 +81,8 @@ int main(){ S.A[0]=0; S.A[1]=0; // arrays and structure are firstprivate, and scalars are from - #pragma omp target defaultmap(firstprivate: aggregate) map(from: s1, s2) + #pragma omp target defaultmap(firstprivate: aggregate) \ + map(from: s1, s2) { A[0]+=1; S.A[0]+=1; //Aggregate changes not returned to host diff --git a/devices/sources/target_defaultmap.1.f90 b/devices/sources/target_defaultmap.1.f90 index 69a9d59..b5cc96f 100644 --- a/devices/sources/target_defaultmap.1.f90 +++ b/devices/sources/target_defaultmap.1.f90 @@ -1,4 +1,4 @@ -! @@name: defaultmap.1f90 +! @@name: target_defaultmap.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes @@ -32,20 +32,20 @@ program defaultmap H(1)=0; H(2)=0 !! Target Region 1 - !! Using defaultmap to set scalars, aggregates & pointers - !! and allocatables to normal defaults. + !! Using defaultmap to set scalars, aggregates & + !! pointers and allocatables to normal defaults. !$omp target & !$omp& defaultmap( firstprivate: scalar) & !$omp& defaultmap( tofrom: aggregate) & !$omp& defaultmap( tofrom: allocatable) & !$omp& defaultmap( default: pointer) - s = 3 !! SCALAR firstprivate, value not returned + s = 3 !! SCALAR firstprivate, val not returned A(1) = 3 !! AGGREGATE array, default map tofrom A(2) = 3 - D%s = 2 !! AGGR. Derived Types, default map tofrom + D%s = 2 !! AGGR. Derived Type, default map tofrom D%A(1) = 2; D%A(2) = 2 H(1) = 2; H(2) = 2 !! ALLOCATABLE, default map tofrom @@ -62,22 +62,23 @@ program defaultmap !! no implicit mapping allowed !$omp target defaultmap(none) map(tofrom: s, A, D) - s=s+5 !! All variables must be explicitly mapped + s=s+5 !! All variables must be explicitly mapped A(1)=A(1)+5; A(2)=A(2)+5 D%s=D%s+5 D%A(1)=D%A(1)+5; D%A(2)=D%A(2)+5 !$omp end target - if(s==7 .and. A(1)==7 .and. D%s==7 .and. D%A(1)==7) print*," PASSED 2 of 4" + if(s==7 .and. A(1)==7 .and. D%s==7 .and. D%A(1)==7) & + print*," PASSED 2 of 4" !! Target Region 3 - !!defaultmap & explicit map with variables in same category + !!defaultmap & explicit map with variables in same category s1=1; s2=1; s3=1 !$omp defaultmap(tofrom: scalar) map(firstprivate: s1,s2) - s1 = s1+5; !! firstprivate (s1 value not returned to host) - s2 = s2+5; !! firstprivate (s2 value not returned to host) - s3 = s3 +s1 + s2; !! mapped as tofrom + s1 = s1+5; !! firstprivate (s1 value not returned to host) + s2 = s2+5; !! firstprivate (s2 value not returned to host) + s3 = s3 +s1 + s2; !! mapped as tofrom !$omp end target if(s1==1 .and. s2==1 .and. s3==13) print*," PASSED 3 of 4" @@ -86,17 +87,17 @@ program defaultmap A(1)=0; A(2)=0 D%A(1)=0; D%A(2)=0 H(1)=0; H(2)=0 - !! non-allocated arrays & derived types are in AGGREGATE cat. - !! Allocatable Arrays are in ALLOCATABLE category - !! Scalars are explicitly mapped from + !! non-allocated arrays & derived types are in AGGREGATE cat. + !! Allocatable Arrays are in ALLOCATABLE category + !! Scalars are explicitly mapped from !$omp target defaultmap(firstprivate: aggregate ) & !$omp& defaultmap(firstprivate: allocatable) & !$omp& map(from: s1, s2) - A(1)=A(1)+1; D%A(1)=D%A(1)+1; H(1)=H(1)+1 !!changes not returned to host - A(2)=A(2)+1; D%A(2)=D%A(2)+1; H(2)=H(2)+1 !!changes not returned to host - s1 = A(1)+D%A(1)+H(1) !!s1 returned to host - s2 = A(2)+D%A(2)+H(1) !!s2 returned to host + A(1)=A(1)+1; D%A(1)=D%A(1)+1; H(1)=H(1)+1 !! changes to A, D%A, H + A(2)=A(2)+1; D%A(2)=D%A(2)+1; H(2)=H(2)+1 !! not returned to host + s1 = A(1)+D%A(1)+H(1) !! s1 returned to host + s2 = A(2)+D%A(2)+H(1) !! s2 returned to host !$omp end target if(A(1)==0 .and. D%A(1)==0 .and. H(1)==0 .and. s1==3) & diff --git a/devices/sources/target_fort_allocatable_map.1.f90 b/devices/sources/target_fort_allocatable_map.1.f90 index d681ce6..c20a18e 100644 --- a/devices/sources/target_fort_allocatable_map.1.f90 +++ b/devices/sources/target_fort_allocatable_map.1.f90 @@ -1,4 +1,4 @@ -! @@name: fort_allocatable_map.1f90 +! @@name: target_fort_allocatable_map.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/devices/sources/target_fort_allocatable_map.2.f90 b/devices/sources/target_fort_allocatable_map.2.f90 index 144d0eb..6c31bcb 100644 --- a/devices/sources/target_fort_allocatable_map.2.f90 +++ b/devices/sources/target_fort_allocatable_map.2.f90 @@ -1,22 +1,28 @@ -! @@name: fort_allocatable_map.2f90 +! @@name: target_fort_allocatable_map.2 ! @@type: F-free ! @@compilable: yes -! @@linkable: no -! @@expect: fail +! @@linkable: yes +! @@expect: unspecified ! @@version: omp_5.1 program main implicit none - integer, allocatable :: a(:,:), b(:) + integer, allocatable :: a(:,:), b(:), c(:) integer :: x(10,2) allocate(a(2,10)) - !$omp target ! Target 1 - a=x ! reshape (or resize) NOT ALLOWED (implicit change) - deallocate(a) ! allocation status change of a NOT ALLOWED - allocate(b(20)) ! allocation status change of b NOT ALLOWED - print*, "ERROR: status change and resize/shaping NOT ALLOWED in target rgn." + !$omp target + a = x ! Reshape (or resize) NOT ALLOWED (implicit change) + + deallocate(a) ! Allocation status change of "a" NOT ALLOWED. + + allocate(b(20)) ! Allocation of b * + + c = 10 ! Intrinsic assignment allocates c * + + ! * Since an explicit deallocation for b and c does not occur before + ! the end of the target region, the PROGRAM BEHAVIOR IS UNSPECIFIED. !$omp end target -end program +end program \ No newline at end of file diff --git a/devices/sources/target_fort_allocatable_map.3.f90 b/devices/sources/target_fort_allocatable_map.3.f90 index affc849..daf471a 100644 --- a/devices/sources/target_fort_allocatable_map.3.f90 +++ b/devices/sources/target_fort_allocatable_map.3.f90 @@ -1,4 +1,4 @@ -! @@name: fort_allocatable_map.3f90 +! @@name: target_fort_allocatable_map.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: no @@ -9,7 +9,7 @@ contains subroutine foo(ain,bout) implicit none integer, allocatable, intent( in) :: ain(:) - integer, allocatable, intent(out) :: bout(:) !"out" causes de/reallocate + integer, allocatable, intent(out) :: bout(:) !"out" causes de/realloc !$omp declare target bout = ain end subroutine @@ -27,7 +27,8 @@ program main !$omp target - call foo(a,b) !ERROR: b deallocation/reallocation not allowed in target region + call foo(a,b) !ERROR: b deallocation/reallocation not allowed + ! in target region !$omp end target diff --git a/devices/sources/target_mapper.1.c b/devices/sources/target_mapper.1.c index c6f6fc2..0a01a80 100644 --- a/devices/sources/target_mapper.1.c +++ b/devices/sources/target_mapper.1.c @@ -1,5 +1,5 @@ /* -* @@name: target_mapper_map.1c +* @@name: target_mapper.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target_mapper.1.f90 b/devices/sources/target_mapper.1.f90 index f063481..7ffea0a 100644 --- a/devices/sources/target_mapper.1.f90 +++ b/devices/sources/target_mapper.1.f90 @@ -1,4 +1,4 @@ -! @@name: target_mapper.1.f90 +! @@name: target_mapper.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target_mapper.2.c b/devices/sources/target_mapper.2.c index b810bf6..db2b0bf 100644 --- a/devices/sources/target_mapper.2.c +++ b/devices/sources/target_mapper.2.c @@ -1,5 +1,5 @@ /* -* @@name: target_mapper_map.2.c +* @@name: target_mapper.2 * @@type: C * @@compilable: yes * @@linkable: no @@ -23,9 +23,10 @@ #pragma omp declare mapper(bottom_id: dzmat_t v) \ map(v.r_m[N/2:N/2][0:N], \ v.i_m[N/2:N/2][0:N] ) - -void dzmat_init(dzmat_t *z, int is, int ie, int n); //initialization -void host_add( dzmat_t *a, dzmat_t *b, dzmat_t *c, int n); //matrix add: c=a+b +//initialization +void dzmat_init(dzmat_t *z, int is, int ie, int n); +//matrix add: c=a+b +void host_add( dzmat_t *a, dzmat_t *b, dzmat_t *c, int n); int main() @@ -48,7 +49,7 @@ int main() dzmat_init(&a,is,ie,N); dzmat_init(&b,is,ie,N); } - + #pragma omp taskwait host_add(&a,&b,&c,N); diff --git a/devices/sources/target_mapper.2.f90 b/devices/sources/target_mapper.2.f90 index 45e2822..709d00a 100644 --- a/devices/sources/target_mapper.2.f90 +++ b/devices/sources/target_mapper.2.f90 @@ -1,4 +1,4 @@ -! @@name: target_mapper.2.f90 +! @@name: target_mapper.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no @@ -42,7 +42,7 @@ program main call dzmat_init(b,is,ie) !$omp end target - !omp taskwait + !$omp taskwait call host_add(a,b,c) diff --git a/devices/sources/target_mapper.3.c b/devices/sources/target_mapper.3.c index b2f6e6b..e716519 100644 --- a/devices/sources/target_mapper.3.c +++ b/devices/sources/target_mapper.3.c @@ -1,5 +1,5 @@ /* -* @@name: target_mapper_map.3.c +* @@name: target_mapper.3 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/devices/sources/target_mapper.3.f90 b/devices/sources/target_mapper.3.f90 index 75db578..4f954f2 100644 --- a/devices/sources/target_mapper.3.f90 +++ b/devices/sources/target_mapper.3.f90 @@ -1,4 +1,4 @@ -! @@name: target_mapper.3.f90 +! @@name: target_mapper.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/devices/sources/target_ptr_map.2.c b/devices/sources/target_ptr_map.2.c index f8d3195..d953761 100644 --- a/devices/sources/target_ptr_map.2.c +++ b/devices/sources/target_ptr_map.2.c @@ -1,19 +1,19 @@ /* -* @@name: target_ptr_map.2c +* @@name: target_ptr_map.2 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_5.0 +* @@version: omp_5.1 */ #include #include #define N 100 -#pragma omp declare target -int *p; -extern void use_arg_p(int *p, int n); -extern void use_global_p( int n); +#pragma omp begin declare target + int *p; + extern void use_arg_p(int *p, int n); + extern void use_global_p( int n); #pragma omp end declare target int main() @@ -35,7 +35,8 @@ int main() return 0; } -//#pragma omp declare target (optional here because of prototype spec) +// A #pragma omp begin declare target is optional here +// because of prototype spec void use_arg_p(int *q, int n) { int i; @@ -48,6 +49,8 @@ void use_global_p(int n) int i; for (i=0; i #include @@ -24,7 +24,7 @@ int main() #pragma omp metadirective \ when(user={condition(accessible)}: target firstprivate(ptr) ) \ - default( target map(ptr[:n]) ) + otherwise( target map(ptr[:n]) ) { do_work(ptr, n); } diff --git a/devices/sources/target_ptr_map.5.c b/devices/sources/target_ptr_map.5.c index ed1061a..91457de 100644 --- a/devices/sources/target_ptr_map.5.c +++ b/devices/sources/target_ptr_map.5.c @@ -1,10 +1,10 @@ /* -* @@name: target_ptr_map_5.c +* @@name: target_ptr_map.5 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_5.1 +* @@version: omp_5.2 */ #include #include @@ -26,11 +26,12 @@ int main() T s = { 0, buf_size }; const int dev = omp_get_default_device(); s.ptr = (int *)malloc(buf_size); - const int accessible = omp_target_is_accessible(s.ptr, s.buf_size, dev); + const int accessible = + omp_target_is_accessible(s.ptr, s.buf_size, dev); #pragma omp metadirective \ when(user={condition(accessible)}: target) \ - default( target map(mapper(deep_copy),tofrom:s) ) + otherwise(target map(mapper(deep_copy),tofrom:s) ) { do_work(s.ptr, n); } diff --git a/devices/sources/target_ptr_map.5.f90 b/devices/sources/target_ptr_map.5.f90 index 6a59927..5795974 100644 --- a/devices/sources/target_ptr_map.5.f90 +++ b/devices/sources/target_ptr_map.5.f90 @@ -1,9 +1,9 @@ -! @@name: target_ptr_map_5.f90 +! @@name: target_ptr_map.5 ! @@type: F-free ! @@compilable: yes ! @@linkable: no ! @@expect: success -! @@version: omp_5.1 +! @@version: omp_5.2 program main use omp_lib @@ -33,7 +33,7 @@ program main !$omp begin metadirective & !$omp& when(user={condition(accessible)}: target) & - !$omp& default( target map(mapper(deep_copy),tofrom:s) ) + !$omp& otherwise( target map(mapper(deep_copy),tofrom:s) ) call do_work(s, n) diff --git a/devices/sources/target_reverse_offload.7.c b/devices/sources/target_reverse_offload.7.c index 42d5525..604b811 100644 --- a/devices/sources/target_reverse_offload.7.c +++ b/devices/sources/target_reverse_offload.7.c @@ -1,10 +1,10 @@ /* -* @@name: target_reverse_offload.1c +* @@name: target_reverse_offload.7 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_5.0 +* @@version: omp_5.2 */ #include @@ -23,7 +23,7 @@ void error_handler(int wrong_value, int index) // Expecting: A[i ]=i } -#pragma omp declare target device_type(host) to(error_handler) +#pragma omp declare target device_type(host) enter(error_handler) int main() { diff --git a/devices/sources/target_reverse_offload.7.f90 b/devices/sources/target_reverse_offload.7.f90 index 6fce764..5f8b0e8 100644 --- a/devices/sources/target_reverse_offload.7.f90 +++ b/devices/sources/target_reverse_offload.7.f90 @@ -1,4 +1,4 @@ -! @@name: target_reverse_offload.1f +! @@name: target_reverse_offload.7 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/devices/sources/target_struct_map.1.c b/devices/sources/target_struct_map.1.c index 660178a..baa6797 100644 --- a/devices/sources/target_struct_map.1.c +++ b/devices/sources/target_struct_map.1.c @@ -1,10 +1,10 @@ /* -* @@name: target_struct_map.1c +* @@name: target_struct_map.1 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_5.0 +* @@version: omp_5.1 */ #include #include @@ -19,7 +19,7 @@ struct foo { float *p; }; -#pragma omp declare target +#pragma omp begin declare target void saxpyfun(struct foo *S) { int i; diff --git a/devices/sources/target_struct_map.2.c b/devices/sources/target_struct_map.2.c index b99464f..f769436 100644 --- a/devices/sources/target_struct_map.2.c +++ b/devices/sources/target_struct_map.2.c @@ -1,5 +1,5 @@ /* -* @@name: target_struct_map.2c +* @@name: target_struct_map.2 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/devices/sources/target_struct_map.2.cpp b/devices/sources/target_struct_map.2.cpp index 303dc68..bc8fc22 100644 --- a/devices/sources/target_struct_map.2.cpp +++ b/devices/sources/target_struct_map.2.cpp @@ -1,10 +1,10 @@ /* -* @@name: target_struct_map.2cpp +* @@name: target_struct_map.2 * @@type: C++ * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_5.0 +* @@version: omp_5.1 */ #include #include @@ -21,7 +21,7 @@ class SAXPY { void saxpyfun(float *p); }; -#pragma omp declare target +#pragma omp begin declare target void SAXPY::saxpyfun(float *q) { for(int i=0; i #include @@ -19,7 +19,7 @@ struct foo { float *p; }; -#pragma omp declare target +#pragma omp begin declare target void saxpyfun(struct foo *S) { int i; diff --git a/devices/sources/target_struct_map.4.c b/devices/sources/target_struct_map.4.c index fa0e762..34bea98 100644 --- a/devices/sources/target_struct_map.4.c +++ b/devices/sources/target_struct_map.4.c @@ -1,10 +1,10 @@ /* -* @@name: target_ptr_map.4.c +* @@name: target_struct_map.4 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_5.0 +* @@version: omp_5.1 */ #include #include @@ -19,7 +19,7 @@ struct foo { float *p; }; -#pragma omp declare target +#pragma omp begin declare target void saxpyfun(struct foo *S) { int i; @@ -41,8 +41,9 @@ int main() for(i=0; i +#pragma omp requires unified_shared_memory + +#pragma omp begin declare target +class A { + public: + virtual void vf() { std::cout << "In A\n"; } +}; + +class D: public A { + public: + void vf() override { std::cout << "In D\n"; } +}; +#pragma omp end declare target + +int main(){ + + // Section 1 -------------------------------------------------------- + D d; // D derives from A, and A::vf() is virtual + A &ar = d; // reference to Derived object d + + #pragma omp target // implicit map of ar is illegal here + { + ar.vf(); // unspecified whether A::vf() or D::vf() is called + } + + A *ap = &d; // pointer to derived object d + #pragma omp target // No need for mapping with Unified Share Memory + { // implicit ap[:0] map is fine + ap->vf(); // calls D::vf() + } + + // Section 2 -------------------------------------------------------- + ap = nullptr; + #pragma omp target map(ap) + { + ap = new A(); + } + + ap->vf(); // illegal + + #pragma omp target + { + delete ap; + } + ap = new A(); + #pragma omp target // No need for mapping with Unified Share Memory + { + ap->vf(); // ok + } + + return 0; +} diff --git a/devices/target.tex b/devices/target.tex index 39a9923..0c349c8 100644 --- a/devices/target.tex +++ b/devices/target.tex @@ -4,6 +4,9 @@ \subsection{\code{target} Construct on \code{parallel} Construct} \label{subsec:target_parallel} +\index{constructs!target@\code{target}} +\index{target construct@\code{target} construct} +\index{target construct@\code{target} construct!implicit mapping} This following example shows how the \code{target} construct offloads a code region to a target device. The variables \plc{p}, \plc{v1}, \plc{v2}, and \plc{N} are implicitly mapped @@ -15,6 +18,9 @@ to the target device. \subsection{\code{target} Construct with \code{map} Clause} \label{subsec:target_map} +\index{target construct@\code{target} construct!map clause@\code{map} clause} +\index{clauses!map@\code{map}} +\index{map clause@\code{map} clause} This following example shows how the \code{target} construct offloads a code region to a target device. The variables \plc{p}, \plc{v1} and \plc{v2} are explicitly mapped to the @@ -27,6 +33,8 @@ the target device. \subsection{\code{map} Clause with \code{to}/\code{from} map-types} \label{subsec:target_map_tofrom} +\index{map clause@\code{map} clause!to map-type@\code{to} map-type} +\index{map clause@\code{map} clause!from map-type@\code{from} map-type} The following example shows how the \code{target} construct offloads a code region to a target device. In the \code{map} clause, the \code{to} and \code{from} @@ -57,6 +65,7 @@ to the default behavior of an implicit mapping. \subsection{\code{map} Clause with Array Sections} \label{subsec:target_array_section} +\index{map clause@\code{map} clause!array sections in} The following example shows how the \code{target} construct offloads a code region to a target device. In the \code{map} clause, map-types are used to optimize @@ -84,6 +93,9 @@ for the lower bound for array section \plc{v2(:N)}. \subsection{\code{target} Construct with \code{if} Clause} \label{subsec:target_if} +\index{target construct@\code{target} construct!if clause@\code{if} clause} +\index{clauses!if@\code{if}} +\index{if clause@\code{if} clause} The following example shows how the \code{target} construct offloads a code region to a target device. @@ -114,6 +126,24 @@ to the \code{parallel} component of the combined directive. \subsection{Target Reverse Offload} \label{subsec:target_reverse_offload} +\index{target reverse offload!reverse_offload clause@\scode{reverse_offload} clause} +\index{target reverse offload!requires directive@\code{requires} directive} +\index{requires directive@\code{requires} directive!reverse_offload clause@\scode{reverse_offload} clause} +\index{directives!requires@\code{requires}} +\index{clauses!reverse_offload@\scode{reverse_offload}} +\index{reverse_offload clause@\scode{reverse_offload} clause} +\index{target construct@\code{target} construct!device clause@\code{device} clause} +\index{clauses!device@\code{device}} +\index{device clause@\code{device} clause!ancestor modifier@\code{ancestor} modifier} +\index{ancestor modifier@\code{ancestor} modifier} +\index{declare target directive@\code{declare}~\code{target} directive!device_type clause@\scode{device_type} clause} +\index{clauses!device_type@\scode{device_type}} +\index{device_type clause@\scode{device_type} clause} +\index{clauses!enter@\code{enter}} +\index{enter clause@\code{enter} clause} + +\index{directives!declare target@\code{declare}~\code{target}} +\index{declare target directive@\code{declare}~\code{target} directive} Beginning with OpenMP 5.0, implementations are allowed to offload back to the host (reverse offload). @@ -131,7 +161,7 @@ The \code{requires} directive (another 5.0 feature) uses the \code{reverse\_offload} clause to guarantee that the reverse offload is implemented. -Note that the \code{declare target} directive uses the +Note that the \code{declare}~\code{target} directive uses the \code{device\_type} clause (another 5.0 feature) to specify that the \plc{error\_handler} function is compiled to execute on the \plc{host} only. This ensures that no @@ -140,6 +170,6 @@ function. This feature may be necessary if the function exists in another compile unit. -\cexample[5.0]{target_reverse_offload}{7} +\cexample[5.2]{target_reverse_offload}{7} \ffreeexample[5.0]{target_reverse_offload}{7} diff --git a/devices/target_associate_ptr.tex b/devices/target_associate_ptr.tex index ad5adde..b4470c5 100644 --- a/devices/target_associate_ptr.tex +++ b/devices/target_associate_ptr.tex @@ -1,7 +1,12 @@ -\newpage -\section{Device and Host Memory Association} +%\newpage +\subsection{Device and Host Memory Association} +\label{subsec:target_associate_ptr} \label{sec:target_associate_ptr} +\index{routines!omp_target_associate_ptr@\scode{omp_target_associate_ptr}} +\index{omp_target_associate_ptr routine@\scode{omp_target_associate_ptr} routine} +\index{routines!omp_target_alloc@\scode{omp_target_alloc}} +\index{omp_target_alloc routine@\scode{omp_target_alloc} routine} The association of device memory with host memory can be established by calling the \scode{omp_target_associate_ptr} API routine as part of the mapping. @@ -14,6 +19,10 @@ In Fortran, the intrinsic function \scode{c_loc} is called to obtain the corresponding C pointer (\splc{h_ptr}) of \splc{arr(ioff)} for use in the call to the API routine. +\index{constructs!target update@\code{target}~\code{update}} +\index{target update construct@\code{target}~\code{update} construct} +\index{map clause@\code{map} clause!always modifier@\code{always} modifier} +\index{always modifier@\code{always} modifier} Since the reference count of the resulting mapping is infinite, it is necessary to use the \scode{target}~\scode{update} directive (or the \scode{always} modifier in a \scode{map} clause) to accomplish a @@ -25,6 +34,10 @@ when referencing the array \splc{arr} in the \scode{target} region. The device pointer \splc{dev_ptr} cannot be accessed directly after a call to the \scode{omp_target_associate_ptr} routine. +\index{routines!omp_target_disassociate_ptr@\scode{omp_target_disassociate_ptr}} +\index{omp_target_disassociate_ptr routine@\scode{omp_target_disassociate_ptr} routine} +\index{routines!omp_target_free@\scode{omp_target_free}} +\index{omp_target_free routine@\scode{omp_target_free} routine} After the \scode{target} region, the device pointer is disassociated from the current chunk of the host memory by calling the \scode{omp_target_disassociate_ptr} routine before working on the next chunk. The device memory is freed by calling the \scode{omp_target_free} diff --git a/devices/target_data.tex b/devices/target_data.tex index 48adeef..5a50ad7 100644 --- a/devices/target_data.tex +++ b/devices/target_data.tex @@ -4,6 +4,8 @@ \subsection{Simple \code{target} \code{data} Construct} \label{subsec:target_data_simple} +\index{constructs!target data@\code{target}~\code{data}} +\index{target data construct@\code{target}~\code{data} construct} This example shows how the \code{target} \code{data} construct maps variables to a device data environment. The \code{target} \code{data} construct creates @@ -50,6 +52,10 @@ once by the \code{target} \code{data} construct. \ffreeexample[4.0]{target_data}{2} +\index{target data construct@\code{target}~\code{data} construct!map clause@\code{map} clause} +\index{target construct@\code{target} construct!map clause@\code{map} clause} +\index{target construct@\code{target} construct!implicit mapping} +\index{map clause@\code{map} clause!tofrom map-type@\code{tofrom} map-type} In the following example, the array \plc{Q} is mapped once at the enclosing \code{target}~\code{data} region instead of at each \code{target} construct. In OpenMP 4.0, a scalar variable is implicitly mapped with the \code{tofrom} map-type. @@ -69,6 +75,7 @@ maps variables to a device data environment. The \code{target} \code{data} construct's device data environment encloses the \code{target} construct's device data environment in the function \code{vec\_mult()}. +\index{map clause@\code{map} clause!alloc map-type@\code{alloc} map-type} When the type of the variable appearing in an array section is pointer, the pointer variable and the storage location of the corresponding array section are mapped to the device data environment. The pointer variable is treated as if it had appeared @@ -126,6 +133,9 @@ data environment. \subsection{\code{target} \code{data} Construct with \code{if} Clause} \label{subsec:target_data_if} +\index{target data construct@\code{target}~\code{data} construct!if clause@\code{if} clause} +\index{clauses!if@\code{if}} +\index{if clause@\code{if} clause} The following two examples show how the \code{target} \code{data} construct maps variables to a device data environment. diff --git a/devices/target_defaultmap.tex b/devices/target_defaultmap.tex index 2e48fbc..e553d5c 100644 --- a/devices/target_defaultmap.tex +++ b/devices/target_defaultmap.tex @@ -1,6 +1,11 @@ \pagebreak \section{\code{defaultmap} Clause} \label{sec:defaultmap} +\index{target construct@\code{target} construct!defaultmap clause@\code{defaultmap} clause} +\index{clauses!defaultmap@\code{defaultmap}} +\index{defaultmap clause@\code{defaultmap} clause} +\index{defaultmap clause@\code{defaultmap} clause!implicit behavior} +\index{defaultmap clause@\code{defaultmap} clause!variable category} The implicitly-determined, data-mapping and data-sharing attribute rules of variables referenced in a \code{target} construct can be diff --git a/devices/target_fort_allocatable_array_mapping.tex b/devices/target_fort_allocatable_array_mapping.tex index 9e31102..b792a3e 100644 --- a/devices/target_fort_allocatable_array_mapping.tex +++ b/devices/target_fort_allocatable_array_mapping.tex @@ -1,6 +1,7 @@ \pagebreak \section{Fortran Allocatable Array Mapping} \label{sec:fort_allocatable_array_mapping} +\index{mapping!allocatable array, Fortran} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -38,12 +39,14 @@ reference to allocated storage) on the device, is similar to pointer attachment. \ffreeexample[5.1]{target_fort_allocatable_map}{1} -Once an allocatable variable have been allocated on the host, +Once an allocatable variable has been allocated on the host, its allocation status may not be changed in a \code{target} region, either explicitly or implicitly. The following example illustrates typical operations on allocatable variables that violate this restriction. Note, an assignment that reshapes or reassigns (causing a deallocation -and allocation) in a \code{target} region is not compliant. +and allocation) in a \code{target} region is not conforming. +Also, an initial intrinsic assignment of an allocatable variable +requires deallocation before the \scode{target} region ends. \ffreeexample[5.1]{target_fort_allocatable_map}{2} diff --git a/devices/target_mapper.tex b/devices/target_mapper.tex index 048dbc0..8a1d802 100644 --- a/devices/target_mapper.tex +++ b/devices/target_mapper.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{declare mapper} Directive} \label{sec:declare_mapper} +\index{directives!declare mapper@\code{declare}~\code{mapper}} +\index{declare mapper directive@\code{declare}~\code{mapper} directive} The following examples show how to use the \code{declare mapper} directive to prescribe a map for later use. @@ -31,6 +33,9 @@ full structure, plus the dynamic storage of the \plc{data} element. \ffreeexample[5.0]{target_mapper}{1} %\pagebreak +\index{mapping!deep copy} +\index{map clause@\code{map} clause!mapper modifier@\code{mapper} modifier} +\index{mapper modifier@\code{mapper} modifier} The next example illustrates the use of the \plc{mapper-identifier} and deep copy within a structure. The structure, \plc{dzmat\_t}, represents a complex matrix, with separate real (\plc{r\_m}) and imaginary (\plc{i\_m}) elements. diff --git a/devices/target_pointer_mapping.tex b/devices/target_pointer_mapping.tex index 53a2302..b0d9457 100644 --- a/devices/target_pointer_mapping.tex +++ b/devices/target_pointer_mapping.tex @@ -1,6 +1,9 @@ \pagebreak \section{Pointer Mapping} \label{sec:pointer_mapping} +\index{mapping!pointer} +\index{mapping!pointer attachment} +\index{pointer attachment} Pointers that contain host addresses require that those addresses are translated to device addresses for them to be useful in the context of a device data environment. Broadly speaking, there are two scenarios where this is important. @@ -55,13 +58,16 @@ Since \plc{ptr3} is private, the value to which it is assigned in the \cexample[5.0]{target_ptr_map}{1} +\index{directives!begin declare target@\code{begin}~\code{declare}~\code{target}} +\index{begin declare target directive@\code{begin}~\code{declare}~\code{target} directive} + In the following example the global pointer \plc{p} appears in a -\code{declare}~\code{target} directive. Hence, the pointer \plc{p} will +declare target directive. Hence, the pointer \plc{p} will persist on the device throughout executions in all \code{target} regions. The pointer is also used in an array section of a \code{map} clause on -a \code{target} construct. When storage associated with -a \code{declare}~\code{target} pointer +a \code{target} construct. When the pointer of storage associated with +a declare target directive is mapped, as for the array section \plc{p[:N]} in the \code{target} construct, the array section on the device is \emph{attached} to the device pointer \plc{p} on entry to the construct, and @@ -72,7 +78,10 @@ pointer on the device is \emph{attached}.) % For globals with declare target is there such a things a % original and corresponding? -\cexample[5.0]{target_ptr_map}{2} +\cexample[5.1]{target_ptr_map}{2} + +\index{directives!begin declare target@\code{begin}~\code{declare}~\code{target}} +\index{begin declare target directive@\code{begin}~\code{declare}~\code{target} directive} The following two examples illustrate subtle differences in pointer attachment to device address because of the order of data mapping. @@ -82,7 +91,7 @@ the global pointer \plc{p1} points to array \plc{x} and \plc{p2} points to array \plc{y} on the host. The array section \plc{x[:N]} is mapped by the \code{target}~\code{enter}~\code{data} directive while array \plc{y} is mapped on the \code{target} construct. -Since the \code{declare}~\code{target} directive is applied to the declaration +Since the \code{begin}~\code{declare}~\code{target} directive is applied to the declaration of \plc{p1}, \plc{p1} is a treated like a mapped variable on the \code{target} construct and references to \plc{p1} inside the construct will be to the corresponding \plc{p1} that exists on the device. However, the corresponding @@ -91,12 +100,12 @@ attachment for \plc{p1} would require that (1) \plc{p1} (or an lvalue expression that refers to the same storage as \plc{p1}) appears as a base pointer to a list item in a \code{map} clause, and (2) the construct that has the \code{map} clause causes the list item to transition from \emph{not mapped} -to \emph{mapped}. The conditions are clearly not satisifed for this example. +to \emph{mapped}. The conditions are clearly not satisfied for this example. The problem for \plc{p2} in this example is also subtle. It will be privatized inside the \code{target} construct, with a runtime check for whether the memory to which it is pointing has corresponding memory that is accessible on the -device. If this check is successful then the \plc{p2} inside the construct +device. If this check is successful, then the \plc{p2} inside the construct would be appropriately initialized to point to that corresponding memory. Unfortunately, despite there being an implicit map of the array \plc{y} (to which \plc{p2} is pointing) on the construct, the order of this map relative to @@ -106,7 +115,7 @@ the initialization of \plc{p2} is unspecified. Therefore, the initial value of Thus, referencing values via either \plc{p1} or \plc{p2} inside the \code{target} region would be invalid. -\cexample[5.0]{target_ptr_map}{3a} +\cexample[5.1]{target_ptr_map}{3a} In example \plc{target\_ptr\_map.3b} the mapping orders for arrays \plc{x} and \plc{y} were rearranged to allow proper pointer attachments. @@ -116,9 +125,11 @@ Pointer \plc{p2} is assigned the device address of the previously mapped array \plc{y}. Referencing values via either \plc{p1} or \plc{p2} inside the \code{target} region is now valid. -\cexample[5.0]{target_ptr_map}{3b} -\clearpage +\cexample[5.1]{target_ptr_map}{3b} +%\clearpage +\index{routines!omp_target_is_accessible@\scode{omp_target_is_accessible}} +\index{omp_target_is_accessible routine@\scode{omp_target_is_accessible} routine} In the following example, storage allocated on the host is not mapped in a \code{target} region if it is determined that the host memory is accessible from the device. On platforms that support host memory access from a target device, @@ -143,12 +154,13 @@ program, \plc{ptr} will be NULL-initialized in the \code{target} region. In the next version of the OpenMP Specification, a false presence check without the \code{firstprivate} clause will cause the pointer to retain its original value. -\cexample[5.1]{target_ptr_map}{4} +\cexample[5.2]{target_ptr_map}{4} +\index{mapping!deep copy} Similar to the previous example, the \code{omp\_target\_is\_accessible} routine is used to discover if a deep copy is required for the platform. Here, the \plc{deep\_copy} map, defined in the \code{declare}~\code{mapper} directive, is used if the host storage referenced by \plc{s.ptr} (or \plc{s\%ptr} in Fortran) is not accessible from the device. -\cexample[5.1]{target_ptr_map}{5} -\ffreeexample[5.1]{target_ptr_map}{5} +\cexample[5.2]{target_ptr_map}{5} +\ffreeexample[5.2]{target_ptr_map}{5} diff --git a/devices/target_structure_mapping.tex b/devices/target_structure_mapping.tex index 1bd6652..9beef2d 100644 --- a/devices/target_structure_mapping.tex +++ b/devices/target_structure_mapping.tex @@ -1,6 +1,10 @@ \pagebreak \section{Structure Mapping} \label{sec:structure_mapping} +\index{mapping!structure} + +\index{directives!begin declare target@\code{begin}~\code{declare}~\code{target}} +\index{begin declare target directive@\code{begin}~\code{declare}~\code{target} directive} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -19,7 +23,7 @@ Note: The buffer arrays and the \plc{x} variable have been grouped together, so the components that will reside on the device are all together (without gaps). This allows the runtime to optimize the transfer and the storage footprint on the device. -\cexample[5.0]{target_struct_map}{1} +\cexample[5.1]{target_struct_map}{1} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -28,7 +32,7 @@ a C++ class. In the member function \plc{SAXPY::driver} the array section \plc{p[:N]} is \emph{attached} to the pointer member \plc{p} on the device. -\cppexample[5.0]{target_struct_map}{2} +\cppexample[5.1]{target_struct_map}{2} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -77,7 +81,7 @@ must do so when the \splc{S2.p} is already present on the device or will be created on the device as a result of the same construct. -\cexample[5.0]{target_struct_map}{3} +\cexample[5.1]{target_struct_map}{3} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -117,5 +121,5 @@ this implicit map only ensures that storage for the members \splc{a}, \splc{b}, and \splc{p} are accessible within the corresponding \splc{S3} that is created on the device. -\cexample[5.0]{target_struct_map}{4} +\cexample[5.1]{target_struct_map}{4} diff --git a/devices/target_unstructured_data.tex b/devices/target_unstructured_data.tex index 898bf29..cf3915a 100644 --- a/devices/target_unstructured_data.tex +++ b/devices/target_unstructured_data.tex @@ -3,6 +3,10 @@ \section{\code{target} \code{enter} \code{data} and \code{target} \code{exit} \code{data} Constructs} \label{sec:target_enter_exit_data} %\section{Simple target enter data and target exit data Constructs} +\index{constructs!target enter data@\code{target} \code{enter} \code{data}} +\index{constructs!target exit data@\code{target} \code{exit} \code{data}} +\index{target enter data construct@\code{target} \code{enter} \code{data} construct} +\index{target exit data construct@\code{target} \code{exit} \code{data} construct} The structured data construct (\code{target}~\code{data}) provides persistent data on a device for subsequent \code{target} constructs as shown in the @@ -13,6 +17,10 @@ The unstructured data constructs allow the creation and deletion of data on the device at any appropriate point within the host code, as shown below with the \code{target}~\code{enter}~\code{data} and \code{target}~\code{exit}~\code{data} constructs. +\index{map clause@\code{map} clause!alloc map-type@\code{alloc} map-type} +\index{map clause@\code{map} clause!delete map-type@\code{delete} map-type} +\index{alloc map-type@\code{alloc} map-type} +\index{delete map-type@\code{delete} map-type} The following C++ code creates/deletes a vector in a constructor/destructor of a class. The constructor creates a vector with \code{target}~\code{enter}~\code{data} and uses an \code{alloc} modifier in the \code{map} clause to avoid copying values diff --git a/devices/target_update.tex b/devices/target_update.tex index 00bcb9c..abc91af 100644 --- a/devices/target_update.tex +++ b/devices/target_update.tex @@ -4,6 +4,18 @@ \subsection{Simple \code{target} \code{data} and \code{target} \code{update} Constructs} \label{subsec:target_data_and_update} +\index{constructs!target data@\code{target}~\code{data}} +\index{target data construct@\code{target}~\code{data} construct} +\index{constructs!target update@\code{target}~\code{update}} +\index{target update construct@\code{target}~\code{update} construct} +\index{target update construct@\code{target}~\code{update} construct!to clause@\code{to} clause} +\index{target update construct@\code{target}~\code{update} construct!from clause@\code{from} clause} +\index{target update construct@\code{target}~\code{update} construct!motion-clause@\plc{motion-clause}} +\index{clauses!motion-clause@\plc{motion-clause}} +\index{clauses!to@\code{to}} +\index{clauses!from@\code{from}} +\index{motion-clause@\plc{motion-clause}!to clause@\code{to} clause} +\index{motion-clause@\plc{motion-clause}!from clause@\code{from} clause} The following example shows how the \code{target} \code{update} construct updates variables in a device data environment. @@ -33,6 +45,9 @@ The second \code{target} region uses the updated values of \plc{v1[:N]} and \plc \subsection{\code{target} \code{update} Construct with \code{if} Clause} \label{subsec:target_update_if} +\index{target update construct@\code{target}~\code{update} construct!if clause@\code{if} clause} +\index{clauses!if@\code{if}} +\index{if clause@\code{if} clause} The following example shows how the \code{target} \code{update} construct updates variables in a device data environment. diff --git a/devices/teams.tex b/devices/teams.tex index 16978ab..da340ec 100644 --- a/devices/teams.tex +++ b/devices/teams.tex @@ -5,6 +5,18 @@ \subsection{\code{target} and \code{teams} Constructs with \code{omp\_get\_num\_teams}\\ and \code{omp\_get\_team\_num} Routines} \label{subsec:teams_api} +\index{constructs!target@\code{target}} +\index{target construct@\code{target} construct} +\index{constructs!teams@\code{teams}} +\index{teams construct@\code{teams} construct} +\index{combined constructs!target teams@\code{target}~\code{teams}} +\index{teams construct@\code{teams} construct!num_teams clause@\scode{num_teams} clause} +\index{clauses!num_teams@\scode{num_teams}} +\index{num_teams clause@\scode{num_teams} clause} +\index{routines!omp_get_num_teams@\scode{omp_get_num_teams}} +\index{routines!omp_get_team_num@\scode{omp_get_team_num}} +\index{omp_get_num_teams routine@\scode{omp_get_num_teams} routine} +\index{omp_get_team_num routine@\scode{omp_get_team_num} routine} The following example shows how the \code{target} and \code{teams} constructs are used to create a league of thread teams that execute a region. The \code{teams} @@ -22,6 +34,8 @@ example manually distributes a loop across two teams. \subsection{\code{target}, \code{teams}, and \code{distribute} Constructs} \label{subsec:teams_distribute} +\index{constructs!distribute@\code{distribute}} +\index{distribute construct@\code{distribute} construct} The following example shows how the \code{target}, \code{teams}, and \code{distribute} constructs are used to execute a loop nest in a \code{target} region. The \code{teams} @@ -38,6 +52,7 @@ When a team's primary thread encounters the parallel loop construct before the i loop, the other threads in its team are activated. The team executes the \code{parallel} region and then workshares the execution of the loop. +\index{reduction clause@\code{reduction} clause!on teams construct@on \code{teams} construct} Each primary thread executing the \code{teams} region has a private copy of the variable \plc{sum} that is created by the \code{reduction} clause on the \code{teams} construct. The primary thread and all threads in its team have a private copy of the variable @@ -48,6 +63,7 @@ each primary thread's private copy of \plc{sum} is reduced into the final \plc{s implicitly mapped into the \code{target} region. \cexample[4.0]{teams}{2} +\clearpage \ffreeexample[4.0]{teams}{2} @@ -69,6 +85,12 @@ primary threads of each team and then across the threads of each team. \subsection{\code{target} \code{teams} and Distribute Parallel Loop Constructs with Scheduling Clauses} \label{subsec:teams_distribute_parallel_schedule} +\index{distribute construct@\code{distribute} construct!dist_schedule clause@\scode{dist_schedule} clause} +\index{clauses!dist_schedule@\scode{dist_schedule}} +\index{dist_schedule clause@\scode{dist_schedule} clause} +\index{worksharing-loop constructs!schedule clause@\code{schedule} clause} +\index{clauses!schedule@\code{schedule}} +\index{schedule clause@\code{schedule} clause} The following example shows how the \code{target} \code{teams} and distribute parallel loop constructs are used to execute a \code{target} region. The \code{teams} diff --git a/directives/attributes.tex b/directives/attributes.tex index 021d805..757a00a 100644 --- a/directives/attributes.tex +++ b/directives/attributes.tex @@ -1,5 +1,7 @@ \section{C++ Attributes} \label{sec:attributes} +\index{directive syntax!attribute, C++} +\index{attribute syntax, C++} OpenMP directives for C++ can also be specified with %the implementation-defined @@ -16,7 +18,7 @@ is applied two different ways in attribute form, as shown in the ATTR 1 and ATTR In ATTR 1 the attribute syntax is used with the \code{omp ::} namespace form. In ATTR 2 the attribute syntax is used with the \code{using omp :} namespace form. -Next, parallization is attempted by applying directives using two different syntaxes. +Next, parallelization is attempted by applying directives using two different syntaxes. For ATTR 3 and PRAG 4, the loop parallelization will fail to compile because multiple directives that apply to the same statement must all use either the attribute syntax or the pragma syntax. The lines have been commented out and labeled INVALID. diff --git a/directives/fixed_format_comments.tex b/directives/fixed_format_comments.tex index 96989c1..b270c1f 100644 --- a/directives/fixed_format_comments.tex +++ b/directives/fixed_format_comments.tex @@ -1,6 +1,8 @@ %\pagebreak \section{Fortran Comments (Fixed Source Form)} \label{sec:fortran_fixed_format_comments} +\index{directive syntax!fixed form, Fortran} +\index{fixed form syntax, Fortran} OpenMP directives in Fortran codes with fixed source form are specified as comments with one of the \code{!\$omp}, \code{c\$omp}, and \code{*\$omp} sentinels, followed by a @@ -15,3 +17,4 @@ Here, an \code{end} directive (\code{end}~\code{parallel}) must be specified to of the \code{parallel} directive. \fexample{directive_syntax_F_fixed_comment}{1} +\clearpage diff --git a/directives/free_format_comments.tex b/directives/free_format_comments.tex index 5d874f7..32e88aa 100644 --- a/directives/free_format_comments.tex +++ b/directives/free_format_comments.tex @@ -1,6 +1,8 @@ %\pagebreak \section{Fortran Comments (Free Source Form)} \label{sec:fortran_free_format_comments} +\index{directive syntax!free form, Fortran} +\index{free form syntax, Fortran} OpenMP directives in Fortran codes with free source form are specified as comments that use the \code{!\$omp} sentinel, followed by the @@ -17,6 +19,7 @@ Here, an \code{end} directive (\code{end}~\code{parallel}) must be specified to of the \code{parallel} directive. \ffreeexample{directive_syntax_F_free_comment}{1} +\clearpage As of OpenMP 5.1, \code{block} and \code{end}~\code{block} statements can be used to designate a structured block for an OpenMP region, and any paired OpenMP \code{end} directive becomes optional, @@ -26,3 +29,23 @@ It was necessary to explicitly declare the \plc{i} variable, due to the \code{im it could have also been declared outside the structured block. \ffreeexample[5.1]{directive_syntax_F_block}{1} + +A Fortran BLOCK construct may eliminate the need for a paired \scode{end} directive for an OpenMP construct, +as illustrated in the following example. + +The first \code{parallel} construct is specified with an OpenMP loosely structured block +(where the first executable construct is not a Fortran 2008 BLOCK construct). +A paired \scode{end} directive must end the OpenMP construct. +The second \code{parallel} construct is specified with an OpenMP strictly structured block +(consists only of a single Fortran BLOCK construct). +The paired \scode{end} directive is optional in this case, and is not used here. + +The next two \code{parallel} directives form an enclosing outer \code{parallel} construct +and a nested inner \code{parallel} construct. The first \code{end}~\code{parallel} directive +that subsequently appears terminates the inner \code{parallel} construct, +because a paired \scode{end} directive immediately following a BLOCK construct that is +a strictly structured block of an OpenMP construct is treated as the terminating end directive +of that construct. +The next \code{end}~\code{parallel} directive is required to terminate the outer \code{parallel} construct. + +\ffreeexample[5.1]{directive_syntax_F_block}{2} diff --git a/directives/pragmas.tex b/directives/pragmas.tex index 303cd5b..9bf91af 100644 --- a/directives/pragmas.tex +++ b/directives/pragmas.tex @@ -1,9 +1,11 @@ %\pagebreak \section{C/C++ Pragmas} \label{sec:pragmas} +\index{directive syntax!pragma, C/C++} +\index{pragma syntax, C/C++} OpenMP C and C++ directives can be specified with the C/C++ \code{\#pragma} directive. -An OpenMP direcitve begins with \code{\#pragma}~\code{omp} and is followed by the +An OpenMP directive begins with \code{\#pragma}~\code{omp} and is followed by the OpenMP directive name, and required and optional clauses. Lines are continued in the usual manner, and comments may be included at the end. Directives are case sensitive. diff --git a/directives/sources/directive_syntax_F_block.1.f90 b/directives/sources/directive_syntax_F_block.1.f90 index d4e9d8f..db372fe 100644 --- a/directives/sources/directive_syntax_F_block.1.f90 +++ b/directives/sources/directive_syntax_F_block.1.f90 @@ -1,4 +1,4 @@ -! @@name: directive_syntax_F_block.f90 +! @@name: directive_syntax_F_block.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/directives/sources/directive_syntax_F_block.2.f90 b/directives/sources/directive_syntax_F_block.2.f90 new file mode 100644 index 0000000..6f32472 --- /dev/null +++ b/directives/sources/directive_syntax_F_block.2.f90 @@ -0,0 +1,54 @@ +! @@name: directive_syntax_F_block.2 +! @@type: F-free +! @@compilable: yes +! @@linkable: yes +! @@expect: success +! @@version: omp_5.1 +program main + + use omp_lib + implicit none + + !$omp parallel num_threads(2) + if( omp_get_thread_num() == 0 ) & + print*, "Loosely structured block -- end required." + block ! BLOCK Fortran 2008 + if( omp_get_thread_num() == 0 ) & + print*, " --" + end block + !$omp end parallel + + !$omp parallel num_threads(2) + block + if( omp_get_thread_num() == 0 ) & + print*, "Strictly structured block -- end not required." + end block + !!$omp end parallel !is optional for strictly structured block + + print*, "Sequential part" + + !$omp parallel num_threads(2) !outer parallel + if( omp_get_thread_num() == 0 ) & + print*, "Outer, loosely structured block." + !$omp parallel num_threads(2) !inner parallel + block + if( omp_get_thread_num() == 0 ) & + print*, "Inner, strictly structured block." + end block + !$omp end parallel + !$omp end parallel + ! Two end directives are required here. + ! A single "!$omp end parallel" terminator will fail. + ! 1st end directive is assumed to be for inner parallel construct. + ! 2nd end directive applies to outer parallel construct. + +end program + +!OUTPUT, in order: +! Loosely structured block -- end required. +! -- +! Strictly structured block -- end not required. +! Sequential part +! Outer, loosely structured block. +! Inner, strictly structured block. +! Inner, strictly structured block. diff --git a/directives/sources/directive_syntax_F_fixed_comment.1.f b/directives/sources/directive_syntax_F_fixed_comment.1.f index 8e761e7..8daf1a5 100644 --- a/directives/sources/directive_syntax_F_fixed_comment.1.f +++ b/directives/sources/directive_syntax_F_fixed_comment.1.f @@ -1,4 +1,4 @@ -! @@name: directive_syntax_F_fixed.1.f +! @@name: directive_syntax_F_fixed_comment.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes @@ -29,7 +29,6 @@ c$omp parallel do num_threads(NT) !comments allowed here DIR 1 end do *$omp end parallel end - ! repeated 3 times, any order ! OUTPUT: thrd no 0 ! OUTPUT: thrd no 1 diff --git a/directives/sources/directive_syntax_F_free_comment.1.f90 b/directives/sources/directive_syntax_F_free_comment.1.f90 index 32dee8d..6c8cf71 100644 --- a/directives/sources/directive_syntax_F_free_comment.1.f90 +++ b/directives/sources/directive_syntax_F_free_comment.1.f90 @@ -1,4 +1,4 @@ -! @@name: directive_syntax_F_free_comment.1.f90 +! @@name: directive_syntax_F_free_comment.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/directives/sources/directive_syntax_attribute.1.cpp b/directives/sources/directive_syntax_attribute.1.cpp index a1c988a..855e631 100644 --- a/directives/sources/directive_syntax_attribute.1.cpp +++ b/directives/sources/directive_syntax_attribute.1.cpp @@ -1,5 +1,5 @@ /* -* @@name: directive_syntax_attribute.1.cpp +* @@name: directive_syntax_attribute.1 * @@type: C++ * @@compilable: yes * @@linkable: yes @@ -15,11 +15,11 @@ #pragma omp declare simd linear(i) simdlen(8) double P(int i){ return (double)i * (double)i; } -[[ omp :: directive( declare simd linear(i) simdlen(4) ) ]] -[[ omp :: directive( declare simd linear(i) simdlen(8) ) ]] +[[omp::directive(declare simd linear(i) simdlen(4))]] +[[omp::directive(declare simd linear(i) simdlen(8))]] double Q(int i){ return (double)i * (double)i; } -int main(){ +int main() { #pragma omp parallel for num_threads(NT) // PRAG 1 for(int i=0; i,line width=1.5pt}} +\tikzset{reddot/.style={fill=red,circle,inner xsep=0mm,inner ysep=0mm,minimum width=6mm,anchor=center}} +\tikzset{bluedot/.style={fill=blue,circle,inner xsep=0mm,inner ysep=0mm,minimum width=6mm,anchor=center}} +\tikzset{dot/.style={fill=black!60,circle,inner xsep=0mm,inner ysep=0mm,minimum width=6mm,anchor=center}} +\tikzset{completetile/.style={draw=black,->,line width=0.5pt,fill=blue!20}} +\tikzset{partialtile/.style={draw=black,->,line width=0.5pt,fill=red!20}} + + + +\path[axis] (0,0) -- (6,0) node[below](i) {j}; +\foreach \x in {0,...,5} { + \path[draw] (\x,0) -- ++(0,-0.1) node[below] {\x}; +} + +\path[axis] (0,0) -- (0,6) node[left](j) {i}; +\foreach \y in {0,...,5} {% + \path[draw] (0,\y) -- ++(-0.1,0) node[left]() {\y};% +} + +\node[use as bounding box,fit={(0,0) (6,6)}]{}; + + +\path[completetile] ([xshift=-0.4cm,yshift=-0.4cm]1,1) rectangle ([xshift=0.4cm,yshift=0.4cm]2,2); +\path[completetile] ([xshift=-0.4cm,yshift=-0.4cm]3,1) rectangle ([xshift=0.4cm,yshift=0.4cm]4,2); +\path[completetile] ([xshift=-0.4cm,yshift=-0.4cm]1,3) rectangle ([xshift=0.4cm,yshift=0.4cm]2,4); +\path[completetile] ([xshift=-0.4cm,yshift=-0.4cm]3,3) rectangle ([xshift=0.4cm,yshift=0.4cm]4,4); + +\path[partialtile] ([xshift=-0.4cm,yshift=-0.4cm]5,5) rectangle ([xshift=0.4cm,yshift=0.4cm]6,6); +\path[partialtile] ([xshift=-0.4cm,yshift=-0.4cm]1,5) rectangle ([xshift=0.4cm,yshift=0.4cm]2,6); +\path[partialtile] ([xshift=-0.4cm,yshift=-0.4cm]3,5) rectangle ([xshift=0.4cm,yshift=0.4cm]4,6); +\path[partialtile] ([xshift=-0.4cm,yshift=-0.4cm]5,1) rectangle ([xshift=0.4cm,yshift=0.4cm]6,2); +\path[partialtile] ([xshift=-0.4cm,yshift=-0.4cm]5,3) rectangle ([xshift=0.4cm,yshift=0.4cm]6,4); + + +\foreach \x in {1,...,5} { + \foreach \y in {1,...,5} { + \node[dot] at (\x,\y) {}; + } +} +\end{tikzpicture} +\end{document} diff --git a/figs/tile-Example_tile2.pdf b/figs/tile-Example_tile2.pdf new file mode 100644 index 0000000..fff52e9 Binary files /dev/null and b/figs/tile-Example_tile2.pdf differ diff --git a/figs/tile-Example_tile2.tex b/figs/tile-Example_tile2.tex new file mode 100644 index 0000000..29f28dd --- /dev/null +++ b/figs/tile-Example_tile2.tex @@ -0,0 +1,37 @@ +\documentclass{standalone} +\usepackage{tikz} +\usetikzlibrary{fit} + +\begin{document} +\begin{tikzpicture} +\tikzset{axis/.style={draw=black,->,line width=1.5pt}} +\tikzset{reddot/.style={fill=red,circle,inner xsep=0mm,inner ysep=0mm,minimum width=6mm,anchor=center}} +\tikzset{bluedot/.style={fill=blue,circle,inner xsep=0mm,inner ysep=0mm,minimum width=6mm,anchor=center}} +\tikzset{dot/.style={fill=black!60,circle,inner xsep=0mm,inner ysep=0mm,minimum width=0.5mm,anchor=center}} +\tikzset{completetile/.style={draw=black,->,line width=0.5pt,fill=blue!20}} +\tikzset{partialtile/.style={draw=black,->,line width=0.5pt,fill=red!20}} + +\begin{scope}[x={(1mm,0mm)},y={(0mm,1mm)}] + +\foreach \x in {0,16,...,80} { + \foreach \y in {0,5,...,95} { + \path[completetile] (\x,\y) rectangle (\x+16,\y+5); + } +} + +\path[partialtile] (96,0) rectangle (100,100); + +\path[axis] (0,0) -- (100+7,0) node[below](i) {j}; +\foreach \x in {0,16,...,96} { + \path[draw] (\x,0) -- ++(0,-0.1cm) node[below] {\x}; +} +\path[draw] (100,0) -- ++(0,-0.1cm) node[below,xshift=5] {100}; + +\path[axis] (0,0) -- (0,100+4) node[left](j) {i}; +\foreach \y in {0,5,...,100} {% + \path[draw] (0,\y) -- ++(-0.1cm,0) node[left]() {\y};% +} + +\end{scope} +\end{tikzpicture} +\end{document} diff --git a/introduction/Examples.tex b/introduction/Examples.tex new file mode 100644 index 0000000..df393c1 --- /dev/null +++ b/introduction/Examples.tex @@ -0,0 +1,29 @@ +\section{Examples Organization} +\label{chap:examples} +\label{sec:examples} +\index{example label} +\index{example label!omp_verno@\scode{omp_}\plc{verno}} +\index{omp_verno@\scode{omp_}\plc{verno}} + +This document includes examples of the OpenMP API directives, constructs, and routines. + +Each example is labeled as \plc{ename.seqno.ext}, where \plc{ename} is +the example name, \plc{seqno} is the sequence number in a section, and +\plc{ext} is the source file extension to indicate the code type and +source form. \plc{ext} is one of the following: +\begin{description}[noitemsep,labelindent=5mm,widest=f90] +\item[\plc{c}] -- \ C code, +\item[\plc{cpp}] -- \ C++ code, +\item[\plc{f}] -- \ Fortran code in fixed form, and +\item[\plc{f90}] -- \ Fortran code in free form. +\end{description} + +Some of the example labels may include version information +(\code{\small{}omp\_\plc{verno}}) to indicate features that are illustrated +by an example for a specific OpenMP version, such as ``\plc{scan.1.c} +\;(\code{\small{}omp\_5.0}).'' + +\ccppspecificstart +A statement following a directive is compound only when necessary, and a +non-compound statement is indented with respect to a directive preceding it. +\ccppspecificend diff --git a/loop_transformations/partial_tile.tex b/loop_transformations/partial_tile.tex new file mode 100644 index 0000000..c243a41 --- /dev/null +++ b/loop_transformations/partial_tile.tex @@ -0,0 +1,77 @@ +\pagebreak +\section{Incomplete Tiles} +\label{sec:incomplete_tiles} + +Optimal performance for tiled loops is achieved when the loop iteration count is a multiple of the tile size. +When this condition does not exist, the implementation is free to execute the partial loops in a manner that +optimizes performance, while preserving the specified order of iterations in the complete-tile loops. + +Figure~\ref{fig:2d_tiling} shows an example of a 2-by-2 tiling for a 5-by-5 iteration space. +There are nine resulting tiles. Four are \emph{complete} 2-by-2 tiles, and the +remaining five tiles are \plc{partial} tiles. + +\begin{figure}[H] +\begin{subfigure}[b]{.5\textwidth} +\includegraphics[width=0.8\textwidth]{figs/tile-2d_tiling} +\centering +\caption{2-dimensional tiling with partial tiles}\label{fig:2d_tiling} +\end{subfigure}% +\begin{subfigure}[b]{.5\textwidth} +\includegraphics[width=0.85\textwidth]{figs/tile-Example_tile2} +\vspace*{2mm} +\centering +\caption{Partial tiles of Example \emph{partial\_tile.1}}\label{fig:Example_tile2} +\end{subfigure} +\caption{Tiling illustrations} +\end{figure} + +In the following example, function \plc{func1} uses the \code{tile} construct +with a \code{sizes(4,16)} tiling clause. Because the second tile dimension of +16 does not evenly divide into the iteration count of the j-loop, the +iterations corresponding to the remainder for the j-loop correspond to partial +tiles as shown in Figure~\ref{fig:Example_tile2}. Each remaining function +illustrates a code implementation that a compiler may generate to implement the +\code{tile} construct in \plc{func1}. + +%Iterations with the tiles can be executed in a any order, ignoring partial tile boundaries. +% Deepak: I don't think this first sentence is true for iterations in a partial tile. +% Only the product order will be maintained for such iterations. +The order of tile execution relative to other tiles can be changed, but execution order of +iterations within the same tile must be preserved. +Implementations must ensure that dependencies that are valid with any tile size need +to be preserved (including tile size of 1 and tiles as large as the iteration space). + +Functions \plc{func2} through \plc{func6} are valid implementations of \plc{func1}. +In \splc{func2} the unrolling is illustrated as a pair of nested loops with a simple +adjustment in the size of the final iteration block in the \splc{j2} iteration space +for the partial tile. + +Performance of the implementation depends on the hardware architecture, the instruction set and compiler optimization goals. +Functions \plc{func3}, \plc{func4}, and \plc{func5} have the advantage that +the innermost loop for the complete tile is a constant size and can be replaced with SIMD instructions. +If the target platform has masked SIMD instructions with no overhead, then avoiding the construction of a +remainder loop, as in \plc{func5}, might be the best option. +Another option is to use a remainder loop without tiling, as shown in \plc{func6}, to reduce control-flow overhead. + +\cexample[5.1]{partial_tile}{1} +\ffreeexample[5.1]{partial_tile}{1} + + +In the following example, function \plc{func7} tiles nested loops with a size of (4,16), +resulting in partial tiles that cover the last 4 iterations of the j-loop, as +in the previous example. However, the outer loop is parallelized with a +\code{parallel} worksharing-loop construct. + +Functions \plc{func8} and \plc{func9} illustrate two implementations of the tiling +with \code{parallel} and worksharing-loop directives. Function \plc{func8} uses a single outer loop, with a \plc{min} function +to accommodate the partial tiles. Function \plc{func9} +uses two sets of nested loops, the first iterates over the complete tiles and the +second covers iterations from the partial tiles. When fissioning loops that +are in a \code{parallel} worksharing-loop region, each iteration of each workshared loop +must be executed on the same thread as in an un-fissioned loop. The \code{schedule(static)} clause in \plc{func7} +forces the implementation to use static scheduling and allows the fission in function \plc{func8}. +When dynamic scheduling is prescribed, fissioning is not allowed. When no scheduling is specified, +the compiler implementation will select a scheduling \plc{kind} and adhere to its restrictions. + +\cexample[5.1]{partial_tile}{2} +\ffreeexample[5.1]{partial_tile}{2} diff --git a/loop_transformations/sources/partial_tile.1.c b/loop_transformations/sources/partial_tile.1.c new file mode 100644 index 0000000..4ffa3c0 --- /dev/null +++ b/loop_transformations/sources/partial_tile.1.c @@ -0,0 +1,80 @@ +/* +* @@name: partial_tile.1 +* @@type: C +* @@compilable: yes +* @@linkable: no +* @@expect: success +* @@version: omp_5.1 +*/ +int min(int a, int b){ return (a < b)? a : b; } + +void func1(double A[100][100]) +{ + #pragma omp tile sizes(4,16) + for (int i = 0; i < 100; ++i) + for (int j = 0; j < 100; ++j) + A[i][j] = A[i][j] + 1; +} + +void func2(double A[100][100]) +{ + for (int i1 = 0; i1 < 100; i1+=4) + for (int j1 = 0; j1 < 100; j1+=16) + for (int i2 = i1; i2 < i1+4; ++i2) + for (int j2 = j1; j2 < min(j1+16,100); ++j2) + A[i2][j2] = A[i2][j2] + 1; +} + +void func3(double A[100][100]) +{ + // complete tiles + for (int i1 = 0; i1 < 100; i1+=4) + for (int j1 = 0; j1 < 96; j1+=16) + for (int i2 = i1; i2 < i1+4; ++i2) + for (int j2 = j1; j2 < j1+16; ++j2) + A[i2][j2] = A[i2][j2] + 1; + // partial tiles / remainder + for (int i1 = 0; i1 < 100; i1+=4) + for (int i2 = i1; i2 < i1+4; ++i2) + for (int j = 96; j < 100; j+=1) + A[i2][j] = A[i2][j] + 1; +} + +void func4(double A[100][100]) +{ + for (int i1 = 0; i1 < 100; i1+=4) { + // complete tiles + for (int j1 = 0; j1 < 96; j1+=16) + for (int i2 = i1; i2 < i1+4; ++i2) + for (int j2 = j1; j2 < j1+16; ++j2) + A[i2][j2] = A[i2][j2] + 1; + // partial tiles + for (int i2 = i1; i2 < i1+4; ++i2) + for (int j = 96; j < 100; j+=1) + A[i2][j] = A[i2][j] + 1; + } +} + +void func5(double A[100][100]) +{ + for (int i1 = 0; i1 < 100; i1+=4) + for (int j1 = 0; j1 < 100; j1+=16) + for (int i2 = i1; i2 < i1+4; ++i2) + for (int j2 = j1; j2 +#include + +void my_init(double *,double *,int, double *,double *,int, \ + double *,double *,int); +void lib_saxpy(double *,double *,double,int); +void my_gather(double *,double *,int); + +#pragma omp begin declare target +void my_gpu_vxv(double *, double *, int); +#pragma omp end declare target + +#define Nhb 1024*1024 // high bandwith +#define Nbg 1024*1024*64 // big memory, default +#define Nll 1024*1024 // low latency memory + +void test_allocate() { + + double v1[Nhb], v2[Nhb]; + double v3[Nbg], v4[Nbg]; + double v5[Nll], v6[Nll]; + +/*** CASE 1: USING ALLOCATE DIRECTIVE ***/ + #pragma omp allocate(v1,v2) allocator(omp_high_bw_mem_alloc) + #pragma omp allocate(v3,v4) allocator(omp_default_mem_alloc) + + my_init(v1,v2,Nhb, v3,v4,Nbg, v5,v6,Nll); + + lib_saxpy(v1,v2,5.0,Nhb); + + #pragma omp target map(to: v3[0:Nbg], v4[0:Nbg]) map(from:v3[0:Nbg]) + my_gpu_vxv(v3,v4,Nbg); + +/*** CASE 2: USING ALLOCATE CLAUSE ***/ + #pragma omp task private(v5,v6) \ + allocate(allocator(omp_low_lat_mem_alloc): v5,v6) + { + my_gather(v5,v6,Nll); + } + +} diff --git a/memory_model/sources/allocators.4.f90 b/memory_model/sources/allocators.4.f90 new file mode 100644 index 0000000..48ac7fa --- /dev/null +++ b/memory_model/sources/allocators.4.f90 @@ -0,0 +1,44 @@ +! @@name: allocators.4 +! @@type: F-free +! @@compilable: yes +! @@linkable: no +! @@expect: success +! @@version: omp_5.1 +subroutine test_allocate + use omp_lib + + interface + subroutine my_gpu_vxv(va,vb,n) + !$omp declare target + integer :: n + double precision :: va(n), vb(n) + end subroutine + end interface + + integer,parameter :: Nhb=1024*1024, & !! high bandwith + Nbg=1024*1024*64,& !! big memory, default + Nll=1024*1024 !! low latency memory + + double precision :: v1(Nhb), v2(Nhb) + double precision :: v3(Nbg), v4(Nbg) + double precision :: v5(Nll), v6(Nll) + + !*** CASE 1: USING ALLOCATE DIRECTIVE ***! + !$omp allocate(v1,v2) allocator(omp_high_bw_mem_alloc) + !$omp allocate(v3,v4) allocator(omp_default_mem_alloc) + + call my_init(v1,v2,Nhb, v3,v4,Nbg, v5,v6,Nll) + + call lib_saxpy(v1,v2,5.0,Nhb) + + !$omp target map(to: v3, v4) map(from:v3) + call my_gpu_vxv(v3,v4,Nbg) + !$omp end target + + !*** CASE 2: USING ALLOCATE CLAUSE ***! + !$omp task private(v5,v6) & + !$omp& allocate(allocator(omp_low_lat_mem_alloc): v5,v6) + call my_gather(v5,v6,Nll) + !$omp end task + +end subroutine test_allocate diff --git a/memory_model/sources/fort_race.1.f90 b/memory_model/sources/fort_race.1.f90 index ba6e4b2..de69bd5 100644 --- a/memory_model/sources/fort_race.1.f90 +++ b/memory_model/sources/fort_race.1.f90 @@ -1,4 +1,4 @@ -! @@name: fort_race.1f +! @@name: fort_race.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/memory_model/sources/mem_model.1.c b/memory_model/sources/mem_model.1.c index cd5e543..ff20f13 100644 --- a/memory_model/sources/mem_model.1.c +++ b/memory_model/sources/mem_model.1.c @@ -1,5 +1,5 @@ /* -* @@name: mem_model.1c +* @@name: mem_model.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/memory_model/sources/mem_model.1.f90 b/memory_model/sources/mem_model.1.f90 index 435cba9..10d1194 100644 --- a/memory_model/sources/mem_model.1.f90 +++ b/memory_model/sources/mem_model.1.f90 @@ -1,4 +1,4 @@ -! @@name: mem_model.1f +! @@name: mem_model.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/memory_model/sources/mem_model.2.c b/memory_model/sources/mem_model.2.c index 0021c29..bc35df7 100644 --- a/memory_model/sources/mem_model.2.c +++ b/memory_model/sources/mem_model.2.c @@ -1,5 +1,5 @@ /* -* @@name: mem_model.2c +* @@name: mem_model.2 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/memory_model/sources/mem_model.2.f b/memory_model/sources/mem_model.2.f index a323551..1108d21 100644 --- a/memory_model/sources/mem_model.2.f +++ b/memory_model/sources/mem_model.2.f @@ -1,4 +1,4 @@ -! @@name: mem_model.2f +! @@name: mem_model.2 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/memory_model/sources/mem_model.3.c b/memory_model/sources/mem_model.3.c index f5bc444..117ee56 100644 --- a/memory_model/sources/mem_model.3.c +++ b/memory_model/sources/mem_model.3.c @@ -1,5 +1,5 @@ /* -* @@name: mem_model.3c +* @@name: mem_model.3 * @@type: C * @@compilable: yes * @@linkable: yes @@ -13,51 +13,53 @@ int data0 = 0, data1 = 0; int main() { - int flag=0; + int flag=0; - #pragma omp parallel num_threads(3) - { - if(omp_get_thread_num()==0) - { - data0 = 17; - #pragma omp flush - /* Set flag to release thread 1 */ - #pragma omp atomic update - flag++; - /* Flush of flag is implied by the atomic directive */ - } - else if(omp_get_thread_num()==1) - { - int flag_val = 0; - /* Loop until we see that flag reaches 1*/ - while(flag_val < 0) - { - #pragma omp atomic read - flag_val = flag; - } - #pragma omp flush(data0) - /* data0 is 17 here */ - printf("Thread 1 awoken (data0 = %d)\n", data0); - data1 = 42; - #pragma omp flush(data1) - /* Set flag to release thread 2 */ - #pragma omp atomic update - flag++; - /* Flush of flag is implied by the atomic directive */ - } - else if(omp_get_thread_num()==2) - { - int flag_val = 0; - /* Loop until we see that flag reaches 2 */ - while(flag_val < 2) - { - #pragma omp atomic read - flag_val = flag; - } - #pragma omp flush(data0,data1) - /* there is a data race here; data0 is 17 and data1 is undefined */ - printf("Thread 2 awoken (data0 = %d, data1 = %d)\n", data0, data1); - } - } - return 0; + #pragma omp parallel num_threads(3) + { + if(omp_get_thread_num()==0) + { + data0 = 17; + #pragma omp flush + /* Set flag to release thread 1 */ + #pragma omp atomic update + flag++; + /* Flush of flag is implied by the atomic directive */ + } + else if(omp_get_thread_num()==1) + { + int flag_val = 0; + /* Loop until we see that flag reaches 1*/ + while(flag_val < 0) + { + #pragma omp atomic read + flag_val = flag; + } + #pragma omp flush(data0) + /* data0 is 17 here */ + printf("Thread 1 awoken (data0 = %d)\n", data0); + data1 = 42; + #pragma omp flush(data1) + /* Set flag to release thread 2 */ + #pragma omp atomic update + flag++; + /* Flush of flag is implied by the atomic directive */ + } + else if(omp_get_thread_num()==2) + { + int flag_val = 0; + /* Loop until we see that flag reaches 2 */ + while(flag_val < 2) + { + #pragma omp atomic read + flag_val = flag; + } + #pragma omp flush(data0,data1) + /* there is a data race here; + data0 is 17 and data1 is undefined */ + printf("Thread 2 awoken (data0 = %d, data1 = %d)\n", + data0, data1); + } + } + return 0; } diff --git a/memory_model/sources/mem_model.3.f b/memory_model/sources/mem_model.3.f index 9a9a375..489a4a2 100644 --- a/memory_model/sources/mem_model.3.f +++ b/memory_model/sources/mem_model.3.f @@ -1,4 +1,4 @@ -! @@name: mem_model.3f +! @@name: mem_model.3 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/memory_model/sources/mem_model.4a.c b/memory_model/sources/mem_model.4a.c new file mode 100644 index 0000000..d5ae340 --- /dev/null +++ b/memory_model/sources/mem_model.4a.c @@ -0,0 +1,40 @@ +/* +* @@name: mem_model.4a +* @@type: C +* @@compilable: yes +* @@linkable: no +* @@expect: failure +* @@version: omp_3.1 +*/ +#include + +void flush_incorrect() +{ + int a, b; + a = b = 0; + #pragma omp parallel num_threads(2) + { + int myid = omp_get_thread_num(); + int tmp; + + if ( myid == 0 ) { // thread 0 + #pragma omp atomic write + b = 1; + #pragma omp flush(b) // flushes are not ordered + #pragma omp flush(a) // compiler may move them around + #pragma omp atomic read + tmp = a; + } + else { // thread 1 + #pragma omp atomic write + a = 1; + #pragma omp flush(a) // flushes are not ordered + #pragma omp flush(b) // compiler may move them around + #pragma omp atomic read + tmp = b; + } + if ( tmp == 0 ) { // exclusive access not guaranteed + /* protected section */ + } + } +} diff --git a/memory_model/sources/mem_model.4a.f90 b/memory_model/sources/mem_model.4a.f90 new file mode 100644 index 0000000..9659f70 --- /dev/null +++ b/memory_model/sources/mem_model.4a.f90 @@ -0,0 +1,36 @@ +! @@name: mem_model.4a +! @@type: F-free +! @@compilable: yes +! @@linkable: no +! @@expect: failure +! @@version: omp_3.1 +subroutine flush_incorrect + use omp_lib + implicit none + integer a, b, tmp + integer myid + + a = 0; b = 0 + !$omp parallel private(myid,tmp) num_threads(2) + myid = omp_get_thread_num() + + if ( myid == 0 ) then ! thread 0 + !$omp atomic write + b = 1 + !$omp flush(b) ! flushes are not ordered + !$omp flush(a) ! compiler may move them around + !$omp atomic read + tmp = a + else ! thread 1 + !$omp atomic write + a = 1 + !$omp flush(a) ! flushes are not ordered + !$omp flush(b) ! compiler may move them around + !$omp atomic read + tmp = b + endif + if ( tmp == 0 ) then ! exclusive access not guaranteed + !! protected section + endif + !$omp end parallel +end subroutine diff --git a/memory_model/sources/mem_model.4b.c b/memory_model/sources/mem_model.4b.c new file mode 100644 index 0000000..74245ce --- /dev/null +++ b/memory_model/sources/mem_model.4b.c @@ -0,0 +1,38 @@ +/* +* @@name: mem_model.4b +* @@type: C +* @@compilable: yes +* @@linkable: no +* @@expect: success +* @@version: omp_3.1 +*/ +#include + +void flush_correct() +{ + int a, b; + a = b = 0; + #pragma omp parallel num_threads(2) + { + int myid = omp_get_thread_num(); + int tmp; + + if ( myid == 0 ) { // thread 0 + #pragma omp atomic write + b = 1; + #pragma omp flush(a,b) // flushes are ordered + #pragma omp atomic read + tmp = a; + } + else { // thread 1 + #pragma omp atomic write + a = 1; + #pragma omp flush(a,b) // flushes are ordered + #pragma omp atomic read + tmp = b; + } + if ( tmp == 0 ) { // access by single thread + /* protected section */ + } + } +} diff --git a/memory_model/sources/mem_model.4b.f90 b/memory_model/sources/mem_model.4b.f90 new file mode 100644 index 0000000..75abf16 --- /dev/null +++ b/memory_model/sources/mem_model.4b.f90 @@ -0,0 +1,34 @@ +! @@name: mem_model.4b +! @@type: F-free +! @@compilable: yes +! @@linkable: no +! @@expect: success +! @@version: omp_3.1 +subroutine flush_correct + use omp_lib + implicit none + integer a, b, tmp + integer myid + + a = 0; b = 0 + !$omp parallel private(myid,tmp) num_threads(2) + myid = omp_get_thread_num() + + if ( myid == 0 ) then ! thread 0 + !$omp atomic write + b = 1 + !$omp flush(a,b) ! flushes are ordered + !$omp atomic read + tmp = a + else ! thread 1 + !$omp atomic write + a = 1 + !$omp flush(a,b) ! flushes are ordered + !$omp atomic read + tmp = b + endif + if ( tmp == 0 ) then ! access by single thread + !! protected section + endif + !$omp end parallel +end subroutine diff --git a/omp_copyright.txt b/omp_copyright.txt index 1d1e053..8f3e7cd 100644 --- a/omp_copyright.txt +++ b/omp_copyright.txt @@ -1,4 +1,4 @@ -Copyright (c) 1997-2021 OpenMP Architecture Review Board. +Copyright (c) 1997-2022 OpenMP Architecture Review Board. All rights reserved. Permission to redistribute and use without fee all or part of the source diff --git a/ompt_interface/ompt_start.tex b/ompt_interface/ompt_start.tex index 806d584..2c09c66 100644 --- a/ompt_interface/ompt_start.tex +++ b/ompt_interface/ompt_start.tex @@ -5,6 +5,10 @@ There are three steps an OpenMP implementation takes to activate a tool. This section explains how the tool and an OpenMP implementation interact to accomplish tool activation. +\index{OMPT interface!activating} +\index{OMPT interface!ompt_start_tool routine@\scode{ompt_start_tool} routine} +\index{routines!ompt_start_tool@\scode{ompt_start_tool}} +\index{ompt_start_tool routine@\scode{ompt_start_tool} routine} Step 1. \emph{Determine Whether to Initialize} \begin{adjustwidth}{2.5em}{0pt} A tool is activated by the OMPT interface when it returns a non-NULL pointer to an \code{ompt\_start\_tool\_result\_t} structure on a call to \code{ompt\_start\_tool} by the OpenMP implementation. @@ -13,7 +17,7 @@ There are three ways that a tool can provide a definition of \code{ompt\_start\_ (2) Introducing a dynamically linked library that includes the tool's definition of \code{ompt\_start\_tool} into the application's address space. (3) Providing the name of a dynamically linked library appropriate for the architecture -and operating system used by the application in the \code{tool-libraries-var} ICV. +and operating system used by the application in the \plc{tool-libraries-var} ICV. \end{adjustwidth} Step 2. \emph{Initializing a First-Party tool} @@ -24,6 +28,9 @@ the tool initializer specified in this structure prior to the occurrence of any \end{adjustwidth} +\index{OMPT interface!ompt_set_callback routine@\scode{ompt_set_callback} routine} +\index{routines!ompt_set_callback@\scode{ompt_set_callback}} +\index{ompt_set_callback routine@\scode{ompt_set_callback} routine} Step 3. \emph{Monitoring Activity on the Host} \begin{adjustwidth}{2.5em}{0pt} To monitor execution of an OpenMP program on the host device, a tool's initializer @@ -37,7 +44,7 @@ as \code{ompt\_set\_callback}, which has the following possible return codes: \h \code{ompt\_set\_sometimes\_paired}, \code{ompt\_set\_always}. -If the \code{ompt\_set\_callback} runtime entry point is called outside a tool’s initializer, +If the \code{ompt\_set\_callback} runtime entry point is called outside a tool's initializer, registration of supported callbacks may fail with a return code of \code{ompt\_set\_error}. All callbacks registered with \code{ompt\_set\_callback} or returned by \code{ompt\_get\_callback} use the dummy type signature \code{ompt\_callback\_t}. While this is a compromise, it is diff --git a/ompt_interface/sources/ompt_start.1.c b/ompt_interface/sources/ompt_start.1.c index 3cbaa67..60cd32e 100644 --- a/ompt_interface/sources/ompt_start.1.c +++ b/ompt_interface/sources/ompt_start.1.c @@ -1,5 +1,5 @@ /* -* @@name: ompt_start.1.c +* @@name: ompt_start.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/openmp-examples.tex b/openmp-examples.tex index 4503a30..ce5a64b 100644 --- a/openmp-examples.tex +++ b/openmp-examples.tex @@ -9,10 +9,8 @@ % Title_Page.tex - the title page % openmplogo.png - the logo % Forward_Chapt.tex - unnumbered introductory chapter -% Introduction_Chapt.tex - unnumbered introductory chapter -% Examples_Chapt.tex - unnumbered chapter -% Examples_Sects.tex - examples -% sources/*.c, *.f - C/C++/Fortran example source files +% Chap_*.tex - example chapters +% */sources/*.c, *.f - C/C++/Fortran example source files % % When editing this file: % @@ -49,40 +47,47 @@ \documentclass[10pt,letterpaper,twoside,makeidx,hidelinks]{scrreprt} % Text to appear in the footer on even-numbered pages: -\newcommand{\VER}{5.1} -\newcommand{\PVER}{\VER{}} -\newcommand{\VERDATE}{August 2021} +\newcommand{\VER}{5.2} % Supported Spec Version +\newcommand{\PVER}{5.2} % Examples Document Version +\newcommand{\VERDATE}{April 2022} \newcommand{\footerText}{OpenMP Examples Version \PVER{} - \VERDATE} +% input a generated file with additional definitions +\input{generated-include} + % Unified style sheet for OpenMP documents: \input{openmp.sty} +\makeindex \begin{document} \pagenumbering{roman} \input{Title_Page} + \input{Foreword_Chapt} + \thispagestyle{empty} + \newpage + \setcounter{page}{1} \setcounter{tocdepth}{2} \begin{spacing}{1.3} \tableofcontents + \clearpage + \listoffigures + \vspace*{5ex} + \listoftables \end{spacing} % Uncomment the next line to enable line numbering on the main body text: \linenumbers\pagewiselinenumbers - \input{Foreword_Chapt} - \cleardoublepage \pagenumbering{arabic} - \input{Introduction_Chapt} - \input{Examples_Chapt} - \input{Deprecated_Features_Chapt} - \setcounter{chapter}{0} % start chapter numbering here + \input{Chap_introduction} \input{Chap_directives} \input{Chap_parallel_execution} \input{Chap_affinity} @@ -100,7 +105,14 @@ \setcounter{chapter}{0} % restart chapter numbering with "letter A" \renewcommand{\thechapter}{\Alph{chapter}}% \appendix + \input{Deprecated_Features} \input{History} + \nolinenumbers + \clearpage + \phantomsection + \addcontentsline{toc}{chapter}{Index} + \printindex + \end{document} diff --git a/openmp-index.ist b/openmp-index.ist new file mode 100644 index 0000000..12c295b --- /dev/null +++ b/openmp-index.ist @@ -0,0 +1,4 @@ +headings_flag 1 +heading_prefix "{\\bfseries " +heading_suffix "}\\nopagebreak\n" +delim_0 ",\\penalty1000\\ " diff --git a/openmp.sty b/openmp.sty index 541a220..05e52f7 100644 --- a/openmp.sty +++ b/openmp.sty @@ -108,18 +108,33 @@ \usepackage{graphicx} \usepackage{framed} % for making boxes with \begin{framed} \usepackage{tikz} % for flow charts, diagrams, arrows - +\usepackage{subcaption} % For subfigure +\usepackage{float} % To fix location of figure: \begin{figure}[H] for no float %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Page formatting - -\usepackage[paperwidth=7.5in, paperheight=9in, - top=0.75in, bottom=1.0in, left=1.4in, right=0.6in]{geometry} - +% +% The PDF and book version need different margin spaces. The bookbuild +% macro is set by the build system (see Makefile) to determine whether +% we are building the PDF or the print-on-demand book version of the spec. +% +\ifdefined\bookbuild + % we are building the book version of the spec, so we need to have a bit + % more margin for the publisher to print + \usepackage[paperwidth=8in, paperheight=10in, + top=1.25in, bottom=1.5in, left=1.65in, right=0.85in]{geometry} + \setlength{\oddsidemargin}{0.875in} + \setlength{\evensidemargin}{0.385in} +\else + % we are building the PDF version of the spec, so we can use the default + % margins + \usepackage[paperwidth=7.5in, paperheight=9in, + top=0.75in, bottom=1.0in, left=1.4in, right=0.6in]{geometry} + \setlength{\oddsidemargin}{0.45in} + \setlength{\evensidemargin}{0.185in} +\fi \usepackage{changepage} % allows left/right-page margin readjustments -\setlength{\oddsidemargin}{0.45in} -\setlength{\evensidemargin}{0.185in} \raggedbottom @@ -219,6 +234,10 @@ \setlength{\cftbeforetoctitleskip}{1.0ex} \setlength{\cftaftertoctitleskip}{3.0ex} +\setlength{\cftbeforeloftitleskip}{1.0ex} +\setlength{\cftafterloftitleskip}{3.0ex} +\setlength{\cftbeforelottitleskip}{1.0ex} +\setlength{\cftafterlottitleskip}{3.0ex} \renewcommand{\cftchapaftersnum}{} \makeatletter \renewcommand{\l@section}{\@dottedtocline{1}{1.5em}{2.6em}} @@ -436,6 +455,11 @@ \newcommand{\specref}[1]{Section~\ref{#1} on page~\pageref{#1}} +% For caption for supertabular and figure, by yanyh15 +\captionsetup[table]{labelfont={sf,sc,bf},textfont=normalfont,singlelinecheck=off,labelformat=simple,labelsep=colon,aboveskip=00pt,belowskip=10pt} + +\captionsetup[figure]{labelfont={sf,sc,bf},textfont=normalfont,singlelinecheck=off,labelformat=simple,labelsep=colon} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Code example formatting for the Examples document @@ -485,7 +509,7 @@ \def\vername{\;\;(\code{\small{}omp\_\myver{#6}})} } \noindent - \textit{Example \ename}\vername + \hypertarget{ex:\cname}{\textit{Example \ename}}\vername \def\fcnt{\the\cnt} %\vspace*{-3mm} \code{\VerbatimInput[numbers=left,numbersep=5ex,firstnumber=1,firstline=\fcnt,fontsize=\small]% @@ -536,6 +560,17 @@ \fortranspecificend } +\newcommandx*\hexentry[4][1=c,3=]{% + \hyperlink{ex:#2.#1}{\splc{#2.#1}}% + \ifthenelse{ \equal{#3}{} }{}{,~\hyperlink{ex:#2.#3}{\plc{#3}}}% + & #4%:~\splc{same name} +} +\newcommandx*\hexmentry[5][1=c,3=]{% + \hyperlink{ex:#2.#1}{\splc{#2.#1}}% + \ifthenelse{ \equal{#3}{} }{}{,~\hyperlink{ex:#2.#3}{\plc{#3}}}% + & #4:~\splc{#5.#1}\ifthenelse{ \equal{#3}{} }{}{,~\plc{#3}} +} + % Set default fonts: \rmfamily\mdseries\upshape\normalsize diff --git a/parallel_execution/collapse.tex b/parallel_execution/collapse.tex index 7591c6b..b93c33c 100644 --- a/parallel_execution/collapse.tex +++ b/parallel_execution/collapse.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{collapse} Clause} \label{sec:collapse} +\index{clauses!collapse@\code{collapse}} +\index{collapse clause@\code{collapse} clause} In the following example, the \code{k} and \code{j} loops are associated with the loop construct. So the iterations of the \code{k} and \code{j} loops are @@ -37,6 +39,10 @@ This example prints: \code{2 3}. \fexample[3.0]{collapse}{2} +\index{clauses!collapse@\code{collapse}} +\index{collapse clause@\code{collapse} clause} +\index{clauses!ordered@\code{ordered}} +\index{ordered clause@\code{ordered} clause} The next example illustrates the interaction of the \code{collapse} and \code{ordered} clauses. @@ -44,7 +50,7 @@ In the example, the loop construct has both a \code{collapse} clause and an \cod clause. The \code{collapse} clause causes the iterations of the \code{k} and \code{j} loops to be collapsed into one loop with a larger iteration space, and that loop is divided among the threads in the current team. An \code{ordered} -clause is added to the loop construct, because an ordered region binds to the loop +clause is added to the loop construct because an ordered region binds to the loop region arising from the loop construct. According to Section 2.12.8 of the OpenMP 4.0 specification, @@ -77,6 +83,7 @@ The code prints \clearpage +\index{non-rectangular loop nest} The following example illustrates the collapse of a non-rectangular loop nest, a new feature in OpenMP 5.0. In a loop nest, a non-rectangular loop has a loop bound that references the iteration variable of an enclosing loop. diff --git a/parallel_execution/fort_do.tex b/parallel_execution/fort_do.tex index f8fc15e..07b80d4 100644 --- a/parallel_execution/fort_do.tex +++ b/parallel_execution/fort_do.tex @@ -1,6 +1,8 @@ \pagebreak \section{Fortran Restrictions on the \code{do} Construct} \label{sec:fort_do} +\index{constructs!do@\code{do}} +\index{do construct@\code{do} construct} \fortranspecificstart If an \code{end do} directive follows a \plc{do-construct} in which several diff --git a/parallel_execution/fpriv_sections.tex b/parallel_execution/fpriv_sections.tex index 319586e..eb864c9 100644 --- a/parallel_execution/fpriv_sections.tex +++ b/parallel_execution/fpriv_sections.tex @@ -1,6 +1,12 @@ \pagebreak \section{\code{firstprivate} Clause and \code{sections} Construct} \label{sec:fpriv_sections} +\index{constructs!sections@\code{sections}} +\index{sections construct@\code{sections} construct} +\index{constructs!section@\code{section}} +\index{section construct@\code{section} construct} +\index{clauses!firstprivate@\code{firstprivate}} +\index{firstprivate clause@\code{firstprivate} clause} In the following example of the \code{sections} construct the \code{firstprivate} clause is used to initialize the private copy of \code{section\_count} of each diff --git a/parallel_execution/get_nthrs.tex b/parallel_execution/get_nthrs.tex index e11311c..c837d8d 100644 --- a/parallel_execution/get_nthrs.tex +++ b/parallel_execution/get_nthrs.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{omp\_get\_num\_threads} Routine} \label{sec:get_nthrs} +\index{routines!omp_get_num_threads@\scode{omp_get_num_threads}} +\index{omp_get_num_threads routine@\scode{omp_get_num_threads} routine} In the following example, the \code{omp\_get\_num\_threads} call returns 1 in the sequential part of the code, so \code{np} will always be equal to 1. To determine diff --git a/parallel_execution/host_teams.tex b/parallel_execution/host_teams.tex index 6ff9a4f..9ddc968 100644 --- a/parallel_execution/host_teams.tex +++ b/parallel_execution/host_teams.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{teams} Construct on Host} \label{sec:host_teams} +\index{constructs!teams@\code{teams}} +\index{teams construct@\code{teams} construct} %{\color{blue} ... } {\color{violet} ... } Originally the \code{teams} construct was created for devices (such as GPUs) diff --git a/parallel_execution/linear_in_loop.tex b/parallel_execution/linear_in_loop.tex index 17d79f6..1ef0522 100644 --- a/parallel_execution/linear_in_loop.tex +++ b/parallel_execution/linear_in_loop.tex @@ -1,5 +1,7 @@ \section{\code{linear} Clause in Loop Constructs} \label{sec:linear_in_loop} +\index{clauses!linear@\code{linear}} +\index{linear clause@\code{linear} clause} The following example shows the use of the \code{linear} clause in a loop construct to allow the proper parallelization of a loop that contains diff --git a/parallel_execution/loop.tex b/parallel_execution/loop.tex index 11b6300..bc8a3fd 100644 --- a/parallel_execution/loop.tex +++ b/parallel_execution/loop.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{loop} Construct} \label{sec:loop} +\index{constructs!loop@\code{loop}} +\index{loop construct@\code{loop} construct} The following example illustrates the use of the OpenMP 5.0 \code{loop} construct for the execution of a loop. diff --git a/parallel_execution/masked.tex b/parallel_execution/masked.tex index f421bed..3cd14c2 100644 --- a/parallel_execution/masked.tex +++ b/parallel_execution/masked.tex @@ -1,10 +1,22 @@ \pagebreak \section{\code{masked} Construct} \label{sec:masked} +\index{constructs!masked@\code{masked}} +\index{masked construct@\code{masked} construct} +\index{masked construct@\code{masked} construct!filter clause@\code{filter} clause} +\index{clauses!filter@\code{filter}} +\index{filter clause@\code{filter} clause} -The following example demonstrates the masked construct. In the example, the primary thread -keeps track of how many iterations have been executed and prints out a progress -report. The other threads skip the \code{masked} region without waiting. +The following example demonstrates the \code{masked} construct. +In the example, the primary thread (thread number 0) +keeps track of how many iterations have been executed and prints out +a progress report in the iteration loop. +The other threads skip the \code{masked} region without waiting. +The \code{filter} clause can be used to specify a thread number other +than the primary thread to execute a structured block, as illustrated by +the second \code{masked} construct after the iteration loop. +If the thread specified in a \scode{filter} clause does not exist +in the team then the structured block is not executed by any thread. \cexample[5.1]{masked}{1} diff --git a/parallel_execution/nowait.tex b/parallel_execution/nowait.tex index ec97ab1..c9f753c 100644 --- a/parallel_execution/nowait.tex +++ b/parallel_execution/nowait.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{nowait} Clause} \label{sec:nowait} +\index{clauses!nowait@\code{nowait}} +\index{nowait clause@\code{nowait} clause} If there are multiple independent loops within a \code{parallel} region, you can use the \code{nowait} clause to avoid the implied barrier at the end of the @@ -10,6 +12,8 @@ loop construct, as follows: \fexample{nowait}{1} +\index{loop scheduling!static} +\index{static scheduling} In the following example, static scheduling distributes the same logical iteration numbers to the threads that execute the three loop regions. This allows the \code{nowait} clause to be used, even though there is a data dependence between the loops. The diff --git a/parallel_execution/nthrs_dynamic.tex b/parallel_execution/nthrs_dynamic.tex index 802674a..98fc374 100644 --- a/parallel_execution/nthrs_dynamic.tex +++ b/parallel_execution/nthrs_dynamic.tex @@ -1,6 +1,10 @@ \pagebreak \section{Interaction Between the \code{num\_threads} Clause and \code{omp\_set\_dynamic}} \label{sec:nthrs_dynamic} +\index{clauses!num_threads@\scode{num_threads}} +\index{num_threads clause@\scode{num_threads} clause} +\index{routines!omp_set_dynamic@\scode{omp_set_dynamic}} +\index{omp_set_dynamic routine@\scode{omp_set_dynamic} routine} The following example demonstrates the \code{num\_threads} clause and the effect of the \\ diff --git a/parallel_execution/nthrs_nesting.tex b/parallel_execution/nthrs_nesting.tex index 11028cf..972f886 100644 --- a/parallel_execution/nthrs_nesting.tex +++ b/parallel_execution/nthrs_nesting.tex @@ -1,6 +1,8 @@ \pagebreak \section{Controlling the Number of Threads on Multiple Nesting Levels} \label{sec:nthrs_nesting} +\index{environment variables!OMP_NUM_THREADS@\scode{OMP_NUM_THREADS}} +\index{OMP_NUM_THREADS@\scode{OMP_NUM_THREADS}} The following examples demonstrate how to use the \code{OMP\_NUM\_THREADS} environment variable to control the number of threads on multiple nesting levels: diff --git a/parallel_execution/order_clause.tex b/parallel_execution/order_clause.tex new file mode 100644 index 0000000..5b153b0 --- /dev/null +++ b/parallel_execution/order_clause.tex @@ -0,0 +1,37 @@ +\pagebreak +\section{Controlling Concurrency and Reproducibility with the \code{order} Clause} +\label{sec:order_clause} + +The \code{order} clause is used for controlling the parallel execution of loop +iterations for one or more loops that are associated with a directive. It is +specified with a clause argument and optional modifier. The only supported +argument as of OpenMP 5.2 is the keyword \code{concurrent} which indicates that +the loop iterations may execute concurrently, including iterations in the same +chunk per the loop schedule. Because of the relaxed execution permitted with an +\code{order(concurrent)} clause, codes must not assume that any cross-iteration +data dependences would be preserved or that any two iterations may execute on +the same thread. + +The first example in this section demonstrates the use of the +\code{order(concurrent)} clause, without any modifiers, for controlling the +parallel execution of loop iterations. + +\cexample[5.1]{order}{1} + +\fexample[5.1]{order}{1} + +Modifiers to the \code{order} clause may be specified to control the +reproducibility of the loop schedule for the associated loop(s). A reproducible +loop schedule will consistently yield the same mapping of iterations to threads +(or SIMD lanes) if the directive name, loop schedule, iteration space, and +binding region remain the same. The \code{reproducible} modifier indicates the +loop schedule must be reproducible, while the \code{unconstrained} modifier +indicates that the loop schedule is not reproducible. + +The next example demonstrates the use of the \code{order(concurrent)} clause +with modifiers for additionally controlling the reproducibility of a loop's +schedule. + +\cexample[5.1]{order}{2} + +\fexample[5.1]{order}{2} diff --git a/parallel_execution/parallel.tex b/parallel_execution/parallel.tex index 8cdeee6..449b564 100644 --- a/parallel_execution/parallel.tex +++ b/parallel_execution/parallel.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{parallel} Construct} \label{sec:parallel} +\index{constructs!parallel@\code{parallel}} +\index{parallel construct@\code{parallel} construct} The \code{parallel} construct can be used in coarse-grain parallel programs. In the following example, each thread in the \code{parallel} region decides what diff --git a/parallel_execution/ploop.tex b/parallel_execution/ploop.tex index 6bd03d3..ca447f9 100644 --- a/parallel_execution/ploop.tex +++ b/parallel_execution/ploop.tex @@ -1,9 +1,19 @@ \pagebreak \section{A Simple Parallel Loop} \label{sec:ploop} +\index{combined constructs!parallel worksharing-loop} +\index{constructs!parallel@\code{parallel}} +\index{parallel construct@\code{parallel} construct} +\index{worksharing-loop constructs!for@\code{for}} +\index{worksharing-loop constructs!do@\code{do}} +\index{constructs!for@\code{for}} +\index{constructs!do@\code{do}} +\index{for construct@\code{for} construct} +\index{do construct@\code{do} construct} -The following example demonstrates how to parallelize a simple loop using the parallel -loop construct. The loop iteration variable is private by default, so it is not +The following example demonstrates how to parallelize a simple loop +using the parallel worksharing-loop +construct. The loop iteration variable is private by default, so it is not necessary to specify it explicitly in a \code{private} clause. \cexample{ploop}{1} diff --git a/parallel_execution/pra_iterator.tex b/parallel_execution/pra_iterator.tex index 1595a53..c71f9c5 100644 --- a/parallel_execution/pra_iterator.tex +++ b/parallel_execution/pra_iterator.tex @@ -2,6 +2,7 @@ \section{Parallel Random Access Iterator Loop} \cppspecificstart \label{sec:pra_iterator} +\index{random access iterator, C++} The following example shows a parallel random access iterator loop. diff --git a/parallel_execution/psections.tex b/parallel_execution/psections.tex index b5a7f43..690133a 100644 --- a/parallel_execution/psections.tex +++ b/parallel_execution/psections.tex @@ -1,11 +1,13 @@ \pagebreak \section{\code{parallel} \code{sections} Construct} \label{sec:psections} +\index{combined constructs!parallel sections@\code{parallel}~\code{sections}} +\index{parallel sections construct@\code{parallel}~\code{sections} construct} In the following example routines \code{XAXIS}, \code{YAXIS}, and \code{ZAXIS} can be executed concurrently. The first \code{section} directive is optional. Note -that all \code{section} directives need to appear in the \code{parallel sections} -construct. +that all \code{section} directives need to appear in the +\code{parallel}~\code{sections} construct. \cexample{psections}{1} diff --git a/parallel_execution/set_dynamic_nthrs.tex b/parallel_execution/set_dynamic_nthrs.tex index 9bcf686..370cbd6 100644 --- a/parallel_execution/set_dynamic_nthrs.tex +++ b/parallel_execution/set_dynamic_nthrs.tex @@ -2,6 +2,10 @@ \section{\code{omp\_set\_dynamic} and \\ \code{omp\_set\_num\_threads} Routines} \label{sec:set_dynamic_nthrs} +\index{routines!omp_set_dynamic@\scode{omp_set_dynamic}} +\index{omp_set_dynamic routine@\scode{omp_set_dynamic} routine} +\index{routines!omp_set_num_threads@\scode{omp_set_num_threads}} +\index{omp_set_num_threads routine@\scode{omp_set_num_threads} routine} Some programs rely on a fixed, prespecified number of threads to execute correctly. Because the default setting for the dynamic adjustment of the number of threads diff --git a/parallel_execution/single.tex b/parallel_execution/single.tex index c434e88..4605908 100644 --- a/parallel_execution/single.tex +++ b/parallel_execution/single.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{single} Construct} \label{sec:single} +\index{constructs!single@\code{single}} +\index{single construct@\code{single} construct} The following example demonstrates the \code{single} construct. In the example, only one thread prints each of the progress messages. All other threads will skip diff --git a/parallel_execution/sources/collapse.1.c b/parallel_execution/sources/collapse.1.c index 0478795..b0741eb 100644 --- a/parallel_execution/sources/collapse.1.c +++ b/parallel_execution/sources/collapse.1.c @@ -1,5 +1,5 @@ /* -* @@name: collapse.1c +* @@name: collapse.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/collapse.1.f b/parallel_execution/sources/collapse.1.f index 47faa16..f8ac7f2 100644 --- a/parallel_execution/sources/collapse.1.f +++ b/parallel_execution/sources/collapse.1.f @@ -1,4 +1,4 @@ -! @@name: collapse.1f +! @@name: collapse.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/collapse.2.c b/parallel_execution/sources/collapse.2.c index 2f63aeb..57e6ed2 100644 --- a/parallel_execution/sources/collapse.2.c +++ b/parallel_execution/sources/collapse.2.c @@ -1,5 +1,5 @@ /* -* @@name: collapse.2c +* @@name: collapse.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/collapse.2.f b/parallel_execution/sources/collapse.2.f index 1c65f76..b0e7853 100644 --- a/parallel_execution/sources/collapse.2.f +++ b/parallel_execution/sources/collapse.2.f @@ -1,4 +1,4 @@ -! @@name: collapse.2f +! @@name: collapse.2 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/parallel_execution/sources/collapse.3.c b/parallel_execution/sources/collapse.3.c index 30d5411..1d005b4 100644 --- a/parallel_execution/sources/collapse.3.c +++ b/parallel_execution/sources/collapse.3.c @@ -1,5 +1,5 @@ /* -* @@name: collapse.3c +* @@name: collapse.3 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/collapse.3.f b/parallel_execution/sources/collapse.3.f index b970ba0..970ba6c 100644 --- a/parallel_execution/sources/collapse.3.f +++ b/parallel_execution/sources/collapse.3.f @@ -1,4 +1,4 @@ -! @@name: collapse.3f +! @@name: collapse.3 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/collapse.4.c b/parallel_execution/sources/collapse.4.c index b43d525..e25134b 100644 --- a/parallel_execution/sources/collapse.4.c +++ b/parallel_execution/sources/collapse.4.c @@ -1,5 +1,5 @@ /* -* @@name: collapse.4c +* @@name: collapse.4 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/collapse.4.f90 b/parallel_execution/sources/collapse.4.f90 index c5a0653..9595067 100644 --- a/parallel_execution/sources/collapse.4.f90 +++ b/parallel_execution/sources/collapse.4.f90 @@ -1,4 +1,4 @@ -! @@name: collapse.4f +! @@name: collapse.4 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/fort_do.1.f b/parallel_execution/sources/fort_do.1.f index 947d820..c0c03ca 100644 --- a/parallel_execution/sources/fort_do.1.f +++ b/parallel_execution/sources/fort_do.1.f @@ -1,4 +1,4 @@ -! @@name: fort_do.1f +! @@name: fort_do.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/fort_do.2.f b/parallel_execution/sources/fort_do.2.f index e83e27b..5bd2549 100644 --- a/parallel_execution/sources/fort_do.2.f +++ b/parallel_execution/sources/fort_do.2.f @@ -1,4 +1,4 @@ -! @@name: fort_do.2f +! @@name: fort_do.2 ! @@type: F-fixed ! @@compilable: no ! @@linkable: no diff --git a/parallel_execution/sources/fpriv_sections.1.c b/parallel_execution/sources/fpriv_sections.1.c index a11efbc..613d944 100644 --- a/parallel_execution/sources/fpriv_sections.1.c +++ b/parallel_execution/sources/fpriv_sections.1.c @@ -1,5 +1,5 @@ /* -* @@name: fpriv_sections.1c +* @@name: fpriv_sections.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/parallel_execution/sources/fpriv_sections.1.f90 b/parallel_execution/sources/fpriv_sections.1.f90 index 53aa6ae..1417797 100644 --- a/parallel_execution/sources/fpriv_sections.1.f90 +++ b/parallel_execution/sources/fpriv_sections.1.f90 @@ -1,4 +1,4 @@ -! @@name: fpriv_sections.1f +! @@name: fpriv_sections.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/parallel_execution/sources/get_nthrs.1.c b/parallel_execution/sources/get_nthrs.1.c index 875d541..d763de3 100644 --- a/parallel_execution/sources/get_nthrs.1.c +++ b/parallel_execution/sources/get_nthrs.1.c @@ -1,5 +1,5 @@ /* -* @@name: get_nthrs.1c +* @@name: get_nthrs.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/get_nthrs.1.f b/parallel_execution/sources/get_nthrs.1.f index 706ab0a..675ddf2 100644 --- a/parallel_execution/sources/get_nthrs.1.f +++ b/parallel_execution/sources/get_nthrs.1.f @@ -1,4 +1,4 @@ -! @@name: get_nthrs.1f +! @@name: get_nthrs.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/get_nthrs.2.c b/parallel_execution/sources/get_nthrs.2.c index f4d4efb..710a813 100644 --- a/parallel_execution/sources/get_nthrs.2.c +++ b/parallel_execution/sources/get_nthrs.2.c @@ -1,5 +1,5 @@ /* -* @@name: get_nthrs.2c +* @@name: get_nthrs.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/get_nthrs.2.f b/parallel_execution/sources/get_nthrs.2.f index 913f05f..ff1aa82 100644 --- a/parallel_execution/sources/get_nthrs.2.f +++ b/parallel_execution/sources/get_nthrs.2.f @@ -1,4 +1,4 @@ -! @@name: get_nthrs.2f +! @@name: get_nthrs.2 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/host_teams.1.c b/parallel_execution/sources/host_teams.1.c index 4cb5d6d..23ca869 100644 --- a/parallel_execution/sources/host_teams.1.c +++ b/parallel_execution/sources/host_teams.1.c @@ -1,5 +1,5 @@ /* -* @@name: host_teams.2.c +* @@name: host_teams.1 * @@type: C * @@compilable: yes * @@linkable: yes @@ -17,6 +17,8 @@ int main(){ float sp_x[N], sp_y[N], sp_a=0.0001e0; double dp_x[N], dp_y[N], dp_a=0.0001e0; + max_thrds = omp_get_num_procs()/nteams_required; + // Create 2 teams, each team works in a different precision #pragma omp teams num_teams(nteams_required) \ thread_limit(max_thrds) private(tm_id) @@ -28,7 +30,7 @@ int main(){ exit(0); } - if(tm_id == 0) // Do Single Precision Work (SAXPY) with this team + if(tm_id == 0) // Do Single Precision Work (SAXPY) with this team { #pragma omp parallel { @@ -40,7 +42,7 @@ int main(){ } } - if(tm_id == 1) // Do Double Precision Work (DAXPY) with this team + if(tm_id == 1) // Do Double Precision Work (DAXPY) with this team { #pragma omp parallel { diff --git a/parallel_execution/sources/host_teams.1.f90 b/parallel_execution/sources/host_teams.1.f90 index 0ea5ac0..eb068b8 100644 --- a/parallel_execution/sources/host_teams.1.f90 +++ b/parallel_execution/sources/host_teams.1.f90 @@ -1,4 +1,4 @@ -! @@name: host_teams.2.f90 +! @@name: host_teams.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes @@ -12,10 +12,11 @@ program main real :: sp_x(N), sp_y(N), sp_a=0.0001e0 double precision :: dp_x(N), dp_y(N), dp_a=0.0001d0 - max_thrds = omp_get_num_procs()/nteams_required + max_thrds = omp_get_num_procs()/nteams_required !! Create 2 teams, each team works in a different precision - !$omp teams num_teams(nteams_required) thread_limit(max_thrds) private(tm_id) + !$omp teams num_teams(nteams_required) thread_limit(max_thrds) \ + private(tm_id) tm_id = omp_get_team_num() @@ -23,7 +24,8 @@ program main stop "error: Insufficient teams on host, 2 required." endif - if(tm_id == 0) then !! Do Single Precision Work (SAXPY) with this team + !! Do Single Precision Work (SAXPY) with this team + if(tm_id == 0) then !$omp parallel !$omp do !! init @@ -40,7 +42,8 @@ program main endif - if(tm_id == 1) then !! Do Double Precision Work (DAXPY) with this team + !! Do Double Precision Work (DAXPY) with this team + if(tm_id == 1) then !$omp parallel !$omp do !! init @@ -58,8 +61,10 @@ program main endif !$omp end teams - write(*,'( "i=",i4," sp|dp= ", e15.7, d25.16 )') N, sp_x(N), dp_x(N) - write(*,'( "i=",i4," sp|dp= ", e15.7, d25.16 )') N/2, sp_x(N/2), dp_x(N/2) + write(*,'( "i=",i4," sp|dp= ", e15.7, d25.16 )') & + N, sp_x(N), dp_x(N) + write(*,'( "i=",i4," sp|dp= ", e15.7, d25.16 )') & + N/2, sp_x(N/2), dp_x(N/2) !! i=1000 sp|dp= 0.1000000E+04 0.1000000010000000D+04 !! i= 500 sp|dp= 0.5000000E+03 0.5000000050000000D+03 end program diff --git a/parallel_execution/sources/linear_in_loop.1.c b/parallel_execution/sources/linear_in_loop.1.c index 7222bab..3875e47 100644 --- a/parallel_execution/sources/linear_in_loop.1.c +++ b/parallel_execution/sources/linear_in_loop.1.c @@ -1,5 +1,5 @@ /* -* @@name: linear_in_loop.1c +* @@name: linear_in_loop.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/parallel_execution/sources/linear_in_loop.1.f90 b/parallel_execution/sources/linear_in_loop.1.f90 index fad40fc..92ebe9b 100644 --- a/parallel_execution/sources/linear_in_loop.1.f90 +++ b/parallel_execution/sources/linear_in_loop.1.f90 @@ -1,4 +1,4 @@ -! @@name: linear_in_loop.1f +! @@name: linear_in_loop.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/parallel_execution/sources/loop.1.c b/parallel_execution/sources/loop.1.c index 4acbc3b..4baffaf 100644 --- a/parallel_execution/sources/loop.1.c +++ b/parallel_execution/sources/loop.1.c @@ -1,5 +1,5 @@ /* -* @@name: loop.2c +* @@name: loop.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/parallel_execution/sources/loop.1.f90 b/parallel_execution/sources/loop.1.f90 index 97e0953..72c0ea8 100644 --- a/parallel_execution/sources/loop.1.f90 +++ b/parallel_execution/sources/loop.1.f90 @@ -1,4 +1,4 @@ -! @@name: loop.2f90 +! @@name: loop.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/parallel_execution/sources/masked.1.c b/parallel_execution/sources/masked.1.c index 41e1e38..284cc7d 100644 --- a/parallel_execution/sources/masked.1.c +++ b/parallel_execution/sources/masked.1.c @@ -1,5 +1,5 @@ /* -* @@name: masked.1c +* @@name: masked.1 * @@type: C * @@compilable: yes * @@linkable: no @@ -17,7 +17,7 @@ void masked_example( float* x, float* xold, int n, float tol ) c = 0; #pragma omp parallel { - do{ + do { #pragma omp for private(i) for( i = 1; i < n-1; ++i ){ xold[i] = x[i]; @@ -33,11 +33,18 @@ void masked_example( float* x, float* xold, int n, float tol ) error = y - x[i]; if( error > tol || error < -tol ) ++toobig; } - #pragma omp masked + #pragma omp masked // primary thread (thread 0) { ++c; printf( "iteration %d, toobig=%d\n", c, toobig ); } - }while( toobig > 0 ); + } while( toobig > 0 ); + #pragma omp barrier + #pragma omp masked filter(1) // thread 1 + { + // The printf statement will not be executed + // if the number of threads is less than 2. + printf( "total number of iterations = %d\n", c ); + } } } diff --git a/parallel_execution/sources/masked.1.f b/parallel_execution/sources/masked.1.f index ae293cc..6af4ab2 100644 --- a/parallel_execution/sources/masked.1.f +++ b/parallel_execution/sources/masked.1.f @@ -1,4 +1,4 @@ -! @@name: masked.1f +! @@name: masked.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no @@ -28,10 +28,16 @@ ERROR = Y-X(I) IF( ERROR > TOL .OR. ERROR < -TOL ) TOOBIG = TOOBIG+1 ENDDO -!$OMP MASKED +!$OMP MASKED ! primary thread (thread 0) C = C + 1 PRINT *, 'Iteration ', C, 'TOOBIG=', TOOBIG !$OMP END MASKED ENDDO +!$OMP BARRIER +!$OMP MASKED FILTER(1) ! thread 1 + ! The print statement will not be executed + ! if the number of threads is less than 2. + PRINT *, 'Total number of iterations =', C +!$OMP END MASKED !$OMP END PARALLEL END SUBROUTINE MASKED_EXAMPLE diff --git a/parallel_execution/sources/nowait.1.c b/parallel_execution/sources/nowait.1.c index a907c70..b715a74 100644 --- a/parallel_execution/sources/nowait.1.c +++ b/parallel_execution/sources/nowait.1.c @@ -1,5 +1,5 @@ /* -* @@name: nowait.1c +* @@name: nowait.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/nowait.1.f b/parallel_execution/sources/nowait.1.f index 1e84a9d..f5beab0 100644 --- a/parallel_execution/sources/nowait.1.f +++ b/parallel_execution/sources/nowait.1.f @@ -1,4 +1,4 @@ -! @@name: nowait.1f +! @@name: nowait.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/nowait.2.c b/parallel_execution/sources/nowait.2.c index b390bde..3e9ab8c 100644 --- a/parallel_execution/sources/nowait.2.c +++ b/parallel_execution/sources/nowait.2.c @@ -1,5 +1,5 @@ /* -* @@name: nowait.2c +* @@name: nowait.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/nowait.2.f90 b/parallel_execution/sources/nowait.2.f90 index 0b3b1f6..7119c37 100644 --- a/parallel_execution/sources/nowait.2.f90 +++ b/parallel_execution/sources/nowait.2.f90 @@ -1,4 +1,4 @@ -! @@name: nowait.2f +! @@name: nowait.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/nthrs_dynamic.1.c b/parallel_execution/sources/nthrs_dynamic.1.c index 4c59042..bbf0314 100644 --- a/parallel_execution/sources/nthrs_dynamic.1.c +++ b/parallel_execution/sources/nthrs_dynamic.1.c @@ -1,5 +1,5 @@ /* -* @@name: nthrs_dynamic.1c +* @@name: nthrs_dynamic.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/parallel_execution/sources/nthrs_dynamic.1.f b/parallel_execution/sources/nthrs_dynamic.1.f index 614f9b7..b01bf04 100644 --- a/parallel_execution/sources/nthrs_dynamic.1.f +++ b/parallel_execution/sources/nthrs_dynamic.1.f @@ -1,4 +1,4 @@ -! @@name: nthrs_dynamic.1f +! @@name: nthrs_dynamic.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/parallel_execution/sources/nthrs_dynamic.2.c b/parallel_execution/sources/nthrs_dynamic.2.c index 48bd8d1..18247ba 100644 --- a/parallel_execution/sources/nthrs_dynamic.2.c +++ b/parallel_execution/sources/nthrs_dynamic.2.c @@ -1,5 +1,5 @@ /* -* @@name: nthrs_dynamic.2c +* @@name: nthrs_dynamic.2 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/parallel_execution/sources/nthrs_dynamic.2.f b/parallel_execution/sources/nthrs_dynamic.2.f index 00a3eab..d03cd82 100644 --- a/parallel_execution/sources/nthrs_dynamic.2.f +++ b/parallel_execution/sources/nthrs_dynamic.2.f @@ -1,4 +1,4 @@ -! @@name: nthrs_dynamic.2f +! @@name: nthrs_dynamic.2 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/parallel_execution/sources/nthrs_nesting.1.c b/parallel_execution/sources/nthrs_nesting.1.c index 967f252..a1fd908 100644 --- a/parallel_execution/sources/nthrs_nesting.1.c +++ b/parallel_execution/sources/nthrs_nesting.1.c @@ -1,5 +1,5 @@ /* -* @@name: nthrs_nesting.1c +* @@name: nthrs_nesting.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/parallel_execution/sources/nthrs_nesting.1.f b/parallel_execution/sources/nthrs_nesting.1.f index 463af89..5242675 100644 --- a/parallel_execution/sources/nthrs_nesting.1.f +++ b/parallel_execution/sources/nthrs_nesting.1.f @@ -1,4 +1,4 @@ -! @@name: nthrs_nesting.1f +! @@name: nthrs_nesting.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/parallel_execution/sources/parallel.1.c b/parallel_execution/sources/parallel.1.c index b9262a7..b459723 100644 --- a/parallel_execution/sources/parallel.1.c +++ b/parallel_execution/sources/parallel.1.c @@ -1,5 +1,5 @@ /* -* @@name: parallel.1c +* @@name: parallel.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/parallel_execution/sources/parallel.1.f b/parallel_execution/sources/parallel.1.f index 44e156b..c9ae814 100644 --- a/parallel_execution/sources/parallel.1.f +++ b/parallel_execution/sources/parallel.1.f @@ -1,4 +1,4 @@ -! @@name: parallel.1f +! @@name: parallel.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/parallel_execution/sources/ploop.1.c b/parallel_execution/sources/ploop.1.c index dde798f..cca0999 100644 --- a/parallel_execution/sources/ploop.1.c +++ b/parallel_execution/sources/ploop.1.c @@ -1,5 +1,5 @@ /* -* @@name: ploop.1c +* @@name: ploop.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/ploop.1.f b/parallel_execution/sources/ploop.1.f index 2e31c3c..7a535c6 100644 --- a/parallel_execution/sources/ploop.1.f +++ b/parallel_execution/sources/ploop.1.f @@ -1,4 +1,4 @@ -! @@name: ploop.1f +! @@name: ploop.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/pra_iterator.1.cpp b/parallel_execution/sources/pra_iterator.1.cpp index 569f3b0..1b52049 100644 --- a/parallel_execution/sources/pra_iterator.1.cpp +++ b/parallel_execution/sources/pra_iterator.1.cpp @@ -1,5 +1,5 @@ /* -* @@name: pra_iterator.1c +* @@name: pra_iterator.1 * @@type: C++ * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/psections.1.c b/parallel_execution/sources/psections.1.c index 7e1a351..add6dc4 100644 --- a/parallel_execution/sources/psections.1.c +++ b/parallel_execution/sources/psections.1.c @@ -1,5 +1,5 @@ /* -* @@name: psections.1c +* @@name: psections.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/psections.1.f b/parallel_execution/sources/psections.1.f index 3b17366..ae063dd 100644 --- a/parallel_execution/sources/psections.1.f +++ b/parallel_execution/sources/psections.1.f @@ -1,4 +1,4 @@ -! @@name: psections.1f +! @@name: psections.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/set_dynamic_nthrs.1.c b/parallel_execution/sources/set_dynamic_nthrs.1.c index a22a750..ecec374 100644 --- a/parallel_execution/sources/set_dynamic_nthrs.1.c +++ b/parallel_execution/sources/set_dynamic_nthrs.1.c @@ -1,5 +1,5 @@ /* -* @@name: set_dynamic_nthrs.1c +* @@name: set_dynamic_nthrs.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/set_dynamic_nthrs.1.f b/parallel_execution/sources/set_dynamic_nthrs.1.f index 7fd3c0a..ebef98e 100644 --- a/parallel_execution/sources/set_dynamic_nthrs.1.f +++ b/parallel_execution/sources/set_dynamic_nthrs.1.f @@ -1,4 +1,4 @@ -! @@name: set_dynamic_nthrs.1f +! @@name: set_dynamic_nthrs.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/single.1.c b/parallel_execution/sources/single.1.c index 2169314..584e743 100644 --- a/parallel_execution/sources/single.1.c +++ b/parallel_execution/sources/single.1.c @@ -1,5 +1,5 @@ /* -* @@name: single.1c +* @@name: single.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/parallel_execution/sources/single.1.f b/parallel_execution/sources/single.1.f index 4ce6d85..45d8944 100644 --- a/parallel_execution/sources/single.1.f +++ b/parallel_execution/sources/single.1.f @@ -1,4 +1,4 @@ -! @@name: single.1f +! @@name: single.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/parallel_execution/sources/workshare.1.f b/parallel_execution/sources/workshare.1.f index c77c4de..2624352 100644 --- a/parallel_execution/sources/workshare.1.f +++ b/parallel_execution/sources/workshare.1.f @@ -1,4 +1,4 @@ -! @@name: workshare.1f +! @@name: workshare.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/workshare.2.f b/parallel_execution/sources/workshare.2.f index 76f9475..f9dcb62 100644 --- a/parallel_execution/sources/workshare.2.f +++ b/parallel_execution/sources/workshare.2.f @@ -1,4 +1,4 @@ -! @@name: workshare.2f +! @@name: workshare.2 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/workshare.3.f b/parallel_execution/sources/workshare.3.f index 3eb8334..2a4b0ae 100644 --- a/parallel_execution/sources/workshare.3.f +++ b/parallel_execution/sources/workshare.3.f @@ -1,4 +1,4 @@ -! @@name: workshare.3f +! @@name: workshare.3 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/workshare.4.f b/parallel_execution/sources/workshare.4.f index af945aa..2c86d44 100644 --- a/parallel_execution/sources/workshare.4.f +++ b/parallel_execution/sources/workshare.4.f @@ -1,4 +1,4 @@ -! @@name: workshare.4f +! @@name: workshare.4 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/workshare.5.f b/parallel_execution/sources/workshare.5.f index 612bae6..6569ee1 100644 --- a/parallel_execution/sources/workshare.5.f +++ b/parallel_execution/sources/workshare.5.f @@ -1,4 +1,4 @@ -! @@name: workshare.5f +! @@name: workshare.5 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/workshare.6.f b/parallel_execution/sources/workshare.6.f index 5b4f6dd..4a1be20 100644 --- a/parallel_execution/sources/workshare.6.f +++ b/parallel_execution/sources/workshare.6.f @@ -1,4 +1,4 @@ -! @@name: workshare.6f +! @@name: workshare.6 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/sources/workshare.7.f b/parallel_execution/sources/workshare.7.f index 51a086e..6f94fb8 100644 --- a/parallel_execution/sources/workshare.7.f +++ b/parallel_execution/sources/workshare.7.f @@ -1,4 +1,4 @@ -! @@name: workshare.7f +! @@name: workshare.7 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/parallel_execution/workshare.tex b/parallel_execution/workshare.tex index 4e15b1c..2e5af8e 100644 --- a/parallel_execution/workshare.tex +++ b/parallel_execution/workshare.tex @@ -2,6 +2,8 @@ \section{\code{workshare} Construct} \fortranspecificstart \label{sec:workshare} +\index{constructs!workshare@\code{workshare}} +\index{workshare construct@\code{workshare} construct} The following are examples of the \code{workshare} construct. diff --git a/program_control/cancellation.tex b/program_control/cancellation.tex index f2ee3dd..1daebf9 100644 --- a/program_control/cancellation.tex +++ b/program_control/cancellation.tex @@ -1,7 +1,12 @@ \pagebreak \section{Cancellation Constructs} \label{sec:cancellation} +\index{cancellation!cancel construct@\code{cancel} construct} +\index{constructs!cancel@\code{cancel}} +\index{cancel construct@\code{cancel} construct} +\index{cancellation!for parallel region@for \code{parallel} region} +\index{cancellation!for worksharing region} The following example shows how the \code{cancel} directive can be used to terminate an OpenMP region. Although the \code{cancel} construct terminates the OpenMP worksharing region, programmers must still track the exception through the pointer @@ -14,6 +19,9 @@ However, it is guaranteed that none of the threads executed \code{phase\_2()}. \cppexample[4.0]{cancellation}{1} +\index{cancellation!cancellation point construct@\code{cancellation}~\code{point} construct} +\index{constructs!cancellation point@\code{cancellation}~\code{point}} +\index{cancellation point construct@\code{cancellation}~\code{point} construct} The following example illustrates the use of the \code{cancel} construct in error handling. If there is an error condition from the \code{allocate} statement, the cancellation is activated. The encountering thread sets the shared variable @@ -24,6 +32,7 @@ the worksharing construct after the cancellation has been activated. \clearpage +\index{cancellation!for taskgroup region@for \code{taskgroup} region} The following example shows how to cancel a parallel search on a binary tree as soon as the search value has been detected. The code creates a task to descend into the child nodes of the current tree node. If the search value has been found, @@ -39,6 +48,6 @@ levels of the tree. The following is the equivalent parallel search example in Fortran. -\ffreeexample[5.1]{cancellation}{2}[1] +\ffreeexample[5.1]{cancellation}{2} diff --git a/program_control/cond_comp.tex b/program_control/cond_comp.tex index 68ad0e3..727b7f1 100644 --- a/program_control/cond_comp.tex +++ b/program_control/cond_comp.tex @@ -1,6 +1,8 @@ \pagebreak \section{Conditional Compilation} \label{sec:cond_comp} +\index{conditional compilation!_OPENMP macro@\scode{_OPENMP} macro} +\index{conditional compilation!sentinel} \ccppspecificstart The following example illustrates the use of conditional compilation using the diff --git a/program_control/icv.tex b/program_control/icv.tex index d3c004a..4802728 100644 --- a/program_control/icv.tex +++ b/program_control/icv.tex @@ -1,6 +1,7 @@ \pagebreak \section{Internal Control Variables (ICVs)} \label{sec:icv} +\index{internal control variables} According to Section 2.3 of the OpenMP 4.0 specification, an OpenMP implementation must act as if there are ICVs that control the behavior of the program. This example illustrates two ICVs, \plc{nthreads-var} diff --git a/program_control/interop.tex b/program_control/interop.tex index 6902067..f42b503 100644 --- a/program_control/interop.tex +++ b/program_control/interop.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{interop} Construct} \label{sec:interop} +\index{constructs!interop@\code{interop}} +\index{interop construct@\code{interop} construct} The \scode{interop} construct allows OpenMP to interoperate with foreign runtime environments. In the example below, asynchronous cuda memory copies and a \splc{cublasDaxpy} routine are executed @@ -8,6 +10,12 @@ in a cuda stream. Also, an asynchronous target task execution (having a \scode{n and two explicit tasks are executed through OpenMP directives. Scheduling dependences (synchronization) are imposed on the foreign stream and the OpenMP tasks through \scode{depend} clauses. +\index{interop construct@\code{interop} construct!init clause@\code{init} clause} +\index{init clause@\code{init} clause} +\index{clauses!init@\code{init}} +\index{interop construct@\code{interop} construct!depend clause@\code{depend} clause} +\index{depend clause@\code{depend} clause} +\index{clauses!depend@\code{depend}} First, an interop object, \splc{obj}, is initialized for synchronization by including the \scode{targetsync} \splc{interop-type} in the interop \scode{init} clause (\scode{init(}~\scode{targetsync,obj}~\scode{)}). @@ -15,10 +23,17 @@ The object provides access to the foreign runtime. The \scode{depend} clause provides a dependence behavior for foreign tasks associated with a valid object. +\index{routines!omp_get_interop_int@\scode{omp_get_interop_int}} +\index{omp_get_interop_int routine@\scode{omp_get_interop_int} routine} Next, the \scode{omp_get_interop_int} routine is used to extract the foreign runtime id (\scode{omp_ipr_fr_id}), and a test in the next statement ensures that the cuda runtime (\scode{omp_ifr_cuda}) is available. +\index{routines!omp_get_interop_ptr@\scode{omp_get_interop_ptr}} +\index{omp_get_interop_ptr routine@\scode{omp_get_interop_ptr} routine} +\index{interop construct@\code{interop} construct!destroy clause@\code{destroy} clause} +\index{destroy clause@\code{destroy} clause} +\index{clauses!destroy@\code{destroy}} Within the block for executing the \splc{cublasDaxpy} routine, a stream is acquired with the \scode{omp_get_interop_ptr} routine, which returns a cuda stream (\splc{s}). The stream is included in the cublas handle, and used directly in the asynchronous memory diff --git a/program_control/metadirective.tex b/program_control/metadirective.tex index efcdb45..8bbef39 100644 --- a/program_control/metadirective.tex +++ b/program_control/metadirective.tex @@ -1,24 +1,34 @@ \pagebreak \section{Metadirectives} \label{sec:metadirective} +\index{directives!metadirective@\code{metadirective}} +\index{metadirective directive@\code{metadirective} directive} +\index{metadirective directive@\code{metadirective} directive!when clause@\code{when} clause} +\index{metadirective directive@\code{metadirective} directive!otherwise clause@\code{otherwise} clause} +\index{clauses!when@\code{when}} +\index{when clause@\code{when} clause} +\index{clauses!otherwise@\code{otherwise}} +\index{otherwise clause@\code{otherwise} clause} A \code{metadirective} directive provides a mechanism to select a directive in a \code{when} clause to be used, depending upon one or more contexts: implementation, available devices and the present enclosing construct. -The directive in a \code{default} clause is used when a directive of the +The directive in an \code{otherwise} clause is used when a directive of the \code{when} clause is not selected. +\index{context selector!construct@\plc{construct}} In the \code{when} clause the \plc{context selector} (or just \plc{selector}) defines traits that are evaluated for selection of the directive that follows the selector. This "selectable" directive is called a \plc{directive variant}. Traits are grouped by \plc{construct}, \plc{implementation} and \plc{device} \plc{sets} to be used by a selector of the same name. +\index{context selector!device@\plc{device}} In the first example the architecture trait \plc{arch} of the -\plc{device} selector set specifies that if an \plc{nvptx} (NVIDIA) architecture is +\plc{device} selector set specifies that if an \plc{nvptx} architecture is active in the OpenMP context, then the \code{teams}~\code{loop} \plc{directive variant} is selected as the directive; otherwise, the \code{parallel}~\code{loop} -\plc{directive variant} of the \code{default} clause is selected as the directive. +\plc{directive variant} of the \code{otherwise} clause is selected as the directive. That is, if a \plc{device} of \plc{nvptx} architecture is supported by the implementation within the enclosing \code{target} construct, its \plc{directive variant} is selected. The architecture names, such as \plc{nvptx}, are implementation defined. @@ -27,14 +37,14 @@ a device number, while \plc{device}, as used in the \code{metadirective} directive as selector set, has traits of \plc{kind}, \plc{isa} and \plc{arch}. +\cexample[5.2]{metadirective}{1} -\cexample[5.0]{metadirective}{1} - -\ffreeexample[5.0]{metadirective}{1} +\ffreeexample[5.2]{metadirective}{1} %\pagebreak +\index{context selector!implementation@\plc{implementation}} In the second example, the \plc{implementation} selector set is specified -in the \code{when} clause to distinguish between AMD and NVIDIA platforms. +in the \code{when} clause to distinguish between platforms. Additionally, specific architectures are specified with the \plc{device} selector set. @@ -42,19 +52,27 @@ In the code, different \code{teams} constructs are employed as determined by the \code{metadirective} directive. The number of teams is restricted by a \code{num\_teams} clause and a thread limit is also set by a \code{thread\_limit} clause for -\plc{vendor} AMD and NVIDIA platforms and specific architecture +\plc{vendor} platforms and specific architecture traits. Otherwise, just the \code{teams} construct is used without -any clauses, as prescribed by the \code{default} clause. +any clauses, as prescribed by the \code{otherwise} clause. -\cexample[5.0]{metadirective}{2} +\cexample[5.2]{metadirective}{2} -\ffreeexample[5.0]{metadirective}{2} +\ffreeexample[5.2]{metadirective}{2} +\clearpage + +\index{context selector!construct@\plc{construct}} + +\index{directives!declare target@\code{declare}~\code{target}} +\index{declare target directive@\code{declare}~\code{target} directive} + +\index{directives!begin declare target@\code{begin}~\code{declare}~\code{target}} +\index{begin declare target directive@\code{begin}~\code{declare}~\code{target} directive} -%\pagebreak In the third example, a \plc{construct} selector set is specified in the \code{when} clause. Here, a \code{metadirective} directive is used within a function that is also -compiled as a function for a target device as directed by the \code{declare}~\code{target} directive. +compiled as a function for a target device as directed by a declare target directive. The \plc{target} directive name of the \code{construct} selector ensures that the \code{distribute}~\code{parallel}~\code{for/do} construct is employed for the target compilation. Otherwise, for the host-compiled version the \code{parallel}~\code{for/do}~\code{simd} construct is used. @@ -81,10 +99,12 @@ the \code{target}~\code{teams} construct has been hoisted out of the function, a as the \plc{variant} directive of the \code{metadirective} directive within the function. %%%%%%%% -\cexample[5.0]{metadirective}{3} +\cexample[5.2]{metadirective}{3} -\ffreeexample[5.0]{metadirective}{3} +\ffreeexample[5.2]{metadirective}{3} +\index{context selector!user@\plc{user}} +\index{context selector!condition selector@\code{condition} selector} The \code{user} selector set can be used in a metadirective to select directives at execution time when the \code{condition(}~\plc{boolean-expr}~\code{)} selector expression is not a constant expression. @@ -117,7 +137,18 @@ is used.) %``work balance'' function might be a more practical approach for setting the schedule kind. -\cexample[5.1]{metadirective}{4} +\cexample[5.2]{metadirective}{4} -\ffreeexample[5.1]{metadirective}{4} +\ffreeexample[5.2]{metadirective}{4} + +Metadirectives can be used in conjunction with templates as shown in the C++ code below. +Here the template definition generates two versions of the Fibonacci function. +The \splc{tasking} boolean is used in the \scode{condition} selector to enable tasking. +The true form implements a parallel version with \scode{task} and \scode{taskwait} +constructs as in the \splc{tasking.4.c} code in Section~\ref{sec:task_taskwait}. +The false form implements a serial version without any tasking constructs. +Note that the serial version is used in the parallel function for optimally +processing numbers less than 8. + +\cppexample[5.0]{metadirective}{5} diff --git a/program_control/nested_loop.tex b/program_control/nested_loop.tex index 0131725..2fb9270 100644 --- a/program_control/nested_loop.tex +++ b/program_control/nested_loop.tex @@ -1,6 +1,7 @@ \pagebreak \section{Nested Loop Constructs} \label{sec:nested_loop} +\index{nested loop constructs} The following example of loop construct nesting is conforming because the inner and outer loop regions bind to different \code{parallel} regions: diff --git a/program_control/nesting_restrict.tex b/program_control/nesting_restrict.tex index 4502290..c276254 100644 --- a/program_control/nesting_restrict.tex +++ b/program_control/nesting_restrict.tex @@ -2,6 +2,7 @@ \section{Restrictions on Nesting of Regions} \label{sec:nesting_restrict} +\index{region nesting rules} The examples in this section illustrate the region nesting rules. The following example is non-conforming because the inner and outer loop regions diff --git a/program_control/reproducible.tex b/program_control/reproducible.tex new file mode 100644 index 0000000..1e05921 --- /dev/null +++ b/program_control/reproducible.tex @@ -0,0 +1,62 @@ +\pagebreak +\section{Controlling Concurrency and Reproducibility with +the \code{order} Clause} +\label{sec:reproducible_modifier} + +\index{clauses!order(concurrent)@\code{order(concurrent)}} +\index{order(concurrent) clause@\code{order(concurrent)} clause} + +The \code{order} clause is used for controlling the parallel execution of +loop iterations for one or more loops that are associated with a directive. +It is specified with a clause argument and optional modifier. +The only supported argument, introduced in OpenMP 5.0, is the keyword +\code{concurrent} which indicates that the loop iterations may execute +concurrently, including iterations in the same chunk per the loop schedule. +Because of the relaxed execution permitted with an \code{order(concurrent)} +clause, codes must not assume that any cross-iteration data dependences +would be preserved or that any two iterations may execute on the same thread. + +The following example in this section demonstrates the use of +the \code{order(concurrent)} clause, without any modifiers, for controlling +the parallel execution of loop iterations. +The \code{order(concurrent)} clause cannot be used for the second and third +\code{parallel}~\code{for}/\code{do} constructs because of either having +data dependences or accessing threadprivate variables. + +\cexample[5.0]{reproducible}{1} + +\ffreeexample[5.0]{reproducible}{1} + +\index{order(concurrent) clause@\code{order(concurrent)} clause!reproducible modifier@\code{reproducible} modifier} +\index{order(concurrent) clause@\code{order(concurrent)} clause!unconstrained modifier@\code{unconstrained} modifier} +Modifiers to the \code{order} clause, introduced in OpenMP 5.1, may be +specified to control the reproducibility of the loop schedule for +the associated loop(s). A reproducible loop schedule will consistently +yield the same mapping of iterations to threads (or SIMD lanes) if the +directive name, loop schedule, iteration space, and binding region remain +the same. The \code{reproducible} modifier indicates the loop schedule must +be reproducible, while the \code{unconstrained} modifier indicates that +the loop schedule is not reproducible. +If a modifier is not specified, then the \code{order} clause does not affect +the reproducibility of the loop schedule. + +The next example demonstrates the use of the \code{order(concurrent)} clause +with modifiers for additionally controlling the reproducibility of a loop's +schedule. +The two worksharing-loop constructs in the first \code{parallel} construct +specify that the loops have reproducible schedules, thus memory effects from iteration \plc{i} from the first loop will be observable to iteration \plc{i} +in the second loop. +In the second \code{parallel} construct, the \code{order} clause does not +control reproducibility for the loop schedules. However, since both loops +specify the same static schedules, the schedules are reproducible and the +data dependences between the loops are preserved by the execution. +In the third \code{parallel} construct, the \code{order} clause indicates +that the loops are not reproducible, overriding the default reproducibility +prescribed by the specified static schedule. Consequentially, +the \code{nowait} clause on the first worksharing-loop construct should not +be used to ensure that the data dependences are preserved by the execution. + +\cexample[5.1]{reproducible}{2} + +\ffreeexample[5.1]{reproducible}{2} + diff --git a/program_control/requires.tex b/program_control/requires.tex index a5c5276..2c8b6c3 100644 --- a/program_control/requires.tex +++ b/program_control/requires.tex @@ -1,11 +1,16 @@ \pagebreak \section{\code{requires} Directive} \label{sec:requires} +\index{directives!requires@\code{requires}} +\index{requires directive@\code{requires} directive} The declarative \code{requires} directive can be used to specify features that an implementation must provide to compile and execute correctly. +\index{requires directive@\code{requires} directive!unified_shared_memory clause@\scode{unified_shared_memory} clause} +\index{clauses!unified_shared_memory@\scode{unified_shared_memory}} +\index{unified_shared_memory clause@\scode{unified_shared_memory} clause} In the following example the \code{unified\_shared\_memory} clause of the \code{requires} directive ensures that the host and all devices accessible through OpenMP provide a \plc{unified address} space diff --git a/program_control/sources/cancellation.1.cpp b/program_control/sources/cancellation.1.cpp index 8d99779..47b5732 100644 --- a/program_control/sources/cancellation.1.cpp +++ b/program_control/sources/cancellation.1.cpp @@ -1,5 +1,5 @@ /* -* @@name: cancellation.1c +* @@name: cancellation.1 * @@type: C++ * @@compilable: yes * @@linkable: no @@ -30,11 +30,11 @@ void example() { // still must remember exception for later handling #pragma omp atomic write ex = e; - // cancel worksharing construct + // cancel worksharing construct #pragma omp cancel for } } - // if an exception has been raised, cancel parallel region + // if an exception has been raised, cancel parallel region if (ex) { #pragma omp cancel parallel } @@ -42,7 +42,8 @@ void example() { #pragma omp barrier phase_2(); } - // continue here if an exception has been thrown in the worksharing loop + // continue here if an exception has been thrown in + // the worksharing loop if (ex) { // handle exception stored in ex } diff --git a/program_control/sources/cancellation.1.f90 b/program_control/sources/cancellation.1.f90 index 8410b1d..c2a977f 100644 --- a/program_control/sources/cancellation.1.f90 +++ b/program_control/sources/cancellation.1.f90 @@ -1,4 +1,4 @@ -! @@name: cancellation.1f +! @@name: cancellation.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/program_control/sources/cancellation.2.c b/program_control/sources/cancellation.2.c index 2170277..575a0fa 100644 --- a/program_control/sources/cancellation.2.c +++ b/program_control/sources/cancellation.2.c @@ -1,15 +1,11 @@ /* -* @@name: cancellation.2c +* @@name: cancellation.2 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success * @@version: omp_5.1 */ -#if _OPENMP < 202011 -#define masked master -#endif - #include typedef struct binary_tree_s { diff --git a/program_control/sources/cancellation.2.f90 b/program_control/sources/cancellation.2.f90 index 2934724..4b37723 100644 --- a/program_control/sources/cancellation.2.f90 +++ b/program_control/sources/cancellation.2.f90 @@ -1,14 +1,9 @@ -! @@name: cancellation.2f +! @@name: cancellation.2 ! @@type: F-free ! @@compilable: yes -! @@requires: preprocessing ! @@linkable: no ! @@expect: success ! @@version: omp_5.1 -#if _OPENMP < 202011 -#define masked master -#endif - module parallel_search type binary_tree integer :: value @@ -21,7 +16,8 @@ contains type(binary_tree), intent(in), pointer :: tree integer, intent(in) :: value, level type(binary_tree), pointer :: found - type(binary_tree), pointer :: found_left => NULL(), found_right => NULL() + type(binary_tree), pointer :: found_left => NULL(), & + found_right => NULL() if (associated(tree)) then if (tree%value .eq. value) then diff --git a/program_control/sources/cond_comp.1.c b/program_control/sources/cond_comp.1.c index fc6267e..717ef7d 100644 --- a/program_control/sources/cond_comp.1.c +++ b/program_control/sources/cond_comp.1.c @@ -1,5 +1,5 @@ /* -* @@name: cond_comp.1c +* @@name: cond_comp.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/program_control/sources/cond_comp.1.f b/program_control/sources/cond_comp.1.f index 0deaa06..7c76a18 100644 --- a/program_control/sources/cond_comp.1.f +++ b/program_control/sources/cond_comp.1.f @@ -1,4 +1,4 @@ -! @@name: cond_comp.1f +! @@name: cond_comp.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/program_control/sources/declare_variant.1.c b/program_control/sources/declare_variant.1.c index f7f457e..312f148 100644 --- a/program_control/sources/declare_variant.1.c +++ b/program_control/sources/declare_variant.1.c @@ -1,10 +1,10 @@ /* -* @@name: declare_variant.1c +* @@name: declare_variant.1 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_5.0 +* @@version: omp_5.1 */ #define N 100 @@ -27,7 +27,7 @@ void p_vxv(int *v1,int *v2,int *v3,int n) // function variant for (int i= 0; i< n; i++) v3[i] = v1[i] * v2[i]*3; } -#pragma omp declare target +#pragma omp begin declare target void t_vxv(int *v1,int *v2,int *v3,int n) // function variant { #pragma omp distribute simd diff --git a/program_control/sources/declare_variant.1.f90 b/program_control/sources/declare_variant.1.f90 index 06d425f..11f26db 100644 --- a/program_control/sources/declare_variant.1.f90 +++ b/program_control/sources/declare_variant.1.f90 @@ -1,4 +1,4 @@ -! @@name: declare_variant.1f90 +! @@name: declare_variant.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/program_control/sources/declare_variant.2.c b/program_control/sources/declare_variant.2.c index 96533e3..85764ee 100644 --- a/program_control/sources/declare_variant.2.c +++ b/program_control/sources/declare_variant.2.c @@ -1,5 +1,5 @@ /* -* @@name: declare_variant.2c +* @@name: declare_variant.2 * @@type: C * @@compilable: yes * @@linkable: no @@ -13,20 +13,20 @@ void avx512_saxpy(int, float, float *, float *); #pragma omp declare variant( avx512_saxpy ) \ match( device={isa("core-avx512")} ) -void base_saxpy(int n, float s, float *x, float *y) // base function +void base_saxpy(int n, float s, float *x, float *y) // base function { #pragma omp parallel for for(int i=0; i #include @@ -45,7 +45,8 @@ int main() base_saxpy(N,s,x,y); - printf("y[0],y[N-1]: %5.0f %5.0f\n",y[0],y[N-1]); //output: y... 3 3000 + printf("y[0],y[N-1]: %5.0f %5.0f\n",y[0],y[N-1]); + //output: y[0],y[N-1]: 3 3000 return 0; } diff --git a/program_control/sources/declare_variant.2.f90 b/program_control/sources/declare_variant.2.f90 index cdf08c8..012cc10 100644 --- a/program_control/sources/declare_variant.2.f90 +++ b/program_control/sources/declare_variant.2.f90 @@ -1,4 +1,4 @@ -! @@name: declare_variant.2f90 +! @@name: declare_variant.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/program_control/sources/display_env.1.c b/program_control/sources/display_env.1.c index 2519313..1bf5c53 100644 --- a/program_control/sources/display_env.1.c +++ b/program_control/sources/display_env.1.c @@ -1,5 +1,5 @@ /* -* @@name: display_env.1.c +* @@name: display_env.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/program_control/sources/display_env.1.f90 b/program_control/sources/display_env.1.f90 index a0f813f..89998ef 100644 --- a/program_control/sources/display_env.1.f90 +++ b/program_control/sources/display_env.1.f90 @@ -1,4 +1,4 @@ -! @@name: display_env.1.f90 +! @@name: display_env.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/program_control/sources/error.1.c b/program_control/sources/error.1.c index ce7a980..459561d 100644 --- a/program_control/sources/error.1.c +++ b/program_control/sources/error.1.c @@ -1,10 +1,10 @@ /* -* @@name: error.1c +* @@name: error.1 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_5.1 +* @@version: omp_5.2 */ #include #include @@ -12,8 +12,8 @@ int main(){ #pragma omp metadirective \ - when( implementation={vendor(gnu)}: nothing ) \ - default(error at(compilation) severity(fatal) \ + when(implementation={vendor(gnu)}: nothing ) \ + otherwise(error at(compilation) severity(fatal) \ message("GNU compiler required.")) if( omp_get_num_procs() < 3 ){ diff --git a/program_control/sources/error.1.f90 b/program_control/sources/error.1.f90 index 2772e24..0493050 100644 --- a/program_control/sources/error.1.f90 +++ b/program_control/sources/error.1.f90 @@ -1,17 +1,16 @@ -! @@name: error.1f +! @@name: error.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes ! @@expect: success -! @@version: omp_5.1 - +! @@version: omp_5.2 program main use omp_lib !$omp metadirective & -!$omp& when( implementation={vendor(gnu)}: nothing ) & -!$omp& default( error at(compilation) severity(fatal) & -!$omp& message( "GNU compiler required." ) ) +!$omp& when( implementation={vendor(gnu)}: nothing ) & +!$omp& otherwise( error at(compilation) severity(fatal) & +!$omp& message( "GNU compiler required." ) ) if( omp_get_num_procs() < 3 ) then diff --git a/program_control/sources/get_wtime.1.c b/program_control/sources/get_wtime.1.c index d85bf95..31fede0 100644 --- a/program_control/sources/get_wtime.1.c +++ b/program_control/sources/get_wtime.1.c @@ -1,5 +1,5 @@ /* -* @@name: get_wtime.1c +* @@name: get_wtime.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/program_control/sources/get_wtime.1.f90 b/program_control/sources/get_wtime.1.f90 index 80e9580..40f1d00 100644 --- a/program_control/sources/get_wtime.1.f90 +++ b/program_control/sources/get_wtime.1.f90 @@ -1,4 +1,4 @@ -! @@name: get_wtime.1f +! @@name: get_wtime.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/program_control/sources/icv.1.c b/program_control/sources/icv.1.c index 3528a06..34aa133 100644 --- a/program_control/sources/icv.1.c +++ b/program_control/sources/icv.1.c @@ -1,5 +1,5 @@ /* -* @@name: icv.1c +* @@name: icv.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/program_control/sources/icv.1.f b/program_control/sources/icv.1.f index 247dffd..0d4fe79 100644 --- a/program_control/sources/icv.1.f +++ b/program_control/sources/icv.1.f @@ -1,4 +1,4 @@ -! @@name: icv.1f +! @@name: icv.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/program_control/sources/interop.1.c b/program_control/sources/interop.1.c index 73cf052..3d7bff5 100644 --- a/program_control/sources/interop.1.c +++ b/program_control/sources/interop.1.c @@ -1,5 +1,5 @@ /* -* @@name: interop.1c +* @@name: interop.1 * @@type: C * @@compilable: no * @@linkable: no @@ -16,14 +16,14 @@ void myVectorSet(int n, double s, double *x) { - for(int i=0; i #include @@ -21,11 +20,13 @@ int main() //Driver { #pragma omp target device(idev) #pragma omp metadirective \ - when( implementation={vendor(nvidia)}, device={arch("kepler")}: \ - teams num_teams(512) thread_limit(32) ) \ - when( implementation={vendor(amd)}, device={arch("fiji" )}: \ - teams num_teams(512) thread_limit(64) ) \ - default( \ + when( implementation={vendor(nvidia)}, \ + device={arch("kepler")}: \ + teams num_teams(512) thread_limit(32) ) \ + when( implementation={vendor(amd)}, \ + device={arch("fiji" )}: \ + teams num_teams(512) thread_limit(64) ) \ + otherwise( \ teams) #pragma omp distribute parallel for for (i=0; i #include #define N 1000 -#pragma omp declare target +#pragma omp begin declare target void exp_pi_diff(double *d, double my_pi){ #pragma omp metadirective \ - when( construct={target}: distribute parallel for ) \ - default( parallel for simd) + when( construct={target}: distribute parallel for ) \ + otherwise( parallel for simd ) for(int i = 0; i @@ -19,7 +19,8 @@ void foo(int *a, int n, bool use_gpu) when( user={condition(use_gpu)}: \ target teams distribute parallel for \ private(b) map(from:a[0:n]) ) \ - default( parallel for ) + otherwise( \ + parallel for ) for (int i=0; i + +// revised Fibonacci from tasking.4.c example + +template +int fib(int n) { + int i, j; + if (n<2) { + return n; + } else if ( tasking && n<8 ) { // serial/taskless cutoff for n<8 + return fib(n); + } else { + #pragma omp metadirective \ + when(user={condition(tasking)}: task shared(i)) + { + i=fib(n-1); + } + #pragma omp metadirective \ + when(user={condition(tasking)}: task shared(j)) + { + j=fib(n-2); + } + #pragma omp metadirective \ + when(user={condition(tasking)}: taskwait) + return i+j; + } +} + +int main(int argc, char** argv) { + int n = 15; + #pragma omp parallel + #pragma omp single + { + printf("fib(%i) = %i\n", n, fib(n)); + } + return 0; +} +// OUTPUT: +// fib(15) = 610 diff --git a/program_control/sources/nested_loop.1.c b/program_control/sources/nested_loop.1.c index c8419fb..7d99e06 100644 --- a/program_control/sources/nested_loop.1.c +++ b/program_control/sources/nested_loop.1.c @@ -1,5 +1,5 @@ /* -* @@name: nested_loop.1c +* @@name: nested_loop.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/program_control/sources/nested_loop.1.f b/program_control/sources/nested_loop.1.f index 1bb917e..8e63641 100644 --- a/program_control/sources/nested_loop.1.f +++ b/program_control/sources/nested_loop.1.f @@ -1,4 +1,4 @@ -! @@name: nested_loop.1f +! @@name: nested_loop.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/program_control/sources/nested_loop.2.c b/program_control/sources/nested_loop.2.c index 4a626a5..6f1aaf5 100644 --- a/program_control/sources/nested_loop.2.c +++ b/program_control/sources/nested_loop.2.c @@ -1,5 +1,5 @@ /* -* @@name: nested_loop.2c +* @@name: nested_loop.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/program_control/sources/nested_loop.2.f b/program_control/sources/nested_loop.2.f index 1a8c6b9..3b7780e 100644 --- a/program_control/sources/nested_loop.2.f +++ b/program_control/sources/nested_loop.2.f @@ -1,4 +1,4 @@ -! @@name: nested_loop.2f +! @@name: nested_loop.2 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/program_control/sources/nesting_restrict.1.c b/program_control/sources/nesting_restrict.1.c index 2d46ace..7e84326 100644 --- a/program_control/sources/nesting_restrict.1.c +++ b/program_control/sources/nesting_restrict.1.c @@ -1,5 +1,5 @@ /* -* @@name: nesting_restrict.1c +* @@name: nesting_restrict.1 * @@type: C * @@compilable: no * @@linkable: no diff --git a/program_control/sources/nesting_restrict.1.f b/program_control/sources/nesting_restrict.1.f index f0a1f0f..0280b6a 100644 --- a/program_control/sources/nesting_restrict.1.f +++ b/program_control/sources/nesting_restrict.1.f @@ -1,4 +1,4 @@ -! @@name: nesting_restrict.1f +! @@name: nesting_restrict.1 ! @@type: F-fixed ! @@compilable: no ! @@linkable: no diff --git a/program_control/sources/nesting_restrict.2.c b/program_control/sources/nesting_restrict.2.c index dc31379..a3fe551 100644 --- a/program_control/sources/nesting_restrict.2.c +++ b/program_control/sources/nesting_restrict.2.c @@ -1,5 +1,5 @@ /* -* @@name: nesting_restrict.2c +* @@name: nesting_restrict.2 * @@type: C * @@compilable: maybe * @@linkable: no diff --git a/program_control/sources/nesting_restrict.2.f b/program_control/sources/nesting_restrict.2.f index 955b233..1396151 100644 --- a/program_control/sources/nesting_restrict.2.f +++ b/program_control/sources/nesting_restrict.2.f @@ -1,4 +1,4 @@ -! @@name: nesting_restrict.2f +! @@name: nesting_restrict.2 ! @@type: F-fixed ! @@compilable: maybe ! @@linkable: no diff --git a/program_control/sources/nesting_restrict.3.c b/program_control/sources/nesting_restrict.3.c index b99fe45..1f69e6e 100644 --- a/program_control/sources/nesting_restrict.3.c +++ b/program_control/sources/nesting_restrict.3.c @@ -1,5 +1,5 @@ /* -* @@name: nesting_restrict.3c +* @@name: nesting_restrict.3 * @@type: C * @@compilable: no * @@linkable: no diff --git a/program_control/sources/nesting_restrict.3.f b/program_control/sources/nesting_restrict.3.f index 556f708..8ff3ee4 100644 --- a/program_control/sources/nesting_restrict.3.f +++ b/program_control/sources/nesting_restrict.3.f @@ -1,4 +1,4 @@ -! @@name: nesting_restrict.3f +! @@name: nesting_restrict.3 ! @@type: F-fixed ! @@compilable: no ! @@linkable: no diff --git a/program_control/sources/nesting_restrict.4.c b/program_control/sources/nesting_restrict.4.c index 45f3c35..1f08d01 100644 --- a/program_control/sources/nesting_restrict.4.c +++ b/program_control/sources/nesting_restrict.4.c @@ -1,5 +1,5 @@ /* -* @@name: nesting_restrict.4c +* @@name: nesting_restrict.4 * @@type: C * @@compilable: no * @@linkable: no diff --git a/program_control/sources/nesting_restrict.4.f b/program_control/sources/nesting_restrict.4.f index dd4a246..2720f09 100644 --- a/program_control/sources/nesting_restrict.4.f +++ b/program_control/sources/nesting_restrict.4.f @@ -1,4 +1,4 @@ -! @@name: nesting_restrict.4f +! @@name: nesting_restrict.4 ! @@type: F-fixed ! @@compilable: no ! @@linkable: no diff --git a/program_control/sources/nesting_restrict.5.c b/program_control/sources/nesting_restrict.5.c index 152c090..4fc6c36 100644 --- a/program_control/sources/nesting_restrict.5.c +++ b/program_control/sources/nesting_restrict.5.c @@ -1,5 +1,5 @@ /* -* @@name: nesting_restrict.5c +* @@name: nesting_restrict.5 * @@type: C * @@compilable: no * @@linkable: no diff --git a/program_control/sources/nesting_restrict.5.f b/program_control/sources/nesting_restrict.5.f index 7094064..1fc97dd 100644 --- a/program_control/sources/nesting_restrict.5.f +++ b/program_control/sources/nesting_restrict.5.f @@ -1,4 +1,4 @@ -! @@name: nesting_restrict.5f +! @@name: nesting_restrict.5 ! @@type: F-fixed ! @@compilable: no ! @@linkable: no diff --git a/program_control/sources/nesting_restrict.6.c b/program_control/sources/nesting_restrict.6.c index 265555b..ca71c93 100644 --- a/program_control/sources/nesting_restrict.6.c +++ b/program_control/sources/nesting_restrict.6.c @@ -1,5 +1,5 @@ /* -* @@name: nesting_restrict.6c +* @@name: nesting_restrict.6 * @@type: C * @@compilable: no * @@linkable: no diff --git a/program_control/sources/nesting_restrict.6.f b/program_control/sources/nesting_restrict.6.f index 3912be2..3eb1cb3 100644 --- a/program_control/sources/nesting_restrict.6.f +++ b/program_control/sources/nesting_restrict.6.f @@ -1,4 +1,4 @@ -! @@name: nesting_restrict.6f +! @@name: nesting_restrict.6 ! @@type: F-fixed ! @@compilable: no ! @@linkable: no diff --git a/program_control/sources/reproducible.1.c b/program_control/sources/reproducible.1.c new file mode 100644 index 0000000..95e7b13 --- /dev/null +++ b/program_control/sources/reproducible.1.c @@ -0,0 +1,50 @@ +/* +* @@name: reproducible.1 +* @@type: C +* @@compilable: yes +* @@linkable: yes +* @@expect: success +* @@version: omp_5.0 +*/ +#include +#include + +int main() +{ + const int n = 1000; + int v[n], u[n]; + static int sum; + #pragma omp threadprivate(sum) + + // no data dependences, so can execute concurrently + #pragma omp parallel for order(concurrent) + for (int i = 0; i < n; i++) { + u[i] = i; + v[i] = i; + v[i] += u[i] * u[i]; + } + + // with data dependences, so cannot execute iterations + // concurrently with the order(concurrent) clause + #pragma omp parallel for ordered + for (int i = 1; i < n; i++) { + v[i] += u[i] * u[i]; + #pragma omp ordered + v[i] += v[i-1]; + } + + sum = 0; + // accessing a threadprivate variable, which would not be + // permitted if the order(concurrent) clause was present + #pragma omp parallel for copyin(sum) + for (int i = 0; i < n; i++) { + sum += v[i]; + } + + #pragma omp parallel + { + printf("sum = %d on thread %d\n", sum, omp_get_thread_num()); + } + + return 0; +} diff --git a/program_control/sources/reproducible.1.f90 b/program_control/sources/reproducible.1.f90 new file mode 100644 index 0000000..ecbbe4f --- /dev/null +++ b/program_control/sources/reproducible.1.f90 @@ -0,0 +1,46 @@ +! @@name: reproducible.1 +! @@type: F-free +! @@compilable: yes +! @@linkable: yes +! @@expect: success +! @@version: omp_5.0 +program main + use omp_lib + implicit none + integer, parameter :: n = 1000 + integer :: v(n), u(n) + integer :: i + integer, save :: sum + !$omp threadprivate(sum) + + !! no data dependences, so can execute concurrently + !$omp parallel do order(concurrent) + do i = 1, n + u(i) = i + v(i) = i + v(i) = v(i) + u(i) * u(i) + end do + + !! with data dependences, so cannot execute iterations + !! concurrently with the order(concurrent) clause + !$omp parallel do ordered + do i = 2, n + v(i) = v(i) + u(i) * u(i) + !$omp ordered + v(i) = v(i) + v(i-1) + !$omp end ordered + end do + + sum = 0 + !! accessing a threadprivate variable, which would not be + !! permitted if the order(concurrent) clause was present + !$omp parallel do copyin(sum) + do i = 2, n + sum = sum + v(i) + end do + + !$omp parallel + print *,"sum = ",sum," on thread ", omp_get_thread_num() + !$omp end parallel + +end program diff --git a/program_control/sources/reproducible.2.c b/program_control/sources/reproducible.2.c new file mode 100644 index 0000000..4fd84f8 --- /dev/null +++ b/program_control/sources/reproducible.2.c @@ -0,0 +1,61 @@ +/* +* @@name: reproducible.2 +* @@type: C +* @@compilable: yes +* @@linkable: yes +* @@expect: success +* @@version: omp_5.1 +*/ +#include + +int main() +{ + const int n = 1000; + int v[n], u[n]; + + #pragma omp parallel + { + // reproducible schedules are used for the following two constructs + #pragma omp for order(reproducible: concurrent) nowait + for (int i = 0; i < n; i++) { + u[i] = i; + v[i] = i; + } + #pragma omp for order(reproducible: concurrent) + for (int i = 0; i < n; i++) { + v[i] += u[i] * u[i]; + } + } + + #pragma omp parallel + { + // static schedules preserve data dependences between the loops + #pragma omp for schedule(static) order(concurrent) nowait + for (int i = 0; i < n; i++) { + u[i] = i; + v[i] = i; + } + #pragma omp for schedule(static) order(concurrent) + for (int i = 0; i < n; i++) { + v[i] += u[i] * u[i]; + } + } + + #pragma omp parallel + { + // the default reproducibility by the static schedule is not + // preserved due to the unconstrained order clause. + // use of nowait here could result in data race. + #pragma omp for schedule(static) order(unconstrained: concurrent) + for (int i = 0; i < n; i++) { + u[i] = i; + v[i] = i; + } + #pragma omp for schedule(static) order(unconstrained: concurrent) + for (int i = 0; i < n; i++) { + v[i] += u[i] * u[i]; + } + } + + return 0; +} diff --git a/program_control/sources/reproducible.2.f90 b/program_control/sources/reproducible.2.f90 new file mode 100644 index 0000000..bb9100f --- /dev/null +++ b/program_control/sources/reproducible.2.f90 @@ -0,0 +1,54 @@ +! @@name: reproducible.2 +! @@type: F-free +! @@compilable: yes +! @@linkable: yes +! @@expect: success +! @@version: omp_5.1 +program main + implicit none + integer, parameter :: n = 1000 + integer :: v(n), u(n) + integer :: i + + !$omp parallel + !! reproducible schedules are used the following two constructs + !$omp do order(reproducible: concurrent) nowait + do i = 1, n + u(i) = i + v(i) = i + end do + !$omp do order(reproducible: concurrent) + do i = 1, n + v(i) = v(i) + u(i) * u(i) + end do + !$omp end parallel + + !$omp parallel + !! static schedules preserve data dependences between the loops + !$omp do schedule(static) order(concurrent) nowait + do i = 1, n + u(i) = i + v(i) = i + end do + !$omp do schedule(static) order(concurrent) + do i = 1, n + v(i) = v(i) + u(i) * u(i) + end do + !$omp end parallel + + !$omp parallel + !! the default reproducibility by the static schedule is not + !! preserved due to the unconstrained order clause. + !! use of nowait here could result in data race. + !$omp do schedule(static) order(unconstrained: concurrent) + do i = 1, n + u(i) = i + v(i) = i + end do + !$omp do schedule(static) order(unconstrained: concurrent) + do i = 1, n + v(i) = v(i) + u(i) * u(i) + end do + !$omp end parallel + +end program diff --git a/program_control/sources/requires.1.cpp b/program_control/sources/requires.1.cpp index 31760e5..4db42fe 100644 --- a/program_control/sources/requires.1.cpp +++ b/program_control/sources/requires.1.cpp @@ -1,5 +1,5 @@ /* -* @@name: requires.1cpp +* @@name: requires.1 * @@type: C++ * @@compilable: yes * @@linkable: yes diff --git a/program_control/sources/requires.1.f90 b/program_control/sources/requires.1.f90 index e5ed110..5955693 100644 --- a/program_control/sources/requires.1.f90 +++ b/program_control/sources/requires.1.f90 @@ -1,4 +1,4 @@ -! @@name: requires.1f90 +! @@name: requires.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/program_control/sources/standalone.1.c b/program_control/sources/standalone.1.c index 24d875d..3216c78 100644 --- a/program_control/sources/standalone.1.c +++ b/program_control/sources/standalone.1.c @@ -1,5 +1,5 @@ /* -* @@name: standalone.1c +* @@name: standalone.1 * @@type: C * @@compilable: no * @@linkable: no diff --git a/program_control/sources/standalone.1.f90 b/program_control/sources/standalone.1.f90 index f994c29..fbf7b0c 100644 --- a/program_control/sources/standalone.1.f90 +++ b/program_control/sources/standalone.1.f90 @@ -1,4 +1,4 @@ -! @@name: standalone.1f +! @@name: standalone.1 ! @@type: F-free ! @@compilable: no ! @@linkable: no diff --git a/program_control/sources/standalone.2.c b/program_control/sources/standalone.2.c index 19f1420..837a6a6 100644 --- a/program_control/sources/standalone.2.c +++ b/program_control/sources/standalone.2.c @@ -1,5 +1,5 @@ /* -* @@name: standalone.2c +* @@name: standalone.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/program_control/sources/standalone.2.f90 b/program_control/sources/standalone.2.f90 index 3eec837..cfac4e0 100644 --- a/program_control/sources/standalone.2.f90 +++ b/program_control/sources/standalone.2.f90 @@ -1,4 +1,4 @@ -! @@name: standalone.2f +! @@name: standalone.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/program_control/sources/target_offload_control.1.c b/program_control/sources/target_offload_control.1.c index 5b7bab2..bed104a 100644 --- a/program_control/sources/target_offload_control.1.c +++ b/program_control/sources/target_offload_control.1.c @@ -1,12 +1,11 @@ /* -* @@name: target_offload_control.1c +* @@name: target_offload_control.1 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success * @@version: omp_5.0 */ - #include #include #include @@ -18,13 +17,13 @@ typedef enum offload_policy offload_policy_t get_offload_policy() -{ - char *env, *end; +{ + char *env, *end; size_t n; env = getenv("OMP_TARGET_OFFLOAD"); if(env == NULL) return NOTSET; - + end = env + strlen(env); //Find trimmed beginning/end while ( *env && isspace(*(env )) ) env++; while (end != env && isspace(*(end-1)) ) end--; @@ -37,43 +36,50 @@ offload_policy_t get_offload_policy() else return UNKNOWN ; } - + int main() { int i; int device_num, on_init_dev; - // get policy from OMP_TARGET_OFFLOAD variable + // get policy from OMP_TARGET_OFFLOAD variable offload_policy_t policy = get_offload_policy(); if(_OPENMP< 201811) { - printf("Warning: OMP_TARGET_OFFLOAD NOT supported by VER. %d\n",_OPENMP ); - printf(" If OMP_TARGET_OFFLOAD is set, it will be ignored.\n"); + printf("Warning: OMP_TARGET_OFFLOAD NOT supported, version %d\n", + _OPENMP ); + printf(" If OMP_TARGET_OFFLOAD is set, " + "it will be ignored.\n"); } - // Set target device number to an unavailable - // device to test offload policy. + // Set target device number to an unavailable + // device to test offload policy. device_num = omp_get_num_devices() + 1; - // Policy: + // Policy: printf("OMP_TARGET_OFFLOAD Policy: "); - if (policy==MANDATORY) printf("MANDATORY-Terminate if dev. not avail\n"); - else if(policy==DISABLED ) printf("DISABLED -(if supported) Only on Host\n"); - else if(policy==DEFAULT ) printf("DEFAULT -On host if device not avail\n"); - else if(policy==UNKNOWN ) printf("OMP_TARGET_OFFLOAD has unknown value\n" ); - else if(policy==NOTSET ) printf("OMP_TARGET_OFFLOAD not set\n" ); + if (policy==MANDATORY) + printf("MANDATORY-Terminate if dev. not avail\n"); + else if(policy==DISABLED ) + printf("DISABLED -(if supported) Only on Host\n"); + else if(policy==DEFAULT ) + printf("DEFAULT -On host if device not avail\n"); + else if(policy==UNKNOWN ) + printf("OMP_TARGET_OFFLOAD has unknown value\n" ); + else if(policy==NOTSET ) + printf("OMP_TARGET_OFFLOAD not set\n" ); + - on_init_dev = 1; - // device# out of range--not supported + // device# out of range--not supported #pragma omp target device(device_num) map(tofrom: on_init_dev) on_init_dev=omp_is_initial_device(); if (policy == MANDATORY && _OPENMP >= 201811) - printf("ERROR: OpenMP 5.0 implementation ignored MANDATORY policy.\n"); + printf("ERROR: OpenMP implementation ignored MANDATORY policy.\n"); - printf("Target region executed on init dev %s\n", + printf("Target region executed on init dev %s\n", on_init_dev ? "TRUE":"FALSE"); return 0; diff --git a/program_control/sources/target_offload_control.1.f90 b/program_control/sources/target_offload_control.1.f90 index 85cb956..6dbc3e6 100644 --- a/program_control/sources/target_offload_control.1.f90 +++ b/program_control/sources/target_offload_control.1.f90 @@ -1,11 +1,9 @@ -! @@name: target_offload_control.1f90 +! @@name: target_offload_control.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes ! @@expect: success ! @@version: omp_5.0 - - module offload_policy implicit none integer, parameter :: LEN_POLICY=10 @@ -43,38 +41,42 @@ program policy_test policy = get_offload_policy() !!Get OMP_TARGET_OFFLOAD value if (OPENMP_VERSION < 201811) then - print*,"Warning: OMP_TARGET_OFFLOAD NOT supported by VER.",OPENMP_VERSION + print*,"Warning: OMP_TARGET_OFFLOAD NOT supported by VER.", & + OPENMP_VERSION print*," If OMP_TARGET_OFFLOAD is set, it will be ignored." endif - !Set target device number to an unavailable device to test offload policy. + ! Set target device number to an unavailable device + ! to test offload policy. device_num = omp_get_num_devices() + 1 - !!Report OMP_TARGET_OFFOAD value + !! Report OMP_TARGET_OFFOAD value select CASE (policy) case("MANDATORY") - print*,"Policy: MANDATORY-Terminate if dev. not avail." + print*,"Policy: MANDATORY-Terminate if dev. not avail." case("DISABLED") - print*,"Policy: DISABLED-(if supported) Only on Host." + print*,"Policy: DISABLED-(if supported) Only on Host." case("DEFAULT") - print*,"Policy: DEFAULT On host if device not avail." + print*,"Policy: DEFAULT On host if device not avail." case("NOTSET") - print*," OMP_TARGET_OFFLOAD is not set." + print*," OMP_TARGET_OFFLOAD is not set." case DEFAULT - print*," OMP_TARGET_OFFLOAD has unknown value." - print*," UPPER CASE VALUE=",policy + print*," OMP_TARGET_OFFLOAD has unknown value." + print*," UPPER CASE VALUE=",policy end select on_init_dev = .FALSE. - !! device# out of range--not supported + !! device# out of range--not supported !$omp target device(device_num) map(tofrom: on_init_dev) on_init_dev=omp_is_initial_device() !$omp end target if (policy=="MANDATORY" .and. OPENMP_VERSION>=201811) then - print*,"OMP ERROR: OpenMP 5.0 implementation ignored MANDATORY policy." - print*," Termination should have occurred at target directive." + print*,"OMP ERROR: ", & + "OpenMP 5.0 implementation ignored MANDATORY policy." + print*," Termination should have occurred", & + " at target directive." endif print*, "Target executed on init dev (T|F): ", on_init_dev diff --git a/program_control/standalone.tex b/program_control/standalone.tex index 4b6a60b..fb89ea2 100644 --- a/program_control/standalone.tex +++ b/program_control/standalone.tex @@ -2,6 +2,15 @@ \section{Placement of \code{flush}, \code{barrier}, \code{taskwait} and \code{taskyield} Directives} \label{sec:standalone} +\index{standalone directive placement} +\index{constructs!flush@\code{flush}} +\index{constructs!barrier@\code{barrier}} +\index{constructs!taskwait@\code{taskwait}} +\index{constructs!taskyield@\code{taskyield}} +\index{flush construct@\code{flush} construct} +\index{barrier construct@\code{barrier} construct} +\index{taskwait construct@\code{taskwait} construct} +\index{taskyield construct@\code{taskyield} construct} The following example is non-conforming, because the \code{flush}, \code{barrier}, \code{taskwait}, and \code{taskyield} directives are stand-alone directives diff --git a/program_control/target_offload.tex b/program_control/target_offload.tex index 995f300..b589f7e 100644 --- a/program_control/target_offload.tex +++ b/program_control/target_offload.tex @@ -1,6 +1,8 @@ \pagebreak \section{Target Offload} \label{sec:target_offload} +\index{environment variables!OMP_TARGET_OFFLOAD@\scode{OMP_TARGET_OFFLOAD}} +\index{OMP_TARGET_OFFLOAD@\scode{OMP_TARGET_OFFLOAD}} In the OpenMP 5.0 implementation the \code{OMP\_TARGET\_OFFLOAD} environment variable was defined to change \plc{default} offload behavior. diff --git a/program_control/utilities.tex b/program_control/utilities.tex index 16752ff..6d3b65c 100644 --- a/program_control/utilities.tex +++ b/program_control/utilities.tex @@ -6,6 +6,10 @@ This section contains examples of utility routines and features. %--------------------------- \subsection{Timing Routines} \label{subsec:get_wtime} +\index{routines!omp_get_wtime@\scode{omp_get_wtime}} +\index{omp_get_wtime routine@\scode{omp_get_wtime} routine} +\index{routines!omp_get_wtick@\scode{omp_get_wtick}} +\index{omp_get_wtick routine@\scode{omp_get_wtick} routine} The \scode{omp_get_wtime} routine can be used to measure the elapsed wall clock time (in seconds) of code execution in a program. @@ -21,6 +25,12 @@ the \scode{omp_get_wtick} routine. The following example shows a use case. %--------------------------- \subsection{Environment Display} \label{subsec:display_env} +\index{environment display!OMP_DISPLAY_ENV@\scode{OMP_DISPLAY_ENV}} +\index{environment variables!OMP_DISPLAY_ENV@\scode{OMP_DISPLAY_ENV}} +\index{OMP_DISPLAY_ENV@\scode{OMP_DISPLAY_ENV}} +\index{environment display!omp_display_env routine@\scode{omp_display_env} routine} +\index{routines!omp_display_env@\scode{omp_display_env}} +\index{omp_display_env routine@\scode{omp_display_env} routine} The OpenMP version number and the values of ICVs associated with the relevant environment variables can be displayed at runtime by setting @@ -50,7 +60,6 @@ setting the \scode{OMP_DISPLAY_ENV} to \code{TRUE} or \code{VERBOSE}. \ffreeexample[5.1]{display_env}{1} \clearpage -\emph{Note}: A sample output from the execution of the code might look like: {\small\begin{verbatim} OPENMP DISPLAY ENVIRONMENT BEGIN @@ -81,6 +90,14 @@ A sample output from the execution of the code might look like: %--------------------------- \subsection{\code{error} Directive} \label{subsec:error} +\index{directives!error@\code{error}} +\index{error directive@\code{error} directive} +\index{error directive@\code{error} directive!at clause@\code{at} clause} +\index{clauses!at@\code{at}} +\index{at clause@\code{at} clause} +\index{error directive@\code{error} directive!severity clause@\code{severity} clause} +\index{clauses!severity@\code{severity}} +\index{severity clause@\code{severity} clause} The \code{error} directive provides a consistent method for C, C++, and Fortran to emit a \plc{fatal} or \plc{warning} message at \plc{compilation} or \plc{execution} time, as determined by a \code{severity} @@ -90,7 +107,7 @@ and \code{severity(fatal)} were specified. The C, C++, and Fortran examples below show all the cases for reporting messages. -\cexample[5.1]{error}{1} -\ffreeexample[5.1]{error}{1} +\cexample[5.2]{error}{1} +\ffreeexample[5.2]{error}{1} diff --git a/program_control/variant.tex b/program_control/variant.tex index 004f6bb..31d7542 100644 --- a/program_control/variant.tex +++ b/program_control/variant.tex @@ -1,6 +1,17 @@ \pagebreak \section{\code{declare}~\code{variant} Directive} \label{sec:declare_variant} +\index{directives!declare variant@\code{declare}~\code{variant}} +\index{declare variant directive@\code{declare}~\code{variant} directive} +\index{declare variant directive@\code{declare}~\code{variant} directive!match clause@\code{match} clause} +\index{clauses!match@\code{match}} +\index{match clause@\code{match} clause} + +\index{directives!declare target@\code{declare}~\code{target}} +\index{declare target directive@\code{declare}~\code{target} directive} + +\index{directives!begin declare target@\code{begin}~\code{declare}~\code{target}} +\index{begin declare target directive@\code{begin}~\code{declare}~\code{target} directive} %A \code{declare variant} directive specifies that the following function is an alternate function, %a \plc{function variant}, to be used in place of the specified \plc{base function} @@ -30,7 +41,7 @@ are defined for the first two regions by using \plc{parallel} and \plc{target} s the \plc{construct} trait set) in a \code{match} clause. The \plc{p\_vxv()} function variant includes a \code{for} construct (\code{do} construct for Fortran) for the \code{parallel} region, while \plc{t\_vxv()} includes a \code{distribute}~\code{simd} construct for the \code{target} region. -The \plc{t\_vxv()} function is explicitly compiled for the device using a \code{declare}~\code{target} directive. +The \plc{t\_vxv()} function is explicitly compiled for the device using a declare target directive. Since the two \code{declare}~\code{variant} directives have no selectors that match traits for the context of the base function call in the sequential part of the program, the base \plc{vxv()} function is used there, @@ -46,7 +57,7 @@ the purpose of a function variant is to produce the same results by a different %\code{teams distribute simd} in the variant function would produce non conforming code. %\pagebreak -\cexample[5.0]{declare_variant}{1} +\cexample[5.1]{declare_variant}{1} \ffreeexample[5.0]{declare_variant}{1} diff --git a/sources/README b/sources/README index 193c0c8..5310289 100644 --- a/sources/README +++ b/sources/README @@ -5,8 +5,10 @@ under the corresponding chapter directory: ../affinity/sources ../data_environment/sources ../devices/sources +../directives/sources ../loop_transformations/sources ../memory_model/sources +../ompt_interface/sources ../parallel_execution/sources ../program_control/sources ../synchronization/sources diff --git a/sources/check_tags b/sources/check_tags new file mode 100755 index 0000000..3bc6ee3 --- /dev/null +++ b/sources/check_tags @@ -0,0 +1,14 @@ +#!/bin/bash + +# call ../util/chk_tags.x to check tags in source codes + +BASE_DIR=$(dirname $0) +UTIL_DIR=${BASE_DIR}/../util +chk_tags=$UTIL_DIR/chk_tags.x +CURR_DIR=$(pwd) + +cd $UTIL_DIR; make chk_tags.x; cd $CURR_DIR + +echo "Running $chk_tags ..." +$chk_tags ${BASE_DIR}/../*/sources/* 2>&1 | tee -a chk_tags.log + diff --git a/synchronization/acquire_release.tex b/synchronization/acquire_release.tex index 3f91622..0adab37 100644 --- a/synchronization/acquire_release.tex +++ b/synchronization/acquire_release.tex @@ -18,6 +18,20 @@ %%%%%%%%%%%%%%%%%% +\index{flushes!acquire} +\index{flushes!release} +\index{clauses!memory ordering clauses} +\index{memory ordering clauses!acquire@\code{acquire}} +\index{acquire clause@\code{acquire} clause} +\index{memory ordering clauses!release@\code{release}} +\index{release clause@\code{release} clause} +\index{memory ordering clauses!acq_rel@\scode{acq_rel}} +\index{acq_rel clause@\scode{acq_rel} clause} +\index{flush construct@\code{flush} construct} +\index{atomic construct@\code{atomic} construct} +\index{clauses!acquire@\code{acquire}} +\index{clauses!release@\code{release}} +\index{clauses!acq_rel@\scode{acq_rel}} As explained in the Memory Model chapter of this document, a flush operation may be an \emph{acquire flush} and/or a \emph{release flush}, and OpenMP 5.0 defines acquire/release semantics in terms of these fundamental flush @@ -45,6 +59,9 @@ acquire flush and release flush, respectively. %%%%%%%%%%%%%%%%%% +\index{constructs!critical@\code{critical}} +\index{critical construct@\code{critical} construct} +\index{flushes!implicit} The first example illustrates how the release and acquire flushes implied by a \code{critical} region guarantee a value written by the first thread is visible to a read of the value on the second thread. Thread 0 writes to \plc{x} and @@ -56,8 +73,8 @@ consistent with the program order of the thread. Meanwhile, thread 1 executes a \plc{x}; again, the execution of the \code{critical} regions happen before the read from \plc{x} based on the program order of the thread. The \code{critical} regions executed by the two threads execute in a serial manner, with a -pair-wise synchronization from the exit of one \code{critical} region to the -entry to the next \code{critical} region. These pair-wise synchronizations +pairwise synchronization from the exit of one \code{critical} region to the +entry to the next \code{critical} region. These pairwise synchronizations result from the implicit release flushes that occur on exit from \code{critical} regions and the implicit acquire flushes that occur on entry to \code{critical} regions; hence, the execution of each \code{critical} region in @@ -70,6 +87,18 @@ by thread 0 and the read from \plc{x} by thread 1, and so thread 1 must see that \cexample[5.0]{acquire_release}{1} \ffreeexample[5.0]{acquire_release}{1} +\index{constructs!atomic@\code{atomic}} +\index{atomic construct@\code{atomic} construct} +\index{atomic construct@\code{atomic} construct!write clause@\code{write} clause} +\index{atomic construct@\code{atomic} construct!read clause@\code{read} clause} +\index{atomic construct@\code{atomic} construct!memory ordering clauses} +\index{write clause@\code{write} clause} +\index{read clause@\code{read} clause} +\index{clauses!write@\code{write}} +\index{clauses!read@\code{read}} +\index{memory ordering clauses!seq_cst@\scode{seq_cst}} +\index{seq_cst clause@\scode{seq_cst} clause} +\index{clauses!seq_cst@\scode{seq_cst}} In the second example, the \code{critical} constructs are exchanged with \code{atomic} constructs that have \textit{explicit} memory ordering specified. When the atomic read operation on thread 1 reads a non-zero value from \plc{y}, this @@ -81,6 +110,9 @@ assignment to \plc{x} on thread 0 happens before the read of \plc{x} on thread \ffreeexample[5.0]{acquire_release}{2} \pagebreak +\index{constructs!atomic@\code{atomic}} +\index{atomic construct@\code{atomic} construct!relaxed atomic operations} +\index{flush construct@\code{flush} construct} In the third example, \code{atomic} constructs that specify relaxed atomic operations are used with explicit \code{flush} directives to enforce memory ordering between the two threads. The explicit \code{flush} directive on thread diff --git a/synchronization/atomic.tex b/synchronization/atomic.tex index 1449439..e373dbc 100644 --- a/synchronization/atomic.tex +++ b/synchronization/atomic.tex @@ -1,6 +1,11 @@ \pagebreak \section{\code{atomic} Construct} \label{sec:atomic} +\index{constructs!atomic@\code{atomic}} +\index{atomic construct@\code{atomic} construct} +\index{atomic construct@\code{atomic} construct!update clause@\code{update} clause} +\index{clauses!update@\code{update}} +\index{update clause@\code{update} clause} The following example avoids race conditions (simultaneous updates of an element of \plc{x} by multiple threads) by using the \code{atomic} construct . @@ -18,6 +23,12 @@ this example. \fexample[3.1]{atomic}{1} +\index{atomic construct@\code{atomic} construct!write clause@\code{write} clause} +\index{atomic construct@\code{atomic} construct!read clause@\code{read} clause} +\index{write clause@\code{write} clause} +\index{clauses!write@\code{write}} +\index{read clause@\code{read} clause} +\index{clauses!read@\code{read}} The following example illustrates the \code{read} and \code{write} clauses for the \code{atomic} directive. These clauses ensure that the given variable is read or written, respectively, as a whole. Otherwise, some other thread might @@ -30,9 +41,12 @@ for all the variable types supported by the OpenMP API. \fexample[3.1]{atomic}{2} +\index{atomic construct@\code{atomic} construct!capture clause@\code{capture} clause} +\index{capture clause@\code{capture} clause} +\index{clauses!capture@\code{capture}} The following example illustrates the \code{capture} clause for the \code{atomic} directive. In this case the value of a variable is captured, and then the variable -is incremented. These operations occur atomically. This particular example could +is incremented. These operations occur atomically. This example could be implemented using the fetch-and-add instruction available on many kinds of hardware. The example also shows a way to implement a spin lock using the \code{capture} and \code{read} clauses. diff --git a/synchronization/atomic_restrict.tex b/synchronization/atomic_restrict.tex index 93080da..c5d6f9c 100644 --- a/synchronization/atomic_restrict.tex +++ b/synchronization/atomic_restrict.tex @@ -1,6 +1,8 @@ \pagebreak \section{Restrictions on the \code{atomic} Construct} \label{sec:atomic_restrict} +\index{constructs!atomic@\code{atomic}} +\index{atomic construct@\code{atomic} construct} The following non-conforming examples illustrate the restrictions on the \code{atomic} construct. diff --git a/synchronization/barrier_regions.tex b/synchronization/barrier_regions.tex index 8cd2b5c..a975d38 100644 --- a/synchronization/barrier_regions.tex +++ b/synchronization/barrier_regions.tex @@ -1,6 +1,7 @@ \pagebreak \section{Binding of \code{barrier} Regions} \label{sec:barrier_regions} +\index{binding!barrier regions@\code{barrier} regions} The binding rules call for a \code{barrier} region to bind to the closest enclosing \code{parallel} region. diff --git a/synchronization/critical.tex b/synchronization/critical.tex index fdee1c0..cd6fc7b 100644 --- a/synchronization/critical.tex +++ b/synchronization/critical.tex @@ -1,6 +1,11 @@ \pagebreak \section{\code{critical} Construct} \label{sec:critical} +\index{constructs!critical@\code{critical}} +\index{critical construct@\code{critical} construct} +\index{critical construct@\code{critical} construct!hint clause@\code{hint} clause} +\index{clauses!hint@\code{hint}} +\index{hint clause@\code{hint} clause} The following example includes several \code{critical} constructs. The example illustrates a queuing model in which a task is dequeued and worked on. To guard @@ -15,6 +20,6 @@ and \plc{yaxis}. The following example extends the previous example by adding the \code{hint} clause to the \code{critical} constructs. -\cexample{critical}{2} +\cexample[5.0]{critical}{2} -\fexample[4.5]{critical}{2}[1] +\fexample[5.0]{critical}{2} diff --git a/synchronization/depobj.tex b/synchronization/depobj.tex index e16c36e..7b8183a 100644 --- a/synchronization/depobj.tex +++ b/synchronization/depobj.tex @@ -1,6 +1,11 @@ \pagebreak \section{\code{depobj} Construct} \label{sec:depobj} +\index{constructs!depobj@\code{depobj}} +\index{depobj construct@\code{depobj} construct} +\index{depobj construct@\code{depobj} construct!depend clause@\code{depend} clause} +\index{depend clause@\code{depend} clause} +\index{clauses!depend@\code{depend}} The stand-alone \code{depobj} construct provides a mechanism to create a \plc{depend object} that expresses a dependence to be @@ -12,6 +17,12 @@ and it is stored in the depend object. The depend object is represented by a variable of type \code{omp\_depend\_t} in C/C++ (by a scalar variable of integer kind \code{omp\_depend\_kind} in Fortran). +\index{depobj construct@\code{depobj} construct!update clause@\code{update} clause} +\index{update clause@\code{update} clause} +\index{clauses!update@\code{update}} +\index{depobj construct@\code{depobj} construct!destroy clause@\code{destroy} clause} +\index{destroy clause@\code{destroy} clause} +\index{clauses!destroy@\code{destroy}} In the example below the stand-alone \code{depobj} construct uses the \code{depend}, \code{update} and \code{destroy} clauses to \plc{initialize}, \plc{update} and \plc{uninitialize} @@ -44,6 +55,6 @@ effectively destroying the depend object. After an object has been uninitialized it can be initialized again with a new dependence type \emph{and} a new variable. -\cexample[5.0]{depobj}{1} +\cexample[5.2]{depobj}{1} -\ffreeexample[5.0]{depobj}{1} +\ffreeexample[5.2]{depobj}{1} diff --git a/synchronization/doacross.tex b/synchronization/doacross.tex index 30925b1..7abe1ff 100644 --- a/synchronization/doacross.tex +++ b/synchronization/doacross.tex @@ -1,27 +1,37 @@ \pagebreak \section{Doacross Loop Nest} \label{sec:doacross} +\index{dependences!doacross loop nest} +\index{doacross loop nest!ordered construct@\code{ordered} construct} +\index{ordered construct@\code{ordered} construct!doacross loop nest} +\index{doacross loop nest!doacross clause@\code{doacross} clause} +\index{constructs!ordered@\code{ordered}} +\index{clauses!doacross@\code{doacross}} +\index{doacross clause@\code{doacross} clause} An \code{ordered} clause can be used on a loop construct with an integer parameter argument to define the number of associated loops within a \plc{doacross loop nest} where cross-iteration dependences exist. -A \code{depend} clause on an \code{ordered} construct within an ordered +A \code{doacross} clause on an \code{ordered} construct within an ordered loop describes the dependences of the \plc{doacross} loops. -In the code below, the \code{depend(sink:i-1)} clause defines an \plc{i-1} +In the code below, the \code{doacross(sink:i-1)} clause defines an \plc{i-1} to \plc{i} cross-iteration dependence that specifies a wait point for the completion of computation from iteration \plc{i-1} before proceeding -to the subsequent statements. The \code{depend(source)} clause indicates +to the subsequent statements. The \scode{doacross(source:omp_cur_iteration)} +or \scode{doacross(source:)} clause indicates the completion of computation from the current iteration (\plc{i}) to satisfy the cross-iteration dependence that arises from the iteration. +The \scode{omp_cur_iteration} keyword is optional for the \scode{source} +dependence type. For this example the same sequential ordering could have been achieved with an \code{ordered} clause without a parameter, on the loop directive, -and a single \code{ordered} directive without the \code{depend} clause +and a single \code{ordered} directive without the \code{doacross} clause specified for the statement executing the \plc{bar} function. -\cexample[4.5]{doacross}{1} +\cexample[5.2]{doacross}{1} -\ffreeexample[4.5]{doacross}{1} +\ffreeexample[5.2]{doacross}{1} The following code is similar to the previous example but with \plc{doacross loop nest} extended to two nested loops, \plc{i} and \plc{j}, @@ -30,30 +40,30 @@ In the C/C++ code, the \plc{i} and \plc{j} loops are the first and second associated loops, respectively, whereas in the Fortran code, the \plc{j} and \plc{i} loops are the first and second associated loops, respectively. -The \code{depend(sink:i-1,j)} and \code{depend(sink:i,j-1)} clauses in +The \code{doacross(sink:i-1,j)} and \code{doacross(sink:i,j-1)} clauses in the C/C++ code define cross-iteration dependences in two dimensions from iterations (\plc{i-1, j}) and (\plc{i, j-1}) to iteration (\plc{i, j}). -Likewise, the \code{depend(sink:j-1,i)} and \code{depend(sink:j,i-1)} clauses +Likewise, the \code{doacross(sink:j-1,i)} and \code{doacross(sink:j,i-1)} clauses in the Fortran code define cross-iteration dependences from iterations (\plc{j-1, i}) and (\plc{j, i-1}) to iteration (\plc{j, i}). -\cexample[4.5]{doacross}{2} +\cexample[5.2]{doacross}{2} -\ffreeexample[4.5]{doacross}{2} +\ffreeexample[5.2]{doacross}{2} The following example shows the incorrect use of the \code{ordered} -directive with a \code{depend} clause. There are two issues with the code. -The first issue is a missing \code{ordered}~\code{depend(source)} directive, +directive with a \code{doacross} clause. There are two issues with the code. +The first issue is a missing \code{ordered}~\code{doacross(source:)} directive, which could cause a deadlock. -The second issue is the \code{depend(sink:i+1,j)} and \code{depend(sink:i,j+1)} +The second issue is the \code{doacross(sink:i+1,j)} and \code{doacross(sink:i,j+1)} clauses define dependences on lexicographically later source iterations (\plc{i+1, j}) and (\plc{i, j+1}), which could cause a deadlock as well since they may not start to execute until the current iteration completes. -\cexample[4.5]{doacross}{3} +\cexample[5.2]{doacross}{3} -\ffreeexample[4.5]{doacross}{3} +\ffreeexample[5.2]{doacross}{3} The following example illustrates the use of the \code{collapse} clause for @@ -63,6 +73,6 @@ The example also shows a compliant usage of the dependence source directive placed before the corresponding sink directive. Checking the completion of computation from previous iterations at the sink point can occur after the source statement. -\cexample[4.5]{doacross}{4} +\cexample[5.2]{doacross}{4} -\ffreeexample[4.5]{doacross}{4} +\ffreeexample[5.2]{doacross}{4} diff --git a/synchronization/flush_nolist.tex b/synchronization/flush_nolist.tex index e807fd0..f9e1d8f 100644 --- a/synchronization/flush_nolist.tex +++ b/synchronization/flush_nolist.tex @@ -1,6 +1,9 @@ \pagebreak \section{\code{flush} Construct without a List} \label{sec:flush_nolist} +\index{constructs!flush@\code{flush}} +\index{flush construct@\code{flush} construct} +\index{flushes!flush without a list} The following example distinguishes the shared variables affected by a \code{flush} construct with no list from the shared objects that are not affected: diff --git a/synchronization/init_lock.tex b/synchronization/init_lock.tex index 3f4151c..0836067 100644 --- a/synchronization/init_lock.tex +++ b/synchronization/init_lock.tex @@ -1,6 +1,8 @@ \subsection{\code{omp\_init\_lock} Routine} \label{subsec:init_lock} +\index{routines!omp_init_lock@\scode{omp_init_lock}} +\index{omp_init_lock routine@\scode{omp_init_lock} routine} The following example demonstrates how to initialize an array of locks in a \code{parallel} region by using \code{omp\_init\_lock}. diff --git a/synchronization/init_lock_with_hint.tex b/synchronization/init_lock_with_hint.tex index ac5a8de..9055c9d 100644 --- a/synchronization/init_lock_with_hint.tex +++ b/synchronization/init_lock_with_hint.tex @@ -2,9 +2,11 @@ \subsection{\code{omp\_init\_lock\_with\_hint} Routine} \label{subsec:init_lock_with_hint} +\index{routines!omp_init_lock_with_hint@\scode{omp_init_lock_with_hint}} +\index{omp_init_lock_with_hint routine@\scode{omp_init_lock_with_hint} routine} The following example demonstrates how to initialize an array of locks in a \code{parallel} region by using \code{omp\_init\_lock\_with\_hint}. Note, hints are combined with an \code{|} or \code{+} operator in C/C++ and a \code{+} operator in Fortran. -\cppexample[4.5]{init_lock_with_hint}{1} +\cppexample[5.0]{init_lock_with_hint}{1} -\fexample[4.5]{init_lock_with_hint}{1}[1] +\fexample[5.0]{init_lock_with_hint}{1} diff --git a/synchronization/lock_owner.tex b/synchronization/lock_owner.tex index df2297c..d96428d 100644 --- a/synchronization/lock_owner.tex +++ b/synchronization/lock_owner.tex @@ -1,6 +1,8 @@ \subsection{Ownership of Locks} \label{subsec:lock_owner} +\index{routines!omp_unset_lock@\scode{omp_unset_lock}} +\index{omp_unset_lock routine@\scode{omp_unset_lock} routine} Ownership of locks has changed since OpenMP 2.5. In OpenMP 2.5, locks are owned by threads; so a lock released by the \code{omp\_unset\_lock} routine must be owned by the same thread executing the routine. Beginning with OpenMP 3.0, locks are owned @@ -17,6 +19,6 @@ acquires the lock. \cexample[5.1]{lock_owner}{1} -\fexample[5.1]{lock_owner}{1}[1] +\fexample[5.1]{lock_owner}{1} diff --git a/synchronization/nestable_lock.tex b/synchronization/nestable_lock.tex index 991786f..f29034b 100644 --- a/synchronization/nestable_lock.tex +++ b/synchronization/nestable_lock.tex @@ -1,6 +1,7 @@ \subsection{Nestable Lock Routines} \label{subsec:nestable_lock} +\index{nestable lock} The following example demonstrates how a nestable lock can be used to synchronize updates both to a whole structure and to one of its members. diff --git a/synchronization/ordered.tex b/synchronization/ordered.tex index d0d6d92..2272f50 100644 --- a/synchronization/ordered.tex +++ b/synchronization/ordered.tex @@ -1,6 +1,10 @@ \pagebreak \section{\code{ordered} Clause and \code{ordered} Construct} \label{sec:ordered} +\index{clauses!ordered@\code{ordered}} +\index{ordered clause@\code{ordered} clause} +\index{constructs!ordered@\code{ordered}} +\index{ordered construct@\code{ordered} construct} Ordered constructs are useful for sequentially ordering the output from work that is done in parallel. The following program prints out the indices in sequential diff --git a/synchronization/simple_lock.tex b/synchronization/simple_lock.tex index bb5a303..bcd088a 100644 --- a/synchronization/simple_lock.tex +++ b/synchronization/simple_lock.tex @@ -1,6 +1,10 @@ \subsection{Simple Lock Routines} \label{subsec:simple_lock} +\index{routines!omp_set_lock@\scode{omp_set_lock}} +\index{omp_set_lock routine@\scode{omp_set_lock} routine} +\index{routines!omp_test_lock@\scode{omp_test_lock}} +\index{omp_test_lock routine@\scode{omp_test_lock} routine} In the following example, the lock routines cause the threads to be idle while waiting for entry to the first critical section, but to do other work while waiting for entry to the second. The \code{omp\_set\_lock} function blocks, but the \scode{omp_test_lock} diff --git a/synchronization/sources/acquire_release.1.c b/synchronization/sources/acquire_release.1.c index d7e1078..b6ca12d 100644 --- a/synchronization/sources/acquire_release.1.c +++ b/synchronization/sources/acquire_release.1.c @@ -1,5 +1,5 @@ /* -* @@name: acquire_release.1.c +* @@name: acquire_release.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/synchronization/sources/acquire_release.1.f90 b/synchronization/sources/acquire_release.1.f90 index 0b617bc..8ac677f 100644 --- a/synchronization/sources/acquire_release.1.f90 +++ b/synchronization/sources/acquire_release.1.f90 @@ -1,4 +1,4 @@ -! @@name: acquire_release.1.f90 +! @@name: acquire_release.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/synchronization/sources/acquire_release.2.c b/synchronization/sources/acquire_release.2.c index 2feedbb..60e4d4d 100644 --- a/synchronization/sources/acquire_release.2.c +++ b/synchronization/sources/acquire_release.2.c @@ -1,5 +1,5 @@ /* -* @@name: acquire_release.2.c +* @@name: acquire_release.2 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/synchronization/sources/acquire_release.2.f90 b/synchronization/sources/acquire_release.2.f90 index ed15571..2507aa5 100644 --- a/synchronization/sources/acquire_release.2.f90 +++ b/synchronization/sources/acquire_release.2.f90 @@ -1,4 +1,4 @@ -! @@name: acquire_release.2.f90 +! @@name: acquire_release.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/synchronization/sources/acquire_release.3.c b/synchronization/sources/acquire_release.3.c index be059ab..26aedfa 100644 --- a/synchronization/sources/acquire_release.3.c +++ b/synchronization/sources/acquire_release.3.c @@ -1,5 +1,5 @@ /* -* @@name: acquire_release.3.c +* @@name: acquire_release.3 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/synchronization/sources/acquire_release.3.f90 b/synchronization/sources/acquire_release.3.f90 index c0fe494..73dd7bf 100644 --- a/synchronization/sources/acquire_release.3.f90 +++ b/synchronization/sources/acquire_release.3.f90 @@ -1,4 +1,4 @@ -! @@name: acquire_release.3.f90 +! @@name: acquire_release.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/synchronization/sources/acquire_release_broke.4.c b/synchronization/sources/acquire_release_broke.4.c index b058ee7..4e575e3 100644 --- a/synchronization/sources/acquire_release_broke.4.c +++ b/synchronization/sources/acquire_release_broke.4.c @@ -1,5 +1,5 @@ /* -* @@name: acquire_release.4.c +* @@name: acquire_release_broke.4 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/synchronization/sources/acquire_release_broke.4.f90 b/synchronization/sources/acquire_release_broke.4.f90 index 3c17aa7..7c8a1f0 100644 --- a/synchronization/sources/acquire_release_broke.4.f90 +++ b/synchronization/sources/acquire_release_broke.4.f90 @@ -1,4 +1,4 @@ -! @@name: acquire_release.4.f90 +! @@name: acquire_release_broke.4 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/synchronization/sources/atomic.1.c b/synchronization/sources/atomic.1.c index e83dcec..0108f1d 100644 --- a/synchronization/sources/atomic.1.c +++ b/synchronization/sources/atomic.1.c @@ -1,5 +1,5 @@ /* -* @@name: atomic.1c +* @@name: atomic.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/synchronization/sources/atomic.1.f b/synchronization/sources/atomic.1.f index 20015a3..d281387 100644 --- a/synchronization/sources/atomic.1.f +++ b/synchronization/sources/atomic.1.f @@ -1,4 +1,4 @@ -! @@name: atomic.1f +! @@name: atomic.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/synchronization/sources/atomic.2.c b/synchronization/sources/atomic.2.c index 7f3797e..a85f25e 100644 --- a/synchronization/sources/atomic.2.c +++ b/synchronization/sources/atomic.2.c @@ -1,5 +1,5 @@ /* -* @@name: atomic.2c +* @@name: atomic.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/synchronization/sources/atomic.2.f b/synchronization/sources/atomic.2.f index 4e5dfab..03e956f 100644 --- a/synchronization/sources/atomic.2.f +++ b/synchronization/sources/atomic.2.f @@ -1,4 +1,4 @@ -! @@name: atomic.2f +! @@name: atomic.2 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/synchronization/sources/atomic.3.c b/synchronization/sources/atomic.3.c index 8cf0a6b..c557835 100644 --- a/synchronization/sources/atomic.3.c +++ b/synchronization/sources/atomic.3.c @@ -1,5 +1,5 @@ /* -* @@name: atomic.3c +* @@name: atomic.3 * @@type: C * @@compilable: yes * @@linkable: no @@ -8,38 +8,38 @@ */ int fetch_and_add(int *p) { -/* Atomically read the value of *p and then increment it. The previous value -is - * returned. This can be used to implement a simple lock as shown below. - */ - int old; + /* Atomically read the value of *p and then increment it. The + previous value is returned. This can be used to implement a + simple lock as shown below. + */ + int old; #pragma omp atomic capture - { old = *p; (*p)++; } - return old; + { old = *p; (*p)++; } + return old; } /* * Use fetch_and_add to implement a lock */ struct locktype { - int ticketnumber; - int turn; + int ticketnumber; + int turn; }; void do_locked_work(struct locktype *lock) { - int atomic_read(const int *p); - void work(); + int atomic_read(const int *p); + void work(); - // Obtain the lock - int myturn = fetch_and_add(&lock->ticketnumber); - while (atomic_read(&lock->turn) != myturn) - ; - // Do some work. The flush is needed to ensure visibility of - // variables not involved in atomic directives + // Obtain the lock + int myturn = fetch_and_add(&lock->ticketnumber); + while (atomic_read(&lock->turn) != myturn) + ; + // Do some work. The flush is needed to ensure visibility of + // variables not involved in atomic directives -#pragma omp flush - work(); -#pragma omp flush - // Release the lock - fetch_and_add(&lock->turn); + #pragma omp flush + work(); + #pragma omp flush + // Release the lock + fetch_and_add(&lock->turn); } diff --git a/synchronization/sources/atomic.3.f b/synchronization/sources/atomic.3.f index c805923..f2a8d6f 100644 --- a/synchronization/sources/atomic.3.f +++ b/synchronization/sources/atomic.3.f @@ -1,4 +1,4 @@ -! @@name: atomic.3f +! @@name: atomic.3 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no @@ -8,8 +8,8 @@ integer:: fetch_and_add integer, intent(inout) :: p -! Atomically read the value of p and then increment it. The previous value is -! returned. This can be used to implement a simple lock as shown below. +! Atomically read the value of p and then increment it. The previous value +! is returned. This can be used to implement a simple lock as shown below. !$omp atomic capture fetch_and_add = p p = p + 1 diff --git a/synchronization/sources/atomic_restrict.1.c b/synchronization/sources/atomic_restrict.1.c index 679a8a3..9c86168 100644 --- a/synchronization/sources/atomic_restrict.1.c +++ b/synchronization/sources/atomic_restrict.1.c @@ -1,5 +1,5 @@ /* -* @@name: atomic_restrict.1c +* @@name: atomic_restrict.1 * @@type: C * @@compilable: maybe * @@linkable: no diff --git a/synchronization/sources/atomic_restrict.1.f b/synchronization/sources/atomic_restrict.1.f index 5f2e28f..27cf690 100644 --- a/synchronization/sources/atomic_restrict.1.f +++ b/synchronization/sources/atomic_restrict.1.f @@ -1,4 +1,4 @@ -! @@name: atomic_restrict.1f +! @@name: atomic_restrict.1 ! @@type: F-fixed ! @@compilable: maybe ! @@linkable: no diff --git a/synchronization/sources/atomic_restrict.2.c b/synchronization/sources/atomic_restrict.2.c index 37fcce7..fa92390 100644 --- a/synchronization/sources/atomic_restrict.2.c +++ b/synchronization/sources/atomic_restrict.2.c @@ -1,5 +1,5 @@ /* -* @@name: atomic_restrict.2c +* @@name: atomic_restrict.2 * @@type: C * @@compilable: maybe * @@linkable: no diff --git a/synchronization/sources/atomic_restrict.2.f b/synchronization/sources/atomic_restrict.2.f index 71467c0..cdf2b91 100644 --- a/synchronization/sources/atomic_restrict.2.f +++ b/synchronization/sources/atomic_restrict.2.f @@ -1,4 +1,4 @@ -! @@name: atomic_restrict.2f +! @@name: atomic_restrict.2 ! @@type: F-fixed ! @@compilable: maybe ! @@linkable: no diff --git a/synchronization/sources/atomic_restrict.3.f b/synchronization/sources/atomic_restrict.3.f index 7ab0165..4cf08c7 100644 --- a/synchronization/sources/atomic_restrict.3.f +++ b/synchronization/sources/atomic_restrict.3.f @@ -1,4 +1,4 @@ -! @@name: atomic_restrict.3f +! @@name: atomic_restrict.3 ! @@type: F-fixed ! @@compilable: maybe ! @@linkable: no diff --git a/synchronization/sources/barrier_regions.1.c b/synchronization/sources/barrier_regions.1.c index 47d13a4..b0cf635 100644 --- a/synchronization/sources/barrier_regions.1.c +++ b/synchronization/sources/barrier_regions.1.c @@ -1,5 +1,5 @@ /* -* @@name: barrier_regions.1c +* @@name: barrier_regions.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/synchronization/sources/barrier_regions.1.f b/synchronization/sources/barrier_regions.1.f index e79f29a..c19d093 100644 --- a/synchronization/sources/barrier_regions.1.f +++ b/synchronization/sources/barrier_regions.1.f @@ -1,4 +1,4 @@ -! @@name: barrier_regions.1f +! @@name: barrier_regions.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/synchronization/sources/critical.1.c b/synchronization/sources/critical.1.c index 50b1d86..b9d8271 100644 --- a/synchronization/sources/critical.1.c +++ b/synchronization/sources/critical.1.c @@ -1,5 +1,5 @@ /* -* @@name: critical.1c +* @@name: critical.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/synchronization/sources/critical.1.f b/synchronization/sources/critical.1.f index 921ee2d..3db5e60 100644 --- a/synchronization/sources/critical.1.f +++ b/synchronization/sources/critical.1.f @@ -1,4 +1,4 @@ -! @@name: critical.1f +! @@name: critical.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/synchronization/sources/critical.2.c b/synchronization/sources/critical.2.c index e603dda..5abc7db 100644 --- a/synchronization/sources/critical.2.c +++ b/synchronization/sources/critical.2.c @@ -1,15 +1,11 @@ /* -* @@name: critical.1c +* @@name: critical.2 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success +* @@version: omp_5.0 */ -#if _OPENMP < 201811 -#define omp_sync_hint_contended omp_lock_hint_contended -#define omp_sync_hint_speculative omp_lock_hint_speculative -#endif - #include int dequeue(float *a); diff --git a/synchronization/sources/critical.2.f b/synchronization/sources/critical.2.f index 7454695..cff6aa6 100644 --- a/synchronization/sources/critical.2.f +++ b/synchronization/sources/critical.2.f @@ -1,14 +1,9 @@ -! @@name: critical.1f +! @@name: critical.2 ! @@type: F-fixed ! @@compilable: yes -! @@requires: preprocessing ! @@linkable: no ! @@expect: success -! @@version: omp_4.5 -#if _OPENMP < 201811 -#define OMP_SYNC_HINT_CONTENDED OMP_LOCK_HINT_CONTENDED -#endif - +! @@version: omp_5.0 SUBROUTINE CRITICAL_EXAMPLE(X, Y) USE OMP_LIB ! or INCLUDE "omp_lib.h" diff --git a/synchronization/sources/depobj.1.c b/synchronization/sources/depobj.1.c index bae1031..fa339ef 100644 --- a/synchronization/sources/depobj.1.c +++ b/synchronization/sources/depobj.1.c @@ -1,10 +1,10 @@ /* -* @@name: depobj.1c +* @@name: depobj.1 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success -* @@version: omp_5.0 +* @@version: omp_5.2 */ #include @@ -36,8 +36,8 @@ int main(){ driver(FALSE, a,b,N, &obj); // no updating of a - #pragma omp depobj(obj) destroy // obj is set to uninitilized state, - // resources are freed + #pragma omp depobj(obj) destroy(obj) // obj is set to uninitialized + // state, resources are freed return 0; } @@ -48,12 +48,11 @@ void driver(int update, float a[], float b[], int n, omp_depend_t *obj) #pragma omp single { - #pragma omp task depend(depobj: *obj) // Task 1, uses depend object - update_copy(update, a,b,n); // update a or not, always copy a to b - - #pragma omp task depend(in: a[:n]) // Task 2, only read a - checkpoint(a,n); + #pragma omp task depend(depobj: *obj) // Task 1, uses depend object + update_copy(update, a,b,n); // may update a, always copy a to b + #pragma omp task depend(in: a[:n]) // Task 2, only read a + checkpoint(a,n); } } diff --git a/synchronization/sources/depobj.1.f90 b/synchronization/sources/depobj.1.f90 index 1c03e40..3e6716c 100644 --- a/synchronization/sources/depobj.1.f90 +++ b/synchronization/sources/depobj.1.f90 @@ -1,9 +1,9 @@ -! @@name: depobj.1f90 +! @@name: depobj.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes ! @@expect: success -! @@version: omp_5.0 +! @@version: omp_5.2 program main use omp_lib @@ -23,8 +23,8 @@ program main call driver(.false., a,b,N, obj) !! no updating - !$omp depobj(obj) destroy !! obj is set to uninitilized state, - !! resources are freed + !$omp depobj(obj) destroy(obj) !! obj is set to uninitialized + !! state, resources are freed end program @@ -41,7 +41,8 @@ subroutine driver(update, a, b, n, obj) !$omp single !$omp task depend(depobj: obj) !! Task 1, uses depend object - call update_copy(update, a,b,n) !! update a or not, always copy a to b + call update_copy(update, a,b,n) + !! update a or not, always copy a to b !$omp end task !$omp task depend(in: a) !! Task 2, only read a diff --git a/synchronization/sources/doacross.1.c b/synchronization/sources/doacross.1.c index 3a0ca97..467687d 100644 --- a/synchronization/sources/doacross.1.c +++ b/synchronization/sources/doacross.1.c @@ -1,11 +1,12 @@ /* -* @@name: doacross.1c +* @@name: doacross.1 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_4.5 +* @@version: omp_5.2 */ + float foo(int i); float bar(float a, float b); float baz(float b); @@ -19,9 +20,9 @@ void work( int N, float *A, float *B, float *C ) { A[i] = foo(i); - #pragma omp ordered depend(sink: i-1) + #pragma omp ordered doacross(sink: i-1) B[i] = bar(A[i], B[i-1]); - #pragma omp ordered depend(source) + #pragma omp ordered doacross(source: omp_cur_iteration) C[i] = baz(B[i]); } diff --git a/synchronization/sources/doacross.1.f90 b/synchronization/sources/doacross.1.f90 index bce32db..5d8703d 100644 --- a/synchronization/sources/doacross.1.f90 +++ b/synchronization/sources/doacross.1.f90 @@ -1,9 +1,10 @@ -! @@name: doacross.1f +! @@name: doacross.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no ! @@expect: success -! @@version: omp_4.5 +! @@version: omp_5.2 + subroutine work( N, A, B, C ) integer :: N, i real, dimension(N) :: A, B, C @@ -13,9 +14,9 @@ subroutine work( N, A, B, C ) do i=2, N A(i) = foo(i) - !$omp ordered depend(sink: i-1) + !$omp ordered doacross(sink: i-1) B(i) = bar(A(i), B(i-1)) - !$omp ordered depend(source) + !$omp ordered doacross(source: omp_cur_iteration) C(i) = baz(B(i)) end do diff --git a/synchronization/sources/doacross.2.c b/synchronization/sources/doacross.2.c index f2613fc..c1d403e 100644 --- a/synchronization/sources/doacross.2.c +++ b/synchronization/sources/doacross.2.c @@ -1,11 +1,12 @@ /* -* @@name: doacross.2c +* @@name: doacross.2 * @@type: C * @@compilable: yes * @@linkable: no * @@expect: success -* @@version: omp_4.5 +* @@version: omp_5.2 */ + float foo(int i, int j); float bar(float a, float b, float c); float baz(float b); @@ -21,9 +22,9 @@ void work( int N, int M, float **A, float **B, float **C ) { A[i][j] = foo(i, j); - #pragma omp ordered depend(sink: i-1,j) depend(sink: i,j-1) + #pragma omp ordered doacross(sink: i-1,j) doacross(sink: i,j-1) B[i][j] = bar(A[i][j], B[i-1][j], B[i][j-1]); - #pragma omp ordered depend(source) + #pragma omp ordered doacross(source:) C[i][j] = baz(B[i][j]); } diff --git a/synchronization/sources/doacross.2.f90 b/synchronization/sources/doacross.2.f90 index 75aefa0..f034a9b 100644 --- a/synchronization/sources/doacross.2.f90 +++ b/synchronization/sources/doacross.2.f90 @@ -1,9 +1,10 @@ -! @@name: doacross.2f +! @@name: doacross.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no ! @@expect: success -! @@version: omp_4.5 +! @@version: omp_5.2 + subroutine work( N, M, A, B, C ) integer :: N, M, i, j real, dimension(M,N) :: A, B, C @@ -14,9 +15,9 @@ subroutine work( N, M, A, B, C ) do i=2, M A(i,j) = foo(i, j) - !$omp ordered depend(sink: j-1,i) depend(sink: j,i-1) + !$omp ordered doacross(sink: j-1,i) doacross(sink: j,i-1) B(i,j) = bar(A(i,j), B(i-1,j), B(i,j-1)) - !$omp ordered depend(source) + !$omp ordered doacross(source:) C(i,j) = baz(B(i,j)) end do diff --git a/synchronization/sources/doacross.3.c b/synchronization/sources/doacross.3.c index 8b3bf44..b14b15c 100644 --- a/synchronization/sources/doacross.3.c +++ b/synchronization/sources/doacross.3.c @@ -1,24 +1,25 @@ /* -* @@name: doacross.3c +* @@name: doacross.3 * @@type: C * @@compilable: no * @@linkable: no * @@expect: failure -* @@version: omp_4.5 +* @@version: omp_5.2 */ + #define N 100 void work_wrong(double p[][N][N]) { int i, j, k; -#pragma omp parallel for ordered(2) private(i,j,k) + #pragma omp parallel for ordered(2) private(i,j,k) for (i=1; i omp_lock_t *new_locks() diff --git a/synchronization/sources/init_lock_with_hint.1.f b/synchronization/sources/init_lock_with_hint.1.f index f7af0aa..8d4ba4f 100644 --- a/synchronization/sources/init_lock_with_hint.1.f +++ b/synchronization/sources/init_lock_with_hint.1.f @@ -1,15 +1,9 @@ -! @@name: init_lock.1f +! @@name: init_lock_with_hint.1 ! @@type: F-fixed ! @@compilable: yes -! @@requires: preprocessing ! @@linkable: no ! @@expect: success -! @@version: omp_4.5 -#if _OPENMP < 201811 -#define OMP_SYNC_HINT_CONTENDED OMP_LOCK_HINT_CONTENDED -#define OMP_SYNC_HINT_SPECULATIVE OMP_LOCK_HINT_SPECULATIVE -#endif - +! @@version: omp_5.0 FUNCTION NEW_LOCKS() USE OMP_LIB ! or INCLUDE "omp_lib.h" INTEGER(OMP_LOCK_KIND), DIMENSION(1000) :: NEW_LOCKS diff --git a/synchronization/sources/lock_owner.1.c b/synchronization/sources/lock_owner.1.c index 4bb90b6..c3fefea 100644 --- a/synchronization/sources/lock_owner.1.c +++ b/synchronization/sources/lock_owner.1.c @@ -1,15 +1,11 @@ /* -* @@name: lock_owner.1c +* @@name: lock_owner.1 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success * @@version: omp_5.1 */ -#if _OPENMP < 202011 -#define masked master -#endif - #include #include #include diff --git a/synchronization/sources/lock_owner.1.f b/synchronization/sources/lock_owner.1.f index 75590b5..185e8d1 100644 --- a/synchronization/sources/lock_owner.1.f +++ b/synchronization/sources/lock_owner.1.f @@ -1,14 +1,9 @@ -! @@name: lock_owner.1f +! @@name: lock_owner.1 ! @@type: F-fixed ! @@compilable: yes -! @@requires: preprocessing ! @@linkable: yes ! @@expect: success ! @@version: omp_5.1 -#if _OPENMP < 202011 -#define masked master -#endif - program lock use omp_lib integer :: x diff --git a/synchronization/sources/nestable_lock.1.c b/synchronization/sources/nestable_lock.1.c index 4c939e0..71ec595 100644 --- a/synchronization/sources/nestable_lock.1.c +++ b/synchronization/sources/nestable_lock.1.c @@ -1,5 +1,5 @@ /* -* @@name: nestable_lock.1c +* @@name: nestable_lock.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/synchronization/sources/nestable_lock.1.f b/synchronization/sources/nestable_lock.1.f index 41a3198..6b468ff 100644 --- a/synchronization/sources/nestable_lock.1.f +++ b/synchronization/sources/nestable_lock.1.f @@ -1,4 +1,4 @@ -! @@name: nestable_lock.1f +! @@name: nestable_lock.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/synchronization/sources/ordered.1.c b/synchronization/sources/ordered.1.c index 15b85c7..b766382 100644 --- a/synchronization/sources/ordered.1.c +++ b/synchronization/sources/ordered.1.c @@ -1,5 +1,5 @@ /* -* @@name: ordered.1c +* @@name: ordered.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/synchronization/sources/ordered.1.f b/synchronization/sources/ordered.1.f index 6ff12c2..0e3c3ff 100644 --- a/synchronization/sources/ordered.1.f +++ b/synchronization/sources/ordered.1.f @@ -1,4 +1,4 @@ -! @@name: ordered.1f +! @@name: ordered.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/synchronization/sources/ordered.2.c b/synchronization/sources/ordered.2.c index 9c84d7e..d07af89 100644 --- a/synchronization/sources/ordered.2.c +++ b/synchronization/sources/ordered.2.c @@ -1,5 +1,5 @@ /* -* @@name: ordered.2c +* @@name: ordered.2 * @@type: C * @@compilable: no * @@linkable: no diff --git a/synchronization/sources/ordered.2.f b/synchronization/sources/ordered.2.f index d7a7b1d..e0bb30a 100644 --- a/synchronization/sources/ordered.2.f +++ b/synchronization/sources/ordered.2.f @@ -1,4 +1,4 @@ -! @@name: ordered.2f +! @@name: ordered.2 ! @@type: F-fixed ! @@compilable: no ! @@linkable: no diff --git a/synchronization/sources/ordered.3.c b/synchronization/sources/ordered.3.c index d478939..18b0de5 100644 --- a/synchronization/sources/ordered.3.c +++ b/synchronization/sources/ordered.3.c @@ -1,5 +1,5 @@ /* -* @@name: ordered.3c +* @@name: ordered.3 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/synchronization/sources/ordered.3.f b/synchronization/sources/ordered.3.f index 65429a8..4b75230 100644 --- a/synchronization/sources/ordered.3.f +++ b/synchronization/sources/ordered.3.f @@ -1,4 +1,4 @@ -! @@name: ordered.3f +! @@name: ordered.3 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/synchronization/sources/simple_lock.1.c b/synchronization/sources/simple_lock.1.c index 104ed63..26294d6 100644 --- a/synchronization/sources/simple_lock.1.c +++ b/synchronization/sources/simple_lock.1.c @@ -1,5 +1,5 @@ /* -* @@name: simple_lock.1c +* @@name: simple_lock.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/synchronization/sources/simple_lock.1.f b/synchronization/sources/simple_lock.1.f index e6e9b82..4dcd5bc 100644 --- a/synchronization/sources/simple_lock.1.f +++ b/synchronization/sources/simple_lock.1.f @@ -1,4 +1,4 @@ -! @@name: simple_lock.1f +! @@name: simple_lock.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: yes diff --git a/synchronization/sources/worksharing_critical.1.c b/synchronization/sources/worksharing_critical.1.c index 61bf0f4..2644060 100644 --- a/synchronization/sources/worksharing_critical.1.c +++ b/synchronization/sources/worksharing_critical.1.c @@ -1,5 +1,5 @@ /* -* @@name: worksharing_critical.1c +* @@name: worksharing_critical.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/synchronization/sources/worksharing_critical.1.f b/synchronization/sources/worksharing_critical.1.f index f57f2b7..d2b7335 100644 --- a/synchronization/sources/worksharing_critical.1.f +++ b/synchronization/sources/worksharing_critical.1.f @@ -1,4 +1,4 @@ -! @@name: worksharing_critical.1f +! @@name: worksharing_critical.1 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/synchronization/worksharing_critical.tex b/synchronization/worksharing_critical.tex index 04cabb5..b51bd4c 100644 --- a/synchronization/worksharing_critical.tex +++ b/synchronization/worksharing_critical.tex @@ -1,6 +1,9 @@ \pagebreak \section{Worksharing Constructs Inside a \code{critical} Construct} \label{sec:worksharing_critical} +\index{constructs!worksharing} +\index{constructs!critical@\code{critical}} +\index{critical construct@\code{critical} construct} The following example demonstrates using a worksharing construct inside a \code{critical} construct. This example is conforming because the worksharing \code{single} diff --git a/tasking/parallel_masked_taskloop.tex b/tasking/parallel_masked_taskloop.tex index 458815a..2549c97 100644 --- a/tasking/parallel_masked_taskloop.tex +++ b/tasking/parallel_masked_taskloop.tex @@ -1,6 +1,16 @@ \pagebreak \section{Combined \code{parallel} \code{masked} and \code{taskloop} Constructs} \label{sec:parallel_masked_taskloop} +\index{combined constructs!parallel masked taskloop@\code{parallel} \code{masked} \code{taskloop}} +\index{combined constructs!parallel masked taskloop simd@\code{parallel} \code{masked} \code{taskloop} \code{simd}} +\index{constructs!parallel@\code{parallel}} +\index{constructs!masked@\code{masked}} +\index{constructs!taskloop@\code{taskloop}} +\index{constructs!simd@\code{simd}} +\index{parallel construct@\code{parallel} construct} +\index{masked construct@\code{masked} construct} +\index{taskloop construct@\code{taskloop} construct} +\index{simd construct@\code{simd} construct} Just as the \code{for} and \code{do} constructs were combined with the \code{parallel} construct for convenience, so too, the combined @@ -27,5 +37,6 @@ with the \code{taskloop} or \code{taskloop}~\code{simd} construct produces no ad restrictions. \cexample[5.1]{parallel_masked_taskloop}{1} +\clearpage -\ffreeexample[5.1]{parallel_masked_taskloop}{1}[1] +\ffreeexample[5.1]{parallel_masked_taskloop}{1} diff --git a/tasking/sources/parallel_masked_taskloop.1.c b/tasking/sources/parallel_masked_taskloop.1.c index 2c12184..db91d8d 100644 --- a/tasking/sources/parallel_masked_taskloop.1.c +++ b/tasking/sources/parallel_masked_taskloop.1.c @@ -1,15 +1,11 @@ /* -* @@name: parallel_masked_taskloop.1c +* @@name: parallel_masked_taskloop.1 * @@type: C * @@compilable: yes * @@linkable: yes * @@expect: success * @@version: omp_5.1 */ -#if _OPENMP < 202011 -#define masked master -#endif - #include #define N 100 @@ -30,6 +26,5 @@ int main() #pragma omp parallel masked taskloop simd // taskloop 3 for(i=0;i + +int main(){ + int a=1, d=1; + + #pragma omp parallel masked num_threads(5) + { + #pragma omp task // Task 1 + { printf("T1\n"); } + + #pragma omp task depend(out: a) // Task 2 + { a++; + printf("T2 a=%i\n", a); } + + #pragma omp task depend(out: d) // Task 3 + { d++; + printf("T3 d=%i\n", d); } + + #pragma omp task depend(inout: omp_all_memory) // Task 4 + { a++; d++; + printf("T4 a=%i d=%i\n", a,d);} + + #pragma omp task // Task 5 + { printf("T5\n"); } + + #pragma omp task depend(in: a,d) // Task 6 + { a++; d++; + printf("T6 a=%i d=%i\n", a,d); } + } +} + +/* OUTPUT: ordered {T2,T3 any order}, {T4}, {T6} + T2 a=2 + T3 d=2 + T4 a=3 d=3 + T6 a=4 d=4 + + OUTPUT: unordered (can appear interspersed in ordered output) + T1 + T5 +*/ diff --git a/tasking/sources/task_dep.13.f90 b/tasking/sources/task_dep.13.f90 new file mode 100644 index 0000000..6ae6fed --- /dev/null +++ b/tasking/sources/task_dep.13.f90 @@ -0,0 +1,52 @@ +! @@name: task_dep.13 +! @@type: F-free +! @@compilable: yes +! @@linkable: yes +! @@expect: success +! @@version: omp_5.1 +program main + integer :: a=1, d=1 + + !$omp parallel masked num_threads(5) + + !$omp task !! Task 1 + write(*,'("T1")') + !$omp end task + + !$omp task depend(out: a) !! Task 2 + a=a+1 + write(*,'("T2 a=",i1)') a + !$omp end task + + !$omp task depend(out: d) !! Task 3 + d=d+1 + write(*,'("T3 d=",i1)') d + !$omp end task + + + !$omp task depend(inout: omp_all_memory) !! Task 4 + a=a+1; d=d+1 + write(*,'("T4 a=",i1," d=",i1)') a, d + !$omp end task + + !$omp task !! Task 5 + write(*,'("T5")') + !$omp end task + + !$omp task depend(in: a,d) !! Task 6 + a=a+1; d=d+1 + write(*,'("T6 a=",i1," d=",i1)') a, d + !$omp end task + + !$omp end parallel masked + +end program + +! OUTPUT: ordered {T2,T3 any order}, {T4}, {T6} +! T2 a=2 +! T3 d=2 +! T4 a=3 d=3 +! T6 a=4 d=4 +! OUTPUT: unordered (can appear interspersed in ordered output) +! T1 +! T5 diff --git a/tasking/sources/task_dep.2.c b/tasking/sources/task_dep.2.c index 595039b..73d56a0 100644 --- a/tasking/sources/task_dep.2.c +++ b/tasking/sources/task_dep.2.c @@ -1,5 +1,5 @@ /* -* @@name: task_dep.2c +* @@name: task_dep.2 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/task_dep.2.f90 b/tasking/sources/task_dep.2.f90 index cb63c90..5efa05b 100644 --- a/tasking/sources/task_dep.2.f90 +++ b/tasking/sources/task_dep.2.f90 @@ -1,4 +1,4 @@ -! @@name: task_dep.2f +! @@name: task_dep.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/task_dep.3.c b/tasking/sources/task_dep.3.c index 6c8c5d2..8b26d41 100644 --- a/tasking/sources/task_dep.3.c +++ b/tasking/sources/task_dep.3.c @@ -1,5 +1,5 @@ /* -* @@name: task_dep.3c +* @@name: task_dep.3 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/task_dep.3.f90 b/tasking/sources/task_dep.3.f90 index 3a2f9f7..1f3ca72 100644 --- a/tasking/sources/task_dep.3.f90 +++ b/tasking/sources/task_dep.3.f90 @@ -1,4 +1,4 @@ -! @@name: task_dep.3f +! @@name: task_dep.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/task_dep.4.c b/tasking/sources/task_dep.4.c index 6170439..388b20b 100644 --- a/tasking/sources/task_dep.4.c +++ b/tasking/sources/task_dep.4.c @@ -1,5 +1,5 @@ /* -* @@name: task_dep.4c +* @@name: task_dep.4 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/task_dep.4.f90 b/tasking/sources/task_dep.4.f90 index ad0ac58..0a297f8 100644 --- a/tasking/sources/task_dep.4.f90 +++ b/tasking/sources/task_dep.4.f90 @@ -1,4 +1,4 @@ -! @@name: task_dep.4f +! @@name: task_dep.4 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/task_dep.5.c b/tasking/sources/task_dep.5.c index 5df1dc8..3a2319a 100644 --- a/tasking/sources/task_dep.5.c +++ b/tasking/sources/task_dep.5.c @@ -1,5 +1,5 @@ /* -* @@name: task_dep.5c +* @@name: task_dep.5 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/task_dep.5.f90 b/tasking/sources/task_dep.5.f90 index edd31d5..21d8884 100644 --- a/tasking/sources/task_dep.5.f90 +++ b/tasking/sources/task_dep.5.f90 @@ -1,4 +1,4 @@ -! @@name: task_dep.5f +! @@name: task_dep.5 ! @@type: F-free ! @@compilable: yes ! @@linkable: no @@ -14,9 +14,10 @@ subroutine matmul_depend (N, BS, A, B, C) do i = 1, N, BS do j = 1, N, BS do k = 1, N, BS -!$omp task shared(A,B,C) private(ii,jj,kk) & ! I,J,K are firstprivate by default +!$omp task shared(A,B,C) private(ii,jj,kk) & !$omp depend ( in: A(i:i+BM, k:k+BM), B(k:k+BM, j:j+BM) ) & !$omp depend ( inout: C(i:i+BM, j:j+BM) ) +! I,J,K are firstprivate by default do ii = i, i+BM do jj = j, j+BM do kk = k, k+BM diff --git a/tasking/sources/task_dep.6.c b/tasking/sources/task_dep.6.c index 3a8483d..4e2ad32 100644 --- a/tasking/sources/task_dep.6.c +++ b/tasking/sources/task_dep.6.c @@ -1,5 +1,5 @@ /* -* @@name: task_depend.6.c +* @@name: task_dep.6 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/task_dep.6.f90 b/tasking/sources/task_dep.6.f90 index 1ebc222..369ace9 100644 --- a/tasking/sources/task_dep.6.f90 +++ b/tasking/sources/task_dep.6.f90 @@ -1,4 +1,4 @@ -! @@name: task_depend.6.f90 +! @@name: task_dep.6 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/task_dep.7.c b/tasking/sources/task_dep.7.c index c9f7649..a18853c 100644 --- a/tasking/sources/task_dep.7.c +++ b/tasking/sources/task_dep.7.c @@ -1,5 +1,5 @@ /* -* @@name: task_depend.7.c +* @@name: task_dep.7 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/task_dep.7.f90 b/tasking/sources/task_dep.7.f90 index db44628..5a48f17 100644 --- a/tasking/sources/task_dep.7.f90 +++ b/tasking/sources/task_dep.7.f90 @@ -1,4 +1,4 @@ -! @@name: task_depend.7.f90 +! @@name: task_dep.7 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/task_dep.8.c b/tasking/sources/task_dep.8.c index 92c4f09..25b2137 100644 --- a/tasking/sources/task_dep.8.c +++ b/tasking/sources/task_dep.8.c @@ -1,5 +1,5 @@ /* -* @@name: task_depend.8.c +* @@name: task_dep.8 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/task_dep.8.f90 b/tasking/sources/task_dep.8.f90 index c6fc10d..41fdd69 100644 --- a/tasking/sources/task_dep.8.f90 +++ b/tasking/sources/task_dep.8.f90 @@ -1,4 +1,4 @@ -! @@name: task_depend.8.f90 +! @@name: task_dep.8 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/task_dep.9.c b/tasking/sources/task_dep.9.c index 35a99e5..2caae08 100644 --- a/tasking/sources/task_dep.9.c +++ b/tasking/sources/task_dep.9.c @@ -1,5 +1,5 @@ /* -* @@name: task_dep.6c +* @@name: task_dep.9 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/task_dep.9.f90 b/tasking/sources/task_dep.9.f90 index f9c312a..9578ec0 100644 --- a/tasking/sources/task_dep.9.f90 +++ b/tasking/sources/task_dep.9.f90 @@ -1,4 +1,4 @@ -! @@name: task_dep.6f +! @@name: task_dep.9 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/task_detach.1.c b/tasking/sources/task_detach.1.c index 8e26450..5b13cac 100644 --- a/tasking/sources/task_detach.1.c +++ b/tasking/sources/task_detach.1.c @@ -1,5 +1,5 @@ /* -* @@name: task_detach.1c +* @@name: task_detach.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/task_detach.1.f90 b/tasking/sources/task_detach.1.f90 index 38d2326..32ad63c 100644 --- a/tasking/sources/task_detach.1.f90 +++ b/tasking/sources/task_detach.1.f90 @@ -1,36 +1,36 @@ -! @@name: task_detach.1f90 -! @@type: F-free -! @@compilable: yes -! @@linkable: no -! @@expect: success -! @@version: omp_5.0 -program main - use omp_lib - implicit none - - external :: async_work, work - - logical :: async=.true. - integer(omp_event_handle_kind) :: event - - !$omp parallel - !$omp masked - - !$omp task detach(event) - - if(async) then - call async_work(omp_fulfill_event, event) - else - call work() - call omp_fulfill_event(event) - endif - - !$omp end task - !! Other work - - !$omp taskwait - - !$omp end masked - !$omp end parallel - -end program +! @@name: task_detach.1 +! @@type: F-free +! @@compilable: yes +! @@linkable: no +! @@expect: success +! @@version: omp_5.0 +program main + use omp_lib + implicit none + + external :: async_work, work + + logical :: async=.true. + integer(omp_event_handle_kind) :: event + + !$omp parallel + !$omp masked + + !$omp task detach(event) + + if(async) then + call async_work(omp_fulfill_event, event) + else + call work() + call omp_fulfill_event(event) + endif + + !$omp end task + !! Other work + + !$omp taskwait + + !$omp end masked + !$omp end parallel + +end program diff --git a/tasking/sources/task_detach.2.c b/tasking/sources/task_detach.2.c index b18d1fa..5282243 100644 --- a/tasking/sources/task_detach.2.c +++ b/tasking/sources/task_detach.2.c @@ -1,5 +1,5 @@ /* -* @@name: task_detach.2c +* @@name: task_detach.2 * @@type: C * @@compilable: yes * @@linkable: yes @@ -17,10 +17,11 @@ #include -#define IO_SIGNAL SIGUSR1 // Signal used to notify I/O completion +#define IO_SIGNAL SIGUSR1 // Signal used to notify I/O completion - // Handler for I/O completion signal -static void callback_aioSigHandler(int sig, siginfo_t *si, void *ucontext) { + // Handler for I/O completion signal +static void callback_aioSigHandler(int sig, siginfo_t *si, + void *ucontext) { if (si->si_code == SI_ASYNCIO){ printf( "OUT: I/O completion signal received.\n"); omp_fulfill_event( (omp_event_handle_t)(si->si_value.sival_ptr) ); @@ -30,8 +31,9 @@ static void callback_aioSigHandler(int sig, siginfo_t *si, void *ucontext) { void work(int i){ printf("OUT: Executing work(%d)\n", i);} int main() { - // Write "Written Asynchronously." to file data, using POSIX asynchronous IO - // Error checking not included for clarity and simplicity. + // Write "Written Asynchronously." to file data, using POSIX + // asynchronous IO. Error checking not included for clarity + // and simplicity. char data[] = "Written Asynchronously."; @@ -57,26 +59,27 @@ int main() { sa.sa_sigaction = callback_aioSigHandler; //callback sigaction(IO_SIGNAL, &sa, NULL); - #pragma omp parallel num_threads(2) - #pragma omp masked - { + #pragma omp parallel num_threads(2) + #pragma omp masked + { - #pragma omp task detach(event) if(0) // TASK1 - { - cb.aio_sigevent.sigev_value.sival_ptr = (void *) event; - aio_write(&cb); - } + #pragma omp task detach(event) if(0) // TASK1 + { + cb.aio_sigevent.sigev_value.sival_ptr = (void *) event; + aio_write(&cb); + } - #pragma omp task // TASK2 - work(1); - #pragma omp task // TASK3 - work(2); + #pragma omp task // TASK2 + work(1); + #pragma omp task // TASK3 + work(2); - } // Parallel region barrier ensures completion of detachable task. + } // Parallel region barrier ensures completion of detachable task. - // Making sure the aio operation completed. - // With OpenMP detachable task the condition will always be false: - while(aio_error(&cb) == EINPROGRESS){printf(" INPROGRESS\n");} //Safeguard + // Making sure the aio operation completed. + // With OpenMP detachable task the condition will always be false: + while(aio_error(&cb) == EINPROGRESS) { + printf(" INPROGRESS\n");} //Safeguard close(fd); return 0; diff --git a/tasking/sources/task_priority.1.c b/tasking/sources/task_priority.1.c index 1a90441..821c2e6 100644 --- a/tasking/sources/task_priority.1.c +++ b/tasking/sources/task_priority.1.c @@ -1,5 +1,5 @@ /* -* @@name: task_priority.1c +* @@name: task_priority.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/task_priority.1.f90 b/tasking/sources/task_priority.1.f90 index 3423684..2edda1a 100644 --- a/tasking/sources/task_priority.1.f90 +++ b/tasking/sources/task_priority.1.f90 @@ -1,4 +1,4 @@ -! @@name: task_priority.1f +! @@name: task_priority.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/taskgroup.1.c b/tasking/sources/taskgroup.1.c index 39ae457..98a373b 100644 --- a/tasking/sources/taskgroup.1.c +++ b/tasking/sources/taskgroup.1.c @@ -1,5 +1,5 @@ /* -* @@name: taskgroup.1c +* @@name: taskgroup.1 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/taskgroup.1.f90 b/tasking/sources/taskgroup.1.f90 index 52df9b2..5af9177 100644 --- a/tasking/sources/taskgroup.1.f90 +++ b/tasking/sources/taskgroup.1.f90 @@ -1,4 +1,4 @@ -! @@name: taskgroup.1f +! @@name: taskgroup.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/tasking.1.c b/tasking/sources/tasking.1.c index b74ce9f..b29efba 100644 --- a/tasking/sources/tasking.1.c +++ b/tasking/sources/tasking.1.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.1c +* @@name: tasking.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.1.f90 b/tasking/sources/tasking.1.f90 index 01d1c1b..9b7494c 100644 --- a/tasking/sources/tasking.1.f90 +++ b/tasking/sources/tasking.1.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.1f +! @@name: tasking.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.10.c b/tasking/sources/tasking.10.c index ee0ad58..e32ed6d 100644 --- a/tasking/sources/tasking.10.c +++ b/tasking/sources/tasking.10.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.10c +* @@name: tasking.10 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.10.f90 b/tasking/sources/tasking.10.f90 index 8270a97..ecf548d 100644 --- a/tasking/sources/tasking.10.f90 +++ b/tasking/sources/tasking.10.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.10f +! @@name: tasking.10 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.11.c b/tasking/sources/tasking.11.c index 9c49ff3..bf2b005 100644 --- a/tasking/sources/tasking.11.c +++ b/tasking/sources/tasking.11.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.11c +* @@name: tasking.11 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.11.f90 b/tasking/sources/tasking.11.f90 index 17d492d..92e9c12 100644 --- a/tasking/sources/tasking.11.f90 +++ b/tasking/sources/tasking.11.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.11f +! @@name: tasking.11 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.12.c b/tasking/sources/tasking.12.c index 1e4e10e..518d84c 100644 --- a/tasking/sources/tasking.12.c +++ b/tasking/sources/tasking.12.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.12c +* @@name: tasking.12 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.12.f90 b/tasking/sources/tasking.12.f90 index 5c9fa01..d3a0d97 100644 --- a/tasking/sources/tasking.12.f90 +++ b/tasking/sources/tasking.12.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.12f +! @@name: tasking.12 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.13.c b/tasking/sources/tasking.13.c index e8dff7e..e5df11c 100644 --- a/tasking/sources/tasking.13.c +++ b/tasking/sources/tasking.13.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.13c +* @@name: tasking.13 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.13.f90 b/tasking/sources/tasking.13.f90 index dac54a8..d46a9ec 100644 --- a/tasking/sources/tasking.13.f90 +++ b/tasking/sources/tasking.13.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.13f +! @@name: tasking.13 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.14.c b/tasking/sources/tasking.14.c index 2403b2a..df60436 100644 --- a/tasking/sources/tasking.14.c +++ b/tasking/sources/tasking.14.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.14c +* @@name: tasking.14 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.14.f90 b/tasking/sources/tasking.14.f90 index 15a61e9..cfeb913 100644 --- a/tasking/sources/tasking.14.f90 +++ b/tasking/sources/tasking.14.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.14f +! @@name: tasking.14 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.15.c b/tasking/sources/tasking.15.c index a0359d4..091af82 100644 --- a/tasking/sources/tasking.15.c +++ b/tasking/sources/tasking.15.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.15c +* @@name: tasking.15 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/tasking.15.f90 b/tasking/sources/tasking.15.f90 index f429279..5116076 100644 --- a/tasking/sources/tasking.15.f90 +++ b/tasking/sources/tasking.15.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.15f +! @@name: tasking.15 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/tasking.16.c b/tasking/sources/tasking.16.c index 5873fb2..36b9a5f 100644 --- a/tasking/sources/tasking.16.c +++ b/tasking/sources/tasking.16.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.16c +* @@name: tasking.16 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/tasking.16.f90 b/tasking/sources/tasking.16.f90 index 4903d82..449ccc0 100644 --- a/tasking/sources/tasking.16.f90 +++ b/tasking/sources/tasking.16.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.16f +! @@name: tasking.16 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/tasking.17.c b/tasking/sources/tasking.17.c index 2e2e47b..f89ae91 100644 --- a/tasking/sources/tasking.17.c +++ b/tasking/sources/tasking.17.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.17c +* @@name: tasking.17 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/tasking.17.f90 b/tasking/sources/tasking.17.f90 index 55763fd..fa5418f 100644 --- a/tasking/sources/tasking.17.f90 +++ b/tasking/sources/tasking.17.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.17f +! @@name: tasking.17 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/tasking.18.c b/tasking/sources/tasking.18.c index ba757e7..3336740 100644 --- a/tasking/sources/tasking.18.c +++ b/tasking/sources/tasking.18.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.18c +* @@name: tasking.18 * @@type: C * @@compilable: yes * @@linkable: yes diff --git a/tasking/sources/tasking.18.f90 b/tasking/sources/tasking.18.f90 index 796982e..66c9676 100644 --- a/tasking/sources/tasking.18.f90 +++ b/tasking/sources/tasking.18.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.18f +! @@name: tasking.18 ! @@type: F-free ! @@compilable: yes ! @@linkable: yes diff --git a/tasking/sources/tasking.19.c b/tasking/sources/tasking.19.c index 8f4d9b1..04384c5 100644 --- a/tasking/sources/tasking.19.c +++ b/tasking/sources/tasking.19.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.19c +* @@name: tasking.19 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.19.f90 b/tasking/sources/tasking.19.f90 index 1afa1ea..3ee2edf 100644 --- a/tasking/sources/tasking.19.f90 +++ b/tasking/sources/tasking.19.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.19f +! @@name: tasking.19 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.2.c b/tasking/sources/tasking.2.c index b5c32ce..62f155c 100644 --- a/tasking/sources/tasking.2.c +++ b/tasking/sources/tasking.2.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.2c +* @@name: tasking.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.2.f90 b/tasking/sources/tasking.2.f90 index e80f946..4be6a0d 100644 --- a/tasking/sources/tasking.2.f90 +++ b/tasking/sources/tasking.2.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.2f +! @@name: tasking.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.3.c b/tasking/sources/tasking.3.c index 733cd8c..a3fcb75 100644 --- a/tasking/sources/tasking.3.c +++ b/tasking/sources/tasking.3.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.3c +* @@name: tasking.3 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.3.f90 b/tasking/sources/tasking.3.f90 index bab3183..1efbec5 100644 --- a/tasking/sources/tasking.3.f90 +++ b/tasking/sources/tasking.3.f90 @@ -1,4 +1,4 @@ -! @@name: tasking.3f +! @@name: tasking.3 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.4.c b/tasking/sources/tasking.4.c index f900bec..57fd6e4 100644 --- a/tasking/sources/tasking.4.c +++ b/tasking/sources/tasking.4.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.4c +* @@name: tasking.4 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.4.f b/tasking/sources/tasking.4.f index edfd66a..2054b5e 100644 --- a/tasking/sources/tasking.4.f +++ b/tasking/sources/tasking.4.f @@ -1,4 +1,4 @@ -! @@name: tasking.4f +! @@name: tasking.4 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.5.c b/tasking/sources/tasking.5.c index 72485bd..9fc2a77 100644 --- a/tasking/sources/tasking.5.c +++ b/tasking/sources/tasking.5.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.5c +* @@name: tasking.5 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.5.f b/tasking/sources/tasking.5.f index ed7a2a5..588fca1 100644 --- a/tasking/sources/tasking.5.f +++ b/tasking/sources/tasking.5.f @@ -1,4 +1,4 @@ -! @@name: tasking.5f +! @@name: tasking.5 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.6.c b/tasking/sources/tasking.6.c index 397b59e..a8769ed 100644 --- a/tasking/sources/tasking.6.c +++ b/tasking/sources/tasking.6.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.6c +* @@name: tasking.6 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.6.f b/tasking/sources/tasking.6.f index e65dac5..dbb1c4e 100644 --- a/tasking/sources/tasking.6.f +++ b/tasking/sources/tasking.6.f @@ -1,4 +1,4 @@ -! @@name: tasking.6f +! @@name: tasking.6 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.7.c b/tasking/sources/tasking.7.c index c1dfbd0..a13d723 100644 --- a/tasking/sources/tasking.7.c +++ b/tasking/sources/tasking.7.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.7c +* @@name: tasking.7 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.7.f b/tasking/sources/tasking.7.f index 2a8b3b9..ba094d1 100644 --- a/tasking/sources/tasking.7.f +++ b/tasking/sources/tasking.7.f @@ -1,4 +1,4 @@ -! @@name: tasking.7f +! @@name: tasking.7 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.8.c b/tasking/sources/tasking.8.c index a1b76ac..0101265 100644 --- a/tasking/sources/tasking.8.c +++ b/tasking/sources/tasking.8.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.8c +* @@name: tasking.8 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.8.f b/tasking/sources/tasking.8.f index 02d837d..38fc281 100644 --- a/tasking/sources/tasking.8.f +++ b/tasking/sources/tasking.8.f @@ -1,4 +1,4 @@ -! @@name: tasking.8f +! @@name: tasking.8 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/tasking.9.c b/tasking/sources/tasking.9.c index 68b7c52..7261dfd 100644 --- a/tasking/sources/tasking.9.c +++ b/tasking/sources/tasking.9.c @@ -1,5 +1,5 @@ /* -* @@name: tasking.9c +* @@name: tasking.9 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/tasking.9.f b/tasking/sources/tasking.9.f index 857b351..abfd4ee 100644 --- a/tasking/sources/tasking.9.f +++ b/tasking/sources/tasking.9.f @@ -1,4 +1,4 @@ -! @@name: tasking.9f +! @@name: tasking.9 ! @@type: F-fixed ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/taskloop.1.c b/tasking/sources/taskloop.1.c index 355b0b6..afc63e4 100644 --- a/tasking/sources/taskloop.1.c +++ b/tasking/sources/taskloop.1.c @@ -1,26 +1,26 @@ -/* -* @@name: taskloop.c -* @@type: C -* @@compilable: yes -* @@linkable: no -* @@expect: success -* @@version: omp_4.5 -*/ -void long_running_task(void); -void loop_body(int i, int j); - -void parallel_work(void) { - int i, j; -#pragma omp taskgroup - { -#pragma omp task - long_running_task(); // can execute concurrently - -#pragma omp taskloop private(j) grainsize(500) nogroup - for (i = 0; i < 10000; i++) { // can execute concurrently - for (j = 0; j < i; j++) { - loop_body(i, j); - } - } - } -} +/* +* @@name: taskloop.1 +* @@type: C +* @@compilable: yes +* @@linkable: no +* @@expect: success +* @@version: omp_4.5 +*/ +void long_running_task(void); +void loop_body(int i, int j); + +void parallel_work(void) { + int i, j; +#pragma omp taskgroup + { +#pragma omp task + long_running_task(); // can execute concurrently + +#pragma omp taskloop private(j) grainsize(500) nogroup + for (i = 0; i < 10000; i++) { // can execute concurrently + for (j = 0; j < i; j++) { + loop_body(i, j); + } + } + } +} diff --git a/tasking/sources/taskloop.1.f90 b/tasking/sources/taskloop.1.f90 index ff1fb5e..50465f6 100644 --- a/tasking/sources/taskloop.1.f90 +++ b/tasking/sources/taskloop.1.f90 @@ -1,25 +1,25 @@ -! @@name: taskloop.1f -! @@type: F-free -! @@compilable: yes -! @@linkable: no -! @@expect: success -! @@version: omp_4.5 -subroutine parallel_work - integer i - integer j -!$omp taskgroup - -!$omp task - call long_running_task() -!$omp end task - -!$omp taskloop private(j) grainsize(500) nogroup - do i=1,10000 - do j=1,i - call loop_body(i, j) - end do - end do -!$omp end taskloop - -!$omp end taskgroup -end subroutine +! @@name: taskloop.1 +! @@type: F-free +! @@compilable: yes +! @@linkable: no +! @@expect: success +! @@version: omp_4.5 +subroutine parallel_work + integer i + integer j +!$omp taskgroup + +!$omp task + call long_running_task() +!$omp end task + +!$omp taskloop private(j) grainsize(500) nogroup + do i=1,10000 + do j=1,i + call loop_body(i, j) + end do + end do +!$omp end taskloop + +!$omp end taskgroup +end subroutine diff --git a/tasking/sources/taskloop.2.c b/tasking/sources/taskloop.2.c index 11f3c20..027df2b 100644 --- a/tasking/sources/taskloop.2.c +++ b/tasking/sources/taskloop.2.c @@ -1,5 +1,5 @@ /* -* @@name: taskloop.2c +* @@name: taskloop.2 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/taskloop.2.f90 b/tasking/sources/taskloop.2.f90 index 15cd08e..818314e 100644 --- a/tasking/sources/taskloop.2.f90 +++ b/tasking/sources/taskloop.2.f90 @@ -1,4 +1,4 @@ -! @@name: taskloop.2f +! @@name: taskloop.2 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/sources/taskyield.1.c b/tasking/sources/taskyield.1.c index 88a7fc6..1b2c1b4 100644 --- a/tasking/sources/taskyield.1.c +++ b/tasking/sources/taskyield.1.c @@ -1,5 +1,5 @@ /* -* @@name: taskyield.1c +* @@name: taskyield.1 * @@type: C * @@compilable: yes * @@linkable: no diff --git a/tasking/sources/taskyield.1.f90 b/tasking/sources/taskyield.1.f90 index 623a639..8c2b2bb 100644 --- a/tasking/sources/taskyield.1.f90 +++ b/tasking/sources/taskyield.1.f90 @@ -1,4 +1,4 @@ -! @@name: taskyield.1f +! @@name: taskyield.1 ! @@type: F-free ! @@compilable: yes ! @@linkable: no diff --git a/tasking/task_dep.tex b/tasking/task_dep.tex index 4339428..b7103a6 100644 --- a/tasking/task_dep.tex +++ b/tasking/task_dep.tex @@ -1,9 +1,16 @@ \pagebreak \section{Task Dependences} \label{sec:task_depend} +\index{dependences!task dependences} \subsection{Flow Dependence} \label{subsec:task_flow_depend} +\index{task dependences!flow dependence} +\index{task construct@\code{task} construct!depend clause@\code{depend} clause} +\index{task construct@\code{task} construct} +\index{constructs!task@\code{task}} +\index{depend clause@\code{depend} clause} +\index{clauses!depend@\code{depend}} This example shows a simple flow dependence using a \code{depend} clause on the \code{task} construct. @@ -19,6 +26,7 @@ would have a race condition. \subsection{Anti-dependence} \label{subsec:task_anti_depend} +\index{task dependences!anti dependence} This example shows an anti-dependence using the \code{depend} clause on the \code{task} construct. @@ -34,6 +42,7 @@ race condition. \subsection{Output Dependence} \label{subsec:task_out_depend} +\index{task dependences!output dependence} This example shows an output dependence using the \code{depend} clause on the \code{task} construct. @@ -50,6 +59,7 @@ race condition. \pagebreak \subsection{Concurrent Execution with Dependences} \label{subsec:task_concurrent_depend} +\index{task dependences!concurrent execution with} In this example we show potentially concurrent execution of tasks using multiple flow dependences expressed using the \code{depend} clause on the \code{task} @@ -59,7 +69,7 @@ construct. \ffreeexample[4.0]{task_dep}{4} -The last two tasks are dependent on the first task. However there is no dependence +The last two tasks are dependent on the first task. However, there is no dependence between the last two tasks, which may execute in any order (or concurrently if more than one thread is available). Thus, the possible outputs are \texttt{"}x + 1 = 3. x + 2 = 4. \texttt{"} and \texttt{"}x + 2 = 4. x + 1 = 3. \texttt{"}. @@ -68,6 +78,7 @@ in any order and the program would have a race condition. \subsection{Matrix multiplication} \label{subsec:task_matrix_mult} +\index{task dependences!matrix multiplication} This example shows a task-based blocked matrix multiplication. Matrices are of NxN elements, and the multiplication is implemented using blocks of BSxBS elements. @@ -78,6 +89,12 @@ NxN elements, and the multiplication is implemented using blocks of BSxBS elemen \subsection{\code{taskwait} with Dependences} \label{subsec:taskwait_depend} +\index{task dependences!taskwait construct with@\code{taskwait} construct with} +\index{taskwait construct@\code{taskwait} construct} +\index{constructs!taskwait@\code{taskwait}} +\index{taskwait construct@\code{taskwait} construct!depend clause@\code{depend} clause} +\index{depend clause@\code{depend} clause} +\index{clauses!depend@\code{depend}} In this subsection three examples illustrate how the \code{depend} clause can be applied to a \code{taskwait} construct to make the @@ -156,6 +173,7 @@ taskwait does not produce race conditions on variables accessed by non-completed \pagebreak \subsection{Mutually Exclusive Execution with Dependences} \label{subsec:task_dep_mutexinoutset} +\index{task dependences!mutually exclusive execution} In this example we show a series of tasks, including mutually exclusive tasks, expressing dependences using the \code{depend} clause on the @@ -183,6 +201,9 @@ scheduling \code{longTaskBC} before \code{shortTaskAC}. \subsection{Multidependences Using Iterators} \label{subsec:depend_iterator} +\index{task dependences!using iterators} +\index{depend clause@\code{depend} clause!iterator modifier@\code{iterator} modifier} +\index{iterator modifier@\code{iterator} modifier} The following example uses an iterator to define a dynamic number of dependences. @@ -207,7 +228,7 @@ must indicate identical storage locations or disjoint storage locations". In this case each of the loop tasks use a single disjoint (different storage) element in their \code{depend} clause; however, the array-section storage area prescribed in the commented directive is neither -identical nor disjoint to the storage prescibed by the elements of the +identical nor disjoint to the storage prescribed by the elements of the loop tasks. The iterator overcomes this restriction by effectively creating n disjoint storage areas. @@ -217,6 +238,7 @@ creating n disjoint storage areas. \subsection{Dependence for Undeferred Tasks} \label{subsec:depend_undefer_task} +\index{task dependences!undeferred tasks} In the following example, we show that even if a task is undeferred as specified by an \code{if} clause that evaluates to \plc{false}, task dependences are @@ -227,7 +249,7 @@ the first task is completed before the second task. The second explicit task has an \code{if} clause that evaluates to \plc{false}. This means that the execution of the generating task (the implicit task of -the \code{single} region) must be suspended until the second explict task +the \code{single} region) must be suspended until the second explicit task is completed. But, because of the dependence, the first explicit task must complete first, then the second explicit task can execute and complete, and only then @@ -239,3 +261,16 @@ Thus, the program will always print "\texttt{x = 2}". \ffreeexample[4.0]{task_dep}{12} + +In OpenMP 5.1 the \scode{omp_all_memory} \splc{reserved locator} was introduced +to specify storage of all objects in memory. In the following example, +it is used in Task 4 as a convenient way to specify that the locator +(list item) denotes the storage of all objects (locations) in memory, and +will therefore match the \splc{a} and \splc{d} locators of Task 2, Task 3 and Task 6. +The dependences guarantee the ordered execution of Tasks 2 and 3 before 4, and +Task 4 before Task 6. +Since there are no dependences imposed on Task 1 and Task 5, they can be +scheduled to execute at any time, with no ordering. + +\cexample[5.1]{task_dep}{13} +\ffreeexample[5.1]{task_dep}{13} diff --git a/tasking/task_detach.tex b/tasking/task_detach.tex index 5c6e802..cc21b5e 100644 --- a/tasking/task_detach.tex +++ b/tasking/task_detach.tex @@ -1,6 +1,11 @@ \pagebreak \section{Task Detachment} \label{sec:task_detachment} +\index{task construct@\code{task} construct!detach clause@\code{detach} clause} +\index{detach clause@\code{detach} clause} +\index{clauses!detach@\code{detach}} +\index{routines!omp_fulfill_event@\scode{omp_fulfill_event}} +\index{omp_fulfill_event routine@\scode{omp_fulfill_event} routine} % if used, then generated task must be completed. % No definition of a detachable task diff --git a/tasking/task_priority.tex b/tasking/task_priority.tex index 75dd262..bc3e7fa 100644 --- a/tasking/task_priority.tex +++ b/tasking/task_priority.tex @@ -1,7 +1,9 @@ \pagebreak \section{Task Priority} \label{sec:task_priority} - +\index{task construct@\code{task} construct!priority clause@\code{priority} clause} +\index{priority clause@\code{priority} clause} +\index{clauses!priority@\code{priority}} %\subsection{Task Priority} diff --git a/tasking/taskgroup.tex b/tasking/taskgroup.tex index aebea36..6fed0f6 100644 --- a/tasking/taskgroup.tex +++ b/tasking/taskgroup.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{taskgroup} Construct} \label{sec:taskgroup} +\index{constructs!taskgroup@\code{taskgroup}} +\index{taskgroup construct@\code{taskgroup} construct} In this example, tasks are grouped and synchronized using the \code{taskgroup} construct. @@ -10,7 +12,7 @@ call) is created in the \code{parallel} region, and later a parallel tree traver is started (the task executing the root of the recursive \code{compute\_tree()} calls). While synchronizing tasks at the end of each tree traversal, using the \code{taskgroup} construct ensures that the formerly started background task -does not participate in the synchronization, and is left free to execute in parallel. +does not participate in the synchronization and is left free to execute in parallel. This is opposed to the behavior of the \code{taskwait} construct, which would include the background tasks in the synchronization. diff --git a/tasking/tasking.tex b/tasking/tasking.tex index b575695..0f4f1ba 100644 --- a/tasking/tasking.tex +++ b/tasking/tasking.tex @@ -1,6 +1,10 @@ \pagebreak \section{\code{task} and \code{taskwait} Constructs} \label{sec:task_taskwait} +\index{constructs!task@\code{task}} +\index{task construct@\code{task} construct} +\index{constructs!taskwait@\code{taskwait}} +\index{taskwait construct@\code{taskwait} construct} The following example shows how to traverse a tree-like structure using explicit tasks. Note that the \code{traverse} function should be called from within a @@ -57,6 +61,10 @@ low, the thread may resume execution of the task generating loop. \fexample[3.0]{tasking}{5} +\index{task construct@\code{task} construct!untied clause@\code{untied} clause} +\index{untied clause@\code{untied} clause} +\index{clauses!untied@\code{untied}} +\index{task scheduling point} The following example is the same as the previous one, except that the tasks are generated in an untied task. While generating the tasks, the implementation may reach its limit on unassigned tasks. If it does, the implementation is allowed @@ -128,6 +136,9 @@ the task region is complete. Therefore, no deadlock is possible. \ffreeexample[3.0]{tasking}{10} \clearpage +\index{task construct@\code{task} construct!mergeable clause@\code{mergeable} clause} +\index{clauses!mergeable@\code{mergeable}} +\index{mergeable clause@\code{mergeable} clause} The following examples illustrate the use of the \code{mergeable} clause in the \code{task} construct. In this first example, the \code{task} construct has been annotated with the \code{mergeable} clause. The addition of this clause @@ -148,13 +159,18 @@ This second example shows an incorrect use of the \code{mergeable} clause. In this example, the created task will access different instances of the variable \code{x} if the task is not merged, as \code{x} is \code{firstprivate}, but it will access the same variable \code{x} if the task is merged. As a result, -the behavior of the program is unspecified and it can print two different values +the behavior of the program is unspecified, and it can print two different values for \code{x} depending on the decisions taken by the implementation. \cexample[3.1]{tasking}{12} \ffreeexample[3.1]{tasking}{12} +\index{task construct@\code{task} construct!final clause@\code{final} clause} +\index{clauses!final@\code{final}} +\index{final clause@\code{final} clause} +\index{routines!omp_in_final@\scode{omp_in_final}} +\index{omp_in_final routine@\scode{omp_in_final} routine} The following example shows the use of the \code{final} clause and the \code{omp\_in\_final} API call in a recursive binary search program. To reduce overhead, once a certain depth of recursion is reached the program uses the \code{final} clause to create @@ -175,6 +191,9 @@ that can be merged if the \code{mergeable} clause is present. \ffreeexample[3.1]{tasking}{13} +\index{task construct@\code{task} construct!if clause@\code{if} clause} +\index{clauses!if@\code{if}} +\index{if clause@\code{if} clause} The following example illustrates the difference between the \code{if} and the \code{final} clauses. The \code{if} clause has a local effect. In the first nest of tasks, the one that has the \code{if} clause will be undeferred but diff --git a/tasking/taskloop.tex b/tasking/taskloop.tex index 9b48990..aacaa7f 100644 --- a/tasking/taskloop.tex +++ b/tasking/taskloop.tex @@ -1,6 +1,14 @@ \pagebreak \section{\code{taskloop} Construct} \label{sec:taskloop} +\index{constructs!taskloop@\code{taskloop}} +\index{taskloop construct@\code{taskloop} construct} +\index{taskloop construct@\code{taskloop} construct!grainsize clause@\code{grainsize} clause} +\index{taskloop construct@\code{taskloop} construct!nogroup clause@\code{nogroup} clause} +\index{clauses!grainsize@\code{grainsize}} +\index{grainsize clause@\code{grainsize} clause} +\index{clauses!nogroup@\code{nogroup}} +\index{nogroup clause@\code{nogroup} clause} The following example illustrates how to execute a long running task concurrently with tasks created with a \code{taskloop} directive for a loop having unbalanced amounts of work for its iterations. diff --git a/tasking/taskyield.tex b/tasking/taskyield.tex index c687d1c..2cd2da5 100644 --- a/tasking/taskyield.tex +++ b/tasking/taskyield.tex @@ -1,6 +1,8 @@ \pagebreak \section{\code{taskyield} Construct} \label{sec:taskyield} +\index{constructs!taskyield@\code{taskyield}} +\index{taskyield construct@\code{taskyield} construct} The following example illustrates the use of the \code{taskyield} directive. The tasks in the example compute something useful and then do some computation diff --git a/util/Makefile b/util/Makefile new file mode 100644 index 0000000..4d28691 --- /dev/null +++ b/util/Makefile @@ -0,0 +1,10 @@ +CC=gcc +CFLAGS=-O + +default: chk_tags.x + +chk_tags.x: chk_tags.c + $(CC) -o $@ $(CFLAGS) $? + +clean: + -rm -f *.o *.x diff --git a/util/chk_tags.c b/util/chk_tags.c new file mode 100644 index 0000000..7feff74 --- /dev/null +++ b/util/chk_tags.c @@ -0,0 +1,405 @@ +/* + * A utility to check validity of @@ in source codes. + * The tool can also be used to fix issues when possible. + * + * Version 0.1 checks only @@name and @@type. + * Version 0.2 added check for other tags. + * Version 0.3 added check for line length. + */ +#define _GNU_SOURCE +#include +#include +#include + +#define VERNO "0.3" + +#define BUF_SIZE 256 +#define F_VFILE 0x01 +#define F_STRICT 0x02 +#define F_FIXIT 0x04 +#define F_CHKLLN 0x08 +#define T_PNAME 0x02 +#define T_MTAGS 0x04 + +/* list of tags */ +static int max_tags = 7; +static struct tags_s { + char *name; /* tag name */ + char *tvals; /* possible values */ + int nc; /* size of name */ + int r, c; /* line number, column number */ +} tags[] = { + {"name", "", 4, 0, 0}, + {"type", "", 4, 0, 0}, + {"compilable", "yes|no|maybe|unknown", 10, 0, 0}, + {"linkable", "yes|no|maybe|unknown", 8, 0, 0}, + {"expect", "success|failure|nothing|compile-time-error|ct-error" + "|runtime-error|rt-error|undefined-behavior", 6, 0, 0}, + {"version", "omp_*", 7, 0, 0}, + {"requires", "preprocessing", 8, 0, 0} +}; + +/* supported file extensions and types */ +static int max_exts = 4; +static char *fexts[] = {"c", "cpp", "f", "f90"}; +static char *types[] = {"C", "C++", "F-fixed", "F-free"}; + +/* match filename (mname) with tname value (tvalue) + up to file extension (fext). + return 0 - OK, 1 - mismatched */ +int check_name(char *tvalue, char *mname, char *fext, int vflg) +{ + char *mp, *cp; + int s = 1; + + mp = mname; + cp = tvalue; + while (*mp && *cp) { + /* strict checks for exact match of . */ + if (vflg&F_STRICT) { + if (mp == fext) break; + } + else { + if (*mp == '.') mp++; + if (*cp == '.') cp++; + } + if (!*cp || !*mp) break; + if (*cp != *mp) break; + mp++; + cp++; + } + if (*cp == '\0') { + /* non-strict allows postfix in tname value */ + if (!(vflg&F_STRICT) && (strcmp(mp, "90") == 0 || + strcmp(mp, "pp") == 0)) mp += 2; + if (*mp == '\0' || mp == fext) s = 0; + } + return s; +} + +/* match ttype value (ttype) with file extension (fext). + return 0 - OK, 1 - mismatched; iext contains matched ext index */ +int check_type(char *ttype, char *fext, int *iext) +{ + int mext, s = 1; + /* get ext index from ttype value */ + for (mext = max_exts-1; mext >= 0; mext--) { + if (strcasecmp(ttype, fexts[mext]) == 0 || + strcasecmp(ttype, types[mext]) == 0) break; + } + if (mext < 0) { + if (strcasecmp(ttype, "ffree") == 0) + mext = 3; + else if (strcasecmp(ttype, "ffixed") == 0) + mext = 2; + } + *iext = mext; + if (mext >= 0 && fext && strcasecmp(fexts[mext], fext+1) == 0) + s = 0; + return s; +} + +/* check validity of a tag value. + return 0 - OK, 1 - unexpected */ +int check_tvalue(char *tvalue, int itag) +{ + int s = 0; + if (itag == 5) { + s = strncasecmp(tvalue, tags[itag].tvals, 4); + } + else if (itag >= 2 && itag <= 6) { + if (!strcasestr(tags[itag].tvals, tvalue)) s = 1; + } + return s; +} + +/* fix tags in source file + new file written to "_fix" + mname - filename without directory name + fext - file extension + iext - extension index */ +int fix_tags(char *fname, char *mname, char *fext, int iext) +{ + FILE *fp, *fou; + char buff[BUF_SIZE], oname[BUF_SIZE], *cp; + int lineno, tcnt, ic, i; + + fp = fopen(fname, "r"); + if (!fp) { + perror(fname); + return -1; + } + + /* fixing name tag value using fname */ + sprintf(oname, "%s_fix", fname); + fou = fopen(oname, "w"); + if (!fou) { + perror(oname); + fclose(fp); + return -1; + } + fprintf(stderr, "*** fixing with - %s\n", oname); + tcnt = lineno = 0; + while (fgets(buff, BUF_SIZE, fp)) { + lineno++; + if (tcnt > 1) { + fputs(buff, fou); + continue; + } + cp = strstr(buff, "@@"); + if (!cp) { + if (tcnt == 1) tcnt = 2; + fputs(buff, fou); + continue; + } + if (tcnt == 0) tcnt = 1; + for (i = 0; i < max_tags; i++) + if (tags[i].r == lineno) break; + if (i >= max_tags || tags[i].c <= 0) { + fputs(buff, fou); + continue; + } + for (cp += 2; *cp == ' ' || *cp == '\t'; cp++); + ic = tags[i].c; + cp += ic; + fprintf(fou, "%.*s", (int)(cp-buff), buff); + if (ic == tags[i].nc) { + /* add ':' if missing */ + fputc(':', fou); + if (*cp == ' ') cp++; + } + if (i == 0 || i == 1) { + while (*cp == ' ' || *cp == '\t') { + fputc(*cp, fou); + cp++; + } + } + if (i == 0) { + /* use filename (no extension) for tname value */ + fprintf(fou, "%.*s\n", (int)(fext-mname), mname); + } + else if (i == 1) { + /* use supported type value */ + if (iext < 0) { + for (iext = max_exts-1; iext > 0; iext--) + if (strcasecmp(fexts[iext], fext+1) == 0) break; + } + fprintf(fou, "%s\n", types[iext]); + } + else { + fputs(cp, fou); + } + } + fclose(fou); + + fclose(fp); + + return 0; +} + +/* print filename */ +void prn_fname(int *tcnt, char *fname, int vflg) +{ + if ((vflg&F_VFILE) || ((*tcnt)&T_PNAME)) return; + *tcnt |= T_PNAME; + fprintf(stderr, "%s\n", fname); +} + +/* check line length in a file */ +int chk_file(char *fname, int vflg, int lln) +{ + FILE *fp; + char buff[BUF_SIZE]; + int lineno, tcnt, nc, ntags, ctag; + + fp = fopen(fname, "r"); + if (!fp) { + perror(fname); + return -1; + } + if (vflg&F_VFILE) + fprintf(stderr, "file = %s\n", fname); + tcnt = lineno = ntags = ctag = 0; + while (fgets(buff, BUF_SIZE, fp)) { + lineno++; + if (ctag < 2 && strstr(buff, "@@")) { + ntags++; + if (ntags == 1 && *buff != '!') ntags += 2; + if (ctag == 0) ctag = 1; + } + else if (ctag == 1) + ctag = 2; + nc = strlen(buff); + while (nc > 1 && strchr(" \t\n", buff[nc-1])) nc--; + buff[nc] = '\0'; + if (nc > lln) { + tcnt++; + if (tcnt == 1 && !(vflg&F_VFILE)) + printf(">> File = %s\n", fname); + printf(" line %d (S-%d): nchars = %d\n", lineno, lineno-ntags, nc); + } + } + fclose(fp); + return (tcnt+1); +} + +/* process one file at a time */ +int proc_file(char *fname, int vflg) +{ + FILE *fp; + char buff[BUF_SIZE], *cp; + char *mname, *fext; + int lineno, tcnt, ic, iext, i, s; + + fp = fopen(fname, "r"); + if (!fp) { + perror(fname); + return -1; + } + + /* mname points to the filename without directory */ + mname = strrchr(fname, '/'); + mname = mname? (mname+1) : fname; + fext = strrchr(mname, '.'); + for (i = 0; i < max_tags; i++) + tags[i].r = tags[i].c = 0; + + /* search for @@ */ + if (vflg&F_VFILE) + fprintf(stderr, "file = %s\n", fname); + tcnt = lineno = 0; + iext = -1; + while (fgets(buff, BUF_SIZE, fp)) { + lineno++; + cp = strstr(buff, "@@"); + if (!cp) { + if (tcnt) break; + continue; + } + if (tcnt == 0) tcnt = 1; + for (cp += 2; *cp == ' ' || *cp == '\t'; cp++); + for (ic = 0; !strchr(": \t\n", cp[ic]); ic++); + i = max_tags; + if (ic >= 4) { + for (i = 0; i < max_tags; i++) { + if (tags[i].nc != ic) continue; + if (strncasecmp(cp, tags[i].name, ic) == 0) break; + } + } + if (i >= max_tags || tags[i].c != 0) { + prn_fname(&tcnt, fname, vflg); + if (tags[i].c != 0) + fprintf(stderr, "\t*** duplicated tag - @@%.*s (lines %d, %d)\n", + ic, cp, tags[i].r, lineno); + else + fprintf(stderr, "\t*** unrecognized tag - @@%.*s\n", ic, cp); + } + else { + tags[i].r = lineno; + if (cp[ic] != ':') { + prn_fname(&tcnt, fname, vflg); + fprintf(stderr, "\tmissing (:) after @@%s\n", tags[i].name); + tags[i].c = ic; + tcnt |= T_MTAGS; + } + else + ic++; + while (cp[ic] == ' ' || cp[ic] == '\t') ic++; + cp += ic; + s = strlen(cp) - 1; + while (s >= 0 && strchr("\t \n", cp[s])) cp[s--] = '\0'; + if (i == 0 || i == 1) { + if (i == 0) + s = check_name(cp, mname, fext, vflg); + else + s = check_type(cp, fext, &iext); + if (s) { + if (tags[i].c == 0) tags[i].c = ic; + prn_fname(&tcnt, fname, vflg); + tcnt |= T_MTAGS; + fprintf(stderr, "\tmis-matched @@%s: %s\n", tags[i].name, cp); + } + } + else { + s = check_tvalue(cp, i); + if (s) { + prn_fname(&tcnt, fname, vflg); + fprintf(stderr, "\t*** unknown value for @@%s: %s\n", + tags[i].name, cp); + } + } + if (tags[i].c == 0) tags[i].c = -1; + } + } + fclose(fp); + + if (!tags[0].c || !tags[1].c) { + prn_fname(&tcnt, fname, vflg); + fprintf(stderr, "\t*** no"); + if (!tags[0].c) fprintf(stderr, " @@%s", tags[0].name); + if (!tags[0].c && !tags[1].c) fprintf(stderr, " or"); + if (!tags[1].c) fprintf(stderr, " @@%s", tags[1].name); + fprintf(stderr, " tag found\n"); + } + + /* fix tag values when required */ + if ((tcnt&T_MTAGS) && (vflg&F_FIXIT)) { + fix_tags(fname, mname, fext, iext); + } + return tcnt; +} + +/* print usage and exit */ +void usage(char *pgnam) +{ + printf("chk_tags - v%s\n%s [-options] filename ...\n", VERNO, pgnam); + printf("options:\n\ + -v ; view filenames\n\ + -sc ; strictly check for \"@@:\"\n\ + -fix ; apply tag fix if needed (to _fix)\n\ + -c ; check line length over a limit (def =75)\n"); + exit(0); +} + +/* the driver */ +int main(int argc, char *argv[]) +{ + int cnt, fcnt = -1, bcnt = 0, vflg = 0, lln = 75; + char *pgnam = strrchr(argv[0], '/'); + pgnam = pgnam? (pgnam+1) : argv[0]; + while (--argc > 0) { + char *cp = *++argv; + if (strcmp(cp, "-v") == 0) + vflg |= F_VFILE; + else if (strcmp(cp, "-sc") == 0) + vflg |= F_STRICT; + else if (strcmp(cp, "-fix") == 0) + vflg |= (F_STRICT|F_FIXIT); + else if (strncmp(cp, "-c", 2) == 0) { + vflg |= F_CHKLLN; + if (cp[2]) { + lln = atoi(cp+2); + if (lln < 1) lln = 75; + } + } + else if (strcmp(cp, "-h") == 0) + usage(pgnam); + else { + if (vflg & F_CHKLLN) + cnt = chk_file(cp, vflg, lln); + else + cnt = proc_file(cp, vflg); + if (fcnt < 0) fcnt = 0; + if (cnt < 0) break; + fcnt++; + if (cnt > 1) bcnt++; + } + } + if (fcnt < 0) usage(pgnam); + fflush(NULL); + fprintf(stderr, "Number of files scanned = %d\n", fcnt); + if ((vflg & F_CHKLLN) && fcnt > 0) + fprintf(stderr, "Line length limit = %d\n", lln); + fprintf(stderr, "Number of bad files = %d\n", bcnt); + return 0; +}