merge with examples-internal/v5.1

2025-04-03 13:21:33 +01:00 · 2021-08-17 09:11:55 -07:00 · 2021-08-17 09:11:55 -07:00 · fb0edc81e7
commit fb0edc81e7
parent 60e8ece384
656 changed files with 4513 additions and 887 deletions
--- a/Chap_SIMD.tex
+++ b/Chap_SIMD.tex
@ -1,5 +1,4 @@
-\pagebreak
-\chapter{SIMD}
+\cchapter{SIMD}{SIMD}
 \label{chap:simd}

 Single instruction, multiple data (SIMD) is a form of parallel execution 
@ -12,7 +11,7 @@ Many processors have SIMD (vector) units that can perform simultaneously
 Loops without loop-carried backward dependency (or with dependency preserved using 
 ordered simd) are candidates for vectorization by the compiler for 
 execution with SIMD units. In addition, with state-of-the-art vectorization 
-technology and \code{declare simd} construct extensions for function vectorization
+technology and \code{declare simd} directive extensions for function vectorization
 in the OpenMP 4.5 specification, loops with function calls can be vectorized as well. 
 The basic idea is that a scalar function call in a loop can be replaced by a vector version 
 of the function, and the loop can be vectorized simultaneously by combining a loop 
@ -46,3 +45,8 @@ execution in different SIMD units.
 %\code{parallel for simd}).


+%===== Examples Sections =====
+\input{SIMD/SIMD}
+\input{SIMD/linear_modifier}
+
+
--- a/Chap_affinity.tex
+++ b/Chap_affinity.tex
@ -1,5 +1,4 @@
-\pagebreak
-\chapter{OpenMP Affinity}
+\cchapter{OpenMP Affinity}{affinity}
 \label{chap:openmp_affinity}

 OpenMP Affinity consists of a \code{proc\_bind} policy (thread affinity policy) and a specification of
@ -53,21 +52,21 @@ variables for the MPI library.  %Forked threads within an MPI process
 %which sets \code{OMP\_PLACES} specifically for the MPI process. 

 Threads of a team are positioned onto places in a compact manner, a 
-scattered distribution, or onto the master's place, by setting the 
+scattered distribution, or onto the primary thread's place, by setting the 
 \code{OMP\_PROC\_BIND} environment variable or the \code{proc\_bind} clause  to 
-\plc{close}, \plc{spread}, or \plc{master}, respectively.  When 
+\code{close}, \code{spread}, or \code{primary} (\code{master} has been deprecated), respectively.  When 
 \code{OMP\_PROC\_BIND} is set to FALSE no binding is enforced; and 
 when the value is TRUE, the binding is implementation defined to 
 a set of places in the \code{OMP\_PLACES} variable or to places 
 defined by the implementation if the \code{OMP\_PLACES} variable 
-is not set.
+is not set. 

 The \code{OMP\_PLACES} variable can also be set to an abstract name 
-(\plc{threads}, \plc{cores}, \plc{sockets}) to specify that a place is
+(\code{threads}, \code{cores}, \code{sockets}) to specify that a place is
 either a single hardware thread, a core, or a socket, respectively. 
 This description of the \code{OMP\_PLACES} is most useful when the 
 number of threads is equal to the number of hardware thread, cores
-or sockets.  It can also be used with a \plc{close} or \plc{spread} 
+or sockets.  It can also be used with a \code{close} or \code{spread} 
 distribution policy when the equality doesn't hold.


@ -116,3 +115,11 @@ distribution policy when the equality doesn't hold.
 %     thread #     0  * * * *   _ _ _ _   _ _  _  _   #mask for thread 0
 %     thread #     0  _ _ _ _   * * * *   _ _  _  _   #mask for thread 1
 %     thread #     0  _ _ _ _   _ _ _ _   * *  *  *   #mask for thread 2
+
+
+%===== Examples Sections =====
+\input{affinity/affinity}
+\input{affinity/task_affinity}
+\input{affinity/affinity_display}
+\input{affinity/affinity_query}
+
--- a/Chap_data_environment.tex
+++ b/Chap_data_environment.tex
@ -1,5 +1,4 @@
-\pagebreak
-\chapter{Data Environment}
+\cchapter{Data Environment}{data_environment}
 \label{chap:data_environment}
 The OpenMP \plc{data environment} contains data attributes of variables and
 objects.  Many constructs (such as \code{parallel}, \code{simd}, \code{task}) 
@ -73,3 +72,22 @@ it has been referenced (+1 on entry and -1 on exited) in nested (structured)
 map regions and/or accumulative (unstructured) mappings, determines the operation.
 Details of the \code{map} clause and reference count operation are specified 
 in the \plc{map Clause} subsection of the OpenMP Specifications document.
+
+
+%===== Examples Sections =====
+\input{data_environment/threadprivate}
+\input{data_environment/default_none}
+\input{data_environment/private}
+\input{data_environment/fort_loopvar}
+\input{data_environment/fort_sp_common}
+\input{data_environment/fort_sa_private}
+\input{data_environment/carrays_fpriv}
+\input{data_environment/lastprivate}
+\input{data_environment/reduction}
+\input{data_environment/udr}
+\input{data_environment/scan}
+\input{data_environment/copyin}
+\input{data_environment/copyprivate}
+\input{data_environment/cpp_reference}
+\input{data_environment/associate}
+
--- a/Chap_devices.tex
+++ b/Chap_devices.tex
@ -1,5 +1,4 @@
-\pagebreak
-\chapter{Devices}
+\cchapter{Devices}{devices}
 \label{chap:devices}

 The \code{target} construct consists of a \code{target} directive 
@ -51,3 +50,26 @@ This new specification does not affect the execution of
 pre-4.5 code; it is a necessary element for asynchronous 
 execution of the \code{target} region when using the new \code{nowait} 
 clause introduced in OpenMP 4.5.
+
+
+%===== Examples Sections =====
+\input{devices/target}
+\input{devices/target_defaultmap}
+\input{devices/target_pointer_mapping}
+\input{devices/target_structure_mapping}
+\input{devices/target_fort_allocatable_array_mapping}
+\input{devices/array_sections}
+\input{devices/array_shaping}
+\input{devices/target_mapper}
+\input{devices/target_data}
+\input{devices/target_unstructured_data}
+\input{devices/target_update}
+\input{devices/target_associate_ptr}
+\input{devices/declare_target}
+\input{devices/teams}
+\input{devices/async_target_depend}
+\input{devices/async_target_with_tasks}
+\input{devices/async_target_nowait}
+\input{devices/async_target_nowait_depend}
+\input{devices/device}
+
--- a/Chap_directives.tex
+++ b/Chap_directives.tex
@ -0,0 +1,45 @@
+\cchapter{OpenMP Directive Syntax}{directives}
+\label{chap:directive_syntax}
+
+OpenMP \emph{directives} use base-language mechanisms to specify OpenMP program behavior.
+In C code, the directives are formed exclusively with pragmas, whereas in C++
+code, directives are formed from either pragmas or attributes.
+Fortran directives are formed with comments in free form and fixed form sources (codes).
+All of these mechanism allow the compilation to ignore the OpenMP directives if
+OpenMP is not supported or enabled.
+
+
+The OpenMP directive is a combination of the base-language mechanism and a \plc{directive-specification},
+as shown below. The \plc{directive-specification} consists
+of the \plc{directive-name} which may seldomly have arguments, 
+followed by optional \plc{clauses}. Full details of the syntax can be found in the OpenMP Specification.
+Illustrations of the syntax is given in the examples.
+
+The formats for combining a base-language mechanism and a \plc{directive-specification} are:
+
+C/C++ pragmas
+\begin{indentedcodelist}
+\code{\#pragma omp} \plc{directive-specification}
+\end{indentedcodelist}
+
+C++ attributes
+\begin{indentedcodelist}
+\code{[[omp :: directive(} \plc{directive-specification} \code{)]]}
+\code{[[using omp : directive(} \plc{directive-specification} \code{)]]}
+\end{indentedcodelist}
+
+Fortran comments
+\begin{indentedcodelist}
+\code{!\$omp} \plc{directive-specification}
+\end{indentedcodelist}
+
+where \code{c\$omp} and \code{*\$omp} may be used in Fortran fixed form sources.
+
+
+%===== Examples Sections =====
+\input{directives/pragmas}
+\input{directives/attributes}
+\input{directives/fixed_format_comments}
+\input{directives/free_format_comments}
+
+
--- a/Chap_loop_transformations.tex
+++ b/Chap_loop_transformations.tex
@ -0,0 +1,25 @@
+\cchapter{Loop Transformations}{loop_transformations}
+\label{chap:loop_transformations}
+
+To obtain better performance on a platform, code may need to be restructured 
+relative to the way it is written (which is often for best readability).
+User-directed loop transformations accomplish this goal by providing a means 
+to separate code semantics and its optimization.
+
+A loop transformation construct states that a transformation operation is to be 
+performed on set of nested loops.  This directive approach can target specific loops
+for transformation, rather than applying more time-consuming general compiler 
+heuristics methods with compiler options that may not be able to discover 
+optimal transformations.
+
+Loop transformations can be augmented by preprocessor support or OpenMP \code{metadirective} 
+directives, to select optimal dimension and size parameters for specific platforms,
+facilitating a single code base for multiple platforms.
+Moreover, directive-based transformations make experimenting easier: 
+whereby specific hot spots can be affected by transformation directives.
+
+
+%===== Examples Sections =====
+\input{loop_transformations/tile}
+\input{loop_transformations/unroll}
+
--- a/Chap_memory_model.tex
+++ b/Chap_memory_model.tex
@ -1,5 +1,4 @@
-\pagebreak
-\chapter{Memory Model}
+\cchapter{Memory Model}{memory_model}
 \label{chap:memory_model}

 OpenMP provides a shared-memory model that allows all threads on a given
@ -129,3 +128,10 @@ section of the OpenMP Specifications document.
 % in \plc{atomic Construct} subsection of the OpenMP Specifications document).

 % Examples 1-3 show the difficulty of synchronizing threads through \code{flush} and \code{atomic} directives.
+
+
+%===== Examples Sections =====
+\input{memory_model/mem_model}
+\input{memory_model/allocators}
+\input{memory_model/fort_race}
+
--- a/Chap_ompt_interface.tex
+++ b/Chap_ompt_interface.tex
@ -0,0 +1,19 @@
+\cchapter{OMPT Interface}{ompt_interface}
+\label{chap:ompt_interface}
+OMPT defines mechanisms and an API for interfacing with tools in the OpenMP program.
+
+The OMPT API provides the following functionality:
+\begin{itemize}
+  \addtolength{\itemindent}{1cm}
+  \item  examines the state associated with an OpenMP thread
+  \item  interprets the call stack of an OpenMP thread
+  \item  receives notification about OpenMP events
+  \item  traces activity on OpenMP target devices
+  \item  assesses implementation-dependent details
+  \item  controls a tool from an OpenMP application
+\end{itemize}
+
+The following sections will illustrate basic mechanisms and operations of the OMPT API.
+
+
+\input{ompt_interface/ompt_start}
--- a/Chap_parallel_execution.tex
+++ b/Chap_parallel_execution.tex
@ -1,5 +1,4 @@
-\pagebreak
-\chapter{Parallel Execution}
+\cchapter{Parallel Execution}{parallel_execution}
 \label{chap:parallel_execution}

 A single thread, the \plc{initial thread}, begins sequential execution of 
@ -10,7 +9,7 @@ A \code{parallel} construct encloses code,
 forming a parallel region.  An \plc{initial thread} encountering a \code{parallel} 
 region forks (creates) a team of threads at the beginning of the 
 \code{parallel} region, and joins them (removes from execution) at the 
-end of the region.  The initial thread becomes the master thread of the team in a 
+end of the region.  The initial thread becomes the primary thread of the team in a 
 \code{parallel} region with a \plc{thread} number equal to zero, the other 
 threads are numbered from 1 to number of threads minus 1. 
 A team may be comprised of just a single thread.
@ -19,9 +18,9 @@ Each thread of a team is assigned an implicit task consisting of code within the
 parallel region. The task that creates a parallel region is suspended while the
 tasks of the team are executed.  A thread is tied to its task; that is,
 only the thread assigned to the task can execute that task.  After completion 
-of the \code{parallel} region, the master thread resumes execution of the generating task.  
+of the \code{parallel} region, the primary thread resumes execution of the generating task.  

-%After the \code{parallel} region the master thread becomes the initial 
+%After the \code{parallel} region the primary thread becomes the initial 
 %thread again, and continues to execute the \plc{sequential part}.  

 Any task within a \code{parallel} region is allowed to encounter another
@ -43,7 +42,8 @@ defined. When dynamic adjustment is on and the number of threads is specified,
 the number of threads becomes an upper limit for the number of threads to be
 provided by the OpenMP runtime.

-\pagebreak
+%\pagebreak
+\bigskip
 WORKSHARING CONSTRUCTS

 A worksharing construct distributes the execution of the associated region
@ -96,9 +96,33 @@ region with a single structure block (section of code). Statements in the
 by threads of the team.  

 \bigskip
-MASTER CONSTRUCT
+MASKED CONSTRUCT
+
+The \code{masked} construct is not a worksharing construct.  The \code{masked} region is
+executed only by the primary thread. There is no implicit barrier (and flush) 
+at the end of the \code{masked} region; hence the other threads of the team continue
+execution beyond code statements beyond the \code{masked} region.
+The \code{master} contruct, which has been deprecated in OpenMP 5.1, has identical semantics
+to the \code{masked} contruct with no \code{filter} clause.
+
+
+%===== Examples Sections =====
+\input{parallel_execution/ploop}
+\input{parallel_execution/parallel}
+\input{parallel_execution/host_teams}
+\input{parallel_execution/nthrs_nesting}
+\input{parallel_execution/nthrs_dynamic}
+\input{parallel_execution/fort_do}
+\input{parallel_execution/nowait}
+\input{parallel_execution/collapse}
+\input{parallel_execution/linear_in_loop}
+\input{parallel_execution/psections}
+\input{parallel_execution/fpriv_sections}
+\input{parallel_execution/single}
+\input{parallel_execution/workshare}
+\input{parallel_execution/masked}
+\input{parallel_execution/loop}
+\input{parallel_execution/pra_iterator}
+\input{parallel_execution/set_dynamic_nthrs}
+\input{parallel_execution/get_nthrs}

-The \code{master} construct is not a worksharing construct.  The master region is
-is executed only by the master thread. There is no implicit barrier (and flush) 
-at the end of the \code{master} region; hence the other threads of the team continue
-execution beyond code statements beyond the \code{master} region.
--- a/Chap_program_control.tex
+++ b/Chap_program_control.tex
@ -1,17 +1,26 @@
-\pagebreak
-\chapter{Program Control}
-\label{sec:program_control}
+\cchapter{Program Control}{program_control}
+\label{chap:program_control}

-Some specific and elementary concepts of controlling program execution are
-illustrated in the examples of this chapter.  Control can be directly
-managed with conditional control code (ifdef's with the \code{\_OPENMP} 
-macro, and the Fortran sentinel (\code{!\$}) 
-for conditionally compiling). The \code{if} clause on some constructs
+Basic concepts and mechanisms for directing and controlling a program compilation and execution
+are provided in this introduction and illustrated in subsequent examples.
+
+\bigskip
+CONDITIONAL COMPILATION and EXECUTION
+
+Conditional compilation can be performed with conventional \#ifdef directives
+in C, C++, and Fortran, and additionally with OpenMP sentinel (\code{!\$}) in Fortran. 
+The \code{if} clause on some directives
 can direct the runtime to ignore or alter the behavior of the construct.
-Of course, the base-language \code{if} statements can be used to control the "execution" 
+Of course, the base-language \code{if} statements can be used to control the execution
 of stand-alone directives (such as \code{flush}, \code{barrier}, \code{taskwait}, 
 and  \code{taskyield}).
-However, the directives must appear in a block structure, and not as a substatement as shown in examples 1 and 2 of this chapter.
+However, the directives must appear in a block structure, and not as a substatement.
+The \code{metadirective} and \code{declare}~\code{variant} directives provide conditional 
+selection of directives and routines for compilation (and use), respectively.
+The \code{assume} and \code{requires} directives provide invariants
+for optimizing compilation, and essential features for compilation 
+and correct execution, respectively.
+

 \bigskip
 CANCELLATION
@ -28,15 +37,15 @@ The \code{cancel} construct is also a cancellation point for any other thread of
 to also continue execution at the end of the named region.  

 Also, once the specified region has been activated for cancellation any thread that encounnters 
-a \code{cancellation point} construct with the same named region (\plc{construct-type-clause}),
+a \code{cancellation}~\code{point} construct with the same named region (\plc{construct-type-clause}),
 continues execution at the end of the region.

 For an activated \code{cancel taskgroup} construct, the tasks that
 belong to the taskgroup set of the innermost enclosing taskgroup region will be canceled. 

-A task that encounters the cancel taskgroup construct continues execution at the end of its
+A task that encounters a \code{cancel}~\code{taskgroup} construct continues execution at the end of its
 task region. Any task of the taskgroup that has already begun execution will run to completion,
-unless it encounters a \code{cancellation point}; tasks that have not begun execution "may" be
+unless it encounters a \code{cancellation}~\code{point}; tasks that have not begun execution may be
 discarded as completed tasks.

 \bigskip
@ -44,9 +53,10 @@ CONTROL VARIABLES

  Internal control variables (ICV) are used by implementations to hold values which control the execution
  of OpenMP regions.  Control (and hence the ICVs) may be set as implementation defaults, 
-  or set and adjusted through environment variables, clauses, and API functions.  Many of the ICV control
-  values are accessible through API function calls.  Also, initial ICV values are reported by the runtime
-  if the \code{OMP\_DISPLAY\_ENV} environment variable has been set to \code{TRUE}. 
+  or set and adjusted through environment variables, clauses, and API functions.  
+ %Many of the ICV control values are accessible through API function calls.  
+  Initial ICV values are reported by the runtime
+  if the \code{OMP\_DISPLAY\_ENV} environment variable has been set to \code{TRUE} or \code{VERBOSE}. 

 %As an example, the \plc{nthreads-var} is the ICV that holds the number of threads
 %to be used in a \code{parallel} region.  It can be set with the \code{OMP\_NUM\_THREADS} environment variable, 
@ -59,9 +69,9 @@ CONTROL VARIABLES
 \bigskip
 NESTED CONSTRUCTS

-Certain combinations of nested constructs are permitted, giving rise to a \plc{combined} construct
-consisting of two or more constructs.  These can be used when the two (or several) constructs would be used
-immediately in succession (closely nested). A combined construct can use the clauses of the component
+Certain combinations of nested constructs are permitted, giving rise to \plc{combined} constructs
+consisting of two or more directives.  These can be used when the two (or several) constructs would be used
+immediately in succession (closely nested). A \plc{combined} construct can use the clauses of the component
 constructs without restrictions.
 A \plc{composite} construct is a combined construct which has one or more clauses with (an often obviously)
 modified or restricted meaning, relative to when the constructs are uncombined. %%[appear separately (singly).
@ -72,14 +82,32 @@ modified or restricted meaning, relative to when the constructs are uncombined.
 %the parallel loop constructs and the \code{SIMD} construct), because the \code{collapse} clause must
 %explicitly address the ordering of loop chunking \plc{and} SIMD "combined" execution.

-Certain nestings are forbidden, and often the reasoning is obvious.  Worksharing constructs cannot be nested, and
+Certain nestings are forbidden, and often the reasoning is obvious.  For example, worksharing constructs cannot be nested, and
 the \code{barrier} construct cannot be nested inside a worksharing construct, or a \code{critical} construct. 
-Also, \code{target} constructs cannot be nested.  
+Also, \code{target} constructs cannot be nested, unless the nested target is a reverse offload.

-The \code{parallel} construct can be nested, as well as the \code{task} construct.  The parallel
-execution in the nested \code{parallel} construct(s) is control by the \code{OMP\_NESTED} and 
-\code{OMP\_MAX\_ACTIVE\_LEVELS} environment variables, and the \code{omp\_set\_nested()} and 
-\code{omp\_set\_max\_active\_levels()} functions.
+The \code{parallel} construct can be nested, as well as the \code{task} construct.  
+The parallel execution in the nested parallel construct(s) is controlled by the 
+\code{OMP\_MAX\_ACTIVE\_LEVELS} environment variable, and the \code{omp\_set\_max\_active\_levels} routine. 
+Use the \code{omp\_get\_max\_active\_levels} routine to determine the maximum levels provided by an implementation.
+As of OpenMP 5.0, use of the \code{OMP\_NESTED} environment variable and the \code{omp\_set\_nested} routine 
+has been deprecated.

 More details on nesting can be found in the \plc{Nesting of Regions} of the \plc{Directives} 
 chapter in the OpenMP Specifications document.
+
+
+%===== Examples Sections =====
+\input{program_control/cond_comp}
+\input{program_control/icv}
+\input{program_control/standalone}
+\input{program_control/cancellation}
+\input{program_control/requires}
+\input{program_control/variant}
+\input{program_control/metadirective}
+\input{program_control/nested_loop}
+\input{program_control/nesting_restrict}
+\input{program_control/target_offload}
+\input{program_control/interop}
+\input{program_control/utilities}
+
--- a/Chap_synchronization.tex
+++ b/Chap_synchronization.tex
@ -1,5 +1,4 @@
-\pagebreak
-\chapter{Synchronization}
+\cchapter{Synchronization}{synchronization}
 \label{chap:synchronization}

 The \code{barrier} construct is a stand-alone directive that requires all threads
@ -79,3 +78,23 @@ Scheduling constraints on task execution can be prescribed by the \code{depend}
 clause to enforce dependence on previously generated tasks.
 More details on controlling task executions can be found in the \plc{Tasking} Chapter
 in the OpenMP Specifications document. %(DO REF. RIGHT.)
+
+
+%===== Examples Sections =====
+\input{synchronization/critical}
+\input{synchronization/worksharing_critical}
+\input{synchronization/barrier_regions}
+\input{synchronization/atomic}
+\input{synchronization/atomic_restrict}
+\input{synchronization/flush_nolist}
+\input{synchronization/acquire_release}
+\input{synchronization/ordered}
+\input{synchronization/depobj}
+\input{synchronization/doacross}
+\input{synchronization/locks}
+\input{synchronization/init_lock}
+\input{synchronization/init_lock_with_hint} 
+\input{synchronization/lock_owner}
+\input{synchronization/simple_lock}
+\input{synchronization/nestable_lock}
+
--- a/Chap_tasking.tex
+++ b/Chap_tasking.tex
@ -1,5 +1,4 @@
-\pagebreak
-\chapter{Tasking}
+\cchapter{Tasking}{tasking}
 \label{chap:tasking}

 Tasking constructs provide units of work to a thread for execution.  
@ -50,3 +49,14 @@ A complete list of the tasking constructs and details of their clauses
 can be found in the \plc{Tasking Constructs} chapter of the OpenMP Specifications,
 in the \plc{OpenMP Application Programming Interface} section.

+
+%===== Examples Sections =====
+\input{tasking/tasking}
+\input{tasking/task_priority}
+\input{tasking/task_dep}
+\input{tasking/task_detach}
+\input{tasking/taskgroup}
+\input{tasking/taskyield}
+\input{tasking/taskloop}
+\input{tasking/parallel_masked_taskloop}
+
--- a/Contributions.md
+++ b/Contributions.md
@ -0,0 +1,153 @@
+# Contributing
+
+The usual process for adding new examples, making changes or adding corrections 
+is to submit an issue for discussion and initial evaluation of changes or example additions. 
+When there is a consensus at a meeting about the contribution, 
+you will be asked to submit a pull request.
+
+Of course, if your contribution is an obvious correction, clarification, or note, you
+may want to submit a pull request directly.
+
+-----------------------------------------------------------
+
+## The OpenMP Examples document
+
+The OpenMP Examples document is in LaTeX format.
+Please see the master LaTeX file, `openmp-examples.tex`, for more information.
+
+## Maintainer
+
+[OpenMP Examples Subcommittee](http://twiki.openmp.org/twiki/bin/view/OpenMPLang/OpenMPExamplesSubCommittee)
+For a brief revision history, see `Changes.log` in the repo.
+
+## Git procedure
+
+ * Fork your own branch of the OpenMP [examples-internal repo](https:/github.com/openmp/examples-internal)
+ * Clone your fork locally
+ * If you are working on generic or old-version updates, create a branch off master.
+ * If you are working on an example for a release candidate for version #.#, create a branch off work_#.#.
+   1.) `git clone --branch <master|work_#.#> https://github.com/<my_account>/examples-internal`
+   2.) `git checkout  -b <branch_name>`
+   3.) ...  `add`, `commit`
+   4.) `git push -u origin <branch_name>`
+   5.) `make` or `make diff` will create a full-document pdf or just a pdf with differences (do this at any point).
+ * `git status` and `git branch -a` are your friends
+ * Submit an issue for your work (usually with a diff pdf), and then you will be asked to submit a pull request
+   * Create an issue by selecting the (issue tab)[https://github.com/openmp/examples-internal/issues] and clicking on `new issue`.
+   * Use this MarkDown Cheatsheet for (issue formatting)[https://wordpress.com/support/markdown-quick-reference/]
+   * More MarkDown details are available (here)[https://markdown-it.github.io]
+   * You can cut and paste markdown formatted text in a (reader)[https://dillinger.io] to see formatting effects.
+   * Forced spaces are available in Markdown.  On a Mac is is "option+space".
+   * Polling is available.  Go to (gh-poll)[https://app.gh-polls.com/].  Type an option on each line, then click `copy markdown`, and paste the contents into the issue.  (Use preview to check your poll, and then submit it.)
+ * Create a pull request
+
+
+## Processing source code
+
+   * Prepare source code (C/C++ and Fortran) and a text description (use similar styles found in recent examples)
+   * Determine the *example* name `<ename>`, *sequence* number `<seq-no>` and *compiler* suffix `<csuffix>` for the example
+      * The syntax is:   `<ename>.<seq-no>.<csuffix>`   (e.g. `affinity_display.1.f90`)
+      * The example name may be a Section name (e.g. affinity), or a Subsection name (affinity_display)
+      * If you are creating a new Chapter, it may be the chapter name.
+   * New examples are usually added at the end of a Section or Subsection. Number it as the next number in the sequence numbers for examples in that Section or Subsection.
+   * The compiler suffix `<csuffix>` is `c`, `cpp`, `f`, and `f90` for C, C++ and Fortran codes.
+   * Insert the code in the sources directory for each chapter, and include the following metadata:
+   * Metadata Tags for example sources:
+     ```
+       @@name:        <ename>.<seq-no>[c|cpp|f|f90]
+       @@type:        C|C++|F-fixed|F-free
+       @@compilable:  yes|no|maybe
+       @@linkable:    yes|no|maybe
+       @@expect:      success|failure|nothing|rt-error
+       @@version:     omp_<verno>
+     ```
+      * **name**
+       is the name of an example
+      * **type**
+       is the source code type, which can be translated into or from proper file extension (c,cpp,f,f90)
+      * **compilable**
+       indicates whether the source code is compilable
+      * **linkable**
+       indicates whether the source code is linkable
+      * **expect**
+       indicates some expected result for testing purpose "`success|failure|nothing`" applies 
+       to the result of code compilation "`rt-error`" is for a case where compilation may be
+       successful, but the code contains potential runtime issues (such as race condition).
+       Alternative would be to just use "`conforming`" or "`non-conforming`".
+      * **version**
+       indicates features for a specific OpenMP version, such as "`omp_5.0`"
+
+
+## Process for text
+   * Create or update the description text in a Section/Subsection file under each chapter directory, usually `<chap_directory>/<ename>.tex`
+   * If adding a new Subsection, just include it in the appropriate subsection file (`<subsection>.tex`)
+   * If adding a new Section, create an `<section>.tex` file and add an entry in the corresponding chapter file, such as `Chap_affinity.tex`
+   * If adding a new Chapter, create a `Chap_<chap_name>.tex` file with introductory text, and add a new `<section>.tex` file with text and links to the code. Update `Makefile` and `openmp-examples.tex` to include the new chapter file.
+   * Commit your changes into your fork of examples-internal
+   * Summit your issue at [OpenMP Examples internal repo]( https://github.com/openmp/examples-internal/issues), and include a PDF when ready.
+   * Examples subcommittee members can view [meeting schedule and notes](http://twiki.openmp.org/twiki/bin/view/OpenMPLang/ExamplesSchedules)
+   * Shepherd your issue to acceptance (discussed at weekly Examples meeting and in issue comments)
+   * When it is in a ready state, you should then submit a pull request.
+   * It will be reviewed and voted on, and changes will be requested.
+   * Once the last changes are made, it will be verified and merged into an appropriate branch (either the `master` branch or a working branch).
+
+
+
+
+# LaTeX macros for examples
+
+* Source code with language h-rules
+```
+   \cexample[<verno>]{<ename>}{<seq-no>}     % for C/C++ examples
+   \cppexample[<verno>]{<ename>}{<seq-no>}   % for C++ examples
+   \fexample[<verno>]{<ename>}{<seq-no>}     % for fixed-form Fortran examples
+   \ffreeexample[<verno>]{<ename>}{<seq-no>} % for free-form Fortran examples
+```
+
+* Source code without language h-rules
+```
+   \cnexample[<verno>]{<ename>}{<seq-no>}
+   \cppnexample[<verno>]{<ename>}{<seq-no>}
+   \fnexample[<verno>]{<ename>}{<seq-no>}
+   \ffreenexample[<verno>]{<ename>}{<seq-no>}
+   \srcnexample[<verno>]{<ename>}{<seq-no>}{<ext>}
+```
+
+   Optional `<verno>` can be supplied in a macro to include a specific OpenMP
+   version in the example header.  This option also suggests one additional
+   tag (`@@version`) line is included in the corresponding source code. 
+   If this is not the case (i.e., no `@@version` tag line), one needs to 
+   prefix `<verno>` with an underscore '\_' symbol in the macro.
+
+   The exception is macro `\srcnexample`, for which the corresponding
+   source code should not contain any `@@` metadata tags. The `ext` argument
+   to this macro is the file extension (such as `h`, `hpp`, `inc`).
+
+* Language h-rules
+```
+   \cspecificstart, \cspecificend
+   \cppspecificstart, \cppspecificend
+   \ccppspecificstart, \ccppspecificend
+   \fortranspecificstart, \fortranspecificend
+```
+
+* Chapter and section macros
+```
+   \cchapter{<Chapter Name>}{<chap_directory>}
+```
+
+The `\cchapter` macro is used for starting a chapter with proper page spacing.
+`<Chapter Name>` is the name of a chapter and `<chap_directory>` is the name 
+of the chapter directory.  All section and subsection files for the chapter 
+should be placed under `<chap_directory>`. The corresponding example sources 
+should be placed under the `sources` directory inside `<chap_directory>`.
+
+A previously-defined macro `\sinput{<section_file>}` to import a section
+file from `<chap_directory>` is no longer supported.  Please use
+`\input{<chap_directory>/<section_file>}` explicitly.
+
+* See `openmp.sty` for more information
+
+### License
+
+For copyright information, please see `omp_copyright.txt`.
--- a/Deprecated_Features_Chapt.tex
+++ b/Deprecated_Features_Chapt.tex
@ -0,0 +1,21 @@
+\bchapter{Deprecated Features}
+\label{chap:deprecated_features}
+
+Deprecation of features began in OpenMP 5.0. 
+Examples that use a deprecated feature have been updated with an equivalent replacement feature.  
+
+Deprecations affecting examples are the following:
+\begin{description}[labelindent=5mm,font=\normalfont]
+\item[5.1] -- \ \scode{masked} construct          replaces \scode{master} construct.
+\item[5.1] -- \ \scode{primary} affinity policy   replaces \scode{master} affinity policy.
+\item[5.0] -- \ \scode{omp_sync_hint_*} constants replace  \scode{omp_lock_hint_*} constants.
+\end{description}
+
+These replacements appear in examples that illustrate, otherwise, earlier features.
+When using a compiler that is compliant with a version prior to 
+the indicated version, the earlier form of
+an example is restored by a C-style conditional compilation using the \scode{_OPENMP} macro.
+
+Since Fortran compilers do not preprocess codes by default, a Fortran preprocessor
+flag will be required to compile Fortran examples with the C-style conditional 
+compilation statements.
--- a/Examples_Chapt.tex
+++ b/Examples_Chapt.tex
@ -1,7 +1,6 @@
-
-\chapter*{Examples}
+\bchapter{Examples}
 \label{chap:examples}
-\addcontentsline{toc}{chapter}{\protect\numberline{}Examples}
+
 The following are examples of the OpenMP API directives, constructs, and routines.
 \ccppspecificstart
 A statement following a directive is compound only when necessary, and a 
@ -12,15 +11,14 @@ Each example is labeled as \plc{ename.seqno.ext}, where \plc{ename} is
 the example name, \plc{seqno} is the sequence number in a section, and 
 \plc{ext} is the source file extension to indicate the code type and 
 source form.  \plc{ext} is one of the following:
-\begin{compactitem}
-\item \plc{c} -- C code,
-\item \plc{cpp} -- C++ code,
-\item \plc{f} -- Fortran code in fixed form, and
-\item \plc{f90} -- Fortran code in free form.
-\end{compactitem}
+\begin{description}[noitemsep,labelindent=5mm,widest=f90]
+\item[\plc{c}] -- \ C code,
+\item[\plc{cpp}] -- \ C++ code,
+\item[\plc{f}] -- \ Fortran code in fixed form, and
+\item[\plc{f90}] -- \ Fortran code in free form.
+\end{description}

 Some of the example labels may include version information 
 (\code{\small{}omp\_\plc{verno}}) to indicate features that are illustrated
 by an example for a specific OpenMP version, such as ``\plc{scan.1.c} 
 \;(\code{\small{}omp\_5.0}).''
-
--- a/Examples_master.tex
+++ b/Examples_master.tex
@ -1,13 +0,0 @@
-\pagebreak
-\section{The \code{master} Construct}
-\label{sec:master}
-
-The following example demonstrates the master construct . In the example, the master 
-keeps track of how many iterations have been executed and prints out a progress 
-report. The other threads skip the master region without waiting.
-
-\cexample{master}{1}
-
-\fexample{master}{1}
-
-
--- a/Examples_parallel_master_taskloop.tex
+++ b/Examples_parallel_master_taskloop.tex
@ -1,33 +0,0 @@
-\pagebreak
-\section{The \code{parallel master taskloop} Construct}
-\label{sec:parallel_master_taskloop}
-
-In the OpenMP 5.0 Specification several combined constructs containing
-the \code{taskloop} construct were added.
-   
-Just as the \code{for} and \code{do} constructs have been combined
-with the \code{parallel} construct for convenience, so too, the combined
-\code{parallel}~\code{master}~\code{taskloop} and 
-\code{parallel}~\code{master}~\code{taskloop}~\code{simd}
-constructs have been created for convenience.
-  
-In the following example the first \code{taskloop} construct is enclosed
-by the usual \code{parallel} and \code{master} constructs to form
-a team of threads, and a single task generator (master thread) for
-the \code{taskloop} construct.
-
-The same OpenMP operations for the first taskloop are accomplished by the second
-taskloop with the \code{parallel}~\code{master}~\code{taskloop} 
-combined construct. 
-The third taskloop uses the combined \code{parallel}~\code{master}~\code{taskloop}~\code{simd} 
-construct to accomplish the same behavior as closely nested \code{parallel master},
-and \code{taskloop simd} constructs.
-
-As with any combined construct the clauses of the components may be used
-with appropriate restrictions. The combination of the \code{parallel}~\code{master} construct
-with the \code{taskloop} or \code{taskloop}~\code{simd} construct produces no additional 
-restrictions.
-
-\cexample[5.0]{parallel_master_taskloop}{1}
-
-\ffreeexample[5.0]{parallel_master_taskloop}{1}
--- a/Examples_target_structure_mapping.tex
+++ b/Examples_target_structure_mapping.tex
@ -1,54 +0,0 @@
-\pagebreak
-\section{Structure mapping}
-\label{sec:structure_mapping}
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-In the example below, only structure elements \plc{S.a}, \plc{S.b} and \plc{S.p} 
-of the \plc{S} structure appear in \code{map} clauses of a \code{target} construct.
-Only these components have corresponding variables and storage on the device.  
-Hence, the large arrays, \plc{S.buffera} and \plc{S.bufferb}, and the \plc{S.x} component have no storage 
-on the device and cannot be accessed.  
-
-Also, since the pointer member \plc{S.p} is used in an array section of a 
-\code{map} clause, the array storage of the array section on the device, 
-\plc{S.p[:N]}, is \emph{attached} to the pointer member \plc{S.p} on the device.
-Explicitly mapping the pointer member \plc{S.p} is optional in this case.
-
-Note: The buffer arrays and the \plc{x} variable have been grouped together, so that
-the components that will reside on the device are all together (without gaps).
-This allows the runtime to optimize the transfer and the storage footprint on the device.
-
-\cexample[5.0]{target_struct_map}{1}
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-The following example is a slight modification of the above example for 
-a C++ class.  In the member function \plc{SAXPY::driver} 
-the array section \plc{p[:N]} is \emph{attached} to the pointer member \plc{p}
-on the device.
- 
-\cppexample[5.0]{target_struct_map}{2}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-%In this example a pointer, \plc{p}, is mapped in a 
-%\code{target}~\code{data} construct (\code{map(p)}) and remains 
-%persistent throughout the \code{target}~\code{data} region. The address stored
-%on the host is not assigned to the device pointer variable, and 
-%the device value is not copied back to the host at the end of the
-%region (for a pointer, it is as though \code{map(alloc:p}) is effectively
-%used).  The array section, \plc{p[:N]}, is mapped on both \code{target}
-%constructs, and the pointer \plc{p} on the device is attached at the
-%beginning and detached at the end of the regions to the newly created
-%array section on the device.
-%
-%Also, in the following example the global variable, \plc{a}, becomes 
-%allocated when it is first used on the device in a \code{target} region, 
-%and persists on the device for all target regions.  The value on the
-%device and host may be different, as shown by the print statements.
-%The values may be made consistent with the \code{update} construct,
-%as shown in the \plc{declare\_target.3.c} and \plc{declare\_target.3.f90} 
-%examples.
-%
-%\cexample{target_struct_map}{2}
--- a/Foreword_Chapt.tex
+++ b/Foreword_Chapt.tex
@ -1,19 +1,17 @@
-\pagebreak
-\chapter*{Foreword}
+\bchapter{Foreword}
 \label{chap:foreword}
-\addcontentsline{toc}{chapter}{\protect\numberline{}Foreword}

 The OpenMP Examples document has been updated with new features
-found in the OpenMP 5.0 Specification. The additional examples and updates
+found in the OpenMP 5.1 Specification. The additional examples and updates
 are referenced in the Document Revision History of the Appendix on page~\pageref{chap:history}.

-Text describing an example with a 5.0 feature specifically states
-that the feature support begins in the OpenMP 5.0 Specification.  Also,
-an \code{\small omp\_5.0} keyword has been added to metadata in the source code.
-These distinctions are presented to remind readers that a 5.0 compliant 
+Text describing an example with a 5.1 feature specifically states
+that the feature support begins in the OpenMP 5.1 Specification.  Also,
+an \code{\small omp\_5.1} keyword is included in the  metadata of the source code.
+These distinctions are presented to remind readers that a 5.1 compliant 
 OpenMP implementation is necessary to use these features in codes.

-Examples for most of the 5.0 features are included in this document,
+Examples for most of the 5.1 features are included in this document,
 and incremental releases will become available as more feature examples
 and updates are submitted, and approved by the OpenMP Examples Subcommittee.

@ -21,3 +19,5 @@ and updates are submitted, and approved by the OpenMP Examples Subcommittee.
 Examples Subcommitee Co-chairs: \smallskip\linebreak
 Henry Jin (\textsc{NASA} Ames Research Center) \linebreak
 Kent Milfeld (\textsc{TACC}, Texas Advanced Research Center)
+
+
--- a/History.tex
+++ b/History.tex
@ -1,6 +1,73 @@
-\chapter{Document Revision History}
+\cchapter{Document Revision History}{history}
 \label{chap:history}

+%=====================================
+\section{Changes from 5.0.1 to 5.1}
+\label{sec:history_501_to_51}
+
+\begin{itemize}
+\item General changes:
+\begin{itemize}
+  \item Replaced \code{master} construct example with equivalent \code{masked} construct example (\specref{sec:masked})
+  \item Primary thread is now used to describe thread number 0 in the current team
+  \item \code{primary} thread affinity policy is now used to specify that every 
+      thread in the team is assigned to the same place as the primary thread (\specref{subsec:affinity_primary})
+  \item The \scode{omp_lock_hint_*} constants have been renamed \scode{omp_sync_hint_*} (\specref{sec:critical}, \specref{sec:locks})
+\end{itemize}
+
+\item Added the following new chapters:
+\begin{itemize}
+  \item Deprecated Features (on page~\pageref{chap:deprecated_features})
+  \item Directive Syntax (\specref{chap:directive_syntax})
+  \item Loop Transformations (\specref{chap:loop_transformations})
+  \item OMPT Interface (\specref{chap:ompt_interface})
+\end{itemize}
+
+\item Added the following examples for the 5.1 features:
+\begin{itemize}
+  \item OpenMP directives in C++ \plc{attribute} specifiers
+    (\specref{sec:attributes})
+  \item Directive syntax adjustment to allow Fortran \code{BLOCK} ... 
+    \code{END}~\code{BLOCK} as a structured block 
+      (\specref{sec:fortran_free_format_comments})
+  \item \code{omp\_target\_is\_accessible} API routine
+      (\specref{sec:pointer_mapping})
+  \item Fortran allocatable array mapping in \code{target} regions (\specref{sec:fort_allocatable_array_mapping})
+  \item \code{begin}~\code{declare}~\code{target} (with
+        \code{end}~\code{declare}~\code{target}) directive
+      (\specref{subsec:declare_target_class})
+  \item \code{tile} construct             (\specref{sec:tile})
+  \item \code{unroll} construct           (\specref{sec:unroll})
+  \item Reduction with the \code{scope} construct
+      (\specref{subsec:reduction_scope})
+  \item  \code{metadirective} directive with dynamic \code{condition} selector
+      (\specref{sec:metadirective})
+  \item \code{interop} construct  (\specref{sec:interop})
+  \item Environment display with the \scode{omp_display_env} routine
+      (\specref{subsec:display_env})
+  \item \code{error} directive  (\specref{subsec:error})
+\end{itemize}
+
+\item Included additional examples for the 5.0 features:
+\begin{itemize}
+  \item \code{collapse} clause for non-rectangular loop nest
+      (\specref{sec:collapse})
+  \item \code{detach} clause for tasks (\specref{sec:task_detachment})
+  \item Pointer attachment for a structure member (\specref{sec:structure_mapping})
+  \item Host and device pointer association with the \scode{omp_target_associate_ptr} routine (\specref{sec:target_associate_ptr})
+  
+  \item Sample code on activating the tool interface 
+      (\specref{sec:ompt_start})
+\end{itemize}
+
+\item Added other examples:
+\begin{itemize}
+  \item The \scode{omp_get_wtime} routine (\specref{subsec:get_wtime})
+\end{itemize}
+\end{itemize}
+
+
+%=====================================
 \section{Changes from 5.0.0 to 5.0.1}
 \label{sec:history_50_to_501}

@ -18,7 +85,7 @@ OpenMP 3.0 and later.
 \item \code{conditional} modifier for the \code{lastprivate} clause                             (\specref{sec:lastprivate})
 \item \code{task} modifier for the \code{reduction} clause                                      (\specref{subsec:task_reduction})
 \item Reduction on combined target constructs                                                     (\specref{subsec:target_reduction})
-\item Task reduction with target constructs 
+\item Task reduction with \code{target} constructs 
   (\specref{subsec:target_task_reduction})
 \item \code{scan} directive for returning the \emph{prefix sum} of a reduction                  (\specref{sec:scan})

@ -59,12 +126,12 @@ in \specref{sec:mem_model}.
 \item \code{mutexinoutset} task dependences                  (\specref{subsec:task_dep_mutexinoutset})
 \item Multidependence Iterators (in \code{depend} clauses)   (\specref{subsec:depend_iterator})
 \item Combined constructs: \code{parallel}~\code{master}~\code{taskloop} and \code{parallel}~\code{master}~\code{taskloop}~\code{simd}
-                                                                                  (\specref{sec:parallel_master_taskloop})
+                                                                                  (\specref{sec:parallel_masked_taskloop})
 \item Reverse Offload through \plc{ancestor} modifier of \code{device} clause.    (\specref{subsec:target_reverse_offload})
 \item Pointer Mapping  - behavior of mapped pointers                              (\specref{sec:pointer_mapping})   %Example_target_ptr_map*
 \item Structure Mapping  - behavior of mapped structures                          (\specref{sec:structure_mapping}) %Examples_target_structure_mapping.tex target_struct_map*
 \item Array Shaping with the \plc{shape-operator}                                 (\specref{sec:array-shaping})
-\item The \code{declare}~\code{mapper} construct                                  (\specref{sec:declare_mapper})
+\item The \code{declare}~\code{mapper} directive                                  (\specref{sec:declare_mapper})
 \item Acquire and Release Semantics Synchronization: Memory ordering 
      clauses \code{acquire}, \code{release}, and \code{acq\_rel} were added
      to flush and atomic constructs
@ -150,7 +217,7 @@ Added the following new examples:
 \item array sections in device constructs    (\specref{sec:array_sections})
 \item \code{target}~\code{data} construct    (\specref{sec:target_data})
 \item \code{target}~\code{update} construct  (\specref{sec:target_update})
-\item \code{declare}~\code{target} construct (\specref{sec:declare_target})
+\item \code{declare}~\code{target} directive (\specref{sec:declare_target})
 \item \code{teams} constructs                (\specref{sec:teams})
 \item asynchronous execution of a \code{target} region using tasks (\specref{subsec:async_target_with_tasks})
 \item device runtime routines                (\specref{sec:device})
--- a/Introduction_Chapt.tex
+++ b/Introduction_Chapt.tex
@ -1,5 +1,5 @@
 % This is the introduction for the OpenMP Examples document.
-% This is an included file. See the master file (openmp-examples.tex) for more information.
+% This is an included file. See the main file (openmp-examples.tex) for more information.
 %
 % When editing this file:
 %
@ -32,9 +32,9 @@
 %             This is a \plc{var-name}.
 %

-\chapter*{Introduction}
+\bchapter{Introduction}
 \label{chap:introduction}
-\addcontentsline{toc}{chapter}{\protect\numberline{}Introduction}
+
 This collection of programming examples supplements the OpenMP API for Shared
 Memory Parallelization specifications, and is not part of the formal specifications. It
 assumes familiarity with the OpenMP specifications, and shares the typographical
@ -59,7 +59,7 @@ directory at
 \href{https://github.com/OpenMP/Examples}{https://github.com/OpenMP/Examples}.
 The codes for this OpenMP \VER{} Examples document have the tag \plc{v\VER}.

-%\href{https://github.com/OpenMP/Examples/tree/master/sources}{https://github.com/OpenMP/Examples/sources}.
+%\href{https://github.com/OpenMP/Examples/tree/main/sources}{https://github.com/OpenMP/Examples/sources}.

 Complete information about the OpenMP API and a list of the compilers that support
 the OpenMP API can be found at the OpenMP.org web site
--- a/27
+++ b/27
@ -1,7 +1,7 @@
 # Makefile for the OpenMP Examples document in LaTex format. 
-# For more information, see the master document, openmp-examples.tex.
+# For more information, see the main document, openmp-examples.tex.

-version=5.0.1
+version=5.1
 default: openmp-examples.pdf
 diff: openmp-diff-abridged.pdf

@ -9,13 +9,16 @@ diff: openmp-diff-abridged.pdf
 CHAPTERS=Title_Page.tex \
 	Foreword_Chapt.tex \
 	Introduction_Chapt.tex \
-	Examples_*.tex \
-	History.tex
+	Examples_Chapt.tex \
+	Deprecated_Features_Chapt.tex \
+	Chap_*.tex \
+	History.tex \
+	*/*.tex

-SOURCES=sources/*.c \
-	sources/*.cpp \
-	sources/*.f90 \
-	sources/*.f 
+SOURCES=*/sources/*.c \
+	*/sources/*.cpp \
+	*/sources/*.f90 \
+	*/sources/*.f 

 INTERMEDIATE_FILES=openmp-examples.pdf \
 		openmp-examples.toc \
@ -49,11 +52,11 @@ endif
 ifdef DIFF_FROM
    VC_DIFF_FROM := -r ${DIFF_FROM}
 else
-    VC_DIFF_FROM := -r master
+    VC_DIFF_FROM := -r work_5.1
 endif

 DIFF_TO:=HEAD
-DIFF_FROM:=master
+DIFF_FROM:=work_5.1
 DIFF_TYPE:=UNDERLINE

 COMMON_DIFF_OPTS:=--math-markup=whole  \
@ -66,6 +69,10 @@ VC_DIFF_MINIMAL_OPTS:= --only-changes --force

 %.tmpdir: $(wildcard *.sty) $(wildcard *.png) $(wildcard *.aux) openmp-examples.pdf
 	mkdir -p $@/sources
+	for i in affinity devices loop_transformations parallel_execution SIMD tasking \
+		 data_environment memory_model program_control synchronization \
+		 directives ompt_interface;  do \
+	  mkdir -p $@/$$i; ln -sf "$$PWD"/$$i/sources $@/$$i/sources; done
 	mkdir -p $@/figs
 	cp -f $^ "$@/"
 	cp -f sources/* "$@/sources"
--- a/76
+++ b/76
@ -1,76 +0,0 @@
-This is the OpenMP Examples document in LaTeX format.
-Please see the master file, openmp-examples.tex, for more information.
-
-For a brief revision history, please see Changes.log.
-
-For copyright information, please see omp_copyright.txt.
-
-
-1) Process for adding an example
-
-   - Prepare source code and text description
-   - Give a high level description in a trac ticket
-   - Determine a name (ename) for the example
-      - Propose a new name if creating a new chapter
-      - Use the existing name if adding to an existing chapter
-   - Number the example within the chapter (seq-no)
-   - Create files for the source code with proper tags in
-      sources/Example_<ename>.<seq-no>c.c
-      sources/Example_<ename>.<seq-no>f.f
-   - Create or update the description text in the chapter file
-      Examples_<ename>,tex
-   - If needed, add the new chapter file name in 
-      Makefile
-      openmp-examples.tex
-   - Commit the changes in git and push to the GitHub repo
-   - Discuss and vote in committee
-
-2) Tags (meta data) for example sources
-
-  @@name:        <ename>.<seq-no>[c|cpp|f|f90]
-  @@type:        C|C++|F-fixed|F-free
-  @@compilable:  yes|no|maybe
-  @@linkable:    yes|no|maybe
-  @@expect:      success|failure|nothing|rt-error
-  @@version:     omp_<verno>
-
-  "name" is the name of an example
-  "type" is the source code type, which can be translated into or from
-          proper file extension (c,cpp,f,f90)
-  "compilable" indicates whether the source code is compilable
-  "linkable" indicates whether the source code is linkable
-  "expect" indicates some expected result for testing purpose
-          "success|failure|nothing" applies to the result of code compilation
-          "rt-error" is for a case where compilation may be successful, 
-          but the code contains potential runtime issues (such as race condition).
-          Alternative would be to just use "conforming" or "non-conforming".
-  "version" indicates features for a specific OpenMP version, such as "omp_5.0"
-
-3) LaTeX macros for examples
-
- Source code with language h-rules
-   \cexample[<verno>]{<ename>}{<seq-no>}     % for C/C++ examples
-   \cppexample[<verno>]{<ename>}{<seq-no>}   % for C++ examples
-   \fexample[<verno>]{<ename>}{<seq-no>}     % for fixed-form Fortran examples
-   \ffreeexample[<verno>]{<ename>}{<seq-no>} % for free-form Fortran examples
-
- Source code without language h-rules
-   \cnexample[<verno>]{<ename>}{<seq-no>}
-   \cppnexample[<verno>]{<ename>}{<seq-no>}
-   \fnexample[<verno>]{<ename>}{<seq-no>}
-   \ffreenexample[<verno>]{<ename>}{<seq-no>}
-
-   Optional <verno> can be supplied in a macro to include a specific OpenMP
-   version in the example header.  This option also suggests one additional
-   tag (@@version) line is included in the corresponding source code. 
-   If this is not the case (i.e., no @@version tag line), one needs to 
-   prefix <verno> with an underscore '_' symbol in the macro.
-
- Language h-rules
-   \cspecificstart, \cspecificend
-   \cppspecificstart, \cppspecificend
-   \ccppspecificstart, \ccppspecificend
-   \fortranspecificstart, \fortranspecificend
-
- See openmp.sty for more information
-
--- a/README.md
+++ b/README.md
@ -0,0 +1,10 @@
+# OpenMP Examples Document
+
+This is the OpenMP Examples document in LaTeX format.
+
+Please see [Contributions.md](Contributions.md) on how to make contributions to adding new examples.
+
+For a brief revision history, please see [Changes.log](Changes.log).
+
+For copyright information, please see [omp_copyright.txt](omp_copyright.txt).
+
--- a/Examples_SIMD.tex
+++ b/Examples_SIMD.tex
@ -1,5 +1,5 @@
 %\pagebreak
-\section{\code{simd} and \code{declare} \code{simd} Constructs}
+\section{\code{simd} and \code{declare} \code{simd} Directives}
 \label{sec:SIMD}

 The following example illustrates the basic use of the \code{simd} construct 
@ -8,29 +8,27 @@ to assure the compiler that the loop can be vectorized.
 \cexample[4.0]{SIMD}{1}

 \ffreeexample[4.0]{SIMD}{1}
-
-\clearpage
 

 When a function can be inlined within a loop the compiler has an opportunity to 
 vectorize the loop. By guaranteeing SIMD behavior of a function's operations, 
 characterizing the arguments of the function and privatizing temporary 
 variables of the loop, the compiler can often create faster, vector code for 
-the loop. In the examples below the \code{declare} \code{simd} construct is 
+the loop. In the examples below the \code{declare} \code{simd} directive is 
 used on the \plc{add1} and \plc{add2} functions to enable creation of their 
 corresponding SIMD function versions for execution within the associated SIMD 
 loop. The functions characterize two different approaches of accessing data 
 within the function: by a single variable and as an element in a data array, 
 respectively. The \plc{add3} C function uses dereferencing.

-The \code{declare} \code{simd} constructs also illustrate the use of 
+The \code{declare} \code{simd} directives also illustrate the use of 
 \code{uniform} and \code{linear} clauses.  The \code{uniform(fact)} clause 
 indicates that the variable \plc{fact} is invariant across the SIMD lanes. In 
 the \plc{add2} function \plc{a} and \plc{b} are included in the \code{uniform} 
 list because the C pointer and the Fortran array references are constant.  The 
 \plc{i} index used in the \plc{add2} function is included in a \code{linear} 
 clause with a constant-linear-step of 1, to guarantee a unity increment of the 
-associated loop. In the \code{declare} \code{simd} construct for the \plc{add3} 
+associated loop. In the \code{declare} \code{simd} directive for the \plc{add3} 
 C function the  \code{linear(a,b:1)} clause instructs the compiler to generate 
 unit-stride loads across the SIMD lanes; otherwise,  costly \emph{gather} 
 instructions would be generated for the unknown sequence of access of the 
@ -44,7 +42,7 @@ variable.

 \ffreeexample[4.0]{SIMD}{2}

-\pagebreak
+%\pagebreak
 A thread that encounters a SIMD construct executes a vectorized code of the 
 iterations. Similar to the concerns of a worksharing loop a loop vectorized 
 with a SIMD construct must assure that temporary and reduction variables are 
@ -57,7 +55,7 @@ construct.
 \ffreeexample[4.0]{SIMD}{3}


-\pagebreak
+%\pagebreak
 A \code{safelen(N)} clause in a \code{simd} construct assures the compiler that 
 there are no loop-carried dependencies for vectors of size \plc{N} or below. If 
 the \code{safelen} clause is not specified, then the default safelen value is 
@ -72,7 +70,7 @@ than 16, the behavior is undefined.

 \ffreeexample[4.0]{SIMD}{4}

-\pagebreak
+%\pagebreak
 The following SIMD construct instructs the compiler to collapse the \plc{i} and 
 \plc{j} loops into a single SIMD loop in which SIMD chunks are executed by 
 threads of the team. Within the workshared loop chunks of a thread, the SIMD 
@ -88,7 +86,7 @@ chunks are executed in the lanes of the vector units.
 \label{sec:SIMD_branch}

 The following examples illustrate the use of the \code{declare} \code{simd} 
-construct with the \code{inbranch} and \code{notinbranch} clauses. The 
+directive with the \code{inbranch} and \code{notinbranch} clauses. The 
 \code{notinbranch} clause informs the compiler that the function \plc{foo} is 
 never called conditionally in the SIMD loop of the function \plc{myaddint}. On 
 the other hand, the \code{inbranch} clause for the function goo indicates that 
--- a/Examples_linear_modifier.tex
+++ b/Examples_linear_modifier.tex
--- a/sources/Example_SIMD.1.c
+++ b/sources/Example_SIMD.1.c
--- a/sources/Example_SIMD.1.f90
+++ b/sources/Example_SIMD.1.f90
--- a/sources/Example_SIMD.2.c
+++ b/sources/Example_SIMD.2.c
--- a/sources/Example_SIMD.2.f90
+++ b/sources/Example_SIMD.2.f90
--- a/sources/Example_SIMD.3.c
+++ b/sources/Example_SIMD.3.c
--- a/sources/Example_SIMD.3.f90
+++ b/sources/Example_SIMD.3.f90
--- a/sources/Example_SIMD.4.c
+++ b/sources/Example_SIMD.4.c
--- a/sources/Example_SIMD.4.f90
+++ b/sources/Example_SIMD.4.f90
--- a/sources/Example_SIMD.5.c
+++ b/sources/Example_SIMD.5.c
--- a/sources/Example_SIMD.5.f90
+++ b/sources/Example_SIMD.5.f90
--- a/sources/Example_SIMD.6.c
+++ b/sources/Example_SIMD.6.c
--- a/sources/Example_SIMD.6.f90
+++ b/sources/Example_SIMD.6.f90
--- a/sources/Example_SIMD.7.c
+++ b/sources/Example_SIMD.7.c
--- a/sources/Example_SIMD.7.f90
+++ b/sources/Example_SIMD.7.f90
--- a/sources/Example_SIMD.8.c
+++ b/sources/Example_SIMD.8.c
--- a/sources/Example_SIMD.8.f90
+++ b/sources/Example_SIMD.8.f90
--- a/sources/Example_linear_modifier.1.cpp
+++ b/sources/Example_linear_modifier.1.cpp
--- a/sources/Example_linear_modifier.1.f90
+++ b/sources/Example_linear_modifier.1.f90
--- a/sources/Example_linear_modifier.2.cpp
+++ b/sources/Example_linear_modifier.2.cpp
--- a/sources/Example_linear_modifier.2.f90
+++ b/sources/Example_linear_modifier.2.f90
--- a/sources/Example_linear_modifier.3.c
+++ b/sources/Example_linear_modifier.3.c
--- a/sources/Example_linear_modifier.3.f90
+++ b/sources/Example_linear_modifier.3.f90
--- a/Title_Page.tex
+++ b/Title_Page.tex
@ -27,7 +27,7 @@ Source codes for OpenMP \PVER{} Examples can be downloaded from
 \href{https://github.com/OpenMP/Examples/tree/v\VER}{github}.\\

 \begin{adjustwidth}{0pt}{1em}\setlength{\parskip}{0.25\baselineskip}%
-Copyright © 1997-2020 OpenMP Architecture Review Board.\\
+Copyright \copyright{} 1997-2021 OpenMP Architecture Review Board.\\
 Permission to copy without fee all or part of this material is granted,
 provided the OpenMP Architecture Review Board copyright notice and
 the title of this document appear. Notice is given that copying is by
@ -37,14 +37,11 @@ permission of OpenMP Architecture Review Board.\end{adjustwidth}

 % Blank page

-\clearpage
-\thispagestyle{empty}
-\phantom{a}
-\emph{This page intentionally left blank}
+\cleardoublepage

 %For final version, uncomment the line above, comment out the lines below
 %This working version enacted the following tickets: 287, 519, 550, 593, 
 %674, 688, 689, 
 %and a few other editorial changes.
-\vfill
+%\vfill

--- a/affinity/affinity.tex
+++ b/affinity/affinity.tex
@ -1,5 +1,5 @@
 \pagebreak
-\section{The \code{proc\_bind} Clause}
+\section{\code{proc\_bind} Clause}
 \label{sec:affinity}

 The following examples demonstrate how to use the \code{proc\_bind} clause to 
@ -38,8 +38,8 @@ above. Note that the threads are bound to the first place of each subpartition.

 \fexample[4.0]{affinity}{1}

-It is unspecified on which place the master thread is initially started. If the 
-master thread is initially started on p0, the following placement of threads will 
+It is unspecified on which place the primary thread is initially started. If the 
+primary thread is initially started on p0, the following placement of threads will 
 be applied in the parallel region:

 \begin{compactitem}
@ -53,7 +53,7 @@ be applied in the parallel region:
 \end{compactitem}


-If the master thread would initially be started on p2, the placement of threads 
+If the primary thread would initially be started on p2, the placement of threads 
 and distribution of the place partition would be as follows:

 \begin{compactitem}
@ -71,7 +71,7 @@ the number of threads is greater than the number of places in the parent's place
 partition.

 Let \plc{T} be the number of threads in the team, and \plc{P} be the number of places in the 
-parent's place partition. The first \plc{T/P} threads of the team (including the master 
+parent's place partition. The first \plc{T/P} threads of the team (including the primary
 thread) execute on the parent's place. The next \plc{T/P} threads execute on the next 
 place in the place partition, and so on, with wrap around. 

@ -79,8 +79,8 @@ place in the place partition, and so on, with wrap around.

 \ffreeexample[4.0]{affinity}{2}

-It is unspecified on which place the master thread is initially started. If the 
-master thread is initially started on p0, the following placement of threads will 
+It is unspecified on which place the primary thread is initially started. If the 
+primary thread is initially started on p0, the following placement of threads will 
 be applied in the parallel region:

 \begin{compactitem}
@ -101,7 +101,7 @@ be applied in the parallel region:
 \item threads 14,15 execute on p7 with the place partition p7
 \end{compactitem}

-If the master thread would initially be started on p2, the placement of threads 
+If the primary thread would initially be started on p2, the placement of threads 
 and distribution of the place partition would be as follows:

 \begin{compactitem}
@ -134,8 +134,8 @@ The place partition is not changed by the \code{close} policy.

 \fexample[4.0]{affinity}{3}

-It is unspecified on which place the master thread is initially started. If the 
-master thread is initially started on p0, the following placement of threads will 
+It is unspecified on which place the primary thread is initially started. If the 
+primary thread is initially started on p0, the following placement of threads will 
 be applied in the \code{parallel} region:

 \begin{compactitem}
@ -148,7 +148,7 @@ be applied in the \code{parallel} region:
 \item thread 3 executes on p3 with the place partition p0-p7
 \end{compactitem}

-If the master thread would initially be started on p2, the placement of threads 
+If the primary thread would initially be started on p2, the placement of threads 
 and distribution of the place partition would be as follows:

 \begin{compactitem}
@ -166,7 +166,7 @@ the number of threads is greater than the number of places in the parent's place
 partition.

 Let \plc{T} be the number of threads in the team, and \plc{P} be the number of places in the 
-parent's place partition. The first \plc{T/P} threads of the team (including the master 
+parent's place partition. The first \plc{T/P} threads of the team (including the primary
 thread) execute on the parent's place. The next \plc{T/P} threads execute on the next 
 place in the place partition, and so on, with wrap around. The place partition 
 is not changed by the \code{close} policy.
@ -175,8 +175,8 @@ is not changed by the \code{close} policy.

 \ffreeexample[4.0]{affinity}{4}

-It is unspecified on which place the master thread is initially started. If the 
-master thread is initially running on p0, the following placement of threads will 
+It is unspecified on which place the primary thread is initially started. If the 
+primary thread is initially running on p0, the following placement of threads will 
 be applied in the parallel region:

 \begin{compactitem}
@ -197,7 +197,7 @@ be applied in the parallel region:
 \item threads 14,15 execute on p7 with the place partition p0-p7
 \end{compactitem}

-If the master thread would initially be started on p2, the placement of threads 
+If the primary thread would initially be started on p2, the placement of threads 
 and distribution of the place partition would be as follows:

 \begin{compactitem}
@ -218,26 +218,27 @@ and distribution of the place partition would be as follows:
 \item threads 14,15 execute on p1 with the place partition p0-p7
 \end{compactitem}

-\subsection{Master Affinity Policy}
-\label{subsec:affinity_master}
+\subsection{Primary Affinity Policy}
+\label{subsec:affinity_primary}

-The following example shows the result of the \code{master} affinity policy on 
+The following example shows the result of the \code{primary} affinity policy on 
 the partition list for the machine architecture depicted above. The place partition 
-is not changed by the master policy.
+is not changed by the primary policy.

 \cexample[4.0]{affinity}{5}

-\fexample[4.0]{affinity}{5}
+\fexample[4.0]{affinity}{5}[1]
+\clearpage

-It is unspecified on which place the master thread is initially started. If the 
-master thread is initially running on p0, the following placement of threads will 
+It is unspecified on which place the primary thread is initially started. If the 
+primary thread is initially running on p0, the following placement of threads will 
 be applied in the parallel region:

 \begin{compactitem}
 \item threads 0-3 execute on p0 with the place partition p0-p7
 \end{compactitem}

-If the master thread would initially be started on p2, the placement of threads 
+If the primary thread would initially be started on p2, the placement of threads 
 and distribution of the place partition would be as follows:

 \begin{compactitem}
--- a/affinity/affinity_display.tex
+++ b/affinity/affinity_display.tex
@ -12,9 +12,9 @@ at selected locations within code.
 For the first example the environment variable \code{OMP\_DISPLAY\_AFFINITY} has been
 set to \code{TRUE}, and execution occurs on an 8-core system with \code{OMP\_NUM\_THREADS} set to 8.

-The affinity for the master thread is reported through a call to the API
+The affinity for the primary thread is reported through a call to the API
 \code{omp\_display\_affinity()} routine. For default affinity settings
-the report shows that the master thread can execute on any of the cores. 
+the report shows that the primary thread can execute on any of the cores. 
 In the following parallel region the affinity for each of the team threads is reported
 automatically since the \code{OMP\_DISPLAY\_AFFINITY} environment variable has been set
 to \code{TRUE}.
--- a/affinity/affinity_query.tex
+++ b/affinity/affinity_query.tex
--- a/affinity/sources/affinity.1.c
+++ b/affinity/sources/affinity.1.c
@ -2,7 +2,7 @@
 * @@name:	affinity.1c
 * @@type:	C
 * @@compilable:	yes
-* @@linkable:	yes
+* @@linkable:	no
 * @@expect:	success
 * @@version:	omp_4.0
 */
--- a/affinity/sources/affinity.1.f
+++ b/affinity/sources/affinity.1.f
@ -1,7 +1,7 @@
 ! @@name:	affinity.1f
 ! @@type:	F-fixed
 ! @@compilable:	yes
-! @@linkable:	yes
+! @@linkable:	no
 ! @@expect:	success
 ! @@version:	omp_4.0
      PROGRAM EXAMPLE
--- a/affinity/sources/affinity.2.c
+++ b/affinity/sources/affinity.2.c
--- a/affinity/sources/affinity.2.f90
+++ b/affinity/sources/affinity.2.f90
--- a/affinity/sources/affinity.3.c
+++ b/affinity/sources/affinity.3.c
@ -2,7 +2,7 @@
 * @@name:	affinity.3c
 * @@type:	C
 * @@compilable:	yes
-* @@linkable:	yes
+* @@linkable:	no
 * @@expect:	success
 * @@version:	omp_4.0
 */
--- a/affinity/sources/affinity.3.f
+++ b/affinity/sources/affinity.3.f
@ -1,7 +1,7 @@
 ! @@name:	affinity.3f
 ! @@type:	F-fixed
 ! @@compilable:	yes
-! @@linkable:	yes
+! @@linkable:	no
 ! @@expect:	success
 ! @@version:	omp_4.0
      PROGRAM EXAMPLE
--- a/affinity/sources/affinity.4.c
+++ b/affinity/sources/affinity.4.c
--- a/affinity/sources/affinity.4.f90
+++ b/affinity/sources/affinity.4.f90
--- a/affinity/sources/affinity.5.c
+++ b/affinity/sources/affinity.5.c
@ -0,0 +1,21 @@
+/*
+* @@name:	affinity.5c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	no
+* @@expect:	success
+* @@version:	omp_5.1
+*/
+#if _OPENMP  < 202011
+#define primary master
+#endif
+
+void work();
+int main()
+{
+#pragma omp parallel proc_bind(primary) num_threads(4)
+   {
+      work();
+   }
+   return 0;
+}
--- a/affinity/sources/affinity.5.f
+++ b/affinity/sources/affinity.5.f
@ -0,0 +1,16 @@
+! @@name:	affinity.5f
+! @@type:	F-fixed
+! @@compilable:	yes
+! @@requires:	preprocessing
+! @@linkable:	no
+! @@expect:	success
+! @@version:	omp_5.1
+#if _OPENMP  < 202011
+#define primary master
+#endif
+
+      PROGRAM EXAMPLE
+!$OMP PARALLEL PROC_BIND(primary) NUM_THREADS(4)
+      CALL WORK()
+!$OMP END PARALLEL
+      END PROGRAM EXAMPLE
--- a/affinity/sources/affinity.6.c
+++ b/affinity/sources/affinity.6.c
@ -6,7 +6,6 @@
 * @@expect: success
 * @@version: omp_5.0
 */
-
 double * alloc_init_B(double *A, int N);
 void     compute_on_B(double *B, int N);

--- a/affinity/sources/affinity.6.f90
+++ b/affinity/sources/affinity.6.f90
@ -4,7 +4,6 @@
 ! @@linkable:   no
 ! @@expect:     success
 ! @@version:	omp_5.0
-
 subroutine task_affinity(A, N)

  external alloc_init_B
--- a/affinity/sources/affinity_display.1.c
+++ b/affinity/sources/affinity_display.1.c
@ -11,7 +11,7 @@

 int main(void){                     //MAX threads = 8, single socket system

-   omp_display_affinity(NULL);  //API call-- Displays Affinity of Master Thread
+   omp_display_affinity(NULL);  //API call-- Displays Affinity of Primary Thread

 // API CALL OUTPUT (default format): 
 //team_num= 0, nesting_level= 0, thread_num= 0, thread_affinity= 0,1,2,3,4,5,6,7
--- a/affinity/sources/affinity_display.1.f90
+++ b/affinity/sources/affinity_display.1.f90
@ -4,17 +4,16 @@
 ! @@linkable: yes
 ! @@expect: success
 ! @@version: omp_5.0
-
 program affinity_display        ! MAX threads = 8, single socket system

   use omp_lib
   implicit none
   character(len=0) :: null

-   call omp_display_affinity(null) !API call- Displays Affinity of Master Thread
+   call omp_display_affinity(null) !API call- Displays Affinity of Primary Thrd

 ! API CALL OUTPUT (default format): 
-! team_num= 0, nesting_level= 0, thread_num= 0, thread_affinity= 0,1,2,3,4,5,6,7
+!team_num= 0, nesting_level= 0, thread_num= 0, thread_affinity= 0,1,2,3,4,5,6,7


                       ! OMP_DISPLAY_AFFINITY=TRUE, OMP_NUM_THREADS=8
--- a/affinity/sources/affinity_display.2.c
+++ b/affinity/sources/affinity_display.2.c
--- a/affinity/sources/affinity_display.2.f90
+++ b/affinity/sources/affinity_display.2.f90
@ -4,7 +4,6 @@
 ! @@linkable: yes
 ! @@expect: success
 ! @@version: omp_5.0
-
 program affinity_display

   use omp_lib
--- a/affinity/sources/affinity_display.3.c
+++ b/affinity/sources/affinity_display.3.c
--- a/affinity/sources/affinity_display.3.f90
+++ b/affinity/sources/affinity_display.3.f90
@ -4,7 +4,6 @@
 ! @@linkable: yes
 ! @@expect: success
 ! @@version: omp_5.0
-
 program affinity_display
   use omp_lib
   implicit none
--- a/affinity/sources/affinity_query.1.c
+++ b/affinity/sources/affinity_query.1.c
--- a/affinity/sources/affinity_query.1.f90
+++ b/affinity/sources/affinity_query.1.f90
@ -4,7 +4,6 @@
 ! @@linkable:	no
 ! @@expect:	success
 ! @@version:	omp_4.5
-
 subroutine socket_init(socket_num)
   use omp_lib
   integer  :: socket_num, n_procs
--- a/affinity/task_affinity.tex
+++ b/affinity/task_affinity.tex
--- a/data_environment/associate.tex
+++ b/data_environment/associate.tex
@ -27,6 +27,7 @@ The association between \plc{u} and the original \plc{v} is retained (see the Da
 Attribute Rules section in the OpenMP 4.0 API Specifications). Inside the \code{parallel} 
 region, \plc{v} has the value of -1 and \plc{u} has the value of the original \plc{v}.

+\pagebreak
 \ffreenexample[4.0]{associate}{3}
 \fortranspecificend

--- a/data_environment/carrays_fpriv.tex
+++ b/data_environment/carrays_fpriv.tex
--- a/data_environment/copyin.tex
+++ b/data_environment/copyin.tex
@ -1,9 +1,9 @@
 \pagebreak
-\section{The \code{copyin} Clause}
+\section{\code{copyin} Clause}
 \label{sec:copyin}

 The \code{copyin} clause is used to initialize threadprivate data upon entry 
-to a \code{parallel} region. The value of the threadprivate variable in the master 
+to a \code{parallel} region. The value of the threadprivate variable in the primary
 thread is copied to the threadprivate variable of each other team member.

 \cexample{copyin}{1}
--- a/data_environment/copyprivate.tex
+++ b/data_environment/copyprivate.tex
@ -1,5 +1,5 @@
 \pagebreak
-\section{The \code{copyprivate} Clause}
+\section{\code{copyprivate} Clause}
 \label{sec:copyprivate}

 The \code{copyprivate} clause can be used to broadcast values acquired by a single 
@ -20,14 +20,14 @@ any of the threads have left the barrier at the end of the construct.

 \fexample{copyprivate}{1}

-In this example, assume that the input must be performed by the master thread. 
-Since the \code{master} construct does not support the \code{copyprivate} clause, 
+In this example, assume that the input must be performed by the primary thread. 
+Since the \code{masked} construct does not support the \code{copyprivate} clause, 
 it cannot broadcast the input value that is read. However, \code{copyprivate} 
-is used to broadcast an address where the input value is stored.
+is used to broadcast an address where the input value is stored. 

-\cexample{copyprivate}{2}
+\cexample[5.1]{copyprivate}{2}

-\fexample{copyprivate}{2}
+\fexample[5.1]{copyprivate}{2}[1]

 Suppose that the number of lock variables required within a \code{parallel} region 
 cannot easily be determined prior to entering it. The \code{copyprivate} clause 
--- a/data_environment/cpp_reference.tex
+++ b/data_environment/cpp_reference.tex
--- a/data_environment/default_none.tex
+++ b/data_environment/default_none.tex
@ -1,5 +1,5 @@
 \pagebreak
-\section{The \code{default(none)} Clause}
+\section{\code{default(none)} Clause}
 \label{sec:default_none}

 The following example distinguishes the variables that are affected by the \code{default(none)} 
--- a/data_environment/fort_loopvar.tex
+++ b/data_environment/fort_loopvar.tex
--- a/data_environment/fort_sa_private.tex
+++ b/data_environment/fort_sa_private.tex
@ -9,6 +9,7 @@ clause rules with regard to storage association.
 \fnexample{fort_sa_private}{1}

 \fnexample{fort_sa_private}{2}
+\clearpage

 \fnexample{fort_sa_private}{3}
 % blue line floater at top of this page for "Fortran, cont."
@ -18,6 +19,6 @@ clause rules with regard to storage association.

 \fnexample{fort_sa_private}{4}

-\fnexample{fort_sa_private}{5}
+\fnexample[5.1]{fort_sa_private}{5}
 \fortranspecificend

--- a/data_environment/fort_sp_common.tex
+++ b/data_environment/fort_sp_common.tex
@ -19,6 +19,7 @@ The following example is also conforming:
 %\begin{figure}[t!]
 %\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
 %\end{figure}
+\clearpage

 The following example is conforming:

--- a/data_environment/lastprivate.tex
+++ b/data_environment/lastprivate.tex
@ -1,5 +1,5 @@
 \pagebreak
-\section{The \code{lastprivate} Clause}
+\section{\code{lastprivate} Clause}
 \label{sec:lastprivate}

 Correct execution sometimes depends on the value that the last iteration of a loop 
--- a/data_environment/private.tex
+++ b/data_environment/private.tex
@ -1,5 +1,5 @@
 \pagebreak
-\section{The \code{private} Clause}
+\section{\code{private} Clause}
 \label{sec:private}

 In the following example, the values of original list items \plc{i} and \plc{j} 
--- a/data_environment/reduction.tex
+++ b/data_environment/reduction.tex
@ -5,7 +5,7 @@

 This section covers ways to perform reductions in parallel, task, taskloop, and SIMD regions.

-\subsection{The \code{reduction} Clause}
+\subsection{\code{reduction} Clause}
 \label{subsec:reduction}

 The following example demonstrates the \code{reduction} clause; note that some 
@ -49,7 +49,7 @@ to \code{MIN}.
 \ffreenexample{reduction}{5}
 \fortranspecificend

-\pagebreak
+%\pagebreak
 The following example is non-conforming because the initialization (\code{a = 
 0}) of the original list item \code{a} is not synchronized with the update of 
 \code{a} as a result of the reduction computation in the \code{for} loop. Therefore, 
@ -62,9 +62,9 @@ clause. This can be achieved by adding an explicit barrier after the assignment
 directive (which has an implied barrier), or by initializing \code{a} before 
 the start of the \code{parallel} region.

-\cexample{reduction}{6}
+\cexample[5.1]{reduction}{6}

-\fexample{reduction}{6}
+\fexample[5.1]{reduction}{6}[1]

 The following example demonstrates the reduction of array \plc{a}.  In C/C++ this is illustrated by the explicit use of an array section \plc{a[0:N]} in the \code{reduction} clause.  The corresponding Fortran example uses array syntax supported in the base language.  As of the OpenMP 4.5 specification the explicit use of array section in the \code{reduction} clause in Fortran is not permitted.  But this oversight has been fixed in the OpenMP 5.0 specification.

@ -154,7 +154,7 @@ second \code{target} construct.
 \cexample[5.0]{target_reduction}{1}

 \ffreeexample[5.0]{target_reduction}{1}
-\clearpage
+%\clearpage

 In next example,  the variables \code{sum1} and \code{sum2} remain on the
 device for the duration of the \code{target}~\code{data} region so that it is
@ -184,9 +184,9 @@ task reduction will be combined (in some order) into the original variable
 listed in the \code{task\_reduction} clause before exiting the \code{taskgroup}
 region. 

-\cexample[5.0]{target_task_reduction}{1}
+\cexample[5.1]{target_task_reduction}{1}

-\ffreeexample[5.0]{target_task_reduction}{1}
+\ffreeexample[5.1]{target_task_reduction}{1}[1]

 In the next pair of examples, the task reduction is defined by a
 \code{reduction} clause with the \code{task} modifier, rather than a
@ -201,13 +201,13 @@ into the original reduction variable, \code{sum}.
 Next, the \code{task} modifier is again used to define a task reduction over
 participating tasks. This time, the participating tasks are a target task
 resulting from a \code{target} construct with the \code{in\_reduction} clause,
-and the implicit task (executing on the master thread) that calls
+and the implicit task (executing on the primary thread) that calls
 \code{host\_compute}. As before, the partial results from these paricipating
 tasks are combined in some order into the original reduction variable.

-\cexample[5.0]{target_task_reduction}{2b}
+\cexample[5.1]{target_task_reduction}{2b}

-\ffreeexample[5.0]{target_task_reduction}{2b}
+\ffreeexample[5.1]{target_task_reduction}{2b}[1]


 \subsection{Taskloop Reduction}
@ -266,7 +266,7 @@ by the taskloop will participate on it.

 \cexample[5.0]{taskloop_reduction}{2}
 \ffreeexample[5.0]{taskloop_reduction}{2}
-\clearpage
+%\clearpage

 In the OpenMP 5.0 Specification, \code{reduction} clauses for the
 \code{taskloop}~\code{ simd} construct were also added. 
@ -339,8 +339,21 @@ At the end of the parallel region \plc{asum} contains the combined result of all
 %At the end of the parallel region \plc{asum} contains the combined result of all reductions.


-\cexample[5.0]{taskloop_simd_reduction}{1}
+\cexample[5.1]{taskloop_simd_reduction}{1}

-\ffreeexample[5.0]{taskloop_simd_reduction}{1}
+\ffreeexample[5.1]{taskloop_simd_reduction}{1}[1]


+\subsection{Reduction with the \code{scope} Construct}
+\label{subsec:reduction_scope}
+
+The following example illustrates the use of the \code{scope} construct 
+to perform a reduction in a \code{parallel} region. The case is useful for 
+producing a reduction and accessing reduction variables inside a \code{parallel} region 
+without using a worksharing-loop construct.
+
+\cppexample[5.1]{scope_reduction}{1}
+\clearpage
+
+\ffreeexample[5.1]{scope_reduction}{1}
+
--- a/data_environment/scan.tex
+++ b/data_environment/scan.tex
@ -1,5 +1,5 @@
 \pagebreak
-\section{The \code{scan} Directive}
+\section{\code{scan} Directive}
 \label{sec:scan}

 The following examples illustrate how to parallelize a loop that saves 
--- a/data_environment/sources/associate.1.f
+++ b/data_environment/sources/associate.1.f
--- a/data_environment/sources/associate.2.f
+++ b/data_environment/sources/associate.2.f
--- a/data_environment/sources/associate.3.f90
+++ b/data_environment/sources/associate.3.f90
--- a/data_environment/sources/carrays_fpriv.1.c
+++ b/data_environment/sources/carrays_fpriv.1.c
--- a/data_environment/sources/copyin.1.c
+++ b/data_environment/sources/copyin.1.c
--- a/data_environment/sources/copyin.1.f
+++ b/data_environment/sources/copyin.1.f
--- a/data_environment/sources/copyprivate.1.c
+++ b/data_environment/sources/copyprivate.1.c
--- a/data_environment/sources/copyprivate.1.f
+++ b/data_environment/sources/copyprivate.1.f
--- a/data_environment/sources/copyprivate.2.c
+++ b/data_environment/sources/copyprivate.2.c
@ -4,7 +4,12 @@
 * @@compilable:	yes
 * @@linkable:	no
 * @@expect:	success
+* @@version:    omp_5.1
 */
+#if _OPENMP  < 202011
+#define masked master
+#endif
+
 #include <stdio.h>
 #include <stdlib.h>

@ -18,7 +23,7 @@ float read_next( ) {
  }  /* copies the pointer only */


-  #pragma omp master
+  #pragma omp masked
  {
    scanf("%f", tmp);
  }
--- a/data_environment/sources/copyprivate.2.f
+++ b/data_environment/sources/copyprivate.2.f
@ -1,8 +1,14 @@
 ! @@name:	copyprivate.2f
 ! @@type:	F-fixed
 ! @@compilable:	yes
+! @@requires:	preprocessing
 ! @@linkable:	no
 ! @@expect:	success
+! @@version:    omp_5.1
+#if _OPENMP  < 202011
+#define MASKED MASTER
+#endif
+
        REAL FUNCTION READ_NEXT()
        REAL, POINTER :: TMP

@ -10,9 +16,9 @@
          ALLOCATE (TMP)
 !$OMP   END SINGLE COPYPRIVATE (TMP)  ! copies the pointer only

-!$OMP   MASTER
+!$OMP   MASKED
          READ (11) TMP
-!$OMP   END MASTER
+!$OMP   END MASKED

 !$OMP   BARRIER
          READ_NEXT = TMP
--- a/data_environment/sources/copyprivate.3.c
+++ b/data_environment/sources/copyprivate.3.c
--- a/data_environment/sources/copyprivate.3.f
+++ b/data_environment/sources/copyprivate.3.f
--- a/data_environment/sources/copyprivate.4.f
+++ b/data_environment/sources/copyprivate.4.f
--- a/data_environment/sources/cpp_reference.1.cpp
+++ b/data_environment/sources/cpp_reference.1.cpp
--- a/Show More
+++ b/Show More