Initial add of all files

2025-04-03 13:21:33 +01:00 · 2015-01-13 11:38:24 -08:00 · 2015-01-13 11:38:24 -08:00 · 542c10d074
commit 542c10d074
357 changed files with 10840 additions and 0 deletions
--- a/Changes.log
+++ b/Changes.log
@ -0,0 +1,80 @@
+[6-Jan-2015] Version 4.0.1ltx
+Changes from 4.0.1ltx-21Nov-2014
+
+1. openmp.sty and openmp-examples.tex
+   enable source line numbering
+
+2. Split chapters in the main file (Examples_Sects.tex) into individual files
+   Makefile and openmp-examples.tex were modified to use the new list.
+
+3. Additional changes related to fixing fonts and language markers
+
+Below is a summary.
+
+Page 2: "non- compound" -> "non-compound"
+Page 10: fixed mis-placed language markers
+Chap-8, page 24: fixed variable fonts for T, P, T/P
+Chap-19. page 79-80: added missing Fortran cont. marker
+Chap-25, page 100: combined 25.2f & 25.3f into one Fortran marker
+Chap-30, page 120: combined 30.2c & 30.3c into one C/C++ marker
+Chap-30, page 122-123: added missing Fortran cont. marker
+Chap-32, page 127: added missing Fortran cont. marker
+Chap-36, page 138-139: added missing Fortran cont. marker
+Chap-39, page 147: added missing Fortran cont. marker
+Chap-50, page 182: fixed variables p, v1, v2 fonts
+Chap-51, page 189: fixed variables p, v1, v2 fonts
+Chap-52. page 201: fixed variable fonts, function fonts
+Chap-53. page 205: fixed variable fonts, function fonts
+Chap-54. page 215: fixed variable fonts
+Chap-58, page 237: fixed variable fonts
+Chap-58, page 237: Minor wording change to reflect the new placement of the Example header.
+
+Modification applied to the following files:
+   Examples_Chapt.tex
+   Examples_affinity.tex
+   Examples_associate.tex
+   Examples_atomic_restrict.tex
+   Examples_cond_comp.tex
+   Examples_declare_target.tex
+   Examples_fort_sa_private.tex
+   Examples_fort_sp_common.tex
+   Examples_reduction.tex
+   Examples_target.tex
+   Examples_target_data.tex
+   Examples_target_update.tex
+   Examples_teams.tex
+   Examples_threadprivate.tex
+   Examples_workshare.tex
+
+4. Other notes
+
+Chap-12, page 37: placement of C/C++ marker changed, but OK
+Chap-29, page 114: marker moved, but OK.
+Chap-50, page 187: Example 50.4bf header added.
+   Fortran marker changed, but OK
+Chap-51, page 192: "Example 51.3c" added, and example numbering 
+   shifted thereafter.
+
+
+[21-Nov-2014] Initial 4.0.1ltx
+Changes from 4.0.1ltx-3Jun2014
+
+1. openmp.sty
+   change from using mnemonics
+      \def\ename{\escstr{#1}.#2}
+   to seqential numbering
+      \def\ename{\thechapter.#2}
+
+   For "fnexample()" definition, firstline=6, not 8
+
+2. source file changes
+   sources - use the "original" sources from 4.0.1
+
+3. Version Number
+   openmp-examples.tex:
+      change footnote "Version 4.0 - July 2013"
+         to "Version 4.0.1 - February 2014"
+   Title_Page.tex:
+      change from "November, 2013" to "February, 2014"
+         "1997-2013" -> "1997-2014"
+
--- a/Examples_Chapt.tex
+++ b/Examples_Chapt.tex
@ -0,0 +1,9 @@
+
+\chapter*{Examples}
+\label{chap:examples}
+The following are examples of the OpenMP API directives, constructs, and routines.
+\ccppspecificstart
+A statement following a directive is compound only when necessary, and a 
+non-compound statement is indented with respect to a directive preceding it.
+\ccppspecificend
+
--- a/Examples_affinity.tex
+++ b/Examples_affinity.tex
@ -0,0 +1,243 @@
+\pagebreak
+\chapter{The \code{proc\_bind} Clause}
+\label{chap:affinity}
+
+The following examples demonstrate how to use the \code{proc\_bind} clause to 
+control the thread binding for a team of threads in a \code{parallel} region. 
+The machine architecture is depicted in the figure below. It consists of two sockets, 
+each equipped with a quad-core processor and configured to execute two hardware 
+threads simultaneously on each core. These examples assume a contiguous core numbering 
+starting from 0, such that the hardware threads 0,1 form the first physical core.
+
+\ifpdf
+%\begin{figure}[htbp]
+\centerline{\includegraphics[width=3.8in,keepaspectratio=true]%
+{figs/proc_bind_fig.pdf}}
+%\end{figure}
+\fi
+
+The following equivalent place list declarations consist of eight places (which 
+we designate as p0 to p7):
+
+\code{OMP\_PLACES=\texttt{"}\{0,1\},\{2,3\},\{4,5\},\{6,7\},\{8,9\},\{10,11\},\{12,13\},\{14,15\}\texttt{"}}
+
+or
+
+\code{OMP\_PLACES=\texttt{"}\{0:2\}:8:2\texttt{"}}
+
+\section{Spread Affinity Policy}
+
+The following example shows the result of the \code{spread} affinity policy on 
+the partition list when the number of threads is less than or equal to the number 
+of places in the parent's place partition, for the machine architecture depicted 
+above. Note that the threads are bound to the first place of each subpartition.
+
+\cexample{affinity}{1c}
+
+\fexample{affinity}{1f}
+
+It is unspecified on which place the master thread is initially started. If the 
+master thread is initially started on p0, the following placement of threads will 
+be applied in the parallel region:
+
+\begin{compactitem}
+\item thread 0 executes on p0 with the place partition p0,p1
+
+\item thread 1 executes on p2 with the place partition p2,p3
+
+\item thread 2 executes on p4 with the place partition p4,p5
+
+\item thread 3 executes on p6 with the place partition p6,p7
+\end{compactitem}
+
+
+If the master thread would initially be started on p2, the placement of threads 
+and distribution of the place partition would be as follows:
+
+\begin{compactitem}
+\item thread 0 executes on p2 with the place partition p2,p3
+
+\item thread 1 executes on p4 with the place partition p4,p5
+
+\item thread 2 executes on p6 with the place partition p6,p7
+
+\item thread 3 executes on p0 with the place partition p0,p1
+\end{compactitem}
+
+The following example illustrates the \code{spread} thread affinity policy when 
+the number of threads is greater than the number of places in the parent's place 
+partition.
+
+Let \plc{T} be the number of threads in the team, and \plc{P} be the number of places in the 
+parent's place partition. The first \plc{T/P} threads of the team (including the master 
+thread) execute on the parent's place. The next \plc{T/P} threads execute on the next 
+place in the place partition, and so on, with wrap around. 
+
+\cexample{affinity}{2c}
+
+\fexample{affinity}{2f}
+
+It is unspecified on which place the master thread is initially started. If the 
+master thread is initially started on p0, the following placement of threads will 
+be applied in the parallel region:
+
+\begin{compactitem}
+\item threads 0,1 execute on p0 with the place partition p0
+
+\item threads 2,3 execute on p1 with the place partition p1
+
+\item threads 4,5 execute on p2 with the place partition p2
+
+\item threads 6,7 execute on p3 with the place partition p3
+
+\item threads 8,9 execute on p4 with the place partition p4
+
+\item threads 10,11 execute on p5 with the place partition p5
+
+\item threads 12,13 execute on p6 with the place partition p6
+
+\item threads 14,15 execute on p7 with the place partition p7
+\end{compactitem}
+
+If the master thread would initially be started on p2, the placement of threads 
+and distribution of the place partition would be as follows:
+
+\begin{compactitem}
+\item threads 0,1 execute on p2 with the place partition p2
+
+\item threads 2,3 execute on p3 with the place partition p3
+
+\item threads 4,5 execute on p4 with the place partition p4
+
+\item threads 6,7 execute on p5 with the place partition p5
+
+\item threads 8,9 execute on p6 with the place partition p6
+
+\item threads 10,11 execute on p7 with the place partition p7
+
+\item threads 12,13 execute on p0 with the place partition p0
+
+\item threads 14,15 execute on p1 with the place partition p1
+\end{compactitem}
+
+\section{Close Affinity Policy}
+
+The following example shows the result of the \code{close} affinity policy on 
+the partition list when the number of threads is less than or equal to the number 
+of places in parent's place partition, for the machine architecture depicted above. 
+The place partition is not changed by the \code{close} policy.
+
+\cexample{affinity}{3c}
+
+\fexample{affinity}{3f}
+
+It is unspecified on which place the master thread is initially started. If the 
+master thread is initially started on p0, the following placement of threads will 
+be applied in the \code{parallel} region:
+
+\begin{compactitem}
+\item thread 0 executes on p0 with the place partition p0-p7
+
+\item thread 1 executes on p1 with the place partition p0-p7
+
+\item thread 2 executes on p2 with the place partition p0-p7
+
+\item thread 3 executes on p3 with the place partition p0-p7
+\end{compactitem}
+
+If the master thread would initially be started on p2, the placement of threads 
+and distribution of the place partition would be as follows:
+
+\begin{compactitem}
+\item thread 0 executes on p2 with the place partition p0-p7
+
+\item thread 1 executes on p3 with the place partition p0-p7
+
+\item thread 2 executes on p4 with the place partition p0-p7
+
+\item thread 3 executes on p5 with the place partition p0-p7
+\end{compactitem}
+
+The following example illustrates the \code{close} thread affinity policy when 
+the number of threads is greater than the number of places in the parent's place 
+partition.
+
+Let \plc{T} be the number of threads in the team, and \plc{P} be the number of places in the 
+parent's place partition. The first \plc{T/P} threads of the team (including the master 
+thread) execute on the parent's place. The next \plc{T/P} threads execute on the next 
+place in the place partition, and so on, with wrap around. The place partition 
+is not changed by the \code{close} policy.
+
+\cexample{affinity}{4c}
+
+\fexample{affinity}{4f}
+
+It is unspecified on which place the master thread is initially started. If the 
+master thread is initially running on p0, the following placement of threads will 
+be applied in the parallel region:
+
+\begin{compactitem}
+\item threads 0,1 execute on p0 with the place partition p0-p7
+
+\item threads 2,3 execute on p1 with the place partition p0-p7
+
+\item threads 4,5 execute on p2 with the place partition p0-p7
+
+\item threads 6,7 execute on p3 with the place partition p0-p7
+
+\item threads 8,9 execute on p4 with the place partition p0-p7
+
+\item threads 10,11 execute on p5 with the place partition p0-p7
+
+\item threads 12,13 execute on p6 with the place partition p0-p7
+
+\item threads 14,15 execute on p7 with the place partition p0-p7
+\end{compactitem}
+
+If the master thread would initially be started on p2, the placement of threads 
+and distribution of the place partition would be as follows:
+
+\begin{compactitem}
+\item threads 0,1 execute on p2 with the place partition p0-p7
+
+\item threads 2,3 execute on p3 with the place partition p0-p7
+
+\item threads 4,5 execute on p4 with the place partition p0-p7
+
+\item threads 6,7 execute on p5 with the place partition p0-p7
+
+\item threads 8,9 execute on p6 with the place partition p0-p7
+
+\item threads 10,11 execute on p7 with the place partition p0-p7
+
+\item threads 12,13 execute on p0 with the place partition p0-p7
+
+\item threads 14,15 execute on p1 with the place partition p0-p7
+\end{compactitem}
+
+\section{Master Affinity Policy}
+
+The following example shows the result of the \code{master} affinity policy on 
+the partition list for the machine architecture depicted above. The place partition 
+is not changed by the master policy.
+
+\cexample{affinity}{5c}
+
+\fexample{affinity}{5f}
+
+It is unspecified on which place the master thread is initially started. If the 
+master thread is initially running on p0, the following placement of threads will 
+be applied in the parallel region:
+
+\begin{compactitem}
+\item threads 0-3 execute on p0 with the place partition p0-p7
+\end{compactitem}
+
+If the master thread would initially be started on p2, the placement of threads 
+and distribution of the place partition would be as follows:
+
+\begin{compactitem}
+\item threads 0-3 execute on p2 with the place partition p0-p7
+\end{compactitem}
+
+
--- a/Examples_array_sections.tex
+++ b/Examples_array_sections.tex
@ -0,0 +1,35 @@
+\pagebreak
+\chapter{Array Sections in Device Constructs}
+\label{chap:array_sections}
+
+The following examples show the usage of array sections in \code{map} clauses 
+on \code{target} and \code{target} \code{data} constructs.
+
+This example shows the invalid usage of two seperate sections of the same array 
+inside of a \code{target} construct.
+
+\cexample{array_sections}{1c}
+
+\fexample{array_sections}{1f}
+
+This example shows the invalid usage of two separate sections of the same array 
+inside of a \code{target} construct.
+
+\cexample{array_sections}{2c}
+
+\fexample{array_sections}{2f}
+
+This example shows the valid usage of two separate sections of the same array inside 
+of a \code{target} construct.
+
+\cexample{array_sections}{3c}
+
+\fexample{array_sections}{3f}
+
+This example shows the valid usage of a wholly contained array section of an already 
+mapped array section inside of a \code{target} construct.
+
+\cexample{array_sections}{4c}
+
+\fexample{array_sections}{4f}
+
--- a/Examples_associate.tex
+++ b/Examples_associate.tex
@ -0,0 +1,32 @@
+\pagebreak
+\chapter{Fortran \code{ASSOCIATE} Construct}
+\fortranspecificstart
+\label{chap:associate}
+
+The following is an invalid example of specifying an associate name on a data-sharing attribute 
+clause. The constraint in the Data Sharing Attribute Rules section in the OpenMP 
+4.0 API Specifications states that an associate name preserves the association 
+with the selector established at the \code{ASSOCIATE} statement. The associate 
+name \plc{b} is associated with the shared variable \plc{a}. With the predetermined data-sharing 
+attribute rule, the associate name \plc{b} is not allowed to be specified on the \code{private} 
+clause.
+
+\fnexample{associate}{1f}
+
+In next example, within the \code{parallel} construct, the association name \plc{thread\_id} 
+is associated with the private copy of \plc{i}. The print statement should output the 
+unique thread number.
+
+\fnexample{associate}{2f}
+
+The following example illustrates the effect of specifying a selector name on a data-sharing 
+attribute clause. The associate name \plc{u} is associated with \plc{v} and the variable \plc{v} 
+is specified on the \code{private} clause of the \code{parallel} construct. 
+The construct association is established prior to the \code{parallel} region. 
+The association between \plc{u} and the original \plc{v} is retained (see the Data Sharing 
+Attribute Rules section in the OpenMP 4.0 API Specifications). Inside the \code{parallel} 
+region, \plc{v} has the value of -1 and \plc{u} has the value of the original \plc{v}.
+
+\fnexample{associate}{3f}
+\fortranspecificend
+
--- a/Examples_async_target.tex
+++ b/Examples_async_target.tex
@ -0,0 +1,31 @@
+\pagebreak
+\chapter{Asynchronous Execution of a \code{target} Region Using Tasks}
+\label{chap:async_target}
+
+The following example shows how the \code{task} and \code{target} constructs 
+are used to execute multiple \code{target} regions asynchronously. The task that 
+encounters the \code{task} construct generates an explicit task that contains 
+a \code{target} region. The thread executing the explicit task encounters a task 
+scheduling point while waiting for the execution of the \code{target} region 
+to complete, allowing the thread to switch back to the execution of the encountering 
+task or one of the previously generated explicit tasks.
+
+\cexample{async_target}{1c}
+
+The Fortran version has an interface block that contains the \code{declare} \code{target}. 
+An identical statement exists in the function declaration (not shown here).
+
+\fexample{async_target}{1f}
+
+The following example shows how the \code{task} and \code{target} constructs 
+are used to execute multiple \code{target} regions asynchronously. The task dependence 
+ensures that the storage is allocated and initialized on the device before it is 
+accessed.
+
+\cexample{async_target}{2c}
+
+The Fortran example uses allocatable arrays for dynamic memory on the device. 
+
+\fexample{async_target}{2f}
+
+
--- a/Examples_atomic.tex
+++ b/Examples_atomic.tex
@ -0,0 +1,44 @@
+\pagebreak
+\chapter{The \code{atomic} Construct}
+\label{chap:atomic}
+
+The following example avoids race conditions (simultaneous updates of an element 
+of \plc{x} by multiple threads) by using the \code{atomic} construct .
+
+The advantage of using the \code{atomic} construct in this example is that it 
+allows updates of two different elements of \plc{x} to occur in parallel. If 
+a \code{critical} construct were used instead, then all updates to elements of 
+\plc{x} would be executed serially (though not in any guaranteed order).
+
+Note that the \code{atomic} directive applies only to the statement immediately 
+following it. As a result, elements of \plc{y} are not updated atomically in 
+this example.
+
+\cexample{atomic}{1c}
+
+\fexample{atomic}{1f}
+
+The following example illustrates the \code{read} and \code{write}  clauses 
+for the \code{atomic} directive. These clauses ensure that the given variable 
+is read or written, respectively, as a whole. Otherwise, some other thread might 
+read or write part of the variable while the current thread was reading or writing 
+another part of the variable. Note that most hardware provides atomic reads and 
+writes for some set of properly aligned variables of specific sizes, but not necessarily 
+for all the variable types supported by the OpenMP API.
+
+\cexample{atomic}{2c}
+
+\fexample{atomic}{2f}
+
+The following example illustrates the \code{capture} clause for the \code{atomic} 
+directive. In this case the value of a variable is captured, and then the variable 
+is incremented. These operations occur atomically. This particular example could 
+be implemented using the fetch-and-add instruction available on many kinds of hardware. 
+The example also shows a way to implement a spin lock using the \code{capture} 
+ and \code{read} clauses.
+
+\cexample{atomic}{3c}
+
+\fexample{atomic}{3f}
+
+
--- a/Examples_atomic_restrict.tex
+++ b/Examples_atomic_restrict.tex
@ -0,0 +1,25 @@
+\pagebreak
+\chapter{Restrictions on the \code{atomic} Construct}
+\label{chap:atomic_restrict}
+
+The following non-conforming examples illustrate the restrictions on the \code{atomic} 
+construct. 
+
+\cexample{atomic_restrict}{1c}
+
+\fexample{atomic_restrict}{1f}
+
+\cexample{atomic_restrict}{2c}
+
+\fortranspecificstart
+The following example is non-conforming because \code{I} and \code{R} reference 
+the same location but have different types.
+
+\fnexample{atomic_restrict}{2f}
+
+Although the following example might work on some implementations, this is also 
+non-conforming:
+
+\fnexample{atomic_restrict}{3f}
+\fortranspecificend
+
--- a/Examples_barrier_regions.tex
+++ b/Examples_barrier_regions.tex
@ -0,0 +1,24 @@
+\pagebreak
+\chapter{Binding of \code{barrier} Regions}
+\label{chap:barrier_regions}
+
+The binding rules call for a \code{barrier} region to bind to the closest enclosing 
+\code{parallel} region. 
+
+In the following example, the call from the main program to \plc{sub2} is conforming 
+because the \code{barrier} region (in \plc{sub3}) binds to the \code{parallel} 
+region in \plc{sub2}. The call from the main program to \plc{sub1} is conforming 
+because the \code{barrier} region binds to the \code{parallel} region in subroutine 
+\plc{sub2}.
+
+The call from the main program to \plc{sub3} is conforming because the \code{barrier} 
+region binds to the implicit inactive \code{parallel} region enclosing the sequential 
+part. Also note that the \code{barrier} region in \plc{sub3} when called from 
+\plc{sub2} only synchronizes the team of threads in the enclosing \code{parallel} 
+region and not all the threads created in \plc{sub1}.
+
+\cexample{barrier_regions}{1c}
+
+\fexample{barrier_regions}{1f}
+
+
--- a/Examples_cancellation.tex
+++ b/Examples_cancellation.tex
@ -0,0 +1,42 @@
+\pagebreak
+\chapter{Cancellation Constructs}
+\label{chap:cancellation}
+
+The following example shows how the \code{cancel} directive can be used to terminate 
+an OpenMP region. Although the \code{cancel} construct terminates the OpenMP 
+worksharing region, programmers must still track the exception through the pointer 
+ex and issue a cancellation for the \code{parallel} region if an exception has 
+been raised. The master thread checks the exception pointer to make sure that the 
+exception is properly handled in the sequential part. If cancellation of the \code{parallel} 
+region has been requested, some threads might have executed \code{phase\_1()}. 
+However, it is guaranteed that none of the threads executed \code{phase\_2()}.
+
+\cexample{cancellation}{1c}
+
+
+The following example illustrates the use of the \code{cancel} construct in error 
+handling. If there is an error condition from the \code{allocate} statement, 
+the cancellation is activated. The encountering thread sets the shared variable 
+\code{err} and other threads of the binding thread set proceed to the end of 
+the worksharing construct after the cancellation has been activated. 
+
+\fexample{cancellation}{1f}
+
+The following example shows how to cancel a parallel search on a binary tree as 
+soon as the search value has been detected. The code creates a task to descend 
+into the child nodes of the current tree node. If the search value has been found, 
+the code remembers the tree node with the found value through an \code{atomic} 
+write to the result variable and then cancels execution of all search tasks. The 
+function \code{search\_tree\_parallel} groups all search tasks into a single 
+task group to control the effect of the \code{cancel taskgroup} directive. The 
+\plc{level} argument is used to create undeferred tasks after the first ten 
+levels of the tree.
+
+\cexample{cancellation}{2c}
+
+
+The following is the equivalent parallel search example in Fortran.
+
+\fexample{cancellation}{2f}
+
+
--- a/Examples_carrays_fpriv.tex
+++ b/Examples_carrays_fpriv.tex
@ -0,0 +1,37 @@
+\pagebreak
+\chapter{C/C++ Arrays in a \code{firstprivate} Clause}
+\ccppspecificstart
+\label{chap:carrays_fpriv}
+
+The following example illustrates the size and value of list items of array or 
+pointer type in a \code{firstprivate} clause . The size of new list items is 
+based on the type of the corresponding original list item, as determined by the 
+base language.
+
+In this example:
+
+\begin{compactitem}
+\item The type of \code{A} is array of two arrays of two ints.
+
+\item  The type of \code{B} is adjusted to pointer to array of \code{n} 
+ints, because it is a function parameter.
+
+\item  The type of \code{C} is adjusted to pointer to int, because 
+it is a function parameter.
+
+\item  The type of \code{D} is array of two arrays of two ints.
+
+\item  The type of \code{E} is array of \code{n} arrays of \code{n} 
+ints.
+\end{compactitem}
+
+Note that  \code{B} and \code{E} involve variable length array types.
+
+The new items of array type are initialized as if each integer element of the original 
+array is assigned to the corresponding element of the new array. Those of pointer 
+type are initialized as if by assignment from the original item to the new item.
+
+\cnexample{carrays_fpriv}{1c}
+\ccppspecificend
+
+
--- a/Examples_collapse.tex
+++ b/Examples_collapse.tex
@ -0,0 +1,77 @@
+\pagebreak
+\chapter{The \code{collapse} Clause}
+\label{chap:collapse}
+
+In the following example, the \code{k} and \code{j} loops are associated with 
+the loop construct. So the iterations of the \code{k} and \code{j} loops are 
+collapsed into one loop with a larger iteration space, and that loop is then divided 
+among the threads in the current team. Since the \code{i} loop is not associated 
+with the loop construct, it is not collapsed, and the \code{i} loop is executed 
+sequentially in its entirety in every iteration of the collapsed \code{k} and 
+\code{j} loop. 
+
+The variable \code{j} can be omitted from the \code{private}  clause when the 
+\code{collapse} clause is used since it is implicitly private. However, if the 
+\code{collapse} clause is omitted then \code{j} will be shared if it is omitted 
+from the \code{private} clause. In either case, \code{k} is implicitly private 
+and could be omitted from the \code{private}  clause.
+
+\cexample{collapse}{1c}
+
+\fexample{collapse}{1f}
+
+In the next example, the \code{k} and \code{j} loops are associated with the 
+loop construct. So the iterations of the \code{k} and \code{j} loops are collapsed 
+into one loop with a larger iteration space, and that loop is then divided among 
+the threads in the current team.
+
+The sequential execution of the iterations in the \code{k} and \code{j} loops 
+determines the order of the iterations in the collapsed iteration space. This implies 
+that in the sequentially last iteration of the collapsed iteration space, \code{k} 
+will have the value \code{2} and \code{j} will have the value \code{3}. Since 
+\code{klast} and \code{jlast} are \code{lastprivate}, their values are assigned 
+by the sequentially last iteration of the collapsed \code{k} and \code{j} loop. 
+This example prints: \code{2 3}.
+
+\cexample{collapse}{2c}
+
+\fexample{collapse}{2f}
+
+The next example illustrates the interaction of the \code{collapse} and \code{ordered} 
+ clauses.
+
+In the example, the loop construct has both a \code{collapse} clause and an \code{ordered} 
+clause. The \code{collapse} clause causes the iterations of the \code{k} and 
+\code{j} loops to be collapsed into one loop with a larger iteration space, and 
+that loop is divided among the threads in the current team. An \code{ordered} 
+clause is added to the loop construct, because an ordered region binds to the loop 
+region arising from the loop construct.
+
+According to \$, a thread must not execute more than one ordered region that binds 
+to the same loop region. So the \code{collapse} clause is required for the example 
+to be conforming. With the \code{collapse} clause, the iterations of the \code{k} 
+and \code{j} loops are collapsed into one loop, and therefore only one ordered 
+region will bind to the collapsed \code{k} and \code{j} loop. Without the \code{collapse} 
+clause, there would be two ordered regions that bind to each iteration of the \code{k} 
+loop (one arising from the first iteration of the \code{j} loop, and the other 
+arising from the second iteration of the \code{j} loop).
+
+The code prints
+
+\code{0 1 1}
+\\
+\code{0 1 2}
+\\
+\code{0 2 1}
+\\
+\code{1 2 2}
+\\
+\code{1 3 1}
+\\
+\code{1 3 2}
+
+\cexample{collapse}{3c}
+
+\fexample{collapse}{3f}
+
+
--- a/Examples_cond_comp.tex
+++ b/Examples_cond_comp.tex
@ -0,0 +1,21 @@
+\pagebreak
+\chapter{Conditional Compilation}
+\label{chap:cond_comp}
+
+\ccppspecificstart
+The following example illustrates the use of conditional compilation using the 
+OpenMP macro \code{\_OPENMP}. With OpenMP compilation, the \code{\_OPENMP} 
+macro becomes defined.
+
+\cnexample{cond_comp}{1c}
+\ccppspecificend
+
+\fortranspecificstart
+The following example illustrates the use of the conditional compilation sentinel. 
+With OpenMP compilation, the conditional compilation sentinel \code{!\$} is recognized 
+and treated as two spaces. In fixed form source, statements guarded by the sentinel 
+must start after column 6.
+
+\fnexample{cond_comp}{1f}
+\fortranspecificend
+
--- a/Examples_copyin.tex
+++ b/Examples_copyin.tex
@ -0,0 +1,13 @@
+\pagebreak
+\chapter{The \code{copyin} Clause}
+\label{chap:copyin}
+
+The \code{copyin} clause is used to initialize threadprivate data upon entry 
+to a \code{parallel} region. The value of the threadprivate variable in the master 
+thread is copied to the threadprivate variable of each other team member.
+
+\cexample{copyin}{1c}
+
+\fexample{copyin}{1f}
+
+
--- a/Examples_copyprivate.tex
+++ b/Examples_copyprivate.tex
@ -0,0 +1,51 @@
+\pagebreak
+\chapter{The \code{copyprivate} Clause}
+\label{chap:copyprivate}
+
+The \code{copyprivate} clause can be used to broadcast values acquired by a single 
+thread directly to all instances of the private variables in the other threads. 
+In this example, if the routine is called from the sequential part, its behavior 
+is not affected by the presence of the directives. If it is called from a \code{parallel} 
+region, then the actual arguments with which \code{a} and \code{b} are associated 
+must be private. 
+
+The thread that executes the structured block associated with the \code{single} 
+ construct broadcasts the values of the private variables \code{a}, \code{b}, 
+\code{x}, and 
+\code{y} from its implicit task's data environment to the data environments 
+of the other implicit tasks in the thread team. The broadcast completes before 
+any of the threads have left the barrier at the end of the construct.
+
+\cexample{copyprivate}{1c}
+
+\fexample{copyprivate}{1f}
+
+In this example, assume that the input must be performed by the master thread. 
+Since the \code{master} construct does not support the \code{copyprivate} clause, 
+it cannot broadcast the input value that is read. However, \code{copyprivate} 
+is used to broadcast an address where the input value is stored.
+
+\cexample{copyprivate}{2c}
+
+\fexample{copyprivate}{2f}
+
+Suppose that the number of lock variables required within a \code{parallel} region 
+cannot easily be determined prior to entering it. The \code{copyprivate} clause 
+can be used to provide access to shared lock variables that are allocated within 
+that \code{parallel} region.
+
+\cexample{copyprivate}{3c}
+
+\fortranspecificstart
+\fnexample{copyprivate}{3f}
+
+Note that the effect of the \code{copyprivate} clause on a variable with the 
+\code{allocatable} attribute is different than on a variable with the \code{pointer} 
+attribute. The value of \code{A} is copied (as if by intrinsic assignment) and 
+the pointer \code{B} is copied (as if by pointer assignment) to the corresponding 
+list items in the other implicit tasks belonging to the \code{parallel} region. 
+
+\fnexample{copyprivate}{4f}
+\fortranspecificend
+
+
--- a/Examples_critical.tex
+++ b/Examples_critical.tex
@ -0,0 +1,16 @@
+\pagebreak
+\chapter{The \code{critical} Construct}
+\label{chap:critical}
+
+The following example includes several \code{critical} constructs . The example 
+illustrates a queuing model in which a task is dequeued and worked on. To guard 
+against multiple threads dequeuing the same task, the dequeuing operation must 
+be in a \code{critical} region. Because the two queues in this example are independent, 
+they are protected by \code{critical} constructs with different names, \plc{xaxis} 
+and \plc{yaxis}.
+
+\cexample{critical}{1c}
+
+\fexample{critical}{1f}
+
+
--- a/Examples_declare_target.tex
+++ b/Examples_declare_target.tex
@ -0,0 +1,111 @@
+\pagebreak
+\chapter{\code{declare} \code{target} Construct}
+\label{chap:declare_target}
+
+\section{\code{declare} \code{target} and \code{end} \code{declare} \code{target} for a Function}
+
+The following example shows how the \code{declare} \code{target} directive 
+is used to indicate that the corresponding call inside a \code{target} region 
+is to a \code{fib} function that can execute on the default target device.
+
+A version of the function is also available on the host device. When the \code{if} 
+clause conditional expression on the \code{target} construct evaluates to \plc{false}, 
+the \code{target} region (thus \code{fib}) will execute on the host device.
+
+For C/C++ codes the declaration of the function \code{fib} appears between the \code{declare} 
+\code{target} and \code{end} \code{declare} \code{target} directives.
+
+\cexample{declare_target}{1c}
+
+The Fortran \code{fib} subroutine contains a \code{declare} \code{target} declaration 
+to indicate to the compiler to create an device executable version of the procedure. 
+The subroutine name has not been included on the \code{declare} \code{target} 
+directive and is, therefore, implicitly assumed.
+
+The program uses the \code{module\_fib} module, which presents an explicit interface to 
+the compiler with the \code{declare} \code{target} declarations for processing 
+the \code{fib} call.
+
+\fexample{declare_target}{1f}
+
+The next Fortran example shows the use of an external subroutine. Without an explicit 
+interface (through module use or an interface block) the \code{declare} \code{target} 
+declarations within a external subroutine are unknown to the main program unit; 
+therefore, a \code{declare} \code{target} must be provided within the program 
+scope for the compiler to determine that a target binary should be available.
+
+\fexample{declare_target}{2f}
+
+\section{\code{declare} \code{target} Construct for Class Type}
+
+The following example shows how the \code{declare} \code{target} and \code{end} 
+\code{declare} \code{target} directives are used to enclose the declaration 
+of a variable \plc{varY} with a class type \code{typeY}. The member function \code{typeY::foo()} cannot 
+be accessed on a target device because its declaration did not appear between \code{declare} 
+\code{target} and \code{end} \code{declare} \code{target} directives.
+
+\cexample{declare_target}{2c}
+
+\section{\code{declare} \code{target} and \code{end} \code{declare} \code{target} for Variables}
+
+The following examples show how the \code{declare} \code{target} and \code{end} 
+\code{declare} \code{target} directives are used to indicate that global variables 
+are mapped to the implicit device data environment of each target device.
+
+In the following example, the declarations of the variables \plc{p}, \plc{v1}, and \plc{v2} appear 
+between \code{declare} \code{target} and \code{end} \code{declare} \code{target} 
+directives indicating that the variables are mapped to the implicit device data 
+environment of each target device. The \code{target} \code{update} directive 
+is then used to manage the consistency of the variables \plc{p}, \plc{v1}, and \plc{v2} between the 
+data environment of the encountering host device task and the implicit device data 
+environment of the default target device.
+
+\cexample{declare_target}{3c}
+
+The Fortran version of the above C code uses a different syntax. Fortran modules 
+use a list syntax on the \code{declare} \code{target} directive to declare 
+mapped variables.
+
+\fexample{declare_target}{3f}
+
+The following example also indicates that the function \code{Pfun()} is available on the 
+target device, as well as the variable \plc{Q}, which is mapped to the implicit device 
+data environment of each target device. The \code{target} \code{update} directive 
+is then used to manage the consistency of the variable \plc{Q} between the data environment 
+of the encountering host device task and the implicit device data environment of 
+the default target device.
+
+In the following example, the function and variable declarations appear between 
+the \code{declare} \code{target} and \code{end} \code{declare} \code{target} 
+directives.
+
+\cexample{declare_target}{4c}
+
+The Fortran version of the above C code uses a different syntax. In Fortran modules 
+a list syntax on the \code{declare} \code{target} directive is used to declare 
+mapped variables and procedures. The \plc{N} and \plc{Q} variables are declared as a comma 
+separated list. When the \code{declare} \code{target} directive is used to 
+declare just the procedure, the procedure name need not be listed -- it is implicitly 
+assumed, as illustrated in the \code{Pfun()} function.
+
+\fexample{declare_target}{4f}
+
+\section{\code{declare} \code{target} and \code{end} \code{declare} \code{target} with \code{declare} \code{simd}}
+
+The following example shows how the \code{declare} \code{target} and \code{end} 
+\code{declare} \code{target} directives are used to indicate that a function 
+is available on a target device. The \code{declare} \code{simd} directive indicates 
+that there is a SIMD version of the function \code{P()} that is available on the target 
+device as well as one that is available on the host device.
+
+\cexample{declare_target}{5c}
+
+The Fortran version of the above C code uses a different syntax. Fortran modules 
+use a list syntax of the \code{declare} \code{target} declaration for the mapping. 
+Here the \plc{N} and \plc{Q} variables are declared in the list form as a comma separated list. 
+The function declaration does not use a list and implicitly assumes the function 
+name. In this Fortran example row and column indices are reversed relative to the 
+C/C++ example, as is usual for codes optimized for memory access.
+
+\fexample{declare_target}{5f}
+
--- a/Examples_default_none.tex
+++ b/Examples_default_none.tex
@ -0,0 +1,12 @@
+\pagebreak
+\chapter{The \code{default(none)} Clause}
+\label{chap:default_none}
+
+The following example distinguishes the variables that are affected by the \code{default(none)} 
+clause from those that are not. 
+
+\cexample{default_none}{1c}
+
+\fexample{default_none}{1f}
+
+
--- a/Examples_device.tex
+++ b/Examples_device.tex
@ -0,0 +1,35 @@
+\pagebreak
+\chapter{Device Routines}
+\label{chap:device}
+
+\section{\code{omp\_is\_initial\_device} Routine}
+
+The following example shows how the \code{omp\_is\_initial\_device} runtime library routine 
+can be used to query if a code is executing on the initial host device or on a 
+target device. The example then sets the number of threads in the \code{parallel} 
+region based on where the code is executing.
+
+\cexample{device}{1c}
+
+\fexample{device}{1f}
+
+\section{\code{omp\_get\_num\_devices} Routine}
+
+The following example shows how the \code{omp\_get\_num\_devices} runtime library routine 
+can be used to determine the number of devices.
+
+\cexample{device}{2c}
+
+\fexample{device}{2f}
+
+\section{\code{omp\_set\_default\_device} and \\
+\code{omp\_get\_default\_device} Routines}
+
+The following example shows how the \code{omp\_set\_default\_device} and \code{omp\_get\_default\_device} 
+runtime library routines can be used to set the default device and determine the 
+default device respectively.
+
+\cexample{device}{3c}
+
+\fexample{device}{3f}
+
--- a/Examples_flush_nolist.tex
+++ b/Examples_flush_nolist.tex
@ -0,0 +1,12 @@
+\pagebreak
+\chapter{The \code{flush} Construct without a List}
+\label{chap:flush_nolist}
+
+The following example distinguishes the shared variables affected by a \code{flush} 
+construct with no list from the shared objects that are not affected:
+
+\cexample{flush_nolist}{1c}
+
+\fexample{flush_nolist}{1f}
+
+
--- a/Examples_fort_do.tex
+++ b/Examples_fort_do.tex
@ -0,0 +1,19 @@
+\pagebreak
+\chapter{Fortran Restrictions on the \code{do} Construct}
+\label{chap:fort_do}
+\fortranspecificstart
+
+If an \code{end do} directive follows a \plc{do-construct}  in which several 
+\code{DO} statements share a \code{DO} termination statement, then a  \code{do} 
+directive can only be specified for the outermost of these \code{DO} statements. 
+The following example contains correct usages of loop constructs:
+
+\fnexample{fort_do}{1f}
+
+The following example is non-conforming because the matching \code{do} directive 
+for the \code{end do} does not precede the outermost loop:
+
+\fnexample{fort_do}{2f}
+\fortranspecificend
+
+
--- a/Examples_fort_loopvar.tex
+++ b/Examples_fort_loopvar.tex
@ -0,0 +1,22 @@
+\pagebreak
+\chapter{Fortran Private Loop Iteration Variables}
+\label{chap:fort_loopvar}
+\fortranspecificstart
+
+In general loop iteration variables will be private, when used in the \plc{do-loop} 
+of a \code{do} and \code{parallel do} construct or in sequential loops in a 
+\code{parallel} construct (see \$ and \$). In the following example of a sequential 
+loop in a \code{parallel} construct the loop iteration variable \plc{I} will 
+be private.
+
+\fnexample{fort_loopvar}{1f}
+
+In exceptional cases, loop iteration variables can be made shared, as in the following 
+example:
+
+\fnexample{fort_loopvar}{2f}
+
+Note however that the use of shared loop iteration variables can easily lead to 
+race conditions.
+\fortranspecificend
+
--- a/Examples_fort_race.tex
+++ b/Examples_fort_race.tex
@ -0,0 +1,16 @@
+\pagebreak
+\chapter{Race Conditions Caused by Implied Copies of Shared Variables in Fortran}
+\fortranspecificstart
+\label{chap:fort_race}
+
+The following example contains a race condition, because the shared variable, which 
+is an array section, is passed as an actual argument to a routine that has an assumed-size 
+array as its dummy argument. The subroutine call passing an array section argument 
+may cause the compiler to copy the argument into a temporary location prior to 
+the call and copy from the temporary location into the original variable when the 
+subroutine returns. This copying would cause races in the \code{parallel} region.
+
+\fnexample{fort_race}{1f}
+\fortranspecificend
+
+
--- a/Examples_fort_sa_private.tex
+++ b/Examples_fort_sa_private.tex
@ -0,0 +1,23 @@
+\pagebreak
+\chapter{Fortran Restrictions on Storage Association with the \code{private} Clause}
+\fortranspecificstart
+\label{chap:fort_sa_private}
+
+The following non-conforming examples illustrate the implications of the \code{private} 
+clause rules with regard to storage association. 
+
+\fnexample{fort_sa_private}{1f}
+
+\fnexample{fort_sa_private}{2f}
+% blue line floater at top of this page for "Fortran, cont."
+\begin{figure}[t!]
+\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
+\end{figure}
+
+\fnexample{fort_sa_private}{3f}
+
+\fnexample{fort_sa_private}{4f}
+
+\fnexample{fort_sa_private}{5f}
+\fortranspecificend
+
--- a/Examples_fort_sp_common.tex
+++ b/Examples_fort_sp_common.tex
@ -0,0 +1,38 @@
+\pagebreak
+\chapter{Fortran Restrictions on \code{shared} and \code{private} Clauses with Common Blocks}
+\fortranspecificstart
+\label{chap:fort_sp_common}
+
+When a named common block is specified in a \code{private}, \code{firstprivate}, 
+or \code{lastprivate} clause of a construct, none of its members may be declared 
+in another data-sharing attribute clause on that construct. The following examples 
+illustrate this point. 
+
+The following example is conforming:
+
+\fnexample{fort_sp_common}{1f}
+
+The following example is also conforming:
+
+\fnexample{fort_sp_common}{2f}
+% blue line floater at top of this page for "Fortran, cont."
+\begin{figure}[t!]
+\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
+\end{figure}
+
+The following example is conforming:
+
+\fnexample{fort_sp_common}{3f}
+
+The following example is non-conforming because \code{x} is a constituent element 
+of \code{c}:
+
+\fnexample{fort_sp_common}{4f}
+
+The following example is non-conforming because a common block may not be declared 
+both shared and private:
+
+\fnexample{fort_sp_common}{5f}
+\fortranspecificend
+
+
--- a/Examples_fpriv_sections.tex
+++ b/Examples_fpriv_sections.tex
@ -0,0 +1,18 @@
+\pagebreak
+\chapter{The \code{firstprivate} Clause and the \code{sections} Construct}
+\label{chap:fpriv_sections}
+
+In the following example of the \code{sections} construct  the \code{firstprivate} 
+clause is used to initialize the private copy of \code{section\_count} of each 
+thread. The problem is that the \code{section} constructs modify \code{section\_count}, 
+which breaks the independence of the \code{section} constructs. When different 
+threads execute each section, both sections will print the value 1. When the same 
+thread executes the two sections, one section will print the value 1 and the other 
+will print the value 2. Since the order of execution of the two sections in this 
+case is unspecified, it is unspecified which section prints which value. 
+
+\cexample{fpriv_sections}{1c}
+
+\fexample{fpriv_sections}{1f}
+
+
--- a/Examples_get_nthrs.tex
+++ b/Examples_get_nthrs.tex
@ -0,0 +1,21 @@
+\pagebreak
+\chapter{The \code{omp\_get\_num\_threads} Routine}
+\label{chap:get_nthrs}
+
+In the following example, the \code{omp\_get\_num\_threads} call returns 1 in 
+the sequential part of the code, so \code{np} will always be equal to 1. To determine 
+the number of threads that will be deployed for the \code{parallel} region, the 
+call should be inside the \code{parallel} region.
+
+\cexample{get_nthrs}{1c}
+
+\fexample{get_nthrs}{1f}
+
+The following example shows how to rewrite this program without including a query 
+for the number of threads:
+
+\cexample{get_nthrs}{2c}
+
+\fexample{get_nthrs}{2f}
+
+
--- a/Examples_icv.tex
+++ b/Examples_icv.tex
@ -0,0 +1,56 @@
+\pagebreak
+\chapter{Internal Control Variables (ICVs)}
+\label{chap:icv}
+
+According to \$, an OpenMP implementation must act as if there are ICVs that control 
+the behavior of the program.  This example illustrates two ICVs, \plc{nthreads-var} 
+and \plc{max-active-levels-var}. The \plc{nthreads-var} ICV controls the 
+number of threads requested for encountered parallel regions; there is one copy 
+of this ICV per task. The \plc{max-active-levels-var} ICV controls the maximum 
+number of nested active parallel regions; there is one copy of this ICV for the 
+whole program.
+
+In the following example, the \plc{nest-var}, \plc{max-active-levels-var}, 
+\plc{dyn-var}, and \plc{nthreads-var} ICVs are modified through calls to 
+the runtime library routines \code{omp\_set\_nested},\\ \code{omp\_set\_max\_active\_levels},\code{ 
+omp\_set\_dynamic}, and \code{omp\_set\_num\_threads} respectively. These ICVs 
+affect the operation of \code{parallel} regions. Each implicit task generated 
+by a \code{parallel} region has its own copy of the \plc{nest-var, dyn-var}, 
+and \plc{nthreads-var} ICVs.
+
+In the following example, the new value of \plc{nthreads-var} applies only to 
+the implicit tasks that execute the call to \code{omp\_set\_num\_threads}. There 
+is one copy of the \plc{max-active-levels-var} ICV for the whole program and 
+its value is the same for all tasks. This example assumes that nested parallelism 
+is supported.
+
+The outer \code{parallel} region creates a team of two threads; each of the threads 
+will execute one of the two implicit tasks generated by the outer \code{parallel} 
+region.
+
+Each implicit task generated by the outer \code{parallel} region calls \code{omp\_set\_num\_threads(3)}, 
+assigning the value 3 to its respective copy of \plc{nthreads-var}. Then each 
+implicit task encounters an inner \code{parallel} region that creates a team 
+of three threads; each of the threads will execute one of the three implicit tasks 
+generated by that inner \code{parallel} region.
+
+Since the outer \code{parallel} region is executed by 2 threads, and the inner 
+by 3, there will be a total of 6 implicit tasks generated by the two inner \code{parallel} 
+regions.
+
+Each implicit task generated by an inner \code{parallel} region will execute 
+the call to\\ \code{omp\_set\_num\_threads(4)}, assigning the value 4 to its respective 
+copy of \plc{nthreads-var}.
+
+The print statement in the outer \code{parallel} region is executed by only one 
+of the threads in the team. So it will be executed only once.
+
+The print statement in an inner \code{parallel} region is also executed by only 
+one of the threads in the team. Since we have a total of two inner \code{parallel} 
+regions, the print statement will be executed twice -- once per inner \code{parallel} 
+region.
+
+\cexample{icv}{1c}
+
+\fexample{icv}{1f}
+
--- a/Examples_init_lock.tex
+++ b/Examples_init_lock.tex
@ -0,0 +1,11 @@
+\pagebreak
+\chapter{The \code{omp\_init\_lock} Routine}
+\label{chap:init_lock}
+
+The following example demonstrates how to initialize an array of locks in a \code{parallel} 
+region by using \code{omp\_init\_lock}.
+
+\cexample{init_lock}{1c}
+
+\fexample{init_lock}{1f}
+
--- a/Examples_lastprivate.tex
+++ b/Examples_lastprivate.tex
@ -0,0 +1,14 @@
+\pagebreak
+\chapter{The \code{lastprivate} Clause}
+\label{chap:lastprivate}
+
+Correct execution sometimes depends on the value that the last iteration of a loop 
+assigns to a variable. Such programs must list all such variables in a \code{lastprivate} 
+clause  so that the values of the variables are the same as when the loop is executed 
+sequentially.
+
+\cexample{lastprivate}{1c}
+
+\fexample{lastprivate}{1f}
+
+
--- a/Examples_lock_owner.tex
+++ b/Examples_lock_owner.tex
@ -0,0 +1,23 @@
+\pagebreak
+\chapter{Ownership of Locks}
+\label{chap:lock_owner}
+
+Ownership of locks has changed since OpenMP 2.5. In OpenMP 2.5, locks are owned 
+by threads; so a lock released by the \code{omp\_unset\_lock} routine must be 
+owned by the same thread executing the routine.  With OpenMP 3.0, locks are owned 
+by task regions; so a lock released by the \code{omp\_unset\_lock} routine in 
+a task region must be owned by the same task region.
+
+This change in ownership requires extra care when using locks. The following program 
+is conforming in OpenMP 2.5 because the thread that releases the lock \code{lck} 
+in the parallel region is the same thread that acquired the lock in the sequential 
+part of the program (master thread of parallel region and the initial thread are 
+the same). However, it is not conforming in OpenMP 3.0 and 3.1, because the task 
+region that releases the lock \code{lck} is different from the task region that 
+acquires the lock.
+
+\cexample{lock_owner}{1c}
+
+\fexample{lock_owner}{1f}
+
+
--- a/Examples_master.tex
+++ b/Examples_master.tex
@ -0,0 +1,13 @@
+\pagebreak
+\chapter{The \code{master} Construct}
+\label{chap:master}
+
+The following example demonstrates the master construct . In the example, the master 
+keeps track of how many iterations have been executed and prints out a progress 
+report. The other threads skip the master region without waiting.
+
+\cexample{master}{1c}
+
+\fexample{master}{1f}
+
+
--- a/Examples_mem_model.tex
+++ b/Examples_mem_model.tex
@ -0,0 +1,38 @@
+\pagebreak
+\chapter{The OpenMP Memory Model}
+\label{chap:mem_model}
+
+In the following example, at Print 1, the value of \plc{x} could be either 2 
+or 5, depending on the timing of the threads, and the implementation of the assignment 
+to \plc{x}. There are two reasons that the value at Print 1 might not be 5. 
+First, Print 1 might be executed before the assignment to \plc{x} is executed. 
+Second, even if Print 1 is executed after the assignment, the value 5 is not guaranteed 
+to be seen by thread 1 because a flush may not have been executed by thread 0 since 
+the assignment.
+
+The barrier after Print 1 contains implicit flushes on all threads, as well as 
+a thread synchronization, so the programmer is guaranteed that the value 5 will 
+be printed by both Print 2 and Print 3.
+
+\cexample{mem_model}{1c}
+
+\fexample{mem_model}{1f}
+
+The following example demonstrates why synchronization is difficult to perform 
+correctly through variables. The value of flag is undefined in both prints on thread 
+1 and the value of data is only well-defined in the second print.
+
+\cexample{mem_model}{2c}
+
+\fexample{mem_model}{2f}
+
+The next example demonstrates why synchronization is difficult to perform correctly 
+through variables. Because the \plc{write}(1)-\plc{flush}(1)-\plc{flush}(2)-\plc{read}(2) 
+sequence cannot be guaranteed in the example, the statements on thread 0 and thread 
+1 may execute in either order.
+
+\cexample{mem_model}{3c}
+
+\fexample{mem_model}{3f}
+
+
--- a/Examples_nestable_lock.tex
+++ b/Examples_nestable_lock.tex
@ -0,0 +1,11 @@
+\pagebreak
+\chapter{Nestable Lock Routines}
+\label{chap:nestable_lock}
+
+The following example demonstrates how a nestable lock can be used to synchronize 
+updates both to a whole structure and to one of its members.
+
+\cexample{nestable_lock}{1c}
+
+\fexample{nestable_lock}{1f}
+
--- a/Examples_nested_loop.tex
+++ b/Examples_nested_loop.tex
@ -0,0 +1,18 @@
+\pagebreak
+\chapter{Nested Loop Constructs}
+\label{chap:nested_loop}
+
+The following example of loop construct nesting is conforming because the inner 
+and outer loop regions bind to different \code{parallel} regions:
+
+\cexample{nested_loop}{1c}
+
+\fexample{nested_loop}{1f}
+
+The following variation of the preceding example is also conforming:
+
+\cexample{nested_loop}{2c}
+
+\fexample{nested_loop}{2f}
+
+
--- a/Examples_nesting_restrict.tex
+++ b/Examples_nesting_restrict.tex
@ -0,0 +1,52 @@
+\pagebreak
+\chapter{Restrictions on Nesting of Regions}
+\label{chap:nesting_restrict}
+
+The examples in this section illustrate the region nesting rules. 
+
+The following example is non-conforming because the inner and outer loop regions 
+are closely nested:
+
+\cexample{nesting_restrict}{1c}
+
+\fexample{nesting_restrict}{1f}
+
+The following orphaned version of the preceding example is also non-conforming:
+
+\cexample{nesting_restrict}{2c}
+
+\fexample{nesting_restrict}{2f}
+
+The following example is non-conforming because the loop and \code{single} regions 
+are closely nested:
+
+\cexample{nesting_restrict}{3c}
+
+\fexample{nesting_restrict}{3f}
+
+The following example is non-conforming because a \code{barrier} region cannot 
+be closely nested inside a loop region:
+
+\cexample{nesting_restrict}{4c}
+
+\fexample{nesting_restrict}{4f}
+
+The following example is non-conforming because the \code{barrier} region cannot 
+be closely nested inside the \code{critical} region. If this were permitted, 
+it would result in deadlock due to the fact that only one thread at a time can 
+enter the \code{critical} region:
+
+\cexample{nesting_restrict}{5c}
+
+\fexample{nesting_restrict}{5f}
+
+The following example is non-conforming because the \code{barrier} region cannot 
+be closely nested inside the \code{single} region. If this were permitted, it 
+would result in deadlock due to the fact that only one thread executes the \code{single} 
+region:
+
+\cexample{nesting_restrict}{6c}
+
+\fexample{nesting_restrict}{6f}
+
+
--- a/Examples_nowait.tex
+++ b/Examples_nowait.tex
@ -0,0 +1,28 @@
+\pagebreak
+\chapter{The \code{nowait} Clause}
+\label{chap:nowait}
+
+If there are multiple independent loops within a \code{parallel} region, you 
+can use the \code{nowait} clause to avoid the implied barrier at the end of the 
+loop construct, as follows:
+
+\cexample{nowait}{1c}
+
+\fexample{nowait}{1f}
+
+In the following example, static scheduling distributes the same logical iteration 
+numbers to the threads that execute the three loop regions. This allows the \code{nowait} 
+clause to be used, even though there is a data dependence between the loops. The 
+dependence is satisfied as long the same thread executes the same logical iteration 
+numbers in each loop.
+
+Note that the iteration count of the loops must be the same. The example satisfies 
+this requirement, since the iteration space of the first two loops is from \code{0} 
+to \code{n-1} (from \code{1} to \code{N} in the Fortran version), while the 
+iteration space of the last loop is from \code{1} to \code{n} (\code{2} to 
+\code{N+1} in the Fortran version).
+
+\cexample{nowait}{2c}
+
+\fexample{nowait}{2f}
+
--- a/Examples_nthrs_dynamic.tex
+++ b/Examples_nthrs_dynamic.tex
@ -0,0 +1,30 @@
+\pagebreak
+\chapter{Interaction Between the \code{num\_threads} Clause and \code{omp\_set\_dynamic}}
+\label{chap:nthrs_dynamic}
+
+The following example demonstrates the \code{num\_threads} clause  and the effect 
+of the \\
+\code{omp\_set\_dynamic} routine  on it.
+
+The call to the \code{omp\_set\_dynamic} routine with argument \code{0} in 
+C/C++, or \code{.FALSE.} in Fortran, disables the dynamic adjustment of the number 
+of threads in OpenMP implementations that support it. In this case, 10 threads 
+are provided. Note that in case of an error the OpenMP implementation is free to 
+abort the program or to supply any number of threads available.
+
+\cexample{nthrs_dynamic}{1c}
+
+\fexample{nthrs_dynamic}{1f}
+
+The call to the \code{omp\_set\_dynamic} routine with a non-zero argument in 
+C/C++, or \code{.TRUE.} in Fortran, allows the OpenMP implementation to choose 
+any number of threads between 1 and 10.
+
+\cexample{nthrs_dynamic}{2c}
+
+\fexample{nthrs_dynamic}{2f}
+
+It is good practice to set the \plc{dyn-var} ICV explicitly by calling the \code{omp\_set\_dynamic} 
+routine, as its default setting is implementation defined.
+
+
--- a/Examples_nthrs_nesting.tex
+++ b/Examples_nthrs_nesting.tex
@ -0,0 +1,12 @@
+\pagebreak
+\chapter{Controlling the Number of Threads on Multiple Nesting Levels}
+\label{chap:nthrs_nesting}
+
+The following examples demonstrate how to use the \code{OMP\_NUM\_THREADS} environment 
+variable  to control the number of threads on multiple nesting levels:
+
+\cexample{nthrs_nesting}{1c}
+
+\fexample{nthrs_nesting}{1f}
+
+
--- a/Examples_ordered.tex
+++ b/Examples_ordered.tex
@ -0,0 +1,28 @@
+\pagebreak
+\chapter{The \code{ordered} Clause and the \code{ordered} Construct}
+\label{chap:ordered}
+
+Ordered constructs  are useful for sequentially ordering the output from work that 
+is done in parallel. The following program prints out the indices in sequential 
+order:
+
+\cexample{ordered}{1c}
+
+\fexample{ordered}{1f}
+
+It is possible to have multiple \code{ordered} constructs within a loop region 
+with the \code{ordered} clause specified. The first example is non-conforming 
+because all iterations execute two \code{ordered} regions. An iteration of a 
+loop must not execute more than one \code{ordered} region:
+
+\cexample{ordered}{2c}
+
+\fexample{ordered}{2f}
+
+The following is a conforming example with more than one \code{ordered} construct. 
+Each iteration will execute only one \code{ordered} region:
+
+\cexample{ordered}{3c}
+
+\fexample{ordered}{3f}
+
--- a/Examples_parallel.tex
+++ b/Examples_parallel.tex
@ -0,0 +1,12 @@
+\pagebreak
+\chapter{The \code{parallel} Construct}
+\label{chap:parallel}
+
+The \code{parallel} construct  can be used in coarse-grain parallel programs. 
+In the following example, each thread in the \code{parallel} region decides what 
+part of the global array \plc{x} to work on, based on the thread number:
+
+\cexample{parallel}{1c}
+
+\fexample{parallel}{1f}
+
--- a/Examples_ploop.tex
+++ b/Examples_ploop.tex
@ -0,0 +1,11 @@
+\chapter{A Simple Parallel Loop}
+\label{chap:ploop}
+
+The following example demonstrates how to parallelize a simple loop using the parallel 
+loop construct. The loop iteration variable is private by default, so it is not 
+necessary to specify it explicitly in a \code{private} clause.
+
+\cexample{ploop}{1c}
+
+\fexample{ploop}{1f}
+
--- a/Examples_pra_iterator.tex
+++ b/Examples_pra_iterator.tex
@ -0,0 +1,11 @@
+\pagebreak
+\chapter{Parallel Random Access Iterator Loop}
+\ccppspecificstart
+\label{chap:pra_iterator}
+
+The following example shows a parallel random access iterator loop.
+
+\cnexample{pra_iterator}{1c}
+\ccppspecificend
+
+
--- a/Examples_private.tex
+++ b/Examples_private.tex
@ -0,0 +1,31 @@
+\pagebreak
+\chapter{The \code{private} Clause}
+\label{chap:private}
+
+In the following example, the values of original list items \plc{i} and \plc{j} 
+are retained on exit from the \code{parallel} region, while the private list 
+items \plc{i} and \plc{j} are modified within the \code{parallel} construct. 
+
+\cexample{private}{1c}
+
+\fexample{private}{1f}
+
+In the following example, all uses of the variable \plc{a} within the loop construct 
+in the routine \plc{f} refer to a private list item \plc{a}, while it is 
+unspecified whether references to \plc{a} in the routine \plc{g} are to a 
+private list item or the original list item.
+
+\cexample{private}{2c}
+
+\fexample{private}{2f}
+
+The following example demonstrates that a list item that appears in a \code{private} 
+ clause in a \code{parallel} construct may also appear in a \code{private} 
+ clause in an enclosed worksharing construct, which results in an additional private 
+copy.
+
+\cexample{private}{3c}
+
+\fexample{private}{3f}
+
+
--- a/Examples_psections.tex
+++ b/Examples_psections.tex
@ -0,0 +1,13 @@
+\pagebreak
+\chapter{The \code{parallel} \code{sections} Construct}
+\label{chap:psections}
+
+In the following example routines \code{XAXIS}, \code{YAXIS}, and \code{ZAXIS} can 
+be executed concurrently. The first \code{section} directive is optional. Note 
+that all \code{section} directives need to appear in the \code{parallel sections} 
+construct.
+
+\cexample{psections}{1c}
+
+\fexample{psections}{1f}
+
--- a/Examples_reduction.tex
+++ b/Examples_reduction.tex
@ -0,0 +1,60 @@
+\pagebreak
+\chapter{The \code{reduction} Clause}
+\label{chap:reduction}
+
+The following example demonstrates the \code{reduction} clause ; note that some 
+reductions can be expressed in the loop in several ways, as shown for the \code{max} 
+and \code{min} reductions below:
+
+\cexample{reduction}{1c}
+
+\fexample{reduction}{1f}
+
+A common implementation of the preceding example is to treat it as if it had been 
+written as follows:
+
+\cexample{reduction}{2c}
+
+\fortranspecificstart
+\fnexample{reduction}{2f}
+
+The following program is non-conforming because the reduction is on the 
+\emph{intrinsic procedure name} \code{MAX} but that name has been redefined to be the variable 
+named \code{MAX}.
+% blue line floater at top of this page for "Fortran, cont."
+\begin{figure}[t!]
+\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
+\end{figure}
+
+\fnexample{reduction}{3f}
+
+The following conforming program performs the reduction using the 
+\emph{intrinsic procedure name} \code{MAX} even though the intrinsic \code{MAX} has been renamed 
+to \code{REN}.
+
+\fnexample{reduction}{4f}
+
+The following conforming program performs the reduction using 
+\plc{intrinsic procedure name} \code{MAX} even though the intrinsic \code{MAX} has been renamed 
+to \code{MIN}.
+
+\fnexample{reduction}{5f}
+\fortranspecificend
+
+The following example is non-conforming because the initialization (\code{a = 
+0}) of the original list item \code{a} is not synchronized with the update of 
+\code{a} as a result of the reduction computation in the \code{for} loop. Therefore, 
+the example may print an incorrect value for \code{a}.
+
+To avoid this problem, the initialization of the original list item \code{a} 
+should complete before any update of \code{a} as a result of the \code{reduction} 
+clause. This can be achieved by adding an explicit barrier after the assignment 
+\code{a = 0}, or by enclosing the assignment \code{a = 0} in a \code{single} 
+directive (which has an implied barrier), or by initializing \code{a} before 
+the start of the \code{parallel} region.
+
+\cexample{reduction}{3c}
+
+\fexample{reduction}{6f}
+
+
--- a/Examples_set_dynamic_nthrs.tex
+++ b/Examples_set_dynamic_nthrs.tex
@ -0,0 +1,24 @@
+\pagebreak
+\chapter{The \code{omp\_set\_dynamic} and \\
+\code{omp\_set\_num\_threads} Routines}
+\label{chap:set_dynamic_nthrs}
+
+Some programs rely on a fixed, prespecified number of threads to execute correctly. 
+Because the default setting for the dynamic adjustment of the number of threads 
+is implementation defined, such programs can choose to turn off the dynamic threads 
+capability and set the number of threads explicitly to ensure portability. The 
+following example shows how to do this using \code{omp\_set\_dynamic}, and \code{omp\_set\_num\_threads}.
+
+In this example, the program executes correctly only if it is executed by 16 threads. 
+If the implementation is not capable of supporting 16 threads, the behavior of 
+this example is implementation defined. Note that the number of threads executing 
+a \code{parallel} region remains constant during the region, regardless of the 
+dynamic threads setting. The dynamic threads mechanism determines the number of 
+threads to use at the start of the \code{parallel} region and keeps it constant 
+for the duration of the region.
+
+\cexample{set_dynamic_nthrs}{1c}
+
+\fexample{set_dynamic_nthrs}{1f}
+
+
--- a/Examples_simple_lock.tex
+++ b/Examples_simple_lock.tex
@ -0,0 +1,19 @@
+\pagebreak
+\chapter{Simple Lock Routines}
+\label{chap:simple_lock}
+
+In the following example, the lock routines cause the threads to be idle while 
+waiting for entry to the first critical section, but to do other work while waiting 
+for entry to the second. The \code{omp\_set\_lock} function blocks, but the \code{omp\_test\_lock} 
+function does not, allowing the work in \code{skip} to be done. 
+
+Note that the argument to the lock routines should have type \code{omp\_lock\_t}, 
+and that there is no need to flush it. 
+
+\cexample{simple_lock}{1c}
+
+Note that there is no need to flush the lock variable. 
+
+\fexample{simple_lock}{1f}
+
+
--- a/Examples_single.tex
+++ b/Examples_single.tex
@ -0,0 +1,18 @@
+\pagebreak
+\chapter{The \code{single} Construct}
+\label{chap:single}
+
+The following example demonstrates the \code{single} construct. In the example, 
+only one thread prints each of the progress messages. All other threads will skip 
+the \code{single} region and stop at the barrier at the end of the \code{single} 
+construct until all threads in the team have reached the barrier. If other threads 
+can proceed without waiting for the thread executing the \code{single} region, 
+a \code{nowait} clause can be specified, as is done in the third \code{single} 
+construct in this example. The user must not make any assumptions as to which thread 
+will execute a \code{single} region.
+
+\cexample{single}{1c}
+
+\fexample{single}{1f}
+
+
--- a/Examples_standalone.tex
+++ b/Examples_standalone.tex
@ -0,0 +1,31 @@
+\pagebreak
+\chapter{Placement of \code{flush}, \code{barrier}, \code{taskwait} 
+and \code{taskyield} Directives}
+\label{chap:standalone}
+
+The following example is non-conforming, because the \code{flush}, \code{barrier}, 
+\code{taskwait}, and \code{taskyield}  directives are stand-alone directives 
+and cannot be the immediate substatement of an \code{if} statement. 
+
+\cexample{standalone}{1c}
+
+The following example is non-conforming, because the \code{flush}, \code{barrier}, 
+\code{taskwait}, and \code{taskyield}  directives are stand-alone directives 
+and cannot be the action statement of an \code{if} statement or a labeled branch 
+target.
+
+\fexample{standalone}{1f}
+
+The following version of the above example is conforming because the \code{flush}, 
+\code{barrier}, \code{taskwait}, and \code{taskyield} directives are enclosed 
+in a compound statement. 
+
+\cexample{standalone}{2c}
+
+The following example is conforming because the \code{flush}, \code{barrier}, 
+\code{taskwait}, and \code{taskyield} directives are enclosed in an \code{if} 
+construct or follow the labeled branch target.
+
+\fexample{standalone}{2f}
+
+
--- a/Examples_target.tex
+++ b/Examples_target.tex
@ -0,0 +1,96 @@
+\pagebreak
+\chapter{\code{target} Construct}
+\label{chap:target}
+
+\section{\code{target} Construct on \code{parallel} Construct}
+
+This following example shows how the \code{target} construct offloads a code 
+region to a target device. The variables \plc{p}, \plc{v1}, \plc{v2}, and \plc{N} are implicitly mapped 
+to the the target device.
+
+\cexample{target}{1c}
+
+\fexample{target}{1f}
+
+\section{\code{target} Construct with \code{map} Clause}
+
+This following example shows how the \code{target} construct offloads a code 
+region to a target device. The variables \plc{p}, \plc{v1} and \plc{v2} are explicitly mapped to the 
+the target device using the map clause. The variable \plc{N} is implicitly mapped to 
+the target device.
+
+\cexample{target}{2c}
+
+\fexample{target}{2f}
+
+\section{\code{map} Clause with \code{to}/\code{from} map-types}
+
+The following example shows how the \code{target} construct offloads a code region 
+to a target device. In the \code{map} clause, the \code{to} and \code{from} 
+map-types define the mapping between the original (host) data and the target (device) 
+data. The \code{to} map-type specifies that the data will only be read on the 
+device, and the \code{from} map-type specifies that the data will only be written 
+to on the device. By specifying a guaranteed access on the device, data transfers 
+can be reduced for the \code{target} region.
+
+The \code{to} map-type indicates that at the start of the \code{target} region 
+the variables \plc{v1} and \plc{v2} are initialized with the values of the corresponding variables 
+on the host device, and at the end of the \code{target} region the variables 
+\plc{v1} and \plc{v2} are not assigned to their corresponding variables on the host device.
+
+The \code{from} map-type indicates that at the start of the \code{target} region 
+the variable \plc{p} is not initialized with the value of the corresponding variable 
+on the host device, and at the end of the \code{target} region the variable \plc{p} 
+is assigned to the corresponding variable on the host device.
+
+\cexample{target}{3c}
+
+The \code{to} and \code{from} map-types allow programmers to optimize data 
+motion. Since data for the \plc{v} arrays are not returned, and data for the \plc{p} array 
+are not transferred to the device, only one-half of the data is moved, compared 
+to the default behavior of an implicit mapping.
+
+\fexample{target}{3f}
+
+\section{\code{map} Clause with Array Sections}
+
+The following example shows how the \code{target} construct offloads a code region 
+to a target device. In the \code{map} clause, map-types are used to optimize 
+the mapping of variables to the target device. Because variables \plc{p}, \plc{v1} and \plc{v2} are 
+pointers, array section notation must be used to map the arrays. The notation \code{:N} 
+is equivalent to \code{0:N}.
+
+\cexample{target}{4c}
+
+In C, the length of the pointed-to array must be specified. In Fortran the extent 
+of the array is known and the length need not be specified. A section of the array 
+can be specified with the usual Fortran syntax, as shown in the following example. 
+The value 1 is assumed for the lower bound for array section \plc{v2(:N)}.
+
+\fexample{target}{4f}
+
+A more realistic situation in which an assumed-size array is passed to \code{vec\_mult} 
+requires that the length of the arrays be specified, because the compiler does 
+not know the size of the storage. A section of the array must be specified with 
+the usual Fortran syntax, as shown in the following example. The value 1 is assumed 
+for the lower bound for array section \plc{v2(:N)}.
+
+\fexample{target}{4bf}
+
+\section{\code{target} Construct with \code{if} Clause}
+
+The following example shows how the \code{target} construct offloads a code region 
+to a target device.
+
+The \code{if} clause on the \code{target} construct indicates that if the variable 
+\plc{N} is smaller than a given threshold, then the \code{target} region will be executed 
+by the host device.
+
+The \code{if} clause on the \code{parallel} construct indicates that if the 
+variable \plc{N} is smaller than a second threshold then the \code{parallel} region 
+is inactive.
+
+\cexample{target}{5c}
+
+\fexample{target}{5f}
+
--- a/Examples_target_data.tex
+++ b/Examples_target_data.tex
@ -0,0 +1,175 @@
+\pagebreak
+\chapter{\code{target} \code{data} Construct}
+\label{chap:target_data}
+
+\section{Simple \code{target} \code{data} Construct}
+
+This example shows how the \code{target} \code{data} construct maps variables 
+to a device data environment. The \code{target} \code{data} construct creates 
+a new device data environment and maps the variables \plc{v1}, \plc{v2}, and \plc{p} to the new device 
+data environment. The \code{target} construct enclosed in the \code{target} 
+\code{data} region creates a new device data environment, which inherits the 
+variables \plc{v1}, \plc{v2}, and \plc{p} from the enclosing device data environment. The variable 
+\plc{N} is mapped into the new device data environment from the encountering task's data 
+environment.
+
+\cexample{target_data}{1c}
+
+The Fortran code passes a reference and specifies the extent of the arrays in the 
+declaration. No length information is necessary in the map clause, as is required 
+with C/C++ pointers.
+
+\fexample{target_data}{1f}
+
+\section{\code{target} \code{data} Region Enclosing Multiple \code{target} Regions}
+
+The following examples show how the \code{target} \code{data} construct maps 
+variables to a device data environment of a \code{target} region. The \code{target} 
+\code{data} construct creates a device data environment and encloses \code{target} 
+regions, which have their own device data environments. The device data environment 
+of the \code{target} \code{data} region is inherited by the device data environment 
+of an enclosed \code{target} region. The \code{target} \code{data} construct 
+is used to create variables that will persist throughout the \code{target} \code{data} 
+region.
+
+In the following example the variables \plc{v1} and \plc{v2} are mapped at each \code{target} 
+construct. Instead of mapping the variable \plc{p} twice, once at each \code{target} 
+construct, \plc{p} is mapped once by the \code{target} \code{data} construct.
+
+\cexample{target_data}{2c}
+
+
+The Fortran code uses reference and specifies the extent of the \plc{p}, \plc{v1} and \plc{v2} arrays. 
+No length information is necessary in the \code{map} clause, as is required with 
+C/C++ pointers. The arrays \plc{v1} and \plc{v2} are mapped at each \code{target} construct. 
+Instead of mapping the array \plc{p} twice, once at each target construct, \plc{p} is mapped 
+once by the \code{target} \code{data} construct.
+
+\fexample{target_data}{2f}
+
+In the following example, the variable tmp defaults to \code{tofrom} map-type 
+and is mapped at each \code{target} construct. The array \plc{Q} is mapped once at 
+the enclosing \code{target} \code{data} region instead of at each \code{target} 
+construct. 
+
+\cexample{target_data}{3c}
+
+In the following example the arrays \plc{v1} and \plc{v2} are mapped at each \code{target} 
+construct. Instead of mapping the array \plc{Q} twice at each \code{target} construct, 
+\plc{Q} is mapped once by the \code{target} \code{data} construct. Note, the \plc{tmp} 
+variable is implicitly remapped for each \code{target} region, mapping the value 
+from the device to the host at the end of the first \code{target} region, and 
+from the host to the device for the second \code{target} region.
+
+\fexample{target_data}{3f}
+
+\section{\code{target} \code{data} Construct with Orphaned Call}
+
+The following two examples show how the \code{target} \code{data} construct 
+maps variables to a device data environment. The \code{target} \code{data} 
+construct's device data environment encloses the \code{target} construct's device 
+data environment in the function \code{vec\_mult()}.
+
+When the type of the variable appearing in an array section is pointer, the pointer 
+variable and the storage location of the corresponding array section are mapped 
+to the device data environment. The pointer variable is treated as if it had appeared 
+in a \code{map} clause with a map-type of \code{alloc}. The array section's 
+storage location is mapped according to the map-type in the \code{map} clause 
+(the default map-type is \code{tofrom}).
+
+The \code{target} construct's device data environment inherits the storage locations 
+of the array sections \plc{v1[0:N]}, \plc{v2[:n]}, and \plc{p0[0:N]} from the enclosing target data 
+construct's device data environment. Neither initialization nor assignment is performed 
+for the array sections in the new device data environment.
+
+The pointer variables \plc{p1}, \plc{v3}, and \plc{v4} are mapped into the target construct's device 
+data environment with an implicit map-type of alloc and they are assigned the address 
+of the storage location associated with their corresponding array sections. Note 
+that the following pairs of array section storage locations are equivalent (\plc{p0[:N]}, 
+\plc{p1[:N]}), (\plc{v1[:N]},\plc{v3[:N]}), and (\plc{v2[:N]},\plc{v4[:N]}).
+
+\cexample{target_data}{4c}
+
+The Fortran code maps the pointers and storage in an identical manner (same extent, 
+but uses indices from 1 to \plc{N}).
+
+The \code{target} construct's device data environment inherits the storage locations 
+of the arrays \plc{v1}, \plc{v2} and \plc{p0} from the enclosing \code{target} \code{data} constructs's 
+device data environment. However, in Fortran the associated data of the pointer 
+is known, and the shape is not required.
+
+The pointer variables \plc{p1}, \plc{v3}, and \plc{v4} are mapped into the \code{target} construct's 
+device data environment with an implicit map-type of \code{alloc} and they are 
+assigned the address of the storage location associated with their corresponding 
+array sections. Note that the following pair of array storage locations are equivalent 
+(\plc{p0},\plc{p1}), (\plc{v1},\plc{v3}), and (\plc{v2},\plc{v4}).
+
+\fexample{target_data}{4f}
+
+
+In the following example, the variables \plc{p1}, \plc{v3}, and \plc{v4} are references to the pointer 
+variables \plc{p0}, \plc{v1} and \plc{v2} respectively. The \code{target} construct's device data 
+environment inherits the pointer variables \plc{p0}, \plc{v1}, and \plc{v2} from the enclosing \code{target} 
+\code{data} construct's device data environment. Thus, \plc{p1}, \plc{v3}, and \plc{v4} are already 
+present in the device data environment.
+
+\cexample{target_data}{5c}
+
+In the following example, the usual Fortran approach is used for dynamic memory. 
+The \plc{p0}, \plc{v1}, and \plc{v2} arrays are allocated in the main program and passed as references 
+from one routine to another. In \code{vec\_mult}, \plc{p1}, \plc{v3} and \plc{v4} are references to the 
+\plc{p0}, \plc{v1}, and \plc{v2} arrays, respectively. The \code{target} construct's device data 
+environment inherits the arrays \plc{p0}, \plc{v1}, and \plc{v2} from the enclosing target data construct's 
+device data environment. Thus, \plc{p1}, \plc{v3}, and \plc{v4} are already present in the device 
+data environment.
+
+\fexample{target_data}{5f}
+
+\section{\code{target} \code{data} Construct with \code{if} Clause}
+
+The following two examples show how the \code{target} \code{data} construct 
+maps variables to a device data environment.
+
+In the following example, the if clause on the \code{target} \code{data} construct 
+indicates that if the variable \plc{N} is smaller than a given threshold, then the \code{target} 
+\code{data} construct will not create a device data environment.
+
+The \code{target} constructs enclosed in the \code{target} \code{data} region 
+must also use an \code{if} clause on the same condition, otherwise the pointer 
+variable \plc{p} is implicitly mapped with a map-type of \code{tofrom}, but the storage 
+location for the array section \plc{p[0:N]} will not be mapped in the device data environments 
+of the \code{target} constructs.
+
+\cexample{target_data}{6c}
+
+The \code{if} clauses work the same way for the following Fortran code. The \code{target} 
+constructs enclosed in the \code{target} \code{data} region should also use 
+an \code{if} clause with the same condition, so that the \code{target} \code{data} 
+region and the \code{target} region are either both created for the device, or 
+are both ignored.
+
+\fexample{target_data}{6f}
+
+In the following example, when the \code{if} clause conditional expression on 
+the \code{target} construct evaluates to \plc{false}, the target region will 
+execute on the host device. However, the \code{target} \code{data} construct 
+created an enclosing device data environment that mapped \plc{p[0:N]} to a device data 
+environment on the default device. At the end of the \code{target} \code{data} 
+region the array section \plc{p[0:N]} will be assigned from the device data environment 
+to the corresponding variable in the data environment of the task that encountered 
+the \code{target} \code{data} construct, resulting in undefined values in \plc{p[0:N]}.
+
+\cexample{target_data}{7c}
+
+The \code{if} clauses work the same way for the following Fortran code. When 
+the \code{if} clause conditional expression on the \code{target} construct 
+evaluates to \plc{false}, the \code{target} region will execute on the host 
+device. However, the \code{target} \code{data} construct created an enclosing 
+device data environment that mapped the \plc{p} array (and \plc{v1} and \plc{v2}) to a device data 
+environment on the default target device. At the end of the \code{target} \code{data} 
+region the \plc{p} array will be assigned from the device data environment to the corresponding 
+variable in the data environment of the task that encountered the \code{target} 
+\code{data} construct, resulting in undefined values in \plc{p}.
+
+\fexample{target_data}{7f}
+
--- a/Examples_target_update.tex
+++ b/Examples_target_update.tex
@ -0,0 +1,53 @@
+\pagebreak
+\chapter{\code{target} \code{update} Construct}
+\label{chap:target_update}
+
+\section{Simple \code{target} \code{data} and \code{target} \code{update} Constructs}
+
+The following example shows how the \code{target} \code{update} construct updates 
+variables in a device data environment.
+
+The \code{target} \code{data} construct maps array sections \plc{v1[:N]} and \plc{v2[:N]} 
+(arrays \plc{v1} and \plc{v2} in the Fortran code) into a device data environment.
+
+The task executing on the host device encounters the first \code{target} region 
+and waits for the completion of the region.
+
+After the execution of the first \code{target} region, the task executing on 
+the host device then assigns new values to \plc{v1[:N]} and \plc{v2[:N]} (\plc{v1} and \plc{v2} arrays 
+in Fortran code) in the task's data environment by calling the function \code{init\_again()}.
+
+The \code{target} \code{update} construct assigns the new values of \plc{v1} and 
+\plc{v2} from the task's data environment to the corresponding mapped array sections 
+in the device data environment of the \code{target} \code{data} construct.
+
+The task executing on the host device then encounters the second \code{target} 
+region and waits for the completion of the region.
+
+The second \code{target} region uses the updated values of \plc{v1[:N]} and \plc{v2[:N]}.
+
+\cexample{target_update}{1c}
+
+\fexample{target_update}{1f}
+
+\section{\code{target} \code{update} Construct with \code{if} Clause}
+
+The following example shows how the \code{target} \code{update} construct updates 
+variables in a device data environment.
+
+The \code{target} \code{data} construct maps array sections \plc{v1[:N]} and \plc{v2[:N]} 
+(arrays \plc{v1} and \plc{v2} in the Fortran code) into a device data environment. In between 
+the two \code{target} regions, the task executing on the host device conditionally 
+assigns new values to \plc{v1} and \plc{v2} in the task's data environment. The function \code{maybe\_init\_again()} 
+returns \plc{true} if new data is written.
+
+When the conditional expression (the return value of \code{maybe\_init\_again()}) in the 
+\code{if} clause is \plc{true}, the \code{target} \code{update} construct 
+assigns the new values of \plc{v1} and \plc{v2} from the task's data environment to the corresponding 
+mapped array sections in the \code{target} \code{data} construct's device data 
+environment.
+
+\cexample{target_update}{2c}
+
+\fexample{target_update}{2f}
+
--- a/Examples_taskgroup.tex
+++ b/Examples_taskgroup.tex
@ -0,0 +1,20 @@
+\pagebreak
+\chapter{The \code{taskgroup} Construct}
+\label{chap:taskgroup}
+
+In this example, tasks are grouped and synchronized using the \code{taskgroup} 
+construct.
+
+Initially, one task (the task executing the \code{start\_background\_work()} 
+call) is created in the \code{parallel} region, and later a parallel tree traversal 
+is started (the task executing the root of the recursive \code{compute\_tree()} 
+calls). While synchronizing tasks at the end of each tree traversal, using the 
+\code{taskgroup} construct ensures that the formerly started background task 
+does not participate in the synchronization, and is left free to execute in parallel. 
+This is opposed to the behaviour of the \code{taskwait} construct, which would 
+include the background tasks in the synchronization.
+
+\cexample{taskgroup}{1c}
+
+\fexample{taskgroup}{1f}
+
--- a/Examples_tasking.tex
+++ b/Examples_tasking.tex
@ -0,0 +1,258 @@
+\pagebreak
+\chapter{Tasking Constructs}
+\label{chap:tasking}
+
+The following example shows how to traverse a tree-like structure using explicit 
+tasks. Note that the \code{traverse} function should be called from within a 
+parallel region for the different specified tasks to be executed in parallel. Also 
+note that the tasks will be executed in no specified order because there are no 
+synchronization directives. Thus, assuming that the traversal will be done in post 
+order, as in the sequential code, is wrong.
+
+\cexample{tasking}{1c}
+
+\fexample{tasking}{1f}
+
+In the next example, we force a postorder traversal of the tree by adding a \code{taskwait} 
+directive. Now, we can safely assume that the left and right sons have been executed 
+before we process the current node.
+
+\cexample{tasking}{2c}
+
+\fexample{tasking}{2f}
+
+The following example demonstrates how to use the \code{task} construct to process 
+elements of a linked list in parallel. The thread executing the \code{single} 
+region generates all of the explicit tasks, which are then executed by the threads 
+in the current team. The pointer \plc{p} is \code{firstprivate} by default 
+on the \code{task} construct so it is not necessary to specify it in a \code{firstprivate} 
+clause.
+
+\cexample{tasking}{3c}
+
+\fexample{tasking}{3f}
+
+The \code{fib()} function should be called from within a \code{parallel}  region 
+for the different specified tasks to be executed in parallel. Also, only one thread 
+of the \code{parallel} region should call \code{fib()} unless multiple concurrent 
+Fibonacci computations are desired. 
+
+\cexample{tasking}{4c}
+
+\fexample{tasking}{4f}
+
+Note: There are more efficient algorithms for computing Fibonacci numbers. This 
+classic recursion algorithm is for illustrative purposes.
+
+The following example demonstrates a way to generate a large number of tasks with 
+one thread and execute them with the threads in the team. While generating these 
+tasks, the implementation may reach its limit on unassigned tasks.  If it does, 
+the implementation is allowed to cause the thread executing the task generating 
+loop to suspend its task at the task scheduling point in the \code{task} directive, 
+and start executing unassigned tasks.  Once the number of unassigned tasks is sufficiently 
+low, the thread may resume execution of the task generating loop.
+
+\cexample{tasking}{5c}
+\pagebreak
+\fexample{tasking}{5f}
+
+The following example is the same as the previous one, except that the tasks are 
+generated in an untied task. While generating the tasks, the implementation may 
+reach its limit on unassigned tasks. If it does, the implementation is allowed 
+to cause the thread executing the task generating loop to suspend its task at the 
+task scheduling point in the \code{task} directive, and start executing unassigned 
+tasks.  If that thread begins execution of a task that takes a long time to complete, 
+the other threads may complete all the other tasks before it is finished.
+
+In this case, since the loop is in an untied task, any other thread is eligible 
+to resume the task generating loop. In the previous examples, the other threads 
+would be forced to idle until the generating thread finishes its long task, since 
+the task generating loop was in a tied task.
+
+\cexample{tasking}{6c}
+
+\fexample{tasking}{6f}
+
+The following two examples demonstrate how the scheduling rules illustrated in 
+\$ affect the usage of \code{threadprivate} variables in tasks. A \code{threadprivate} 
+variable can be modified by another task that is executed by the same thread. Thus, 
+the value of a \code{threadprivate} variable cannot be assumed to be unchanged 
+across a task scheduling point. In untied tasks, task scheduling points may be 
+added in any place by the implementation.
+
+A task switch may occur at a task scheduling point. A single thread may execute 
+both of the task regions that modify \code{tp}. The parts of these task regions 
+in which \code{tp} is modified may be executed in any order so the resulting 
+value of \code{var} can be either 1 or 2.
+
+\cexample{tasking}{7c}
+
+
+\fexample{tasking}{7f}
+
+In this example, scheduling constraints prohibit a thread in the team from executing 
+a new task that modifies \code{tp}  while another such task region tied to the 
+same thread is suspended. Therefore, the value written will persist across the 
+task scheduling point.
+
+\cexample{tasking}{8c}
+
+
+\fexample{tasking}{8f}
+
+The following two examples demonstrate how the scheduling rules illustrated in 
+\$ affect the usage of locks and critical sections in tasks.  If a lock is held 
+across a task scheduling point, no attempt should be made to acquire the same lock 
+in any code that may be interleaved.  Otherwise, a deadlock is possible.
+
+In the example below, suppose the thread executing task 1 defers task 2.  When 
+it encounters the task scheduling point at task 3, it could suspend task 1 and 
+begin task 2 which will result in a deadlock when it tries to enter critical region 
+1.
+
+\cexample{tasking}{9c}
+
+
+\fexample{tasking}{9f}
+
+In the following example, \code{lock} is held across a task scheduling point. 
+ However, according to the scheduling restrictions, the executing thread can't 
+begin executing one of the non-descendant tasks that also acquires \code{lock} before 
+the task region is complete.  Therefore, no deadlock is possible.
+
+\cexample{tasking}{10c}
+
+
+\fexample{tasking}{10f}
+
+The following examples illustrate the use of the \code{mergeable} clause in the 
+\code{task} construct. In this first example, the \code{task} construct has 
+been annotated with the \code{mergeable}  clause. The addition of this clause 
+allows the implementation to reuse the data environment (including the ICVs) of 
+the parent task for the task inside \code{foo} if the task is included or undeferred. 
+Thus, the result of the execution may differ depending on whether the task is merged 
+or not. Therefore the mergeable clause needs to be used with caution. In this example, 
+the use of the mergeable clause is safe. As \code{x} is a shared variable the 
+outcome does not depend on whether or not the task is merged (that is, the task 
+will always increment the same variable and will always compute the same value 
+for \code{x}).
+
+\cexample{tasking}{11c}
+
+\fexample{tasking}{11f}
+
+This second example shows an incorrect use of the \code{mergeable} clause. In 
+this example, the created task will access different instances of the variable 
+\code{x} if the task is not merged, as \code{x} is \code{firstprivate}, but 
+it will access the same variable \code{x} if the task is merged. As a result, 
+the behavior of the program is unspecified and it can print two different values 
+for \code{x} depending on the decisions taken by the implementation.
+
+\cexample{tasking}{12c}
+
+\fexample{tasking}{12f}
+
+The following example shows the use of the \code{final} clause and the \code{omp\_in\_final} 
+API call in a recursive binary search program. To reduce overhead, once a certain 
+depth of recursion is reached the program uses the \code{final} clause to create 
+only included tasks, which allow additional optimizations.
+
+The use of the \code{omp\_in\_final} API call allows programmers to optimize 
+their code by specifying which parts of the program are not necessary when a task 
+can create only included tasks (that is, the code is inside a \code{final} task). 
+In this example, the use of a different state variable is not necessary so once 
+the program reaches the part of the computation that is finalized and copying from 
+the parent state to the new state is eliminated. The allocation of \code{new\_state} 
+in the stack could also be avoided but it would make this example less clear. The 
+\code{final} clause is most effective when used in conjunction with the \code{mergeable} 
+clause since all tasks created in a \code{final} task region are included tasks 
+that can be merged if the \code{mergeable} clause is present.
+
+\cexample{tasking}{13c}
+
+\fexample{tasking}{13f}
+
+The following example illustrates the difference between the \code{if}  and the 
+\code{final} clauses. The \code{if} clause has a local effect. In the first 
+nest of tasks, the one that has the \code{if}  clause will be undeferred but 
+the task nested inside that task will not be affected by the \code{if} clause 
+and will be created as usual. Alternatively, the \code{final} clause affects 
+all \code{task} constructs in the \code{final} task region but not the \code{final} 
+task itself. In the second nest of tasks, the nested tasks will be created as included 
+tasks. Note also that the conditions for the \code{if} and \code{final} clauses 
+are usually the opposite.
+
+\cexample{tasking}{14c}
+
+\fexample{tasking}{14f}
+
+\section*{Task Dependences}
+
+\section{Flow Dependence}
+
+In this example we show a simple flow dependence expressed using the \code{depend} 
+clause on the \code{task} construct.
+
+\cexample{tasking}{15c}
+
+\fexample{tasking}{15f}
+
+The program will always print \texttt{"}x = 2\texttt{"}, because the \code{depend} 
+clauses enforce the ordering of the tasks. If the \code{depend} clauses had been 
+omitted, then the tasks could execute in any order and the program and the program 
+would have a race condition.
+
+\section{Anti-dependence}
+
+In this example we show an anti-dependence expressed using the \code{depend} 
+clause on the \code{task} construct.
+
+\cexample{tasking}{16c}
+
+\fexample{tasking}{16f}
+
+The program will always print \texttt{"}x = 1\texttt{"}, because the \code{depend} 
+clauses enforce the ordering of the tasks. If the \code{depend} clauses had been 
+omitted, then the tasks could execute in any order and the program would have a 
+race condition.
+
+\section{Output Dependence}
+
+In this example we show an output dependence expressed using the \code{depend} 
+clause on the \code{task} construct.
+
+\cexample{tasking}{17c}
+
+\fexample{tasking}{17f}
+
+The program will always print \texttt{"}x = 2\texttt{"}, because the \code{depend} 
+clauses enforce the ordering of the tasks. If the \code{depend} clauses had been 
+omitted, then the tasks could execute in any order and the program would have a 
+race condition.
+
+\section{Concurrent Execution with Dependences}
+
+In this example we show potentially concurrent execution of tasks using multiple 
+flow dependences expressed using the \code{depend} clause on the \code{task} 
+construct.
+
+\cexample{tasking}{18c}
+
+\fexample{tasking}{18f}
+
+The last two tasks are dependent on the first task. However there is no dependence 
+between the last two tasks, which may execute in any order (or concurrently if 
+more than one thread is available). Thus, the possible outputs are \texttt{"}x 
+ 1 = 3. x + 2 = 4. \texttt{"} and \texttt{"}x + 2 = 4. x + 1 = 3. \texttt{"}. 
+If the \code{depend} clauses had been omitted, then all of the tasks could execute 
+in any order and the program would have a race condition.
+
+\section{Matrix multiplication}
+
+This example shows a task-based blocked matrix multiplication. Matrices are of 
+NxN elements, and the multiplication is implemented using blocks of BSxBS elements.
+
+\cexample{tasking}{19c}
+
+\fexample{tasking}{19f}
+
--- a/Examples_taskyield.tex
+++ b/Examples_taskyield.tex
@ -0,0 +1,14 @@
+\pagebreak
+\chapter{The \code{taskyield} Directive}
+\label{chap:taskyield}
+
+The following example illustrates the use of the \code{taskyield}  directive. 
+The tasks in the example compute something useful and then do some computation 
+that must be done in a critical region. By using \code{taskyield} when a task 
+cannot get access to the \code{critical} region the implementation can suspend 
+the current task and schedule some other task that can do something useful. 
+
+\cexample{taskyield}{1c}
+
+\fexample{taskyield}{1f}
+
--- a/Examples_teams.tex
+++ b/Examples_teams.tex
@ -0,0 +1,118 @@
+\pagebreak
+\chapter{\code{teams} Constructs}
+\label{chap:teams}
+
+\section{\code{target} and \code{teams} Constructs with \code{omp\_get\_num\_teams}\\
+and \code{omp\_get\_team\_num} Routines}
+
+The following example shows how the \code{target} and \code{teams} constructs 
+are used to create a league of thread teams that execute a region. The \code{teams} 
+construct creates a league of at most two teams where the master thread of each 
+team executes the \code{teams} region.
+
+The \code{omp\_get\_num\_teams} routine returns the number of teams executing in a \code{teams} 
+region. The \code{omp\_get\_team\_num} routine returns the team number, which is an integer 
+between 0 and one less than the value returned by \code{omp\_get\_num\_teams}. The following 
+example manually distributes a loop across two teams.
+
+\cexample{teams}{1c}
+
+\fexample{teams}{1f}
+
+\section{\code{target}, \code{teams}, and \code{distribute} Constructs}
+
+The following example shows how the \code{target}, \code{teams}, and \code{distribute} 
+constructs are used to execute a loop nest in a \code{target} region. The \code{teams} 
+construct creates a league and the master thread of each team executes the \code{teams} 
+region. The \code{distribute} construct schedules the subsequent loop iterations 
+across the master threads of each team.
+
+The number of teams in the league is less than or equal to the variable \plc{num\_blocks}. 
+Each team in the league has a number of threads less than or equal to the variable 
+\plc{block\_threads}. The iterations in the outer loop are distributed among the master 
+threads of each team.
+
+When a team's master thread encounters the parallel loop construct before the inner 
+loop, the other threads in its team are activated. The team executes the \code{parallel} 
+region and then workshares the execution of the loop.
+
+Each master thread executing the \code{teams} region has a private copy of the 
+variable \plc{sum} that is created by the \code{reduction} clause on the \code{teams} construct. 
+The master thread and all threads in its team have a private copy of the variable 
+\plc{sum} that is created by the \code{reduction} clause on the parallel loop construct. 
+The second private \plc{sum} is reduced into the master thread's private copy of \plc{sum} 
+created by the \code{teams} construct. At the end of the \code{teams} region, 
+each master thread's private copy of \plc{sum} is reduced into the final \plc{sum} that is 
+implicitly mapped into the \code{target} region.
+
+\cexample{teams}{2c}
+
+\fexample{teams}{2f}
+
+\section{\code{target} \code{teams}, and Distribute Parallel Loop Constructs}
+
+The following example shows how the \code{target} \code{teams} and distribute 
+parallel loop constructs are used to execute a \code{target} region. The \code{target} 
+\code{teams} construct creates a league of teams where the master thread of each 
+team executes the \code{teams} region.
+
+The distribute parallel loop construct schedules the loop iterations across the 
+master threads of each team and then across the threads of each team.
+
+\cexample{teams}{3c}
+
+\fexample{teams}{3f}
+
+\section{\code{target} \code{teams} and Distribute Parallel Loop 
+Constructs with Scheduling Clauses}
+
+The following example shows how the \code{target} \code{teams} and distribute 
+parallel loop constructs are used to execute a \code{target} region. The \code{teams} 
+construct creates a league of at most eight teams where the master thread of each 
+team executes the \code{teams} region. The number of threads in each team is 
+less than or equal to 16.
+
+The \code{distribute} parallel loop construct schedules the subsequent loop iterations 
+across the master threads of each team and then across the threads of each team.
+
+The \code{dist\_schedule} clause on the distribute parallel loop construct indicates 
+that loop iterations are distributed to the master thread of each team in chunks 
+of 1024 iterations.
+
+The \code{schedule} clause indicates that the 1024 iterations distributed to 
+a master thread are then assigned to the threads in its associated team in chunks 
+of 64 iterations.
+
+\cexample{teams}{4c}
+
+\fexample{teams}{4f}
+
+\section{\code{target} \code{teams} and \code{distribute} \code{simd} Constructs}
+
+The following example shows how the \code{target} \code{teams} and \code{distribute} 
+\code{simd} constructs are used to execute a loop in a \code{target} region. 
+The \code{target} \code{teams} construct creates a league of teams where the 
+master thread of each team executes the \code{teams} region.
+
+The \code{distribute} \code{simd} construct schedules the loop iterations across 
+the master thread of each team and then uses SIMD parallelism to execute the iterations.
+
+\cexample{teams}{5c}
+
+\fexample{teams}{5f}
+
+\section{\code{target} \code{teams} and Distribute Parallel Loop SIMD Constructs}
+
+The following example shows how the \code{target} \code{teams} and the distribute 
+parallel loop SIMD constructs are used to execute a loop in a \code{target} \code{teams} 
+region. The \code{target} \code{teams} construct creates a league of teams 
+where the master thread of each team executes the \code{teams} region.
+
+The distribute parallel loop SIMD construct schedules the loop iterations across 
+the master thread of each team and then across the threads of each team where each 
+thread uses SIMD parallelism.
+
+\cexample{teams}{6c}
+
+\fexample{teams}{6f}
+
--- a/Examples_threadprivate.tex
+++ b/Examples_threadprivate.tex
@ -0,0 +1,106 @@
+\pagebreak
+\chapter{The \code{threadprivate} Directive}
+\label{chap:threadprivate}
+
+The following examples demonstrate how to use the \code{threadprivate} directive 
+ to give each thread a separate counter.
+
+\cexample{threadprivate}{1c}
+
+\fexample{threadprivate}{1f}
+
+\ccppspecificstart
+The following example uses \code{threadprivate} on a static variable:
+
+\cnexample{threadprivate}{2c}
+
+The following example demonstrates unspecified behavior for the initialization 
+of a \code{threadprivate} variable. A \code{threadprivate}  variable is initialized 
+once at an unspecified point before its first reference. Because \code{a} is 
+constructed using the value of \code{x}  (which is modified by the statement 
+\code{x++}), the value of \code{a.val}  at the start of the \code{parallel} 
+region could be either 1 or 2. This problem is avoided for \code{b}, which uses 
+an auxiliary \code{const} variable and a copy-constructor.
+
+\cnexample{threadprivate}{3c}
+\ccppspecificend
+
+The following examples show non-conforming uses and correct uses of the \code{threadprivate} 
+directive. 
+
+\fortranspecificstart
+The following example is non-conforming because the common block is not declared 
+local to the subroutine that refers to it:
+
+\fnexample{threadprivate}{2f}
+
+The following example is also non-conforming because the common block is not declared 
+local to the subroutine that refers to it:
+
+\fnexample{threadprivate}{3f}
+
+The following example is a correct rewrite of the previous example:
+% blue line floater at top of this page for "Fortran, cont."
+\begin{figure}[t!]
+\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
+\end{figure}
+
+\fnexample{threadprivate}{4f}
+
+The following is an example of the use of \code{threadprivate} for local variables:
+
+\fnexample{threadprivate}{5f}
+% blue line floater at top of this page for "Fortran, cont."
+\begin{figure}[t!]
+\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
+\end{figure}
+
+The above program, if executed by two threads, will print one of the following 
+two sets of output: 
+
+\code{a = 11 12 13}
+\\
+\code{ptr = 4}
+\\
+\code{i = 15}
+
+\code{A is not allocated}
+\\
+\code{ptr = 4}
+\\
+\code{i = 5}
+
+or
+
+\code{A is not allocated}
+\\
+\code{ptr = 4}
+\\
+\code{i = 15}
+
+\code{a = 1 2 3}
+\\
+\code{ptr = 4}
+\\
+\code{i = 5}
+
+The following is an example of the use of \code{threadprivate} for module variables:
+
+\fnexample{threadprivate}{6f}
+\fortranspecificend
+
+\ccppspecificstart
+The following example illustrates initialization of \code{threadprivate} variables 
+for class-type \code{T}. \code{t1} is default constructed, \code{t2} is constructed 
+taking a constructor accepting one argument of integer type, \code{t3} is copy 
+constructed with argument \code{f()}:
+
+\cnexample{threadprivate}{4c}
+
+The following example illustrates the use of \code{threadprivate} for static 
+class members. The \code{threadprivate} directive for a static class member must 
+be placed inside the class definition.
+
+\cnexample{threadprivate}{5c}
+\ccppspecificend
+
--- a/Examples_workshare.tex
+++ b/Examples_workshare.tex
@ -0,0 +1,76 @@
+\pagebreak
+\chapter{The \code{workshare} Construct}
+\fortranspecificstart
+\label{chap:workshare}
+
+The following are examples of the \code{workshare} construct. 
+
+In the following example, \code{workshare} spreads work across the threads executing 
+the \code{parallel} region, and there is a barrier after the last statement. 
+Implementations must enforce Fortran execution rules inside of the \code{workshare} 
+block.
+
+\fnexample{workshare}{1f}
+
+In the following example, the barrier at the end of the first \code{workshare} 
+region is eliminated with a \code{nowait} clause. Threads doing \code{CC = 
+DD} immediately begin work on \code{EE = FF} when they are done with \code{CC 
+= DD}.
+
+\fnexample{workshare}{2f}
+% blue line floater at top of this page for "Fortran, cont."
+\begin{figure}[t!]
+\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
+\end{figure}
+
+The following example shows the use of an \code{atomic} directive inside a \code{workshare} 
+construct. The computation of \code{SUM(AA)} is workshared, but the update to 
+\code{R} is atomic.
+
+\fnexample{workshare}{3f}
+
+Fortran \code{WHERE} and \code{FORALL} statements are \emph{compound statements}, 
+made up of a \emph{control} part and a \emph{statement} part. When \code{workshare} 
+is applied to one of these compound statements, both the control and the statement 
+parts are workshared. The following example shows the use of a \code{WHERE} statement 
+in a \code{workshare} construct.
+
+Each task gets worked on in order by the threads:
+
+\code{AA = BB} then
+\\
+\code{CC = DD} then
+\\
+\code{EE .ne. 0} then
+\\
+\code{FF = 1 / EE} then
+\\
+\code{GG = HH}
+
+\fnexample{workshare}{4f}
+% blue line floater at top of this page for "Fortran, cont."
+\begin{figure}[t!]
+\linewitharrows{-1}{dashed}{Fortran (cont.)}{8em}
+\end{figure}
+
+In the following example, an assignment to a shared scalar variable is performed 
+by one thread in a \code{workshare} while all other threads in the team wait.
+
+\fnexample{workshare}{5f}
+
+The following example contains an assignment to a private scalar variable, which 
+is performed by one thread in a \code{workshare} while all other threads wait. 
+It is non-conforming because the private scalar variable is undefined after the 
+assignment statement. 
+
+\fnexample{workshare}{6f}
+
+Fortran execution rules must be enforced inside a \code{workshare} construct. 
+In the following example, the same result is produced in the following program 
+fragment regardless of whether the code is executed sequentially or inside an OpenMP 
+program with multiple threads:
+
+\fnexample{workshare}{7f}
+\fortranspecificend
+
+
--- a/Examples_worksharing_critical.tex
+++ b/Examples_worksharing_critical.tex
@ -0,0 +1,18 @@
+\pagebreak
+\chapter{Worksharing Constructs Inside a \code{critical} Construct}
+\label{chap:worksharing_critical}
+
+The following example demonstrates using a worksharing construct inside a \code{critical} 
+construct. This example is conforming because the worksharing \code{single}  
+region is not closely nested inside the \code{critical} region. A single thread 
+executes the one and only section in the \code{sections} region, and executes 
+the \code{critical} region. The same thread encounters the nested \code{parallel} 
+region, creates a new team of threads, and becomes the master of the new team. 
+One of the threads in the new team enters the \code{single} region and increments 
+\code{i} by \code{1}. At the end of this example \code{i} is equal to \code{2}.
+
+\cexample{worksharing_critical}{1c}
+
+\fexample{worksharing_critical}{1f}
+
+
--- a/Introduction_Chapt.tex
+++ b/Introduction_Chapt.tex
@ -0,0 +1,69 @@
+% This is the introduction for the OpenMP Examples document.
+% This is an included file. See the master file (openmp-examples.tex) for more information.
+%
+% When editing this file:
+%
+%    1. To change formatting, appearance, or style, please edit openmp.sty.
+%
+%    2. Custom commands and macros are defined in openmp.sty.
+%
+%    3. Be kind to other editors -- keep a consistent style by copying-and-pasting to
+%       create new content.
+%
+%    4. We use semantic markup, e.g. (see openmp.sty for a full list):
+%         \code{}     % for bold monospace keywords, code, operators, etc.
+%         \plc{}      % for italic placeholder names, grammar, etc.
+%
+%    5. Other recommendations:
+%         Use the convenience macros defined in openmp.sty for the minor headers
+%         such as Comments, Syntax, etc.
+%
+%         To keep items together on the same page, prefer the use of 
+%         \begin{samepage}.... Avoid \parbox for text blocks as it interrupts line numbering.
+%         When possible, avoid \filbreak, \pagebreak, \newpage, \clearpage unless that's
+%         what you mean. Use \needspace{} cautiously for troublesome paragraphs.
+%
+%         Avoid absolute lengths and measures in this file; use relative units when possible.
+%         Vertical space can be relative to \baselineskip or ex units. Horizontal space
+%         can be relative to \linewidth or em units.
+%
+%         Prefer \emph{} to italicize terminology, e.g.:
+%             This is a \emph{definition}, not a placeholder.
+%             This is a \plc{var-name}.
+%
+
+\chapter*{Introduction}
+\label{chap:introduction}
+This collection of programming examples supplements the OpenMP API for Shared
+Memory Parallelization specifications, and is not part of the formal specifications. It
+assumes familiarity with the OpenMP specifications, and shares the typographical
+conventions used in that document.
+
+\notestart
+\noteheader – This first release of the OpenMP Examples reflects the OpenMP Version 4.0
+specifications. Additional examples are being developed and will be published in future
+releases of this document.
+\noteend
+
+The OpenMP API specification provides a model for parallel programming that is
+portable across shared memory architectures from different vendors. Compilers from
+numerous vendors support the OpenMP API.
+
+The directives, library routines, and environment variables demonstrated in this
+document allow users to create and manage parallel programs while permitting
+portability. The directives extend the C, C++ and Fortran base languages with single
+program multiple data (SPMD) constructs, tasking constructs, device constructs,
+worksharing constructs, and synchronization constructs, and they provide support for
+sharing and privatizing data. The functionality to control the runtime environment is
+provided by library routines and environment variables. Compilers that support the
+OpenMP API often include a command line option to the compiler that activates and
+allows interpretation of all OpenMP directives.
+
+Complete information about the OpenMP API and a list of the compilers that support
+the OpenMP API can be found at the OpenMP.org web site
+
+\code{http://www.openmp.org}
+
+
+% This is the end of introduction.tex of the OpenMP Examples document.
+
--- a/88
+++ b/88
@ -0,0 +1,88 @@
+# Makefile for the OpenMP Examples document in LaTex format. 
+# For more information, see the master document, openmp-examples.tex.
+
+version=4.0.1ltx
+default: openmp-examples.pdf
+
+
+CHAPTERS=Title_Page.tex \
+	Introduction_Chapt.tex \
+	Examples_Chapt.tex \
+	Examples_ploop.tex \
+	Examples_mem_model.tex \
+	Examples_cond_comp.tex \
+	Examples_icv.tex \
+	Examples_parallel.tex \
+	Examples_nthrs_nesting.tex \
+	Examples_nthrs_dynamic.tex \
+	Examples_affinity.tex \
+	Examples_fort_do.tex \
+	Examples_fort_loopvar.tex \
+	Examples_nowait.tex \
+	Examples_collapse.tex \
+	Examples_psections.tex \
+	Examples_fpriv_sections.tex \
+	Examples_single.tex \
+	Examples_tasking.tex \
+	Examples_taskgroup.tex \
+	Examples_taskyield.tex \
+	Examples_workshare.tex \
+	Examples_master.tex \
+	Examples_critical.tex \
+	Examples_worksharing_critical.tex \
+	Examples_barrier_regions.tex \
+	Examples_atomic.tex \
+	Examples_atomic_restrict.tex \
+	Examples_flush_nolist.tex \
+	Examples_standalone.tex \
+	Examples_ordered.tex \
+	Examples_cancellation.tex \
+	Examples_threadprivate.tex \
+	Examples_pra_iterator.tex \
+	Examples_fort_sp_common.tex \
+	Examples_default_none.tex \
+	Examples_fort_race.tex \
+	Examples_private.tex \
+	Examples_fort_sa_private.tex \
+	Examples_carrays_fpriv.tex \
+	Examples_lastprivate.tex \
+	Examples_reduction.tex \
+	Examples_copyin.tex \
+	Examples_copyprivate.tex \
+	Examples_nested_loop.tex \
+	Examples_nesting_restrict.tex \
+	Examples_set_dynamic_nthrs.tex \
+	Examples_get_nthrs.tex \
+	Examples_init_lock.tex \
+	Examples_lock_owner.tex \
+	Examples_simple_lock.tex \
+	Examples_nestable_lock.tex \
+	Examples_target.tex \
+	Examples_target_data.tex \
+	Examples_target_update.tex \
+	Examples_declare_target.tex \
+	Examples_teams.tex \
+	Examples_async_target.tex \
+	Examples_array_sections.tex \
+	Examples_device.tex \
+	Examples_associate.tex
+
+INTERMEDIATE_FILES=openmp-examples.pdf \
+		openmp-examples.toc \
+		openmp-examples.idx \
+		openmp-examples.aux \
+		openmp-examples.ilg \
+		openmp-examples.ind \
+		openmp-examples.out \
+		openmp-examples.log
+
+openmp-examples.pdf: $(CHAPTERS) openmp.sty openmp-examples.tex openmp-logo.png
+	rm -f $(INTERMEDIATE_FILES)
+	pdflatex -interaction=batchmode -file-line-error openmp-examples.tex
+	pdflatex -interaction=batchmode -file-line-error openmp-examples.tex
+	pdflatex -interaction=batchmode -file-line-error openmp-examples.tex
+	cp openmp-examples.pdf openmp-examples-${version}.pdf
+
+clean:
+	rm -f $(INTERMEDIATE_FILES)
+
--- a/3
+++ b/3
@ -0,0 +1,3 @@
+This is the OpenMP 4.0 specification in LaTex format.
+Please see the master file, openmp-4.0.tex, for more information.
+
--- a/Title_Page.tex
+++ b/Title_Page.tex
@ -0,0 +1,42 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Title page
+
+  \begin{titlepage}
+    \begin{flushleft}
+     \hspace{-6em} \includegraphics[width=0.4\textwidth]{openmp-logo.png}
+    \end{flushleft}
+
+    \begin{adjustwidth}{-0.75in}{0in}
+    \begin{center}
+      \Huge
+      \textsf{OpenMP\\Application Programming\\Interface}
+
+      % An optional subtitle can go here:
+      \vspace{0.5in}\textsf{Examples}\vspace{-0.7in}
+      \normalsize
+
+      \vspace{1.0in}
+
+      \textbf{Version 4.0.1.ltx -- February, 2014}
+    \end{center}
+    \end{adjustwidth}
+
+    \vspace{3.0in}
+
+\begin{adjustwidth}{0pt}{1em}\setlength{\parskip}{0.25\baselineskip}%
+Copyright © 1997-2014 OpenMP Architecture Review Board.\\
+Permission to copy without fee all or part of this material is granted,
+provided the OpenMP Architecture Review Board copyright notice and
+the title of this document appear. Notice is given that copying is by
+permission of OpenMP Architecture Review Board.\end{adjustwidth}
+
+  \end{titlepage}
+
+% Blank page
+
+\clearpage
+\thispagestyle{empty}
+\phantom{a}
+\emph{This page intentionally left blank}
+\vfill
+
--- a/figs/proc_bind_fig.pdf
+++ b/figs/proc_bind_fig.pdf
--- a/figs/proc_bind_fig.png
+++ b/figs/proc_bind_fig.png
--- a/openmp-examples.tex
+++ b/openmp-examples.tex
@ -0,0 +1,137 @@
+% Welcome to openmp-examples.tex.
+% This is the master LaTex file for the OpenMP Examples document.
+%
+% The files in this set include:
+%
+%    openmp-examples.tex              - this file, the master file
+%    Makefile                         - makes the document
+%    openmp.sty                       - the main style file
+%    Title_Page.tex                   - the title page
+%    openmplogo.png                   - the logo
+%    Introduction_Chapt.tex           - unnumbered introductory chapter
+%    Examples_Chapt.tex               - unnumbered chapter
+%    Examples_Sects.tex               - examples
+%    sources/*.c, *.f                 - C/C++/Fortran example source files
+%
+% When editing this file:
+%
+%    1. To change formatting, appearance, or style, please edit openmp.sty.
+%
+%    2. Custom commands and macros are defined in openmp.sty.
+%
+%    3. Be kind to other editors -- keep a consistent style by copying-and-pasting to
+%       create new content.
+%
+%    4. We use semantic markup, e.g. (see openmp.sty for a full list):
+%         \code{}     % for bold monospace keywords, code, operators, etc.
+%         \plc{}      % for italic placeholder names, grammar, etc.
+%
+%    5. Other recommendations:
+%         Use the convenience macros defined in openmp.sty for the minor headers
+%         such as Comments, Syntax, etc.
+%
+%         To keep items together on the same page, prefer the use of 
+%         \begin{samepage}.... Avoid \parbox for text blocks as it interrupts line numbering.
+%         When possible, avoid \filbreak, \pagebreak, \newpage, \clearpage unless that's
+%         what you mean. Use \needspace{} cautiously for troublesome paragraphs.
+%
+%         Avoid absolute lengths and measures in this file; use relative units when possible.
+%         Vertical space can be relative to \baselineskip or ex units. Horizontal space
+%         can be relative to \linewidth or em units.
+%
+%         Prefer \emph{} to italicize terminology, e.g.:
+%             This is a \emph{definition}, not a placeholder.
+%             This is a \plc{var-name}.
+%
+
+% The following says letter size, but the style sheet may change the size
+\documentclass[10pt,letterpaper,twoside,makeidx,hidelinks]{scrreprt}
+
+% Text to appear in the footer on even-numbered pages:
+\newcommand{\footerText}{OpenMP Examples Version 4.0.1 - February 2014}
+
+% Unified style sheet for OpenMP documents:
+\input{openmp.sty}
+
+
+\begin{document}
+    \pagenumbering{roman}
+    \input{Title_Page}
+
+    \setcounter{page}{0}
+    \setcounter{tocdepth}{2}
+
+    \begin{spacing}{1.3}
+        \tableofcontents
+    \end{spacing}
+
+    % Uncomment the next line to enable line numbering on the main body text:
+    \linenumbers\pagewiselinenumbers
+
+    \newpage\pagenumbering{arabic}
+
+    \input{Introduction_Chapt}
+    \input{Examples_Chapt}
+
+    \setcounter{chapter}{0}  % start chapter numbering here
+
+    \input{Examples_ploop}
+    \input{Examples_mem_model}
+    \input{Examples_cond_comp}
+    \input{Examples_icv}
+    \input{Examples_parallel}
+    \input{Examples_nthrs_nesting}
+    \input{Examples_nthrs_dynamic}
+    \input{Examples_affinity}
+    \input{Examples_fort_do}
+    \input{Examples_fort_loopvar}
+    \input{Examples_nowait}
+    \input{Examples_collapse}
+    \input{Examples_psections}
+    \input{Examples_fpriv_sections}
+    \input{Examples_single}
+    \input{Examples_tasking}
+    \input{Examples_taskgroup}
+    \input{Examples_taskyield}
+    \input{Examples_workshare}
+    \input{Examples_master}
+    \input{Examples_critical}
+    \input{Examples_worksharing_critical}
+    \input{Examples_barrier_regions}
+    \input{Examples_atomic}
+    \input{Examples_atomic_restrict}
+    \input{Examples_flush_nolist}
+    \input{Examples_standalone}
+    \input{Examples_ordered}
+    \input{Examples_cancellation}
+    \input{Examples_threadprivate}
+    \input{Examples_pra_iterator}
+    \input{Examples_fort_sp_common}
+    \input{Examples_default_none}
+    \input{Examples_fort_race}
+    \input{Examples_private}
+    \input{Examples_fort_sa_private}
+    \input{Examples_carrays_fpriv}
+    \input{Examples_lastprivate}
+    \input{Examples_reduction}
+    \input{Examples_copyin}
+    \input{Examples_copyprivate}
+    \input{Examples_nested_loop}
+    \input{Examples_nesting_restrict}
+    \input{Examples_set_dynamic_nthrs}
+    \input{Examples_get_nthrs}
+    \input{Examples_init_lock}
+    \input{Examples_lock_owner}
+    \input{Examples_simple_lock}
+    \input{Examples_nestable_lock}
+    \input{Examples_target}
+    \input{Examples_target_data}
+    \input{Examples_target_update}
+    \input{Examples_declare_target}
+    \input{Examples_teams}
+    \input{Examples_async_target}
+    \input{Examples_array_sections}
+    \input{Examples_device}
+    \input{Examples_associate}
+\end{document}
+
--- a/openmp-logo.png
+++ b/openmp-logo.png
--- a/openmp.sty
+++ b/openmp.sty
@ -0,0 +1,484 @@
+% This is openmp.sty, the preamble and style definitions for the OpenMP specification.
+% This is an include file. Please see the master file for more information.
+%
+% When editing this file:
+%
+%    1. To change formatting, appearance, or style, please edit openmp.sty.
+%
+%    2. Custom commands and macros are defined in openmp.sty.
+%
+%    3. Be kind to other editors -- keep a consistent style by copying-and-pasting to
+%       create new content.
+%
+%    4. We use semantic markup, e.g. (see openmp.sty for a full list):
+%         \code{}     % for bold monospace keywords, code, operators, etc.
+%         \plc{}      % for italic placeholder names, grammar, etc.
+%
+%    5. Other recommendations:
+%         Use the convenience macros defined in openmp.sty for the minor headers
+%         such as Comments, Syntax, etc.
+%
+%         To keep items together on the same page, prefer the use of 
+%         \begin{samepage}.... Avoid \parbox for text blocks as it interrupts line numbering.
+%         When possible, avoid \filbreak, \pagebreak, \newpage, \clearpage unless that's
+%         what you mean. Use \needspace{} cautiously for troublesome paragraphs.
+%
+%         Avoid absolute lengths and measures in this file; use relative units when possible.
+%         Vertical space can be relative to \baselineskip or ex units. Horizontal space
+%         can be relative to \linewidth or em units.
+%
+%         Prefer \emph{} to italicize terminology, e.g.:
+%             This is a \emph{definition}, not a placeholder.
+%             This is a \plc{var-name}.
+%
+% Quick list of the environments, commands and macros supported. Search below for more details.
+%
+%    \binding                   % makes header of the same name
+%    \comments
+%    \constraints
+%    \crossreferences
+%    \descr
+%    \effect
+%    \format
+%    \restrictions
+%    \summary
+%    \syntax
+%    
+%    \code{}                    % monospace, bold
+%    \plc{}                     % for any kind of placeholder: italic
+%    \begin{codepar}            % for blocks of verbatim code: monospace, bold
+%    \begin{boxedcode}          % outlined verbatim code for syntax definitions, prototypes, etc.
+%    \begin{indentedcodelist}   % used with,e.g., "where clause is one of the following:"
+%
+%    \specref{}                 % formats the cross-reference "Section X on page Y"
+%    
+%    \notestart                 % black horizontal rule for Notes
+%    \noteend
+%    
+%    \cspecificstart            % blue horizontal rule for C-specific text
+%    \cspecificend
+%    
+%    \cppspecificstart          % blue horizontal rule for C++ -specific text
+%    \cppspecificend
+%    
+%    \ccppspecificstart         % blue horizontal rule for C / C++ -specific text
+%    \ccppspecificend
+%    
+%    \fortranspecificstart      % blue horizontal rule for Fortran-specific text
+%    \fortranspecificend
+%    
+%    \glossaryterm              % for use in formatting glossary entries
+%    \glossarydefstart
+%    \glossarydefend
+%
+%    \compactitem               % single-spaced itemized lists for the Examples doc
+%    \cexample                  % C/C++ code example for the Examples doc
+%    \fexample                  % Fortran code example for the Examples doc
+
+
+\usepackage{comment}            % allow use of \begin{comment}
+\usepackage{ifpdf,ifthen}       % allow conditional tests in LaTeX definitions
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Document data
+%
+\author{}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Fonts
+
+\usepackage{amsmath}
+\usepackage{amsfonts}
+\usepackage{amssymb}
+\usepackage{courier}
+\usepackage{helvet}
+\usepackage[utf8]{inputenc}
+
+% Main body serif font:
+\usepackage{tgtermes}
+\usepackage[T1]{fontenc}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Graphic elements
+
+\usepackage{graphicx}
+\usepackage{framed}    % for making boxes with \begin{framed}
+\usepackage{tikz}      % for flow charts, diagrams, arrows
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Page formatting
+
+\usepackage[paperwidth=7.5in, paperheight=9in, 
+            top=0.75in, bottom=1.0in, left=1.4in, right=0.6in]{geometry}
+
+\usepackage{changepage}   % allows left/right-page margin readjustments
+
+\setlength{\oddsidemargin}{0.45in}
+\setlength{\evensidemargin}{0.185in}
+\raggedbottom
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Paragraph formatting
+
+\usepackage{setspace}     % allows use of \singlespacing, \onehalfspacing
+\usepackage{needspace}    % allows use of \needspace to keep lines together
+\usepackage{parskip}      % removes paragraph indenting
+
+\raggedright
+\usepackage[raggedrightboxes]{ragged2e}  % is this needed?
+
+\lefthyphenmin=60         % only hyphenate if the left part is >= this many chars
+\righthyphenmin=60        % only hyphenate if the right part is >= this many chars
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Bulleted (itemized) lists
+%    Align bullets with section header
+%    Align text left
+%    Small bullets
+%    \compactitem for single-spaced lists (used in the Examples doc)
+
+\usepackage{enumitem}     % for setting margins on lists
+\setlist{leftmargin=*}    % don't indent bullet items
+\renewcommand{\labelitemi}{{\normalsize$\bullet$}} % bullet size
+
+% There is a \compactitem defined in package parlist (and perhaps others), however,
+% we'll define our own version of compactitem in terms of package enumitem that
+% we already use:
+\newenvironment{compactitem}
+{\begin{itemize}[itemsep=-1.2ex]}
+{\end{itemize}}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Tables
+
+% This allows tables to flow across page breaks, headers on each new page, etc.
+\usepackage{supertabular}
+\usepackage{caption}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Line numbering
+
+\usepackage[pagewise]{lineno}       % for line numbers on left side of the page
+\pagewiselinenumbers
+\setlength\linenumbersep{6em}
+\renewcommand\linenumberfont{\normalfont\small\sffamily}
+\nolinenumbers            % start with line numbers off
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Footers
+
+\usepackage{fancyhdr}     % makes right/left footers
+\pagestyle{fancy}
+\fancyhead{} % clear all header fields
+\cfoot{}
+\renewcommand{\headrulewidth}{0pt}
+
+% Left side on even pages:
+% This requires that \footerText be defined in the master document:
+\fancyfoot[LE]{\bfseries \thepage \mdseries \hspace{2em} \footerText}
+\fancyhfoffset[E]{4em}
+
+% Right side on odd pages:
+\fancyfoot[RO]{\mdseries  \leftmark \hspace{2em} \bfseries \thepage}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Section header format - we use four levels: \chapter \section \subsection \subsubsection.
+
+\usepackage{titlesec}     % format headers with \titleformat{}
+
+% Format and spacing for chapter, section, subsection, and subsubsection headers:
+
+\setcounter{secnumdepth}{4}          % show numbers down to subsubsection level
+
+\titleformat{\chapter}[display]%
+{\normalfont\sffamily\upshape\Huge\bfseries\fontsize{20}{20}\selectfont}%
+{\normalfont\sffamily\scshape\large\bfseries \hspace{-0.7in} \MakeUppercase%
+    {\chaptertitlename} \thechapter}%
+{0.8in}{}[\vspace{2ex}\hrule]
+\titlespacing{\chapter}{0ex}{0em plus 1em minus 1em}{3em plus 1em minus 1em}[10em]
+
+\titleformat{\section}[hang]{\huge\bfseries\sffamily\fontsize{16}{16}\selectfont}{\thesection}{1.0em}{}
+\titlespacing{\section}{-5em}{5em plus 1em minus 1em}{1em plus 0.5em minus 0em}[10em]
+
+\titleformat{\subsection}[hang]{\LARGE\bfseries\sffamily\fontsize{14}{14}\selectfont}{\thesubsection}{1.0em}{}
+\titlespacing{\subsection}{-5em}{4em plus 1em minus 2.0em}{0.75em plus 0.5em minus 0em}[10em]
+
+\titleformat{\subsubsection}[hang]{\needspace{1\baselineskip}%
+\Large\bfseries\sffamily\fontsize{12}{12}\selectfont}{\thesubsubsection}{1.0em}{}
+\titlespacing{\subsubsection}{-5em}{3em plus 1em minus 1em}{0.5em plus 0.5em minus 0em}[10em]
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Macros for minor headers: Summary, Syntax, Description, etc.
+% These headers are defined in terms of \paragraph
+
+\titleformat{\paragraph}[block]{\large\bfseries\sffamily\fontsize{11}{11}\selectfont}{}{}{}
+\titlespacing{\paragraph}{0em}{1.5em plus 0.55em minus 0.5em}{0.0em plus 0.55em minus 0.0em}
+
+% Use one of the convenience macros below, or \littleheader{} for an arbitrary header
+\newcommand{\littleheader}[1] {\paragraph*{#1}}
+
+\newcommand{\binding} {\littleheader{Binding}}
+\newcommand{\comments} {\littleheader{Comments}}
+\newcommand{\constraints} {\littleheader{Constraints on Arguments}}
+\newcommand{\crossreferences} {\littleheader{Cross References}}
+\newcommand{\descr} {\littleheader{Description}}
+\newcommand{\effect} {\littleheader{Effect}}
+\newcommand{\format} {\littleheader{Format}}
+\newcommand{\restrictions} {\littleheader{Restrictions}}
+\newcommand{\summary} {\littleheader{Summary}}
+\newcommand{\syntax} {\littleheader{Syntax}}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Code and placeholder semantic tagging.
+%
+% When possible, prefer semantic tags instead of typographic tags. The
+% following semantics tags are defined here:
+%
+%     \code{}     % for bold monospace keywords, code, operators, etc.
+%     \plc{}      % for italic placeholder names, grammar, etc.
+%
+% For function prototypes or other code snippets, you can use \code{} as
+% the outer wrapper, and use \plc{{} inside. Example:
+%
+%     \code{\#pragma omp directive ( \plc{some-placeholder-identifier} :}
+%
+% To format text in italics for emphasis (rather than text as a placeholder),
+% use the generic \emph{} command. Example:
+%
+%     This sentence \emph{emphasizes some non-placeholder words}.
+
+% Enable \alltt{} for formatting blocks of code:
+\usepackage{alltt}
+
+% This sets the default \code{} font to tt (monospace) and bold:
+\newcommand{\code}[1]{{\texttt{\textbf{#1}}}}
+\newcommand{\nspace}[1]{{\textrm{\textmd{ }}}}
+
+% This defines the \plc{} placeholder font to be tt normal slanted:
+\newcommand{\plc}[1] {{\textrm{\textmd{\itshape{#1}}}}}
+
+% Environment for a paragraph of literal code, single-spaced, no outline, no indenting:
+\newenvironment{codepar}[1]
+{\begin{alltt}\bfseries #1}
+{\end{alltt}}
+
+% For blocks of code inside a box frame:
+\newenvironment{boxedcode}[1]
+{\vspace{0.25em plus 5em minus 0.25em}\begin{framed}\begin{minipage}[t]{\textwidth}\begin{alltt}\bfseries #1}
+{\end{alltt}\end{minipage}\end{framed}\vspace{0.25em plus 5em minus 0.25em}}
+
+% This sets the margins in the framed box:
+\setlength{\FrameSep}{0.6em}
+
+% For indented lists of verbatim code at a relaxed line spacing,
+% e.g., for use after "where clause is one of the following:"
+\usepackage{setspace}
+\newenvironment{indentedcodelist}{%
+    \begin{adjustwidth}{0.25in}{}\begin{spacing}{1.5}\begin{alltt}\bfseries}
+    {\end{alltt}\end{spacing}\vspace{-0.25\baselineskip}\end{adjustwidth}}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Macros for the black and blue lines and arrows delineating language-specific
+% and notes sections. Example:
+%
+%   \fortranspecificstart
+%   This is text that applies to Fortran.
+%   \fortranspecificend
+
+% local parameters for use \linewitharrows and \notelinewitharrows:
+\newlength{\sbsz}\setlength{\sbsz}{0.05in}  % size of arrows
+\newlength{\sblw}\setlength{\sblw}{1.35pt}  % line width (thickness)
+\newlength{\sbtw}                           % text width
+\newlength{\sblen}                          % total width of horizontal rule
+\newlength{\sbht}                           % height of arrows
+\newlength{\sbhadj}                         % vertical adjustment for aligning arrows with the line
+\newlength{\sbns}\setlength{\sbns}{7\baselineskip}       % arg for \needspace for downward arrows
+
+% \notelinewitharrows is a helper command that makes a black Note marker:
+%     arg 1 = 1 or -1 for up or down arrows
+%     arg 2 = solid or dashed or loosely dashed, etc.
+\newcommand{\notelinewitharrows}[2]{%
+    \needspace{0.1\baselineskip}%
+    \vbox{\begin{tikzpicture}%
+        \setlength{\sblen}{\linewidth}%
+        \setlength{\sbht}{#1\sbsz}\setlength{\sbht}{1.4\sbht}%
+        \setlength{\sbhadj}{#1\sblw}\setlength{\sbhadj}{0.25\sbhadj}%
+        \filldraw (\sblen, 0) -- (\sblen - \sbsz, \sbht) -- (\sblen - 2\sbsz, 0) -- (\sblen, 0);
+        \draw[line width=\sblw, #2] (2\sbsz - \sblw, \sbhadj) -- (\sblen - 2\sbsz + \sblw, \sbhadj);
+        \filldraw (0, 0) -- (\sbsz, \sbht) -- (0 + 2\sbsz, 0) -- (0, 0);
+    \end{tikzpicture}}}
+
+% \linewitharrows is a helper command that makes a blue horizontal line, up or down arrows, and some text:
+% arg 1 = 1 or -1 for up or down arrows
+% arg 2 = solid or dashed or loosely dashed, etc.
+% arg 3 = text
+% arg 4 = text width
+\newcommand{\linewitharrows}[4]{%
+    \needspace{0.1\baselineskip}%
+    \vbox to 1\baselineskip {\begin{tikzpicture}%
+        \setlength{\sbtw}{#4}%
+        \setlength{\sblen}{\linewidth}%
+        \setlength{\sbht}{#1\sbsz}\setlength{\sbht}{1.4\sbht}%
+        \setlength{\sbhadj}{#1\sblw}\setlength{\sbhadj}{0.25\sbhadj}%
+        \filldraw[color=blue!40] (\sblen, 0) -- (\sblen - \sbsz, \sbht) -- (\sblen - 2\sbsz, 0) -- (\sblen, 0);
+        \draw[line width=\sblw, color=blue!40, #2] (2\sbsz - \sblw, \sbhadj) -- (0.5\sblen - 0.5\sbtw, \sbhadj);
+        \draw[line width=\sblw, color=blue!40, #2] (0.5\sblen + 0.5\sbtw, \sbhadj) -- (\sblen - 2\sbsz + \sblw, \sbhadj);
+        \filldraw[color=blue!40] (0, 0) -- (\sbsz, \sbht) -- (0 + 2\sbsz, 0) -- (0, 0);
+        \node[color=blue!90] at (0.5\sblen, 0) {\large  \textsf{\textup{#3}}};
+    \end{tikzpicture}}}
+
+\newcommand{\VSPb}{\vspace{0.5ex plus 5ex minus 0.25ex}}
+\newcommand{\VSPa}{\vspace{0.25ex plus 5ex minus 0.25ex}}
+
+% C
+\newcommand{\cspecificstart}{\needspace{\sbns}\linewitharrows{-1}{solid}{C}{3em}}
+\newcommand{\cspecificend}{\linewitharrows{1}{solid}{C}{3em}\bigskip}
+
+% C/C++
+\newcommand{\ccppspecificstart}{\VSPb\linewitharrows{-1}{solid}{C / C++}{6em}\VSPa}
+\newcommand{\ccppspecificend}{\VSPb\linewitharrows{1}{solid}{C / C++}{6em}\VSPa}
+
+% C++
+\newcommand{\cppspecificstart}{\needspace{\sbns}\linewitharrows{-1}{solid}{C++}{6em}}
+\newcommand{\cppspecificend}{\linewitharrows{1}{solid}{C++}{6em}\bigskip}
+
+% C90
+\newcommand{\cNinetyspecificstart}{\needspace{\sbns}\linewitharrows{-1}{solid}{C90}{4em}}
+\newcommand{\cNinetyspecificend}{\linewitharrows{1}{solid}{C90}{4em}\bigskip}
+
+% C99
+\newcommand{\cNinetyNinespecificstart}{\needspace{\sbns}\linewitharrows{-1}{solid}{C99}{4em}}
+\newcommand{\cNinetyNinespecificend}{\linewitharrows{1}{solid}{C99}{4em}\bigskip}
+
+% Fortran
+\newcommand{\fortranspecificstart}{\VSPb\linewitharrows{-1}{solid}{Fortran}{6em}\VSPa}
+\newcommand{\fortranspecificend}{\VSPb\linewitharrows{1}{solid}{Fortran}{6em}\VSPa}
+
+% Note
+\newcommand{\notestart}{\VSPb\notelinewitharrows{-1}{solid}\VSPa}
+\newcommand{\noteend}{\VSPb\notelinewitharrows{1}{solid}\VSPa}
+
+% convenience macro for formatting the word "Note:" at the beginning of note blocks:
+\newcommand{\noteheader}{{\textrm{\textsf{\textbf\textup\normalsize{{{{Note }}}}}}}}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Glossary formatting
+
+\newcommand{\glossaryterm}[1]{\needspace{1ex}
+\begin{adjustwidth}{-0.75in}{0.0in}
+\nolinenumbers\parbox[b][-0.95\baselineskip][t]{1.4in}{\flushright \textbf{#1}}
+\end{adjustwidth}\linenumbers}
+
+\newcommand{\glossarydefstart}{
+\begin{adjustwidth}{0.79in}{0.0in}}
+
+\newcommand{\glossarydefend}{
+\end{adjustwidth}\vspace{-1.5\baselineskip}}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Indexing and Table of Contents
+
+\usepackage{makeidx}
+\usepackage[nodotinlabels]{titletoc}   % required for its [nodotinlabels] option
+
+% Clickable links in TOC and index:
+\usepackage[hyperindex=true,linktocpage=true]{hyperref}
+\hypersetup{
+  colorlinks  = true, % Colors links instead of red boxes
+  urlcolor    = blue, % Color for external links
+  linkcolor   = blue  % Color for internal links
+}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Formats a cross reference label as "Section X on page Y".
+
+\newcommand{\specref}[1]{Section~\ref{#1} on page~\pageref{#1}}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Code example formatting for the Examples document
+% This defines:
+%     /cexample       formats blue markers, caption, and code for C/C++ examples
+%     /fexample       formats blue markers, caption, and code for Fortran examples
+% Thanks to Jin, Haoqiang H. for the original definitions of the following:
+
+\usepackage{color,fancyvrb}  % for \VerbatimInput
+\usepackage{toolbox}         % for \toolboxMakeSplit
+
+\renewcommand\theFancyVerbLine{\normalfont\footnotesize\sffamily S-\arabic{FancyVerbLine}}
+
+\newcommand{\myreplace}[3]{\bgroup\toolboxMakeSplit*{#1}{DoSplit}%
+   \long\def\DoReplace##1{\DoSplit{##1}\lefttext\righttext
+   \lefttext
+   \toolboxIfElse{\ifx\righttext\undefined}{}%
+      {#2\expandafter\DoReplace\expandafter{\righttext}}}%
+   \DoReplace{#3}\egroup}
+
+\newcommand{\escstr}[1]{\myreplace{_}{\_}{#1}}
+
+\def\exampleheader#1#2{%
+   \ifthenelse{ \equal{#1}{} }{
+      \def\cname{#2}
+      \def\ename\cname
+   }{
+      \def\cname{#1.#2}
+% Use following line for old numbering
+      \def\ename{\thechapter.#2}
+% Use following for mneumonics
+%      \def\ename{\escstr{#1}.#2}
+   }
+   \noindent
+   \textit{Example \ename}
+   %\vspace*{-3mm}
+}
+
+\def\cnexample#1#2{%
+   \exampleheader{#1}{#2}
+   \code{\VerbatimInput[numbers=left,numbersep=10ex,firstnumber=1,firstline=8,fontsize=\small]%
+   %\code{\VerbatimInput[numbers=left,firstnumber=1,firstline=8,fontsize=\small]%
+   %\code{\VerbatimInput[firstline=8,fontsize=\small]%
+      {sources/Example_\cname.c}} 
+}
+
+\def\fnexample#1#2{%
+   \exampleheader{#1}{#2}
+   \code{\VerbatimInput[numbers=left,numbersep=10ex,firstnumber=1,firstline=6,fontsize=\small]%
+   %\code{\VerbatimInput[numbers=left,firstnumber=1,firstline=6,fontsize=\small]%
+   %\code{\VerbatimInput[firstline=6,fontsize=\small]%
+      {sources/Example_\cname.f}}
+}
+
+\newcommand\cexample[2]{%
+\needspace{5\baselineskip}\ccppspecificstart
+\cnexample{#1}{#2}
+\ccppspecificend
+}
+
+\newcommand\fexample[2]{%
+\needspace{5\baselineskip}\fortranspecificstart
+\fnexample{#1}{#2}
+\fortranspecificend
+}
+
+
+% Set default fonts:
+\rmfamily\mdseries\upshape\normalsize
+
+
+% This is the end of openmp.sty of the OpenMP specification.
--- a/sources/Example_affinity.1c.c
+++ b/sources/Example_affinity.1c.c
@ -0,0 +1,15 @@
+/*
+* @@name:	affinity.1c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	yes
+* @@expect:	success
+*/
+void work();
+void main()
+{
+#pragma omp parallel proc_bind(spread) num_threads(4)
+   {
+      work();
+   }
+}
--- a/sources/Example_affinity.1f.f
+++ b/sources/Example_affinity.1f.f
@ -0,0 +1,10 @@
+! @@name:	affinity.1f
+! @@type:	F-fixed
+! @@compilable:	yes
+! @@linkable:	yes
+! @@expect:	success
+      PROGRAM EXAMPLE
+!$OMP PARALLEL PROC_BIND(SPREAD) NUM_THREADS(4)
+      CALL WORK()
+!$OMP END PARALLEL
+      END PROGRAM EXAMPLE
--- a/sources/Example_affinity.2c.c
+++ b/sources/Example_affinity.2c.c
@ -0,0 +1,15 @@
+/*
+* @@name:	affinity.2c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	no
+* @@expect:	success
+*/
+void work();
+void foo()
+{
+  #pragma omp parallel num_threads(16) proc_bind(spread)
+  {
+    work();
+  }
+}
--- a/sources/Example_affinity.2f.f
+++ b/sources/Example_affinity.2f.f
@ -0,0 +1,10 @@
+! @@name:	affinity.2f
+! @@type:	F-free
+! @@compilable:	yes
+! @@linkable:	no
+! @@expect:	success
+subroutine foo
+!$omp parallel num_threads(16) proc_bind(spread)
+      call work()
+!$omp end parallel
+end subroutine
--- a/sources/Example_affinity.3c.c
+++ b/sources/Example_affinity.3c.c
@ -0,0 +1,15 @@
+/*
+* @@name:	affinity.3c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	yes
+* @@expect:	success
+*/
+void work();
+void main()
+{
+#pragma omp parallel proc_bind(close) num_threads(4)
+   {
+      work();
+   }
+}
--- a/sources/Example_affinity.3f.f
+++ b/sources/Example_affinity.3f.f
@ -0,0 +1,10 @@
+! @@name:	affinity.3f
+! @@type:	F-fixed
+! @@compilable:	yes
+! @@linkable:	yes
+! @@expect:	success
+      PROGRAM EXAMPLE
+!$OMP PARALLEL PROC_BIND(CLOSE) NUM_THREADS(4)
+      CALL WORK()
+!$OMP END PARALLEL
+      END PROGRAM EXAMPLE
--- a/sources/Example_affinity.4c.c
+++ b/sources/Example_affinity.4c.c
@ -0,0 +1,15 @@
+/*
+* @@name:	affinity.4c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	no
+* @@expect:	success
+*/
+void work();
+void foo()
+{
+  #pragma omp parallel num_threads(16) proc_bind(close)
+  {
+    work();
+  }
+}
--- a/sources/Example_affinity.4f.f
+++ b/sources/Example_affinity.4f.f
@ -0,0 +1,10 @@
+! @@name:	affinity.4f
+! @@type:	F-free
+! @@compilable:	yes
+! @@linkable:	no
+! @@expect:	success
+subroutine foo
+!$omp parallel num_threads(16) proc_bind(close)
+      call work()
+!$omp end parallel
+end subroutine
--- a/sources/Example_affinity.5c.c
+++ b/sources/Example_affinity.5c.c
@ -0,0 +1,15 @@
+/*
+* @@name:	affinity.5c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	yes
+* @@expect:	success
+*/
+void work();
+void main()
+{
+#pragma omp parallel proc_bind(master) num_threads(4)
+   {
+      work();
+   }
+}
--- a/sources/Example_affinity.5f.f
+++ b/sources/Example_affinity.5f.f
@ -0,0 +1,10 @@
+! @@name:	affinity.5f
+! @@type:	F-fixed
+! @@compilable:	yes
+! @@linkable:	yes
+! @@expect:	success
+      PROGRAM EXAMPLE
+!$OMP PARALLEL PROC_BIND(MASTER) NUM_THREADS(4)
+      CALL WORK()
+!$OMP END PARALLEL
+      END PROGRAM EXAMPLE
--- a/sources/Example_array_sections.1c.c
+++ b/sources/Example_array_sections.1c.c
@ -0,0 +1,19 @@
+/*
+* @@name:	array_sections.1c
+* @@type:	C
+* @@compilable:	no
+* @@linkable:	no
+* @@expect:	failure
+*/
+void foo ()
+{
+   int A[30];
+#pragma omp target data map( A[0:4] )
+{
+   /* Cannot map distinct parts of the same array */
+   #pragma omp target map( A[7:20] )
+   {
+      A[2] = 0;
+   }
+}
+}
--- a/sources/Example_array_sections.1f.f
+++ b/sources/Example_array_sections.1f.f
@ -0,0 +1,15 @@
+! @@name:	array_sections.1f
+! @@type:	F-free
+! @@compilable:	no
+! @@linkable:	no
+! @@expect:	failure
+subroutine foo()
+integer :: A(30)
+   A = 1
+   !$omp target data map( A(1:4) )
+     ! Cannot map distinct parts of the same array
+     !$omp target map( A(8:27) )
+        A(3) = 0
+     !$omp end target map
+   !$omp end target data
+end subroutine
--- a/sources/Example_array_sections.2c.c
+++ b/sources/Example_array_sections.2c.c
@ -0,0 +1,23 @@
+/*
+* @@name:	array_sections.2c
+* @@type:	C
+* @@compilable:	no
+* @@linkable:	no
+* @@expect:	failure
+*/
+void foo ()
+{
+   int A[30], *p;
+#pragma omp target data map( A[0:4] )
+{
+   p = &A[0];
+   /* invalid because p[3] and A[3] are the same
+    * location on the host but the array section
+    * specified via p[...] is not a subset of A[0:4] */
+   #pragma omp target map( p[3:20] )
+   {
+      A[2] = 0;
+      p[8] = 0;
+   }
+}
+}
--- a/sources/Example_array_sections.2f.f
+++ b/sources/Example_array_sections.2f.f
@ -0,0 +1,20 @@
+! @@name:	array_sections.2f
+! @@type:	F-free
+! @@compilable:	no
+! @@linkable:	no
+! @@expect:	failure
+subroutine foo()
+integer,target  :: A(30)
+integer,pointer :: p(:)
+   A=1
+   !$omp target data map( A(1:4) )
+     p=>A
+     ! invalid because p(4) and A(4) are the same
+     ! location on the host but the array section
+     ! specified via p(...) is not a subset of A(1:4)
+     !$omp target map( p(4:23) )
+        A(3) = 0
+        p(9) = 0
+     !$omp end target
+   !$omp end target data
+end subroutine
--- a/sources/Example_array_sections.3c.c
+++ b/sources/Example_array_sections.3c.c
@ -0,0 +1,20 @@
+/*
+* @@name:	array_sections.3c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	no
+* @@expect:	success
+*/
+void foo ()
+{
+   int A[30], *p;
+#pragma omp target data map( A[0:4] )
+{
+   p = &A[0];
+   #pragma omp target map( p[7:20] )
+   {
+      A[2] = 0;
+      p[8] = 0;
+   }
+}
+}
--- a/sources/Example_array_sections.3f.f
+++ b/sources/Example_array_sections.3f.f
@ -0,0 +1,16 @@
+! @@name:	array_sections.3f
+! @@type:	F-free
+! @@compilable:	yes
+! @@linkable:	no
+! @@expect:	success
+subroutine foo()
+integer,target  :: A(30)
+integer,pointer :: p(:)
+   !$omp target data map( A(1:4) )
+     p=>A
+     !$omp target map( p(8:27) )
+        A(3) = 0
+        p(9) = 0
+     !$omp end target map
+   !$omp end target data
+end subroutine
--- a/sources/Example_array_sections.4c.c
+++ b/sources/Example_array_sections.4c.c
@ -0,0 +1,21 @@
+/*
+* @@name:	array_sections.4c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	no
+* @@expect:	success
+*/
+void foo ()
+{
+   int A[30];
+#pragma omp target data map( A[0:10] )
+{
+   p = &A[0];
+   #pragma omp target map( p[3:7] )
+   {
+      A[2] = 0;
+      p[8] = 0;
+      A[8] = 1;
+   }
+}
+}
--- a/sources/Example_array_sections.4f.f
+++ b/sources/Example_array_sections.4f.f
@ -0,0 +1,17 @@
+! @@name:	array_sections.4f
+! @@type:	F-free
+! @@compilable:	yes
+! @@linkable:	no
+! @@expect:	success
+subroutine foo()
+integer,target  :: A(30)
+integer,pointer :: p(:)
+   !$omp target data map( A(1:10) )
+     p=>A
+     !$omp target map( p(4:10) )
+        A(3) = 0
+        p(9) = 0
+        A(9) = 1
+     !$omp end target
+   !$omp end target data
+end subroutine
--- a/sources/Example_associate.1f.f
+++ b/sources/Example_associate.1f.f
@ -0,0 +1,13 @@
+! @@name:	associate.1f
+! @@type:	F-fixed
+! @@compilable:	no
+! @@linkable:	no
+! @@expect:	failure
+      program example
+      real :: a, c
+      associate (b => a)
+!$omp parallel private(b, c)        ! invalid to privatize b
+      c = 2.0*b
+!$omp end parallel
+      end associate
+      end program
--- a/sources/Example_associate.2f.f
+++ b/sources/Example_associate.2f.f
@ -0,0 +1,15 @@
+! @@name:	associate.2f
+! @@type:	F-fixed
+! @@compilable:	yes
+! @@linkable:	yes
+! @@expect:	success
+      program example
+      use omp_lib
+      integer  i
+!$omp parallel private(i)
+      i = omp_get_thread_num()
+      associate(thread_id => i)
+        print *, thread_id       ! print private i value
+      end associate
+!$omp end parallel
+      end program
--- a/sources/Example_associate.3f.f
+++ b/sources/Example_associate.3f.f
@ -0,0 +1,16 @@
+! @@name:	associate.3f
+! @@type:	F-free
+! @@compilable:	yes
+! @@linkable:	yes
+! @@expect:	success
+program example
+  integer :: v
+  v = 15
+associate(u => v)
+!$omp parallel private(v)
+  v = -1
+  print *, v               ! private v=-1
+  print *, u               ! original v=15
+!$omp end parallel
+end associate
+end program
--- a/sources/Example_async_target.1c.c
+++ b/sources/Example_async_target.1c.c
@ -0,0 +1,28 @@
+/*
+* @@name:	async_target.1c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	no
+* @@expect:	success
+*/
+#pragma omp declare target
+float F(float);
+#pragma omp end declare target
+#define N 1000000000
+#define CHUNKSZ 1000000
+void init(float *, int);
+float Z[N];
+void pipedF()
+{
+   int C, i;
+   init(Z, N);
+   for (C=0; C<N; C+=CHUNKSZ)
+   {
+      #pragma omp task
+      #pragma omp target map(Z[C:CHUNKSZ])
+      #pragma omp parallel for
+      for (i=0; i<CHUNKSZ; i++)
+         Z[i] = F(Z[i]);
+   }
+   #pragma omp taskwait
+}
--- a/sources/Example_async_target.1f.f
+++ b/sources/Example_async_target.1f.f
@ -0,0 +1,38 @@
+! @@name:	async_target.1f
+! @@type:	F-free
+! @@compilable:	yes
+! @@linkable:	no
+! @@expect:	success
+module parameters
+integer, parameter :: N=1000000000, CHUNKSZ=1000000
+end module
+subroutine pipedF()
+use parameters, ONLY: N, CHUNKSZ
+integer            :: C, i
+real               :: z(N)
+
+interface
+   function F(z)
+   !$omp declare target
+     real, intent(IN) ::z
+     real             ::F
+   end function F
+end interface
+
+   call init(z,N)
+
+   do C=1,N,CHUNKSZ
+
+      !$omp task
+      !$omp target map(z(C:C+CHUNKSZ-1))
+      !$omp parallel do
+         do i=C,C+CHUNKSZ-1
+            z(i) = F(z(i))
+         end do
+      !$omp end target
+      !$omp end task
+
+   end do
+   print*, z
+
+end subroutine pipedF
--- a/sources/Example_async_target.2c.c
+++ b/sources/Example_async_target.2c.c
@ -0,0 +1,39 @@
+/*
+* @@name:	async_target.2c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	no
+* @@expect:	success
+*/
+#include <stdlib.h>
+extern void init(float *, float *, int);
+extern void output(float *, int);
+void vec_mult(float *p, float *v1, float *v2, int N, int dev)
+{
+   int i;
+   init(p, N);
+   #pragma omp task depend(out: v1, v2)
+   #pragma omp target device(dev) map(v1, v2)
+   {
+       // check whether on device dev
+       if (omp_is_initial_device())
+	  abort();
+       v1 = malloc(N*sizeof(float));
+       v2 = malloc(N*sizeof(float));
+       init(v1,v2);
+   }
+   foo(); // execute other work asychronously
+   #pragma omp task depend(in: v1, v2)
+   #pragma omp target device(dev) map(to: v1, v2) map(from: p[0:N])
+   {
+       // check whether on device dev
+       if (omp_is_initial_device())
+	  abort();
+       #pragma omp parallel for
+       for (i=0; i<N; i++)
+	 p[i] = v1[i] * v2[i];
+       output(p, N);
+       free(v1);
+       free(v2);
+   }
+}
--- a/sources/Example_async_target.2f.f
+++ b/sources/Example_async_target.2f.f
@ -0,0 +1,39 @@
+! @@name:	async_target.2f
+! @@type:	F-free
+! @@compilable:	yes
+! @@linkable:	no
+! @@expect:	success
+ subroutine mult(p,  N, idev)
+   use omp_lib, ONLY: omp_is_initial_device
+   real             :: p(N)
+   real,allocatable :: v1(:), v2(:)
+   integer ::  i, idev
+   !$omp declare target (init)
+
+   !$omp task depend(out: v1,v2)
+      !$omp target device(idev) map(v1,v2)
+         if( omp_is_initial_device() ) &
+            stop "not executing on target device"
+         allocate(v1(N), v2(N))
+         call init(v1,v2,N)
+      !$omp end target
+   !$omp end task
+
+   call foo()  ! execute other work asychronously
+
+   !$omp task depend(in: v1,v2)
+      !$omp target device(idev) map(to: v1,v2) map(from: p)
+         if( omp_is_initial_device() ) &
+            stop "not executing on target device"
+         !$omp parallel do
+            do i = 1,N
+               p(i) = v1(i) * v2(i)
+            end do
+         deallocate(v1,v2)
+
+      !$omp end target
+   !$omp end task
+
+   call output(p, N)
+
+end subroutine
--- a/sources/Example_atomic.1c.c
+++ b/sources/Example_atomic.1c.c
@ -0,0 +1,45 @@
+/*
+* @@name:	atomic.1c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	yes
+* @@expect:	success
+*/
+float work1(int i)
+{
+  return 1.0 * i;
+}
+
+float work2(int i)
+{
+   return 2.0 * i;
+}
+
+void atomic_example(float *x, float *y, int *index, int n)
+{
+  int i;
+
+  #pragma omp parallel for shared(x, y, index, n)
+    for (i=0; i<n; i++) {
+      #pragma omp atomic update
+      x[index[i]] += work1(i);
+      y[i] += work2(i);
+     }
+}
+
+int main()
+{
+  float x[1000];
+  float y[10000];
+  int index[10000];
+  int i;
+
+  for (i = 0; i < 10000; i++) {
+    index[i] = i % 1000;
+    y[i]=0.0;
+  }
+  for (i = 0; i < 1000; i++)
+    x[i] = 0.0;
+  atomic_example(x, y, index, 10000);
+  return 0;
+}
--- a/sources/Example_atomic.1f.f
+++ b/sources/Example_atomic.1f.f
@ -0,0 +1,49 @@
+! @@name:	atomic.1f
+! @@type:	F-fixed
+! @@compilable:	yes
+! @@linkable:	yes
+! @@expect:	success
+      REAL FUNCTION WORK1(I)
+        INTEGER I
+        WORK1 = 1.0 * I
+        RETURN
+      END FUNCTION WORK1
+
+      REAL FUNCTION WORK2(I)
+        INTEGER I
+        WORK2 = 2.0 * I
+        RETURN
+      END FUNCTION WORK2
+
+      SUBROUTINE SUB(X, Y, INDEX, N)
+        REAL X(*), Y(*)
+        INTEGER INDEX(*), N
+
+        INTEGER I
+
+!$OMP   PARALLEL DO SHARED(X, Y, INDEX, N)
+          DO I=1,N
+!$OMP       ATOMIC UPDATE
+              X(INDEX(I)) = X(INDEX(I)) + WORK1(I)
+            Y(I) = Y(I) + WORK2(I)
+          ENDDO
+
+      END SUBROUTINE SUB
+
+      PROGRAM ATOMIC_EXAMPLE
+        REAL X(1000), Y(10000)
+        INTEGER INDEX(10000)
+        INTEGER I
+
+        DO I=1,10000
+          INDEX(I) = MOD(I, 1000) + 1
+          Y(I) = 0.0
+        ENDDO
+
+        DO I = 1,1000
+          X(I) = 0.0
+        ENDDO
+
+        CALL SUB(X, Y, INDEX, 10000)
+
+      END PROGRAM ATOMIC_EXAMPLE
--- a/sources/Example_atomic.2c.c
+++ b/sources/Example_atomic.2c.c
@ -0,0 +1,27 @@
+/*
+* @@name:	atomic.2c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	no
+* @@expect:	success
+*/
+int atomic_read(const int *p)
+{
+    int value;
+/* Guarantee that the entire value of *p is read atomically. No part of
+ * *p can change during the read operation.
+ */
+#pragma omp atomic read
+     value = *p;
+     return value;
+}
+
+void atomic_write(int *p, int value)
+{
+/* Guarantee that value is stored atomically into *p. No part of *p can
+change
+ * until after the entire write operation is completed.
+ */
+#pragma omp atomic write
+    *p = value;
+}
--- a/sources/Example_atomic.2f.f
+++ b/sources/Example_atomic.2f.f
@ -0,0 +1,24 @@
+! @@name:	atomic.2f
+! @@type:	F-fixed
+! @@compilable:	yes
+! @@linkable:	no
+! @@expect:	success
+       function atomic_read(p)
+       integer :: atomic_read
+       integer, intent(in) :: p
+! Guarantee that the entire value of p is read atomically. No part of
+! p can change during the read operation.
+
+!$omp atomic read
+       atomic_read = p
+       return
+       end function atomic_read
+
+       subroutine atomic_write(p, value)
+       integer, intent(out) :: p
+       integer, intent(in) :: value
+! Guarantee that value is stored atomically into p. No part of p can change
+! until after the entire write operation is completed.
+!$omp atomic write
+       p = value
+       end subroutine atomic_write
--- a/sources/Example_atomic.3c.c
+++ b/sources/Example_atomic.3c.c
@ -0,0 +1,44 @@
+/*
+* @@name:	atomic.3c
+* @@type:	C
+* @@compilable:	yes
+* @@linkable:	no
+* @@expect:	success
+*/
+int fetch_and_add(int *p)
+{
+/* Atomically read the value of *p and then increment it. The previous value
+is
+ * returned. This can be used to implement a simple lock as shown below.
+ */
+    int old;
+#pragma omp atomic capture
+    { old = *p; (*p)++; }
+    return old;
+}
+
+/*
+ * Use fetch_and_add to implement a lock
+ */
+struct locktype {
+    int ticketnumber;
+    int turn;
+};
+void do_locked_work(struct locktype *lock)
+{
+    int atomic_read(const int *p);
+    void work();
+
+    // Obtain the lock
+    int myturn = fetch_and_add(&lock->ticketnumber);
+    while (atomic_read(&lock->turn) != myturn)
+       ;
+    // Do some work. The flush is needed to ensure visibility of
+    // variables not involved in atomic directives
+
+#pragma omp flush
+    work();
+#pragma omp flush
+    // Release the lock
+    fetch_and_add(&lock->turn);
+}
--- a/sources/Example_atomic.3f.f
+++ b/sources/Example_atomic.3f.f
@ -0,0 +1,50 @@
+! @@name:	atomic.3f
+! @@type:	F-fixed
+! @@compilable:	yes
+! @@linkable:	no
+! @@expect:	success
+       function fetch_and_add(p)
+       integer:: fetch_and_add
+       integer, intent(inout) :: p
+
+! Atomically read the value of p and then increment it. The previous value is
+! returned. This can be used to implement a simple lock as shown below.
+!$omp atomic capture
+       fetch_and_add = p
+       p = p + 1
+!$omp end atomic
+       end function fetch_and_add
+       module m
+       interface
+         function fetch_and_add(p)
+           integer :: fetch_and_add
+           integer, intent(inout) :: p
+         end function
+         function atomic_read(p)
+           integer :: atomic_read
+           integer, intent(in) :: p
+         end function
+       end interface
+       type locktype
+          integer ticketnumber
+          integer turn
+       end type
+       contains
+       subroutine do_locked_work(lock)
+       type(locktype), intent(inout) :: lock
+       integer myturn
+       integer junk
+! obtain the lock
+        myturn = fetch_and_add(lock%ticketnumber)
+        do while (atomic_read(lock%turn) .ne. myturn)
+          continue
+        enddo
+! Do some work. The flush is needed to ensure visibility of variables
+! not involved in atomic directives
+!$omp flush
+       call work
+!$omp flush
+! Release the lock
+       junk = fetch_and_add(lock%turn)
+       end subroutine
+       end module
--- a/Show More
+++ b/Show More