pr #2233 Merge branch 'master' into 1772_modify_adjoint_metadata

stfc · Sep 12, 2023 · c59096e · c59096e
2 parents 3288a85 + 3fc7ae0
commit c59096e
Show file tree

Hide file tree

Showing 101 changed files with 4,018 additions and 1,017 deletions.
diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml
@@ -38,14 +38,24 @@
 # Modified by N. M. Nobre, STFC Daresbury Lab
 
 # This workflow will use a self-hosted runner to perform the more expensive
-# integrations tests that are not run on GHA systems. At the moment it only
-# runs the test suite with compilation enabled (using gfortran).
+# compilation tests that are not run on GHA systems. At the moment it:
+#
+#  * Runs the test suite with compilation with gfortran and nvfortran;
+#  * Builds the examples with gfortran and nvfortran (the latter with
+#    OpenACC enabled).
+#  * Builds the tutorials with gfortran and nvfortran.
 
 name: Compilation tests
 
 on:
   push
 
+env:
+  PYTHON_VERSION: 3.11.4
+  GFORTRAN_VERSION: 13.2.0
+  NVFORTRAN_VERSION: 23.7
+  CUDA_VERSION: 12.2
+
 jobs:
   run_if_on_mirror:
     # Only PSyclone-mirror has the necessary self-hosted runner.
@@ -66,17 +76,60 @@ jobs:
         commit-filter: '[skip ci]'
     - name: Install dependencies
       run: |
-        module load python/3.11.3
+        module load python/${PYTHON_VERSION}
         python -m venv .runner_venv
         . .runner_venv/bin/activate
         python -m pip install --upgrade pip
         # If you wish to install the version of fparser pointed to by the
         # submodule instead of the released version (from PyPI) then
         # uncomment the following line:
         pip install external/fparser
-        pip install .[test]
-    - name: Test with pytest and compilation
+        pip install .[test,psydata]
+    - name: Unit tests with compilation - gfortran
+      run: |
+        module load python/${PYTHON_VERSION}
+        . .runner_venv/bin/activate
+        module load gcc/${GFORTRAN_VERSION} openmpi netcdf_fortran
+        pytest -n 2 --f90=gfortran --compile --compileopencl src/psyclone/tests
+        module rm netcdf_fortran gcc
+    - name: Unit tests with compilation - nvfortran
+      run: |
+        module load python/${PYTHON_VERSION}
+        . .runner_venv/bin/activate
+        module load nvidia-hpcsdk/${NVFORTRAN_VERSION} netcdf_fortran
+        # We have to tell nvfortran where to find the OpenCL library.
+        pytest -n 2 --f90=nvfortran --f90flags="-L/apps/packages/compilers/nvidia-hpcsdk/Linux_x86_64/${NVFORTRAN_VERSION}/cuda/${CUDA_VERSION}/targets/x86_64-linux/lib" --compile --compileopencl src/psyclone/tests
+        module rm netcdf_fortran nvidia-hpcsdk
+    - name: Examples with compilation - gfortran
+      run: |
+        module load python/${PYTHON_VERSION}
+        . .runner_venv/bin/activate
+        module load gcc/${GFORTRAN_VERSION} openmpi netcdf_fortran
+        # Although we're using gfortran, we link with the OpenCL lib that comes
+        # with CUDA.
+        make -C examples allclean
+        F90=gfortran F90FLAGS="-L/apps/packages/compilers/nvidia-hpcsdk/Linux_x86_64/${NVFORTRAN_VERSION}/cuda/${CUDA_VERSION}/targets/x86_64-linux/lib" make -C examples compile
+    - name: Tutorials with compilation - gfortran
+      run: |
+        module load python/${PYTHON_VERSION}
+        . .runner_venv/bin/activate
+        make -C tutorial/practicals allclean
+        module load gcc/${GFORTRAN_VERSION} openmpi netcdf_fortran
+        make -C tutorial/practicals compile
+    - name: Examples with compilation - nvfortran
+      run: |
+        module load python/${PYTHON_VERSION}
+        . .runner_venv/bin/activate
+        make -C examples allclean
+        module load nvidia-hpcsdk/${NVFORTRAN_VERSION} netcdf_fortran
+        # We have to tell nvfortran where to find the OpenCL library.
+        F90=nvfortran F90FLAGS="-acc -Minfo=all -L/apps/packages/compilers/nvidia-hpcsdk/Linux_x86_64/${NVFORTRAN_VERSION}/cuda/${CUDA_VERSION}/targets/x86_64-linux/lib" make -C examples compile
+    - name: Tutorials with compilation - nvfortran
       run: |
-        module load python/3.11.3
+        module load python/${PYTHON_VERSION}
         . .runner_venv/bin/activate
-        pytest -n 2 --f90=gfortran --compile src/psyclone/tests
+        make -C tutorial/practicals allclean
+        module load nvidia-hpcsdk/${NVFORTRAN_VERSION} netcdf_fortran
+        # TODO #2251. Cannot build the LFRic practicals with 23.5/7 of nvfortran
+        # because the compilation of nan_test.f90 gives an ICE.
+        F90=nvfortran F90FLAGS="-acc -Minfo=all" make -C tutorial/practicals/nemo compile
diff --git a/.github/workflows/nemo_tests.yml b/.github/workflows/nemo_tests.yml
@@ -100,7 +100,28 @@ jobs:
         tail -n 1 output.txt | grep -q "S_min:  0.482686"
         tail -n 1 output.txt | grep -q "S_max:  0.407622"
         grep -A 1 "Elapsed Time" output.txt
-        echo  $GITHUB_REF_NAME $GITHUB_SHA $(grep -A 1 "Elapsed Time" output.txt | head -n 2 | tail -n 1) >> ${NEMO_DIR}/performance_history
+        echo  $GITHUB_REF_NAME $GITHUB_SHA $(grep -A 1 "Elapsed Time" output.txt | head -n 2 | tail -n 1) >> ${NEMO_DIR}/performance_history_openmp_gpu
+
+    # PSyclone, compile and run MetOffice NEMO with OpenACC kernels for GPUs
+    - name: NEMO MetOffice OpenACC kernels for GPU
+      run: |
+        . .runner_venv/bin/activate
+        export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
+        export NEMO_DIR=${HOME}/NEMO
+        cd examples/nemo/scripts
+        make -j 4 openacc_kernels
+        module load nvidia-hpcsdk netcdf_fortran
+        COMPILER_ARCH=linux_nvidia_acc_gpu make -j 4 compile-openacc_kernels
+        export NV_ACC_POOL_THRESHOLD=75
+        make run-openacc_kernels | tee output.txt
+        # Check the output is as expected for the first 6 digits
+        tail -n 1 output.txt | grep -q " it :      10"
+        tail -n 1 output.txt | grep -q "|ssh|_max:  0.259483"
+        tail -n 1 output.txt | grep -q "|U|_max:  0.458515"
+        tail -n 1 output.txt | grep -q "S_min:  0.482686"
+        tail -n 1 output.txt | grep -q "S_max:  0.407622"
+        grep -A 1 "Elapsed Time" output.txt
+        echo  $GITHUB_REF_NAME $GITHUB_SHA $(grep -A 1 "Elapsed Time" output.txt | head -n 2 | tail -n 1) >> ${NEMO_DIR}/performance_history_openacc_kernels_gpu
 
     # PSyclone, compile and run ECMWF NEMO with OpenMP for CPUs
     - name: NEMO ECMWF OpenMP for CPU
@@ -129,4 +150,4 @@ jobs:
         tail -n 1 output.txt | grep -q "S_min:  0.108530"
         tail -n 1 output.txt | grep -q "S_max:  0.404045"
         grep -A 1 "Elapsed Time" output.txt
-        echo  $GITHUB_REF_NAME $GITHUB_SHA $(grep -A 1 "Elapsed Time" output.txt | head -n 2 | tail -n 1) >> ${NEMO_DIR}/performance_history
+        echo  $GITHUB_REF_NAME $GITHUB_SHA $(grep -A 1 "Elapsed Time" output.txt | head -n 2 | tail -n 1) >> ${NEMO_DIR}/performance_history_openmp_cpu
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -93,7 +93,14 @@ jobs:
     - uses: actions/setup-python@v4
     - run: python -m pip install --upgrade pip
     - run: pip install .[doc]
-    - run: cd doc/developer_guide; make doctest
+    # Sphinx since version 7.2 (7.2.0/1/2) aborts with
+    #   exec('from sympy import *', global_dict)
+    #   File "<string>", line 1, in <module>
+    #  AttributeError: module 'sympy' has no attribute 'external'
+    # when it is run without creating the documentation first.
+    # So till this is fixed in Sphinx, we trigger the creation
+    # of the html documents before actually running doctest
+    - run: cd doc/developer_guide; make html; make doctest
   build:
     if: ${{ github.repository != 'stfc/PSyclone-mirror' }}
     runs-on: ubuntu-latest

diff --git a/changelog b/changelog
@@ -541,6 +541,55 @@
 	182) PR #2240 for #2234. Fix WhileLoop issue in the NEMO OpenACC kernels
 	script.
 
+	183) PR #2182 for #2179. Fix LFRic eg14 nvfortran compilation and add
+	nvfortran examples and tutorials steps in the Integration tests. 
+
+	184) PR #2250 for #2248 and #1575. Fixes bug in line-length limiter
+	to prevent it breaking a line before the first non-whitepsace
+	character.
+
+	185) PR #2262 towards #446. Implement reference_access for Call nodes.
+
+	186) PR #2184 for #2166. Adds Sympy support for user-defined types.
+
+	187) PR #2270 for #1575. Fixes the line-length limiter for code that
+	has an indent greater than the max. line length.
+
+	188) PR #2259 for #1932. Adds support for the 'gang' and 'vector'
+	clauses on OpenACC Loop directives.
+
+	189) PR #2276 for #2274. Small fixes for various complaints
+	produced by latest version of pycodestyle.
+
+	190) PR #2291 for #2290. Bug fix to KernelModuleInlineTrans -
+	ensures that the new RoutineSymbol added to the Container is given
+	a DefaultModuleInterface.
+
+	191) PR #2238 for #2234. Adds support for a generic interface to
+	perform variable comparison when the datatype is unknown. (Required
+	for canonicalising SELECT CASE constructs.)
+
+	192) PR #2292 for #2288. Workaround for Sphinx-doctest bug by
+	building html dev guide before running the doctests.
+
+	193) PR #2260 for #2245. Splits LFRicLoopBounds class from
+	dynamo0.3.py.
+
+	194 PR #2241 for #2215. Adds support for Fortran names being the
+	same as Python keywords when using sympy within PSyclone, e.g. in
+	comparisons between expressions.
+
+	195) PR #2295 for #2294. Extends the PSyData extraction library
+	to support character variables
+
+	196) PR #2157 for #1396. Add tree-update mechanism and new ACCClause
+	nodes for data movement.
+
+	197) PR #2281 for #2258. Adds a PSyData wrapper for the TAU profiler.
+
+	198) PR #2283 for #2282 and #2286. Adds a get_ubound_expression
+	method to ArrayMixin (analagous to get_lbound_expression).
+
 release 2.3.1 17th of June 2022
 
 	1) PR #1747 for #1720. Adds support for If blocks to PSyAD.

diff --git a/doc/developer_guide/psy_data.rst b/doc/developer_guide/psy_data.rst
@@ -72,7 +72,7 @@ the same time as checking that read-only values are indeed not modified,
 different module names and data types must be used.
 
 PSyData divides its application into different classes. For example,
-the class "profile" is used for all profiling tools (e.g. DrHook or the
+the class "profile" is used for all profiling tools (e.g. TAU, DrHook or the
 NVIDIA profiling tools). This class name is used as a prefix for
 the module name, the ``PSyDataType`` and functions. So if a profiling application
 is linked the above code will actually look like this::
@@ -100,8 +100,8 @@ The class prefixes supported at the moment are:
 ======================= =======================================================
 Class Prefix            Description
 ======================= =======================================================
-profile                 All libraries related to profiling tools like DrHook,
-                        NVIDIA's profiling tools etc. See
+profile                 All libraries related to profiling tools like TAU,
+                        DrHook, NVIDIA's profiling tools etc. See
                         :ref:`user_guide:profiling` for details.
 extract                 For libraries used for kernel data extraction. See
                         :ref:`user_guide:psyke` for details.
@@ -654,6 +654,8 @@ takes the following parameters:
         64-bit integer value
     ``logical``:
         32-bit logical value
+    ``char``:
+        A default string value
 
     Default value is ``real,double,int``.
 
@@ -967,7 +969,7 @@ an index used by the profiling tool in ``profile_PSyDataType``, or
 by storing pointers to the profiling data to be able to
 print all results in a ProfileFinalise() subroutine.
 Some of the wrapper libraries use the PSyData base class (e.g. dl_timer,
-simple_timing, template), others do not (e.g. NVIDIA profiling,
+simple_timing, template), others do not (e.g. NVIDIA profiling, TAU,
 DrHook wrapper).
 
 

diff --git a/doc/developer_guide/psyir.rst b/doc/developer_guide/psyir.rst
@@ -192,7 +192,8 @@ For a full list of methods available in any PSyIR node see the
     To achieve this, we sub-classed the Python list and redefined all
     methods that modify the list by calling first the PSyIR provided
     validation method and subsequently, if valid, calling the associated
-    list method.
+    list method and triggering an 'update' signal (see
+    :ref:`update_signals_label`).
 
 .. _nodesinfo-label:
 
@@ -256,6 +257,26 @@ relationship.
 Methods like ``node.detach()``, ``node.copy()`` and ``node.pop_all_children()``
 can be used to move or replicate existing children into different nodes. 
 
+.. _update_signals_label:
+
+Dynamic Tree Updates
+====================
+
+Certain modifications to a PSyIR tree will require that parent nodes
+also be updated. For instance, if nodes are added to or removed from
+an OpenACC data region, then the clauses describing the
+necessary data movement (to/from the accelerator device) may have to
+change. To support such use cases, the PSyIR Node has the
+``update_signal`` method which is used to signal that the tree has
+been modified. This signal is propagated up the tree (i.e. from parent
+to parent). The default handler for this signal, ``Node._update_node``, does
+nothing. If a sub-class must take action when the tree below it is
+modified then it must override the ``_update_node`` method as appropriate.
+
+Note that the signalling mechanism is fully contained within the ``Node``
+class and takes care of avoiding recursive updates to the same Node instance.
+It should therefore only be necessary for a class to implement the
+``_update_node`` handler.
 
 Selected Node Descriptions
 ==========================
@@ -507,6 +528,16 @@ var2, var3)` would be represented by a
 The PSyIR supports the concept of named arguments for operation
 nodes, see the :ref:`named_arguments-label` section for more details.
 
+.. note:: Similar to Fortran, the PSyIR has two comparison operators, one for
+        booleans (EQV) and one for non-booleans (EQ). These are not
+        interchangeable because they have different precedence priorities and
+        some compilers will not compile with the wrong operator. In some cases
+        we need to insert a comparison of two expressions and we don't know the
+        datatype of the operands (e.g. in the select-case canonicalisation).
+        A solution to this is to create an abstract interface with appropriate
+        implementations for each possible datatype.
+
+
 IntrinsicCall Nodes
 -------------------
 

diff --git a/doc/developer_guide/sympy.rst b/doc/developer_guide/sympy.rst
@@ -153,63 +153,27 @@ is case sensitive.
 
 User-defined Types
 ~~~~~~~~~~~~~~~~~~
-SymPy has no concept of user-defined types like
-``a(i)%b`` in Fortran. But this case is not handled especially, the
-PSyIR is converted to Fortran syntax and is provided unmodified to SymPy.
-SymPy interprets the ``%`` symbol
-as modulo function, so the expression above is read as ``Mod(a(i), b)``.
-This interpretation achieves the expected outcome when comparing structures
-and array references.
-For example, ``a(i+2*j-1)%b(k-i)`` and ``a(j*2-1+i)%b(-i+k)`` will be
-considered to be equal:
-
-1. Converting the two expressions to SymPy internally results in
-   ``Mod(a(i+2*j-1), b(k-i))`` and ``Mod(a(j*2-1+i, b(-i+k))``.
-2. Since nothing is known about the arguments of any of the ``Mod``
-   functions, SymPy will first detect that the same function is called
-   in both expression, and then continue to compare the arguments of
-   this function.
-3. The first arguments are ``a(i+2*j-1)`` and ``a(j*2-1+i)``.
-   The name ``a`` is considered an unknown function. SymPy detects
-   that both expressions appear to call the same function, and it
-   will therefore compare the arguments.
-4. SymPy compares ``i+2*j-1`` and ``j*2-1+i`` symbolically, and
-   evaluate these expressions to be identical. Therefore, the
-   two expressions ``a(...)`` are identical, so the first arguments
-   of the ``Mod`` function are identical.
-5. Similarly, it will then continue to evaluate the second argument
-   of the ``Mod`` function (``b(...)``), and evaluate them to be
-   identical.
-6. Since all arguments of the ``Mod`` function are identical,
-   SymPy will report these two functions to be the same, which
-   is the expected outcome.
-
-A member of a structure in Fortran becomes a stand-alone symbol (or
-function if it is an array) in SymPy. The SymPy
-writer will rename members to better indicate that they are members:
-an expression like ``a%b%c`` will be written as ``a%a_b%a_b_c``, which
-SymPy then parses as ``MOD(a, MOD(a_b, a_b_c))``. This convention
-makes it easier to identify what the various expressions in SymPy are.
-
-This handling of member variables can result in name clashes. Consider
-the expression ``a%b + a_b + b``. The structure access will be using
-two symbols ``a`` and ``a_b`` - but now there are two different symbols
-with the same name. Note that the renaming of the member from ``b`` to
-``a_b`` is not the reason for this - without renaming the same clash would
-happen with the symbol ``b``.
-
-The SymPy writer uses a symbol table to make sure it creates unique symbols.
+SymPy has no concept of user-defined types like ``a(i)%b`` in Fortran.
+A structure reference like this is converted to a single new symbol
+(scalar) or function (if an array index is involved). The default name
+will be the name of the reference and members concatenated using ``_``,
+e.g. ``a%b%c`` becomes ``a_b_c``, which will be declared as a new SymPy
+symbol or function (if it is an array access). The SymPy writer uses a
+symbol table to make sure it creates unique symbols.
 It first adds all References in the expression to the symbol table, which
-guarantees that no Reference to an existing symbol is renamed. The writer
-then renames all members and makes sure it uses a unique name. In the case of
-``a%b + a_b + b``, it would create ``a%a_b_1 + a_b + b``, using the name
-``a_b_1`` for the member to avoid the name clash with the reference
-``a_b`` - so an existing Reference will not be renamed, only members.
-
-.. note:: At this stage an expression using user-defined types cannot be
-    converted back to a PSyIR (which is what
-    `psyclone.core.SymbolicMaths.expand` does as a final step). This is
-    tracked as issue #2166.
+guarantees that no Reference to an existing symbol is renamed. In the case of
+``a%b + a_b + b``, it would create ``a_b_1 + a_b + b``, using the name
+``a_b_1`` for the structure reference to avoid the name clash with the
+reference ``a_b``.
+
+Any array indices are converted into arguments of this new function. So an
+expression like ``a(i)%b%c(j,k)`` becomes ``a_b_c(i,i,1,j,j,1,k,k,1)``
+(see :ref:`array_expressions`). The ``SymPyWriter`` creates a custom SymPy
+function, which keeps a list of which reference/member contained how many
+indices. In the example this would be ``[1, 0, 2]``, indicating that the
+first reference had one index, the second one none (i.e. it is not an
+array access), and the last reference had two indices. This allows the
+function to properly re-create the Fortran string.
 
 
 Documentation for SymPyWriter Functions