From 555314bd25ba79cbce4cd0c1b4a8b7e9f82df5b6 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Fri, 23 Aug 2024 14:36:50 +0100 Subject: [PATCH] #2671 Improve the parallel array privatisation implementation --- examples/nemo/scripts/omp_cpu_trans.py | 2 + examples/nemo/scripts/utils.py | 6 +- src/psyclone/psyir/nodes/node.py | 23 +++++++ src/psyclone/psyir/nodes/omp_directives.py | 10 ++- src/psyclone/psyir/symbols/datasymbol.py | 2 +- .../transformations/parallel_loop_trans.py | 43 +++++++++---- .../parallel_loop_trans_test.py | 62 +++++++++++++------ 7 files changed, 112 insertions(+), 36 deletions(-) diff --git a/examples/nemo/scripts/omp_cpu_trans.py b/examples/nemo/scripts/omp_cpu_trans.py index 818f8788c7..26ea19f1c5 100755 --- a/examples/nemo/scripts/omp_cpu_trans.py +++ b/examples/nemo/scripts/omp_cpu_trans.py @@ -96,4 +96,6 @@ def trans(psyir): loop_directive_trans=omp_loop_trans, # Collapse may be useful in some architecture/compiler collapse=False, + privatise_arrays=True + # privatise_arrays=True if psyir.name == "zdftke.f90" else False ) diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index 051e3d1e8c..f471393770 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -291,6 +291,7 @@ def insert_explicit_loop_parallelism( region_directive_trans=None, loop_directive_trans=None, collapse: bool = True, + privatise_arrays: bool = True, ): ''' For each loop in the schedule that doesn't already have a Directive as an ancestor, attempt to insert the given region and loop directives. @@ -307,6 +308,8 @@ def insert_explicit_loop_parallelism( :py:class:`psyclone.transformation.Transformation` :param collapse: whether to attempt to insert the collapse clause to as many nested loops as possible. + :param privatise_arrays: whether to attempt to privatise arrays that cause + write-write race conditions. ''' # Add the parallel directives in each loop @@ -314,7 +317,8 @@ def insert_explicit_loop_parallelism( if loop.ancestor(Directive): continue # Skip if an outer loop is already parallelised - opts = {"collapse": collapse, "verbose": True} + opts = {"collapse": collapse, "privatise_arrays": privatise_arrays, + "verbose": True} routine_name = loop.ancestor(Routine).name diff --git a/src/psyclone/psyir/nodes/node.py b/src/psyclone/psyir/nodes/node.py index 7b838f28f6..f3d08970da 100644 --- a/src/psyclone/psyir/nodes/node.py +++ b/src/psyclone/psyir/nodes/node.py @@ -1371,6 +1371,29 @@ def immediately_follows(self, node_1): and self.position == node_1.position + 1 ) + def is_inside_of(self, node): + ''' + :param node: the node to compare it to. + + :returns: whether this node is a decendant of the given node. + :rtype: bool + ''' + if self.root != node.root: + return False + + if self.abs_position < node.abs_position: + # It is before node, not inside + return False + + if node.parent and len(node.parent.children) > node.position + 1: + next_node = node.parent.children[node.position + 1] + if self.abs_position >= next_node.abs_position: + # It is after node, not inside + return False + + # If its not before or after, and root is the same, it must be inside + return True + def coded_kernels(self): ''' Returns a list of all of the user-supplied kernels (as opposed to diff --git a/src/psyclone/psyir/nodes/omp_directives.py b/src/psyclone/psyir/nodes/omp_directives.py index 6bb9f48de4..b7ecdb7301 100644 --- a/src/psyclone/psyir/nodes/omp_directives.py +++ b/src/psyclone/psyir/nodes/omp_directives.py @@ -74,7 +74,7 @@ from psyclone.psyir.nodes.schedule import Schedule from psyclone.psyir.nodes.structure_reference import StructureReference from psyclone.psyir.nodes.while_loop import WhileLoop -from psyclone.psyir.symbols import INTEGER_TYPE, ScalarType +from psyclone.psyir.symbols import INTEGER_TYPE, ScalarType, DataSymbol # OMP_OPERATOR_MAPPING is used to determine the operator to use in the # reduction clause of an OpenMP directive. @@ -1598,9 +1598,13 @@ def infer_sharing_attributes(self): # only apply to a sub-component, this won't be captured # appropriately. name = signature.var_name - symbol = accesses[0].node.scope.symbol_table.lookup(name) + try: + symbol = accesses[0].node.scope.symbol_table.lookup(name) + except KeyError: + symbol = None + # If it is manually marked as threadprivate, add it to private - if symbol.is_thread_private: + if isinstance(symbol, DataSymbol) and symbol.is_thread_private: private.add(symbol) continue diff --git a/src/psyclone/psyir/symbols/datasymbol.py b/src/psyclone/psyir/symbols/datasymbol.py index efea8b4390..fe913b80ff 100644 --- a/src/psyclone/psyir/symbols/datasymbol.py +++ b/src/psyclone/psyir/symbols/datasymbol.py @@ -131,7 +131,7 @@ def _process_arguments(self, **kwargs): if "is_thread_private" in kwargs: new_is_thread_private = kwargs.pop("is_thread_private") - if not hasattr(self, '_is_constant'): + if not hasattr(self, '_is_thread_private'): # At least initialise it if we reach this point and it doesn't # exist (which may happen if this symbol has been specialised from # a Symbol). diff --git a/src/psyclone/psyir/transformations/parallel_loop_trans.py b/src/psyclone/psyir/transformations/parallel_loop_trans.py index 1dccac4b37..9c5b8b50df 100644 --- a/src/psyclone/psyir/transformations/parallel_loop_trans.py +++ b/src/psyclone/psyir/transformations/parallel_loop_trans.py @@ -124,8 +124,7 @@ def validate(self, node, options=None): force = options.get("force", False) ignore_dependencies_for = options.get("ignore_dependencies_for", []) sequential = options.get("sequential", False) - array_privatisation = options.get("array_privatisation", - False) + privatise_arrays = options.get("privatise_arrays", False) # Check we are not a sequential loop if (not sequential and isinstance(node, PSyLoop) and @@ -142,10 +141,10 @@ def validate(self, node, options=None): f"The 'collapse' argument must be an integer or a bool but" f" got an object of type {type(collapse)}") - if not isinstance(array_privatisation, bool): + if not isinstance(privatise_arrays, bool): raise TypeError( - f"The 'array_privatisation' option must be a bool" - f"but got an object of type {type(array_privatisation)}") + f"The 'privatise_arrays' option must be a bool" + f"but got an object of type {type(privatise_arrays)}") # If it's sequential or we 'force' the transformation, the validations # below this point are skipped @@ -184,23 +183,43 @@ def validate(self, node, options=None): signatures_to_ignore=signatures): # The DependencyTools also returns False for things that are # not an issue, so we ignore specific messages. + errors = [] for message in dep_tools.get_all_messages(): if message.code == DTCode.WARN_SCALAR_WRITTEN_ONCE: continue - if (array_privatisation and + if (privatise_arrays and message.code == DTCode.ERROR_WRITE_WRITE_RACE): + privatisable = True for var_name in message.var_names: symbol = node.scope.symbol_table.lookup(var_name) + + # It must ONLY be referenced in this loop + symbol_scope = symbol.find_symbol_table(node).node + + if any(reference.symbol is symbol and + not reference.is_inside_of(node) for + reference in symbol_scope.walk(Reference)): + privatisable = False + break + symbol.is_thread_private = True + if not privatisable: + errors.append( + f"The write-write dependency in '{var_name}'" + f" cannot be solved by array privatisation because" + f" the variable is used outside the loop") continue - all_msg_str = "\n".join([str(m) for m in - dep_tools.get_all_messages()]) - messages = (f"Loop cannot be parallelised because the " - f"dependency analysis reported:\n{all_msg_str}\n" + errors.append(str(message)) + + if errors: + error_lines = "\n".join(errors) + messages = (f"Loop cannot be parallelised because:\n" + f"{error_lines}\n" f"Consider using the \"ignore_dependencies_for\"" f" transformation option if this is a false " - f"dependency or the \"array_privatisation\" (if " - f"this is an applicable write-write dependency).") + f"dependency\nConsider using the \"array_" + f"privatisation\" transformation option if " + f"this is a write-write dependency") if verbose: # This message can get quite long, we will skip it if an # ancestor loop already has the exact same message diff --git a/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py b/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py index 8ec30bc040..2b9af2993a 100644 --- a/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py @@ -82,7 +82,7 @@ def test_paralooptrans_validate_force(fortran_reader): trans = ParaTrans() with pytest.raises(TransformationError) as err: trans.validate(loop) - assert "because the dependency analysis reported" in str(err.value) + assert "Loop cannot be parallelised because:" in str(err.value) # Set the 'force' option to True - no exception should be raised. trans.validate(loop, {"force": True}) @@ -132,12 +132,12 @@ def test_paralooptrans_validate_ignore_dependencies_for(fortran_reader, trans = ParaTrans() with pytest.raises(TransformationError) as err: trans.validate(loop, options={"verbose": True}) - assert ("Loop cannot be parallelised because the dependency analysis " - "reported:\nWarning: Variable 'sum' is read first, which indicates" - " a reduction. Variable: 'sum'.") in str(err.value) + assert ("Loop cannot be parallelised because:\nWarning: Variable 'sum' is " + "read first, which indicates a reduction. Variable: 'sum'." + in str(err.value)) # With the verbose option, the dependency will be reported in a comment - assert ("PSyclone: Loop cannot be parallelised because the dependency" - " analysis reported:" in loop.preceding_comment) + assert ("PSyclone: Loop cannot be parallelised because:" + in loop.preceding_comment) # Test that the inner loop does not log again the same error message with pytest.raises(TransformationError) as err: @@ -149,7 +149,7 @@ def test_paralooptrans_validate_ignore_dependencies_for(fortran_reader, loop.preceding_comment = "" with pytest.raises(TransformationError) as err: trans.validate(loop.loop_body[0], options={"verbose": True}) - assert ("PSyclone: Loop cannot be parallelised because the dependency" + assert ("PSyclone: Loop cannot be parallelised because:" in loop.loop_body[0].preceding_comment) # Now use the 'ignore_dependencies_for' option @@ -321,7 +321,7 @@ def test_paralooptrans_validate_sequential(fortran_reader): trans = ParaTrans() with pytest.raises(TransformationError) as err: trans.validate(loop) - assert "because the dependency analysis reported" in str(err.value) + assert "Loop cannot be parallelised because" in str(err.value) # Set the 'sequential' option to True - no exception should be raised. trans.validate(loop, {"sequential": True}) @@ -443,9 +443,6 @@ def test_paralooptrans_validate_all_vars(fortran_reader): with pytest.raises(TransformationError) as err: trans.validate(loop) err_text = str(err.value) - # Should have two messages (the first being the one that is ignored by - # validate). - assert "'ipivot' is only written once" in err_text assert ("Variable 'zval1' is read first, which indicates a reduction" in err_text) @@ -481,12 +478,12 @@ def test_paralooptrans_apply(fortran_reader): assert isinstance(loop.parent.parent, OMPParallelDoDirective) -def test_paralooptranas_apply_with_array_privatisation(fortran_reader, - fortran_writer): +def test_paralooptranas_with_array_privatisation(fortran_reader, + fortran_writer): ''' - Check that the apply() method works as expected, including passing - `options` down to validate(). - + Check that the 'privatise_arrays' transformation option allows to ignore + write-write dependencies by setting the associated variable as 'private' + (only if it's never used outside the loop). ''' psyir = fortran_reader.psyir_from_source(''' subroutine my_sub() @@ -508,13 +505,40 @@ def test_paralooptranas_apply_with_array_privatisation(fortran_reader, loop = psyir.walk(Loop)[0] trans = ParaTrans() - # By default this can not be parallelised because all arrays are considered - # shared + # By default this can not be parallelised because 'ztmp' is shared with pytest.raises(TransformationError) as err: trans.apply(loop) assert "ztmp(jj)\' causes a write-write race condition." in str(err.value) # Now enable array privatisation - trans.apply(loop, {"array_privatisation": True}) + trans.apply(loop, {"privatise_arrays": True}) assert ("!$omp parallel do default(shared), private(ji,jj,ztmp)" in fortran_writer(psyir)) + + # If the 'ztmp' is accessed outside the loop the privatisation will fail + psyir = fortran_reader.psyir_from_source(''' + subroutine my_sub() + integer ji, jj + real :: var1(10,10) + real :: ztmp(10) + var1 = 1.0 + ztmp = 3.0 + + do ji = 1, 10 + do jj = 1, 10 + ztmp(jj) = var1(ji, jj) + 1 + end do + do jj = 1, 10 + var1(ji, jj) = ztmp(jj) * 2 + end do + end do + end subroutine my_sub''') + + loop = psyir.walk(Loop)[0] + with pytest.raises(TransformationError) as err: + trans.apply(loop, {"privatise_arrays": True}) + # with and updated error message + assert "ztmp(jj)\' causes a write-write race " not in str(err.value) + assert ("The write-write dependency in 'ztmp' cannot be solved by array " + "privatisation because the variable is used outside the loop" + in str(err.value))