From 60992e12c52aad9275e0e4b6d869ff0fd843a7cc Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Sat, 24 Aug 2024 12:48:14 +0100 Subject: [PATCH] #2671 Allow threadprivate arrays to be firstprivate when necessary --- src/psyclone/psyir/nodes/node.py | 121 +++++++++++------- src/psyclone/psyir/nodes/omp_directives.py | 15 ++- .../transformations/parallel_loop_trans.py | 23 ++-- src/psyclone/tests/psyir/nodes/node_test.py | 100 ++++++++++++++- .../parallel_loop_trans_test.py | 27 +++- 5 files changed, 224 insertions(+), 62 deletions(-) diff --git a/src/psyclone/psyir/nodes/node.py b/src/psyclone/psyir/nodes/node.py index f3d08970da..36a17a65a6 100644 --- a/src/psyclone/psyir/nodes/node.py +++ b/src/psyclone/psyir/nodes/node.py @@ -1272,23 +1272,49 @@ def kernels(self): from psyclone.psyGen import Kern return self.walk(Kern) - def following(self, routine=True): - '''Return all :py:class:`psyclone.psyir.nodes.Node` nodes after this - node. Ordering is depth first. If the `routine` argument is - set to `True` then nodes are only returned if they are - descendents of this node's closest ancestor routine if one - exists. + def following_node(self, routine_scope=True): + ''' + :param bool routine_scope: an optional (default `True`) argument + that enables returing only nodes from the same ancestor routine. + + :returns: the next node (the next sibiling, or if it doesn't have one + the first next sibiling of its ancestors). + :rtype: Optional[:py:class:`psyclone.psyir.nodes.Node`] + + ''' + if not self.parent: + return None + + if len(self.parent.children) > self.position + 1: + return self.parent.children[self.position + 1] + + # Import here to avoid circular dependencies + # pylint: disable=import-outside-toplevel + from psyclone.psyir.nodes import Routine + if routine_scope and isinstance(self.parent, Routine): + return None + + return self.parent.following_node(routine_scope) - :param bool routine: an optional (default `True`) argument \ - that only returns nodes that are within this node's \ - closest ancestor Routine node if one exists. + def following(self, routine_scope=True, include_children=True): + ''' Return all nodes after itself. Ordering is depth first. If the + `routine_scope` argument is set to `True` (default) then only nodes + inside the same ancestor routine are returned. If `include_children` + is set to `True` (default) children of itself are also returned, + otherwise it starts at the next sibiling. + + :param bool routine_scope: an optional (default `True`) argument + that enables returing only nodes from the same ancestor routine. + :param bool include_children: an optional (default `True`) argument + that enables including own children, instead of starting from + the next sibiling. :returns: a list of nodes. - :rtype: :func:`list` of :py:class:`psyclone.psyir.nodes.Node` + :rtype: List[:py:class:`psyclone.psyir.nodes.Node`] ''' root = self.root - if routine: + if routine_scope: # Import here to avoid circular dependencies # pylint: disable=import-outside-toplevel from psyclone.psyir.nodes import Routine @@ -1297,37 +1323,44 @@ def following(self, routine=True): routine_node = self.ancestor(Routine) if routine_node: root = routine_node + + if include_children: + starting_node = self + else: + starting_node = self.following_node(routine_scope) + if starting_node is None: + return [] + all_nodes = root.walk(Node) position = None for index, node in enumerate(all_nodes): - if node is self: - position = index + if node is starting_node: + if include_children: + position = index + 1 # Skip self + else: + position = index break - return all_nodes[position+1:] + return all_nodes[position:] - def preceding(self, reverse=False, routine=True): - '''Return all :py:class:`psyclone.psyir.nodes.Node` nodes before this - node. Ordering is depth first. If the `reverse` argument is - set to `True` then the node ordering is reversed - i.e. returning the nodes closest to this node first. if the - `routine` argument is set to `True` then nodes are only - returned if they are descendents of this node's closest - ancestor routine if one exists. + def preceding(self, reverse=False, routine_scope=True): + ''' Return all nodes before itself. Ordering is depth first. If the + `routine_scope` argument is set to `True` (default) then only nodes + inside the same ancestor routine are returned. If the `reverse` argument + is set to `True` then the node ordering is reversedi.e. returning the + nodes closest to this node first. if the - :param bool reverse: an optional (default `False`) argument \ - that reverses the order of any returned nodes (i.e. makes \ - them 'closest first' if set to true. - :param bool routine: an optional (default `True`) argument \ - that only returns nodes that are within this node's \ - closest ancestor Routine node if one exists. + :param bool reverse: an optional (default `False`) argument that + reverses the order of any returned nodes (i.e. makes them 'closest + first'). + :param bool routine_scope: an optional (default `True`) argument + that enables returing only nodes from the same ancestor routine. :returns: a list of nodes. - :rtype: :func:`list` of :py:class:`psyclone.psyir.nodes.Node` - + :rtype: List[:py:class:`psyclone.psyir.nodes.Node`] ''' root = self.root - if routine: + if routine_scope: # Import here to avoid circular dependencies # pylint: disable=import-outside-toplevel from psyclone.psyir.nodes import Routine @@ -1347,28 +1380,30 @@ def preceding(self, reverse=False, routine=True): nodes.reverse() return nodes - def immediately_precedes(self, node_2): + def immediately_precedes(self, node): ''' - :returns: True if this node immediately precedes `node_2`, False - otherwise + :param node: the node to compare it to. + + :returns: whether this node immediately precedes the given `node`. :rtype: bool ''' return ( - self.sameParent(node_2) - and self in node_2.preceding() - and self.position + 1 == node_2.position + self.sameParent(node) + and self in node.preceding() + and self.position + 1 == node.position ) - def immediately_follows(self, node_1): + def immediately_follows(self, node): ''' - :returns: True if this node immediately follows `node_1`, False - otherwise + :param node: the node to compare it to. + + :returns: whether this node immediately follows the given `node`. :rtype: bool ''' return ( - self.sameParent(node_1) - and self in node_1.following() - and self.position == node_1.position + 1 + self.sameParent(node) + and self in node.following() + and self.position == node.position + 1 ) def is_inside_of(self, node): diff --git a/src/psyclone/psyir/nodes/omp_directives.py b/src/psyclone/psyir/nodes/omp_directives.py index b7ecdb7301..6a2b88dd1f 100644 --- a/src/psyclone/psyir/nodes/omp_directives.py +++ b/src/psyclone/psyir/nodes/omp_directives.py @@ -1536,7 +1536,9 @@ def infer_sharing_attributes(self): This method analyses the directive body and automatically classifies each symbol using the following rules: - - All arrays are shared. + - All arrays that are not marked as threadprivate, are shared. + - Threadprivate arrays are private, or firstprivate if they are used + before the directive. - Scalars that are accessed only once are shared. - Scalars that are read-only or written outside a loop are shared. - Scalars written in multiple iterations of a loop are private, unless: @@ -1605,7 +1607,16 @@ def infer_sharing_attributes(self): # If it is manually marked as threadprivate, add it to private if isinstance(symbol, DataSymbol) and symbol.is_thread_private: - private.add(symbol) + if any(ref.symbol is symbol + for ref in self.following(include_children=False) + if isinstance(ref, Reference)): + continue # Is shared + if any(ref.symbol is symbol for ref in self.preceding() + if isinstance(ref, Reference)): + # If it's used before the loop, make it firstprivate + fprivate.add(symbol) + else: + private.add(symbol) continue # All arrays not explicitly marked as threadprivate are shared diff --git a/src/psyclone/psyir/transformations/parallel_loop_trans.py b/src/psyclone/psyir/transformations/parallel_loop_trans.py index 9c5b8b50df..92048cdd33 100644 --- a/src/psyclone/psyir/transformations/parallel_loop_trans.py +++ b/src/psyclone/psyir/transformations/parallel_loop_trans.py @@ -46,6 +46,7 @@ from psyclone.domain.common.psylayer import PSyLoop from psyclone.psyir import nodes from psyclone.psyir.nodes import Loop, Reference, Call +from psyclone.psyir.symbols import AutomaticInterface from psyclone.psyir.tools import DependencyTools, DTCode from psyclone.psyir.transformations.loop_trans import LoopTrans from psyclone.psyir.transformations.transformation_error import \ @@ -191,23 +192,29 @@ def validate(self, node, options=None): message.code == DTCode.ERROR_WRITE_WRITE_RACE): privatisable = True for var_name in message.var_names: - symbol = node.scope.symbol_table.lookup(var_name) + sym = node.scope.symbol_table.lookup(var_name) - # It must ONLY be referenced in this loop - symbol_scope = symbol.find_symbol_table(node).node + # If it's not a local symbol, we cannot safely analyse + # its lifetime + if not isinstance(sym.interface, AutomaticInterface): + privatisable = False + break - if any(reference.symbol is symbol and - not reference.is_inside_of(node) for - reference in symbol_scope.walk(Reference)): + # It must not be referenced after this loop (before the + # loop is fine because we can use OpenMP/OpenACC + # first-private or Fortran do concurrent local_init()) + if any(r.symbol is sym + for r in node.following(include_children=False) + if isinstance(r, Reference)): privatisable = False break - symbol.is_thread_private = True + sym.is_thread_private = True if not privatisable: errors.append( f"The write-write dependency in '{var_name}'" f" cannot be solved by array privatisation because" - f" the variable is used outside the loop") + f" the variable is used outside the loop.") continue errors.append(str(message)) diff --git a/src/psyclone/tests/psyir/nodes/node_test.py b/src/psyclone/tests/psyir/nodes/node_test.py index 843d2433a4..0d992189dc 100644 --- a/src/psyclone/tests/psyir/nodes/node_test.py +++ b/src/psyclone/tests/psyir/nodes/node_test.py @@ -1515,9 +1515,9 @@ def test_following_preceding(): # 2b: Middle node. 'routine' argument is set to False. Additional # container and routine2 nodes are returned. - assert (multiply1.following(routine=False) == + assert (multiply1.following(routine_scope=False) == [c_ref, d_ref, routine2, assign2, e_ref, zero]) - assert (multiply1.preceding(routine=False) == + assert (multiply1.preceding(routine_scope=False) == [container, routine1, assign1, a_ref, multiply2, b_ref]) @@ -1815,3 +1815,99 @@ def test_get_sibling_lists_with_stopping(fortran_reader): # Second kernel assert len(blocks_to_port[1]) == 1 assert blocks_to_port[1][0] is loops[2] + + +def test_following_node(fortran_reader): + '''Tests that the following_node method works as expected.''' + + psyir = fortran_reader.psyir_from_source(''' + module my_mod + contains + + subroutine test + integer :: i, j, val + + do j = 1, 10 + do i = 1, 10 + if (i == 3) then + val = 1 + end if + end do + val = 2 + end do + end subroutine + + subroutine test2 + end subroutine test2 + end module + ''') + + loops = psyir.walk(Loop) + assignments = psyir.walk(Assignment) + routines = psyir.walk(Routine) + + # If it has a following sibiling, this is the following_node + assert loops[1].following_node() is assignments[1] + assert routines[0].following_node() is routines[1] + + # If it doesn't, but one of its ancestor does, that it the following node + assert assignments[0].following_node() is assignments[1] + + # If they don't (at the routine scope), they return None + assert loops[0].following_node() is None + assert assignments[1].following_node() is None + + # With the routine_scope=False, they keep searching outside the routine + assert loops[0].following_node(routine_scope=False) is routines[1] + assert assignments[1].following_node(routine_scope=False) is routines[1] + + +def test_following(fortran_reader): + '''Tests that the following method works as expected.''' + + psyir = fortran_reader.psyir_from_source(''' + module my_mod + contains + + subroutine test + integer :: i, j, val + + do j = 1, 10 + do i = 1, 10 + if (i == 3) then + val = 1 + end if + end do + val = 2 + end do + val = 3 + end subroutine + + subroutine test2 + end subroutine test2 + end module + ''') + loops = psyir.walk(Loop) + assignments = psyir.walk(Assignment) + routines = psyir.walk(Routine) + + # By default following returns children and following children + # inside the same routine scope + assert loops[0] not in loops[1].following() # before + assert assignments[0] in loops[1].following() # inside + assert assignments[1] in loops[1].following() # after + assert assignments[2] in loops[1].following() # after a parent + assert routines[1] not in loops[1].following() # outside routine + + # If we set routine_scope to False, it returns nodes from outside + assert routines[1] in loops[1].following(routine_scope=False) + + # If we set include_children to False, it only return "after" nodes + assert assignments[0] not in loops[1].following(include_children=False) + assert assignments[1] in loops[1].following(include_children=False) + assert assignments[2] in loops[1].following(include_children=False) + + # Both arguments work together + assert routines[1] not in loops[1].following(include_children=False) + assert routines[1] in loops[1].following(routine_scope=False, + include_children=False) diff --git a/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py b/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py index 2b9af2993a..2bd00c835f 100644 --- a/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/parallel_loop_trans_test.py @@ -483,18 +483,22 @@ def test_paralooptranas_with_array_privatisation(fortran_reader, ''' Check that the 'privatise_arrays' transformation option allows to ignore write-write dependencies by setting the associated variable as 'private' - (only if it's never used outside the loop). ''' psyir = fortran_reader.psyir_from_source(''' subroutine my_sub() integer ji, jj real :: var1(10,10) real :: ztmp(10) + real :: ztmp2(10) var1 = 1.0 + ztmp2 = 1.0 do ji = 1, 10 do jj = 1, 10 - ztmp(jj) = var1(ji, jj) + 1 + if (jj == 4) then + ztmp2(jj) = 4 + end if ! the rest get the value from before the loop + ztmp(jj) = var1(ji, jj) + ztmp2(jj) end do do jj = 1, 10 var1(ji, jj) = ztmp(jj) * 2 @@ -509,36 +513,45 @@ def test_paralooptranas_with_array_privatisation(fortran_reader, with pytest.raises(TransformationError) as err: trans.apply(loop) assert "ztmp(jj)\' causes a write-write race condition." in str(err.value) + assert "ztmp2(jj)\' causes a write-write race condition." in str(err.value) # Now enable array privatisation trans.apply(loop, {"privatise_arrays": True}) - assert ("!$omp parallel do default(shared), private(ji,jj,ztmp)" in - fortran_writer(psyir)) + assert ("!$omp parallel do default(shared), private(ji,jj,ztmp), " + "firstprivate(ztmp2)" in fortran_writer(psyir)) - # If the 'ztmp' is accessed outside the loop the privatisation will fail + # If the array is accessed after the loop or is a not an automatic + # interface the privatisation will fail psyir = fortran_reader.psyir_from_source(''' subroutine my_sub() integer ji, jj real :: var1(10,10) - real :: ztmp(10) + real, save :: ztmp1(10) + real :: ztmp2(10) var1 = 1.0 ztmp = 3.0 do ji = 1, 10 do jj = 1, 10 + ztmp2(jj) = 3 ztmp(jj) = var1(ji, jj) + 1 end do do jj = 1, 10 var1(ji, jj) = ztmp(jj) * 2 end do end do + call something(ztmp2) end subroutine my_sub''') loop = psyir.walk(Loop)[0] with pytest.raises(TransformationError) as err: trans.apply(loop, {"privatise_arrays": True}) - # with and updated error message + # with and updated error messages assert "ztmp(jj)\' causes a write-write race " not in str(err.value) + assert "ztmp2(jj)\' causes a write-write race " not in str(err.value) assert ("The write-write dependency in 'ztmp' cannot be solved by array " "privatisation because the variable is used outside the loop" in str(err.value)) + assert ("The write-write dependency in 'ztmp2' cannot be solved by array " + "privatisation because the variable is used outside the loop" + in str(err.value))