Skip to content

Commit

Permalink
#2671 Improve the parallel array privatisation implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
sergisiso committed Aug 23, 2024
1 parent 52888e8 commit 555314b
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 36 deletions.
2 changes: 2 additions & 0 deletions examples/nemo/scripts/omp_cpu_trans.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,6 @@ def trans(psyir):
loop_directive_trans=omp_loop_trans,
# Collapse may be useful in some architecture/compiler
collapse=False,
privatise_arrays=True
# privatise_arrays=True if psyir.name == "zdftke.f90" else False
)
6 changes: 5 additions & 1 deletion examples/nemo/scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ def insert_explicit_loop_parallelism(
region_directive_trans=None,
loop_directive_trans=None,
collapse: bool = True,
privatise_arrays: bool = True,
):
''' For each loop in the schedule that doesn't already have a Directive
as an ancestor, attempt to insert the given region and loop directives.
Expand All @@ -307,14 +308,17 @@ def insert_explicit_loop_parallelism(
:py:class:`psyclone.transformation.Transformation`
:param collapse: whether to attempt to insert the collapse clause to as
many nested loops as possible.
:param privatise_arrays: whether to attempt to privatise arrays that cause
write-write race conditions.
'''
# Add the parallel directives in each loop
for loop in schedule.walk(Loop):
if loop.ancestor(Directive):
continue # Skip if an outer loop is already parallelised

opts = {"collapse": collapse, "verbose": True}
opts = {"collapse": collapse, "privatise_arrays": privatise_arrays,
"verbose": True}

routine_name = loop.ancestor(Routine).name

Expand Down
23 changes: 23 additions & 0 deletions src/psyclone/psyir/nodes/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,29 @@ def immediately_follows(self, node_1):
and self.position == node_1.position + 1
)

def is_inside_of(self, node):
'''
:param node: the node to compare it to.
:returns: whether this node is a decendant of the given node.
:rtype: bool
'''
if self.root != node.root:
return False

if self.abs_position < node.abs_position:
# It is before node, not inside
return False

if node.parent and len(node.parent.children) > node.position + 1:
next_node = node.parent.children[node.position + 1]
if self.abs_position >= next_node.abs_position:
# It is after node, not inside
return False

# If its not before or after, and root is the same, it must be inside
return True

def coded_kernels(self):
'''
Returns a list of all of the user-supplied kernels (as opposed to
Expand Down
10 changes: 7 additions & 3 deletions src/psyclone/psyir/nodes/omp_directives.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
from psyclone.psyir.nodes.schedule import Schedule
from psyclone.psyir.nodes.structure_reference import StructureReference
from psyclone.psyir.nodes.while_loop import WhileLoop
from psyclone.psyir.symbols import INTEGER_TYPE, ScalarType
from psyclone.psyir.symbols import INTEGER_TYPE, ScalarType, DataSymbol

# OMP_OPERATOR_MAPPING is used to determine the operator to use in the
# reduction clause of an OpenMP directive.
Expand Down Expand Up @@ -1598,9 +1598,13 @@ def infer_sharing_attributes(self):
# only apply to a sub-component, this won't be captured
# appropriately.
name = signature.var_name
symbol = accesses[0].node.scope.symbol_table.lookup(name)
try:
symbol = accesses[0].node.scope.symbol_table.lookup(name)
except KeyError:
symbol = None

# If it is manually marked as threadprivate, add it to private
if symbol.is_thread_private:
if isinstance(symbol, DataSymbol) and symbol.is_thread_private:
private.add(symbol)
continue

Expand Down
2 changes: 1 addition & 1 deletion src/psyclone/psyir/symbols/datasymbol.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def _process_arguments(self, **kwargs):

if "is_thread_private" in kwargs:
new_is_thread_private = kwargs.pop("is_thread_private")
if not hasattr(self, '_is_constant'):
if not hasattr(self, '_is_thread_private'):
# At least initialise it if we reach this point and it doesn't
# exist (which may happen if this symbol has been specialised from
# a Symbol).
Expand Down
43 changes: 31 additions & 12 deletions src/psyclone/psyir/transformations/parallel_loop_trans.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,7 @@ def validate(self, node, options=None):
force = options.get("force", False)
ignore_dependencies_for = options.get("ignore_dependencies_for", [])
sequential = options.get("sequential", False)
array_privatisation = options.get("array_privatisation",
False)
privatise_arrays = options.get("privatise_arrays", False)

# Check we are not a sequential loop
if (not sequential and isinstance(node, PSyLoop) and
Expand All @@ -142,10 +141,10 @@ def validate(self, node, options=None):
f"The 'collapse' argument must be an integer or a bool but"
f" got an object of type {type(collapse)}")

if not isinstance(array_privatisation, bool):
if not isinstance(privatise_arrays, bool):
raise TypeError(
f"The 'array_privatisation' option must be a bool"
f"but got an object of type {type(array_privatisation)}")
f"The 'privatise_arrays' option must be a bool"
f"but got an object of type {type(privatise_arrays)}")

# If it's sequential or we 'force' the transformation, the validations
# below this point are skipped
Expand Down Expand Up @@ -184,23 +183,43 @@ def validate(self, node, options=None):
signatures_to_ignore=signatures):
# The DependencyTools also returns False for things that are
# not an issue, so we ignore specific messages.
errors = []
for message in dep_tools.get_all_messages():
if message.code == DTCode.WARN_SCALAR_WRITTEN_ONCE:
continue
if (array_privatisation and
if (privatise_arrays and
message.code == DTCode.ERROR_WRITE_WRITE_RACE):
privatisable = True
for var_name in message.var_names:
symbol = node.scope.symbol_table.lookup(var_name)

# It must ONLY be referenced in this loop
symbol_scope = symbol.find_symbol_table(node).node

if any(reference.symbol is symbol and
not reference.is_inside_of(node) for
reference in symbol_scope.walk(Reference)):
privatisable = False
break

symbol.is_thread_private = True
if not privatisable:
errors.append(
f"The write-write dependency in '{var_name}'"
f" cannot be solved by array privatisation because"
f" the variable is used outside the loop")
continue
all_msg_str = "\n".join([str(m) for m in
dep_tools.get_all_messages()])
messages = (f"Loop cannot be parallelised because the "
f"dependency analysis reported:\n{all_msg_str}\n"
errors.append(str(message))

if errors:
error_lines = "\n".join(errors)
messages = (f"Loop cannot be parallelised because:\n"
f"{error_lines}\n"
f"Consider using the \"ignore_dependencies_for\""
f" transformation option if this is a false "
f"dependency or the \"array_privatisation\" (if "
f"this is an applicable write-write dependency).")
f"dependency\nConsider using the \"array_"
f"privatisation\" transformation option if "
f"this is a write-write dependency")
if verbose:
# This message can get quite long, we will skip it if an
# ancestor loop already has the exact same message
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def test_paralooptrans_validate_force(fortran_reader):
trans = ParaTrans()
with pytest.raises(TransformationError) as err:
trans.validate(loop)
assert "because the dependency analysis reported" in str(err.value)
assert "Loop cannot be parallelised because:" in str(err.value)
# Set the 'force' option to True - no exception should be raised.
trans.validate(loop, {"force": True})

Expand Down Expand Up @@ -132,12 +132,12 @@ def test_paralooptrans_validate_ignore_dependencies_for(fortran_reader,
trans = ParaTrans()
with pytest.raises(TransformationError) as err:
trans.validate(loop, options={"verbose": True})
assert ("Loop cannot be parallelised because the dependency analysis "
"reported:\nWarning: Variable 'sum' is read first, which indicates"
" a reduction. Variable: 'sum'.") in str(err.value)
assert ("Loop cannot be parallelised because:\nWarning: Variable 'sum' is "
"read first, which indicates a reduction. Variable: 'sum'."
in str(err.value))
# With the verbose option, the dependency will be reported in a comment
assert ("PSyclone: Loop cannot be parallelised because the dependency"
" analysis reported:" in loop.preceding_comment)
assert ("PSyclone: Loop cannot be parallelised because:"
in loop.preceding_comment)

# Test that the inner loop does not log again the same error message
with pytest.raises(TransformationError) as err:
Expand All @@ -149,7 +149,7 @@ def test_paralooptrans_validate_ignore_dependencies_for(fortran_reader,
loop.preceding_comment = ""
with pytest.raises(TransformationError) as err:
trans.validate(loop.loop_body[0], options={"verbose": True})
assert ("PSyclone: Loop cannot be parallelised because the dependency"
assert ("PSyclone: Loop cannot be parallelised because:"
in loop.loop_body[0].preceding_comment)

# Now use the 'ignore_dependencies_for' option
Expand Down Expand Up @@ -321,7 +321,7 @@ def test_paralooptrans_validate_sequential(fortran_reader):
trans = ParaTrans()
with pytest.raises(TransformationError) as err:
trans.validate(loop)
assert "because the dependency analysis reported" in str(err.value)
assert "Loop cannot be parallelised because" in str(err.value)
# Set the 'sequential' option to True - no exception should be raised.
trans.validate(loop, {"sequential": True})

Expand Down Expand Up @@ -443,9 +443,6 @@ def test_paralooptrans_validate_all_vars(fortran_reader):
with pytest.raises(TransformationError) as err:
trans.validate(loop)
err_text = str(err.value)
# Should have two messages (the first being the one that is ignored by
# validate).
assert "'ipivot' is only written once" in err_text
assert ("Variable 'zval1' is read first, which indicates a reduction"
in err_text)

Expand Down Expand Up @@ -481,12 +478,12 @@ def test_paralooptrans_apply(fortran_reader):
assert isinstance(loop.parent.parent, OMPParallelDoDirective)


def test_paralooptranas_apply_with_array_privatisation(fortran_reader,
fortran_writer):
def test_paralooptranas_with_array_privatisation(fortran_reader,
fortran_writer):
'''
Check that the apply() method works as expected, including passing
`options` down to validate().
Check that the 'privatise_arrays' transformation option allows to ignore
write-write dependencies by setting the associated variable as 'private'
(only if it's never used outside the loop).
'''
psyir = fortran_reader.psyir_from_source('''
subroutine my_sub()
Expand All @@ -508,13 +505,40 @@ def test_paralooptranas_apply_with_array_privatisation(fortran_reader,
loop = psyir.walk(Loop)[0]
trans = ParaTrans()

# By default this can not be parallelised because all arrays are considered
# shared
# By default this can not be parallelised because 'ztmp' is shared
with pytest.raises(TransformationError) as err:
trans.apply(loop)
assert "ztmp(jj)\' causes a write-write race condition." in str(err.value)

# Now enable array privatisation
trans.apply(loop, {"array_privatisation": True})
trans.apply(loop, {"privatise_arrays": True})
assert ("!$omp parallel do default(shared), private(ji,jj,ztmp)" in
fortran_writer(psyir))

# If the 'ztmp' is accessed outside the loop the privatisation will fail
psyir = fortran_reader.psyir_from_source('''
subroutine my_sub()
integer ji, jj
real :: var1(10,10)
real :: ztmp(10)
var1 = 1.0
ztmp = 3.0
do ji = 1, 10
do jj = 1, 10
ztmp(jj) = var1(ji, jj) + 1
end do
do jj = 1, 10
var1(ji, jj) = ztmp(jj) * 2
end do
end do
end subroutine my_sub''')

loop = psyir.walk(Loop)[0]
with pytest.raises(TransformationError) as err:
trans.apply(loop, {"privatise_arrays": True})
# with and updated error message
assert "ztmp(jj)\' causes a write-write race " not in str(err.value)
assert ("The write-write dependency in 'ztmp' cannot be solved by array "
"privatisation because the variable is used outside the loop"
in str(err.value))

0 comments on commit 555314b

Please sign in to comment.