Skip to content

Commit

Permalink
Add various Keccak-x4 scalar/Neon hybrids
Browse files Browse the repository at this point in the history
  • Loading branch information
hanno-becker committed Oct 1, 2024
1 parent 6ffcd4b commit 85de739
Show file tree
Hide file tree
Showing 6 changed files with 2,776 additions and 1,281 deletions.
131 changes: 62 additions & 69 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class Example():

def __init__(self, infile, name=None, funcname=None, suffix="opt",
rename=False, outfile="", arch=Arch_Armv81M, target=Target_CortexM55r1,
timeout=None, **kwargs):
timeout=None, outfile_full=False, **kwargs):
if name is None:
name = infile

Expand All @@ -67,17 +67,23 @@ def __init__(self, infile, name=None, funcname=None, suffix="opt",
self.funcname = funcname
self.infile = infile
self.suffix = suffix
if outfile == "":
self.outfile = f"{infile}_{self.suffix}_{target_label_dict[self.target]}"
if outfile_full is True:
self.outfile = outfile
else:
self.outfile = f"{outfile}_{self.suffix}_{target_label_dict[self.target]}"
if outfile == "":
self.outfile = f"{infile}_{self.suffix}_{target_label_dict[self.target]}"
else:
self.outfile = f"{outfile}_{self.suffix}_{target_label_dict[self.target]}"
if funcname is None:
self.funcname = self.infile
subfolder = ""
if self.arch == AArch64_Neon:
subfolder = "aarch64/"
self.infile_full = f"examples/naive/{subfolder}{self.infile}.s"
self.outfile_full = f"examples/opt/{subfolder}{self.outfile}.s"
if outfile_full is False:
self.outfile_full = f"examples/opt/{subfolder}{self.outfile}.s"
else:
self.outfile_full = self.outfile
self.name = name
self.rename = rename
self.timeout = timeout
Expand Down Expand Up @@ -1358,46 +1364,6 @@ def core(self, slothy):
slothy.config.sw_pipelining.optimize_postamble = False
slothy.optimize_loop("flt_radix4_fft_loop_start")

class neon_keccak_x4(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
name = "keccak_f1600_x4_hybrid_slothy"
infile = "keccak_f1600_x4_hybrid_slothy"

if var != "":
name += f"_{var}"
infile += f"_{var}"
name += f"_{target_label_dict[target]}"

super().__init__(infile, name, outfile=name, rename=True, arch=arch, target=target, timeout=600)

def core(self, slothy):
slothy.config.inputs_are_outputs = True
# TODO: check of all of these are need for all code parts
slothy.config.reserved_regs = ["sp"]
slothy.config.outputs = ["x27"]
slothy.config.reserved_regs += self.target_reserved
slothy.config.constraints.stalls_first_attempt = 8
slothy.config.variable_size = True

slothy.config.split_heuristic = True
slothy.config.split_heuristic_repeat = 0
slothy.config.split_heuristic_preprocess_naive_interleaving = True

slothy.optimize(start="initial", end="end_initial")
slothy.optimize(start="initial2", end="end_initial2")
slothy.optimize(start="loop_0", end="end_loop_0")
slothy.optimize(start="loop_1", end="end_loop_1")

slothy.config.split_heuristic = True
slothy.config.split_heuristic_factor = 3
slothy.config.split_heuristic_stepsize = 0.2
slothy.config.split_heuristic_repeat = 2
slothy.optimize(start="initial", end="end_initial")
slothy.optimize(start="initial2", end="end_initial2")
slothy.config.split_heuristic_repeat = 5
slothy.optimize(start="loop_0", end="end_loop_0")
slothy.optimize(start="loop_1", end="end_loop_1")

class neon_keccak_x1_no_symbolic(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
name = "keccak_f1600_x1_scalar_slothy_no_symbolic"
Expand All @@ -1418,15 +1384,41 @@ def core(self, slothy):

slothy.config.outputs = ["flags"]
slothy.config.constraints.stalls_first_attempt = 64
# slothy.config.ignore_objective = True
slothy.config.constraints.minimize_spills = True
# slothy.config.constraints.functional_only = True
slothy.config.constraints.allow_reordering = True
# slothy.config.constraints.allow_reordering = False
slothy.config.constraints.allow_spills = True
slothy.config.constraints.minimize_spills = True
slothy.config.visualize_expected_performance = True
# slothy.config.visualize_show_old_code = True
slothy.optimize(start="loop", end="end_loop")

slothy.config.outputs = ["hint_STACK_OFFSET_COUNT"]
slothy.optimize(start="initial_round_start", end="initial_round_end")

class neon_keccak_x1_scalar_opt(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
name = "keccak_f1600_x1_scalar_opt"
infile = "keccak_f1600_x1_scalar_pre_opt"
outfile = "keccak_f1600_x1_scalar"

super().__init__(infile, name, outfile=outfile, rename=True, arch=arch, target=target)

def core(self, slothy):
slothy.config.reserved_regs = ["x18", "sp"]

slothy.config.inputs_are_outputs = True
slothy.config.variable_size = True
slothy.config.timeout = 10800

slothy.config.selfcheck_failure_logfile = "selfcheck_fail.log"

slothy.config.outputs = ["flags"]
slothy.config.constraints.stalls_first_attempt = 32
slothy.config.visualize_expected_performance = True
slothy.config.split_heuristic = True
slothy.config.split_heuristic_factor = 1.5
slothy.config.split_heuristic_stepsize = 0.3
slothy.config.split_heuristic_repeat = 1
slothy.config.split_heuristic_optimize_seam = 5

slothy.optimize(start="loop", end="end_loop")

Expand Down Expand Up @@ -1454,49 +1446,50 @@ def core(self, slothy):
slothy.config.outputs = ["flags"]
slothy.config.constraints.stalls_first_attempt = 64
slothy.config.ignore_objective = True
# slothy.config.constraints.minimize_spills = True
slothy.config.constraints.functional_only = True
# slothy.config.constraints.allow_reordering = True
slothy.config.constraints.allow_reordering = False
slothy.config.constraints.allow_spills = True
# slothy.config.constraints.minimize_spills = True
slothy.config.visualize_expected_performance = True
# slothy.config.visualize_show_old_code = True

slothy.optimize(start="loop", end="loop_end")
slothy.config.outputs = ["hint_STACK_OFFSET_COUNT"]
slothy.optimize(start="initial", end="loop")

class neon_keccak_x1_scalar_opt(Example):
class neon_keccak_x4_interleave(Example):
def __init__(self, var="", arch=AArch64_Neon, target=Target_CortexA55):
name = "keccak_f1600_x1_scalar_opt"
infile = "keccak_f1600_x1_scalar_pre_opt"
outfile = "keccak_f1600_x1_scalar"
name = "keccak_f1600_x4_hybrid_slothy_interleave"
infile = "keccak_f1600_x4_hybrid_slothy_clean"
outfile = "examples/naive/aarch64/keccak_f1600_x4_hybrid_slothy_interleaved.s"

super().__init__(infile, name, outfile=outfile, rename=True, arch=arch, target=target)
super().__init__(infile, name, outfile=outfile, rename="keccak_f1600_x4_hybrid_slothy_interleaved",
arch=arch, target=target, outfile_full=True)

def core(self, slothy):
slothy.config.reserved_regs = ["x18", "sp"]

slothy.config.inputs_are_outputs = True
slothy.config.variable_size = True
slothy.config.visualize_expected_performance = False
slothy.config.timeout = 10800

slothy.config.selfcheck_failure_logfile = "selfcheck_fail.log"

slothy.config.outputs = ["flags"]
slothy.config.constraints.stalls_first_attempt = 32
slothy.config.outputs = ["flags", "hint_STACK_OFFSET_COUNT"]
slothy.config.constraints.stalls_first_attempt = 64
slothy.config.ignore_objective = True
slothy.config.constraints.functional_only = True
slothy.config.constraints.allow_reordering = False
slothy.config.constraints.allow_spills = True
slothy.config.visualize_expected_performance = True
slothy.config.split_heuristic = True
slothy.config.split_heuristic_factor = 1.5
slothy.config.split_heuristic_stepsize = 0.3
slothy.config.split_heuristic_repeat = 1
slothy.config.split_heuristic_optimize_seam = 5

slothy.optimize(start="loop", end="end_loop")
slothy.config.split_heuristic = True
slothy.config.split_heuristic_repeat = 0
slothy.config.split_heuristic_preprocess_naive_interleaving = True
slothy.config.split_heuristic_preprocess_naive_interleaving_strategy = "alternate"
slothy.config.split_heuristic_estimate_performance = False
slothy.config.absorb_spills = False

slothy.config.outputs = ["hint_STACK_OFFSET_COUNT"]
slothy.optimize(start="initial_round_start", end="initial_round_end")
slothy.optimize(start="loop", end="loop_end")

#############################################################################################

Expand Down Expand Up @@ -1641,10 +1634,10 @@ def main():
# Fixed point
fft_fixedpoint_radix4(),
# Keccak
neon_keccak_x4(),
neon_keccak_x1_no_symbolic(),
neon_keccak_x1_scalar_opt(),
neon_keccak_x4_no_symbolic(),
neon_keccak_x4_interleave(),
]

all_example_names = [e.name for e in examples]
Expand Down
Loading

0 comments on commit 85de739

Please sign in to comment.