diff --git a/setup.py b/setup.py index 6b0d8943d7..9c3314673f 100644 --- a/setup.py +++ b/setup.py @@ -453,7 +453,7 @@ def get_extensions(): cc_flag = ["-DBUILD_PYTHON_PACKAGE"] use_rtn_bf16_convert = os.getenv("ENABLE_HIP_FMHA_RTN_BF16_CONVERT", "0") if use_rtn_bf16_convert == "1": - cc_flag += ["-DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0"] + cc_flag += ["-DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=3"] arch_list = os.getenv("HIP_ARCHITECTURES", "native").split() @@ -471,6 +471,12 @@ def get_extensions(): "-Woverloaded-virtual", "-mllvm", "-enable-post-misched=0", + "-mllvm", + "-amdgpu-early-inline-all=true", + "-mllvm", + "-amdgpu-function-calls=false", + "-mllvm", + "-greedy-reverse-local-assignment=1", ] + generator_flag + cc_flag, diff --git a/third_party/composable_kernel_tiled b/third_party/composable_kernel_tiled index c8b6b64240..73b67f290f 160000 --- a/third_party/composable_kernel_tiled +++ b/third_party/composable_kernel_tiled @@ -1 +1 @@ -Subproject commit c8b6b64240e840a7decf76dfaa13c37da5294c4a +Subproject commit 73b67f290f6602fe0461d48a2c103de460f14084