Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ocl: rely on default parameters more consistently #827

Merged
merged 1 commit into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 36 additions & 52 deletions src/acc/opencl/smm/opencl_libsmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -697,8 +697,7 @@ int libsmm_acc_transpose(const int* dev_trs_stack, int offset, int stack_size, v
const int inplace = ((m == n) && (NULL == env_inplace ? 0 : ('0' != *env_inplace)));
# endif
const int blockm = ((NULL == env_bm || '\0' == *env_bm) ? 0 : atoi(env_bm));
const int bm = (0 >= blockm ? (NULL == config ? /*default*/ m : /*LIBXSMM_CLMP(config->bm, 1, m)*/ m)
: LIBXSMM_MIN(blockm, m));
const int bm = (0 >= blockm ? m : LIBXSMM_MIN(blockm, m));
opencl_libsmm_trans_t new_config;
memset(&new_config, 0, sizeof(new_config));
switch (datatype) {
Expand Down Expand Up @@ -1060,13 +1059,6 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack,
const char *const env_ab = getenv("OPENCL_LIBSMM_SMM_AB"), *const env_ac = getenv("OPENCL_LIBSMM_SMM_AC");
const char *const env_xf = getenv("OPENCL_LIBSMM_SMM_XF"), *const env_cl = getenv("OPENCL_LIBSMM_SMM_BUILDOPTS");
const char* const intel_xf = "-cl-intel-256-GRF-per-thread";
const int default_lu = (0 != c_dbcsr_acc_opencl_config.device.intel ? -1 : 0);
const int unroll = LIBXSMM_MAX(-2, (NULL == env_lu || '\0' == *env_lu)
? (0 == kernel_idx ? (NULL == config ? default_lu : config->lu) : default_lu)
: atoi(env_lu)); /* populate only lower bound */
const int blockm = ((NULL == env_bm || '\0' == *env_bm || 1 < unroll) /* 1<LU ignores BM */
? (1 >= unroll ? 0 : LIBXSMM_UP(m_max / unroll, OPENCL_LIBSMM_VMIN))
: atoi(env_bm));
const int blockn = ((NULL == env_bn || '\0' == *env_bn) ? 0 : atoi(env_bn));
const int blockk = ((NULL == env_bk || '\0' == *env_bk) ? 0 : atoi(env_bk));
const int wgmin = ((NULL == env_ws || '\0' == *env_ws) ? 0 : atoi(env_ws));
Expand All @@ -1077,7 +1069,8 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack,
: LIBXSMM_MIN(OPENCL_LIBSMM_VMIN, m_max))
: 1);
const int default_wg = (((0x0bd0 > devuid || 0x0bdb < devuid)) ? (0 == kernel_idx ? 0 : -2) : -1);
int nbm, nbn;
const int default_lu = (0 != c_dbcsr_acc_opencl_config.device.intel ? -1 : 0);
int defaults, blockm, nbm, nbn;
opencl_libsmm_smm_t new_config;
if (NULL == config) {
memset(&new_config, 0, sizeof(new_config));
Expand All @@ -1086,65 +1079,56 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack,
memcpy(&new_config, config, sizeof(opencl_libsmm_smm_t));
}
if (NULL == env_xf || '\0' == *env_xf) {
if (0 == c_dbcsr_acc_opencl_config.device.intel || CL_DEVICE_TYPE_GPU != c_dbcsr_acc_opencl_config.device.type ||
NULL == env_cl || NULL == strstr(env_cl, intel_xf))
if (0 != c_dbcsr_acc_opencl_config.device.intel && CL_DEVICE_TYPE_GPU == c_dbcsr_acc_opencl_config.device.type &&
NULL != env_cl && NULL != strstr(env_cl, intel_xf))
{
new_config.flags = (NULL == config ? /*default*/ 0 : config->flags);
new_config.flags = 1;
}
else new_config.flags = 1;
}
else new_config.flags = atoi(env_xf);
new_config.lu = unroll;
defaults = ((NULL == config || 0 != kernel_idx || (NULL != config && new_config.flags != config->flags)) ? 1 : 0);
new_config.lu = LIBXSMM_MAX(-2, (NULL == env_lu || '\0' == *env_lu) ? (0 != defaults ? default_lu : config->lu)
: atoi(env_lu)); /* populate only lower bound */
blockm = ((NULL == env_bm || '\0' == *env_bm || 1 < new_config.lu) /* 1<LU ignores BM */
? (1 >= new_config.lu ? 0 : LIBXSMM_UP(m_max / new_config.lu, OPENCL_LIBSMM_VMIN))
: atoi(env_bm));
/* two defaults for new_config parameters: 1st - regular, 2nd - BS=1 kernel */
new_config.bm = (0 >= blockm ? (0 == kernel_idx ? (NULL == config ? LIBXSMM_MIN(OPENCL_LIBSMM_DEFAULT_BM, m_max)
: LIBXSMM_CLMP(config->bm, 1, m_max))
new_config.bm = (0 >= blockm ? (0 == kernel_idx ? (0 != defaults ? LIBXSMM_MIN(OPENCL_LIBSMM_DEFAULT_BM, m_max)
: LIBXSMM_CLMP(config->bm, 1, m_max))
: LIBXSMM_MIN(OPENCL_LIBSMM_DEFAULT_BM, m_max))
: LIBXSMM_MIN(blockm, m_max));
new_config.bn = (0 >= blockn ? (0 == kernel_idx ? (NULL == config ? LIBXSMM_MIN(OPENCL_LIBSMM_DEFAULT_BN, n_max)
: LIBXSMM_CLMP(config->bn, 1, n_max))
new_config.bn = (0 >= blockn ? (0 == kernel_idx ? (0 != defaults ? LIBXSMM_MIN(OPENCL_LIBSMM_DEFAULT_BN, n_max)
: LIBXSMM_CLMP(config->bn, 1, n_max))
: LIBXSMM_MIN(OPENCL_LIBSMM_DEFAULT_BN, n_max))
: LIBXSMM_MIN(blockn, n_max));
new_config.bk = (0 >= blockk ? (NULL == config ? default_bk : LIBXSMM_CLMP(config->bk, 1, m_max))
new_config.bk = (0 >= blockk ? (0 != defaults ? default_bk : LIBXSMM_CLMP(config->bk, 1, m_max))
: LIBXSMM_MIN(blockk, m_max));
new_config.ws = (0 >= wgmin ? (0 == kernel_idx ? (NULL == config ? /*default*/ LIBXSMM_MAX(m_max, n_max)
: LIBXSMM_CLMP(config->ws, 1, n_max * m_max))
: /*default*/ LIBXSMM_MAX(m_max, n_max))
new_config.ws = (0 >= wgmin ? (0 == kernel_idx ? (0 != defaults ? LIBXSMM_MAX(m_max, n_max)
: LIBXSMM_CLMP(config->ws, 1, n_max * m_max))
: LIBXSMM_MAX(m_max, n_max))
: LIBXSMM_MIN(wgmin, n_max * m_max));
new_config.wg = LIBXSMM_CLMP(
(NULL == env_wg || '\0' == *env_wg) ? (NULL == config ? default_wg : config->wg) : atoi(env_wg), -2, 2);
new_config.nz = LIBXSMM_CLMP((NULL == env_nz || '\0' == *env_nz)
? (0 == kernel_idx ? (NULL == config ? /*default*/ 0 : config->nz) : /*default*/ 0)
: atoi(env_nz),
0, 1);
(NULL == env_wg || '\0' == *env_wg) ? (0 != defaults ? default_wg : config->wg) : atoi(env_wg), -2, 2);
new_config.nz = LIBXSMM_CLMP(
(NULL == env_nz || '\0' == *env_nz) ? (0 != defaults ? /*default*/ 0 : config->nz) : atoi(env_nz), 0, 1);
new_config.al = LIBXSMM_CLMP(/* bug: AL=1 */
(NULL == env_al || '\0' == *env_al)
? (0 == (32 & c_dbcsr_acc_opencl_config.wa) ? (0 == kernel_idx ? (NULL == config ? 0 : config->al) : 0) : 0)
? (0 == (32 & c_dbcsr_acc_opencl_config.wa) ? (0 != defaults ? 0 : config->al) : 0)
: atoi(env_al),
0, 1);
new_config.tb = LIBXSMM_CLMP((NULL == env_tb || '\0' == *env_tb)
? (0 == kernel_idx ? (NULL == config ? /*default*/ 0 : config->tb) : /*default*/ 0)
: atoi(env_tb),
0, 1);
new_config.tc = LIBXSMM_CLMP((NULL == env_tc || '\0' == *env_tc)
? (0 == kernel_idx ? (NULL == config ? /*default*/ 1 : config->tc) : /*default*/ 1)
: atoi(env_tc),
0, 1);
new_config.ap = LIBXSMM_CLMP((NULL == env_ap || '\0' == *env_ap)
? (0 == kernel_idx ? (NULL == config ? /*default*/ 0 : config->ap) : /*default*/ 0)
: atoi(env_ap),
0, 1);
new_config.tb = LIBXSMM_CLMP(
(NULL == env_tb || '\0' == *env_tb) ? (0 != defaults ? /*default*/ 0 : config->tb) : atoi(env_tb), 0, 1);
new_config.tc = LIBXSMM_CLMP(
(NULL == env_tc || '\0' == *env_tc) ? (0 != defaults ? /*default*/ 1 : config->tc) : atoi(env_tc), 0, 1);
new_config.ap = LIBXSMM_CLMP(
(NULL == env_ap || '\0' == *env_ap) ? (0 != defaults ? /*default*/ 0 : config->ap) : atoi(env_ap), 0, 1);
new_config.aa = LIBXSMM_CLMP(/* bug: AA=2 XF=1 */
(NULL == env_aa || '\0' == *env_aa) ? (0 == kernel_idx ? (NULL == config ? default_aa : config->aa) : default_aa)
: atoi(env_aa),
0, (0 == (64 & c_dbcsr_acc_opencl_config.wa) || 0 == new_config.flags) ? 2 : 1);
new_config.ab = LIBXSMM_CLMP((NULL == env_ab || '\0' == *env_ab)
? (0 == kernel_idx ? (NULL == config ? default_ab : config->ab) : default_ab)
: atoi(env_ab),
0, 2);
new_config.ac = LIBXSMM_CLMP((NULL == env_ac || '\0' == *env_ac)
? (0 == kernel_idx ? (NULL == config ? default_ac : config->ac) : default_ac)
: atoi(env_ac),
0, 1);
(NULL == env_aa || '\0' == *env_aa) ? (0 != defaults ? default_aa : config->aa) : atoi(env_aa), 0,
(0 == (64 & c_dbcsr_acc_opencl_config.wa) || 0 == new_config.flags) ? 2 : 1);
new_config.ab = LIBXSMM_CLMP(
(NULL == env_ab || '\0' == *env_ab) ? (0 != defaults ? default_ab : config->ab) : atoi(env_ab), 0, 2);
new_config.ac = LIBXSMM_CLMP(
(NULL == env_ac || '\0' == *env_ac) ? (0 != defaults ? default_ac : config->ac) : atoi(env_ac), 0, 1);
if (0 >= new_config.s) new_config.s = stack_size;
if (0 == kernel_idx || 1 >= new_config.bs) new_config.bs = bs;
nbm = (m_max + new_config.bm - 1) / new_config.bm;
Expand Down
24 changes: 13 additions & 11 deletions src/acc/opencl/smm/tune_multiply.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,10 @@ def ilog2(n):


class SmmTuner(MeasurementInterface):
def __init__(self, args, argd):
def __init__(self, args):
"""Setup common state and define search space"""
super(SmmTuner, self).__init__(args)
manipulator = ConfigurationManipulator()
if self.args.jsondir == argd.jsondir and os.path.isdir(self.args.mnk):
self.args.jsondir = self.args.mnk
self.args.mnk = default_mnk
elif not self.args.mnk: # parse and sanitize kernel shape
self.args.mnk = default_mnk
mnk = tuple(max(int(i), 1) for i in self.args.mnk.split("x"))
self.mnk = (mnk + (mnk[0], mnk[0]))[:3]
self.wsx = self.mnk[0] * self.mnk[1]
Expand Down Expand Up @@ -181,7 +176,7 @@ def __init__(self, args, argd):
and (self.typeid and 0 < self.ndevices)
and (self.size and 0 < self.size)
): # setup database (DB)
if args.database is None: # adjust DB-location
if self.args.database is None: # adjust DB-location
envrank = os.getenv("PMI_RANK", os.getenv("OMPI_COMM_WORLD_LOCAL_RANK"))
tmpdir = os.path.join(tempfile.gettempdir(), "opentuner")
if envrank:
Expand Down Expand Up @@ -288,7 +283,7 @@ def seed_configurations(self):
]

def objective(self):
if 0 == args.tlevel:
if 0 == self.args.tlevel:
return opentuner.search.objective.MaximizeAccuracyMinimizeSize()
else:
return opentuner.search.objective.MaximizeAccuracy()
Expand Down Expand Up @@ -900,11 +895,18 @@ def handle_sigint(self, signum, frame):
os.environ["OPENCL_LIBSMM_SMM_LU"] = "{}".format(args.lu)
if 0 == args.mb:
args.mb = 64
instance = SmmTuner(args, argd)
# more flexible handling of positional/first argument
if args.jsondir == argd.jsondir and os.path.isdir(args.mnk):
args.jsondir = args.mnk
args.mnk = default_mnk
elif not args.mnk: # parse and sanitize kernel shape
args.mnk = default_mnk
# construct tuner instance
instance = SmmTuner(args)
if not default_dbg:
for retry in range(default_retry):
try:
TuningRunMain(instance, args, argd).main()
TuningRunMain(instance, args).main()
exit(0)
except Exception as e:
ign = (
Expand All @@ -916,4 +918,4 @@ def handle_sigint(self, signum, frame):
pass
instance.save_final_config(None, True)
else:
TuningRunMain(instance, args, argd).main()
TuningRunMain(instance, args).main()
Loading