diff --git a/lib/BUILD.bazel b/lib/BUILD.bazel index 4977d9bb2..f0239af29 100644 --- a/lib/BUILD.bazel +++ b/lib/BUILD.bazel @@ -30,6 +30,17 @@ cc_library( copts = ccv_default_copts() ) +cc_library( + name = "metal_cpp_hdrs", + srcs = [ + "nnc/mfa/3rdparty/metal-cpp/Dispatch.cpp", + ], + hdrs = [ + "nnc/mfa/3rdparty/metal-cpp/Dispatch.hpp", + "nnc/mfa/3rdparty/metal-cpp/Metal.hpp", + ], +) + cc_library( name = "siphash", srcs = [ @@ -359,6 +370,17 @@ cuda_library( ] ) +cc_library( + name = "nnc_mfa_compat", + srcs = glob(["nnc/mfa/**/*.cpp"]), + hdrs = glob(["nnc/mfa/**/*.hpp"]), + copts = ccv_default_copts(), + deps = [ + ":metal_cpp_hdrs", + ":nnc_headers" + ] +) + objc_library( name = "nnc_mps_compat", non_arc_srcs = [ @@ -370,8 +392,9 @@ objc_library( copts = ccv_default_copts(), sdk_frameworks = ["Metal", "MetalPerformanceShaders", "MetalPerformanceShadersGraph"], deps = [ + ":nnc_mfa_compat", ":nnc_headers", - ":SFMT_hdrs", + ":SFMT_hdrs" ] ) @@ -417,6 +440,7 @@ objc_library( copts = ccv_default_copts(), deps = [ ":nnc_headers", + ":nnc_mfa_compat", ":nnc_mps_compat", ] ) @@ -516,6 +540,7 @@ cc_library( "//conditions:default": [] }) + select({ "//config:have_mps": [ + ":nnc_mfa_compat", ":nnc_mps_compat", ":cmd_mps" ], diff --git a/lib/config.mk.in b/lib/config.mk.in index 4387fe2c4..a8865c184 100644 --- a/lib/config.mk.in +++ b/lib/config.mk.in @@ -4,6 +4,7 @@ NVCC := @NVCC@ CUDA_SRCS := @CUDA_SRCS@ CUDA_COMPAT_LIB := @CUDA_COMPAT_LIB@ CUDA_CMD_LIB := @CUDA_CMD_LIB@ +MFA_COMPAT_LIB := @MFA_COMPAT_LIB@ MPS_COMPAT_LIB := @MPS_COMPAT_LIB@ MPS_CMD_LIB := @MPS_CMD_LIB@ DEFINE_MACROS := @DEFINE_MACROS@ diff --git a/lib/configure b/lib/configure index a180ffca1..0e64d04e1 100755 --- a/lib/configure +++ b/lib/configure @@ -1,9 +1,10 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for libccv 0.7. +# Generated by GNU Autoconf 2.71 for libccv 0.7. # # -# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, +# Inc. # # # This configure script is free software; the Free Software Foundation @@ -14,14 +15,16 @@ # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else +else $as_nop case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( @@ -31,46 +34,46 @@ esac fi + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. as_nl=' ' export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi # The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then +if ${PATH_SEPARATOR+false} :; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || @@ -79,13 +82,6 @@ if test "${PATH_SEPARATOR+set}" != set; then fi -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( @@ -94,8 +90,12 @@ case $0 in #(( for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break done IFS=$as_save_IFS @@ -107,30 +107,10 @@ if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH # Use a proper internal environment variable to ensure we don't fall # into an infinite loop, continuously re-executing ourselves. @@ -152,20 +132,22 @@ esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. -$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 -as_fn_exit 255 +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then - as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + as_bourne_compatible="as_nop=: +if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST -else +else \$as_nop case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( @@ -185,41 +167,52 @@ as_fn_success || { exitcode=1; echo as_fn_success failed.; } as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } -if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : +if ( set x; as_fn_ret_success y && test x = \"\$1\" ) +then : -else +else \$as_nop exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1 +blah=\$(echo \$(echo blah)) +test x\"\$blah\" = xblah || exit 1 test -x / || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1" - if (eval "$as_required") 2>/dev/null; then : + if (eval "$as_required") 2>/dev/null +then : as_have_required=yes -else +else $as_nop as_have_required=no fi - if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null +then : -else +else $as_nop as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac as_found=: case $as_dir in #( /*) for as_base in sh bash ksh sh5; do # Try only shells that exist, to save several forks. - as_shell=$as_dir/$as_base + as_shell=$as_dir$as_base if { test -f "$as_shell" || test -f "$as_shell.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + as_run=a "$as_shell" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : CONFIG_SHELL=$as_shell as_have_required=yes - if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + if as_run=a "$as_shell" -c "$as_bourne_compatible""$as_suggested" 2>/dev/null +then : break 2 fi fi @@ -227,14 +220,21 @@ fi esac as_found=false done -$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : - CONFIG_SHELL=$SHELL as_have_required=yes -fi; } IFS=$as_save_IFS +if $as_found +then : + +else $as_nop + if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi +fi - if test "x$CONFIG_SHELL" != x; then : + if test "x$CONFIG_SHELL" != x +then : export CONFIG_SHELL # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also @@ -252,18 +252,19 @@ esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. -$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi - if test x$as_have_required = xno; then : - $as_echo "$0: This script requires a shell more modern than all" - $as_echo "$0: the shells that I found on your system." - if test x${ZSH_VERSION+set} = xset ; then - $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" - $as_echo "$0: be upgraded to zsh 4.3.4 or later." + if test x$as_have_required = xno +then : + printf "%s\n" "$0: This script requires a shell more modern than all" + printf "%s\n" "$0: the shells that I found on your system." + if test ${ZSH_VERSION+y} ; then + printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should" + printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later." else - $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, + printf "%s\n" "$0: Please tell bug-autoconf@gnu.org about your system, $0: including any error possibly output before this $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." @@ -290,6 +291,7 @@ as_fn_unset () } as_unset=as_fn_unset + # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. @@ -307,6 +309,14 @@ as_fn_exit () as_fn_set_status $1 exit $1 } # as_fn_exit +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop # as_fn_mkdir_p # ------------- @@ -321,7 +331,7 @@ as_fn_mkdir_p () as_dirs= while :; do case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" @@ -330,7 +340,7 @@ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | +printf "%s\n" X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -369,12 +379,13 @@ as_fn_executable_p () # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : eval 'as_fn_append () { eval $1+=\$2 }' -else +else $as_nop as_fn_append () { eval $1=\$$1\$2 @@ -386,18 +397,27 @@ fi # as_fn_append # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : eval 'as_fn_arith () { as_val=$(( $* )) }' -else +else $as_nop as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- @@ -409,9 +429,9 @@ as_fn_error () as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi - $as_echo "$as_me: error: $2" >&2 + printf "%s\n" "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error @@ -438,7 +458,7 @@ as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | +printf "%s\n" X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q @@ -482,7 +502,7 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || - { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } # If we had to re-execute with $CONFIG_SHELL, we're ensured to have # already done that, so ensure we don't try to do so again and fall @@ -496,6 +516,10 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits exit } + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) @@ -509,6 +533,13 @@ case `echo -n x` in #((((( ECHO_N='-n';; esac +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + + rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file @@ -596,6 +627,7 @@ NVCC CC MPS_CMD_LIB MPS_COMPAT_LIB +MFA_COMPAT_LIB CUDA_CMD_LIB CUDA_COMPAT_LIB CUDA_SRCS @@ -729,8 +761,6 @@ do *) ac_optarg=yes ;; esac - # Accept the important Cygnus configure options, so we can diagnose typos. - case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; @@ -771,9 +801,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" + as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" @@ -797,9 +827,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" + as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" @@ -1010,9 +1040,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" + as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" @@ -1026,9 +1056,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" + as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" @@ -1072,9 +1102,9 @@ Try \`$0 --help' for more information" *) # FIXME: should be removed in autoconf 3.0. - $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && - $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2 : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" ;; @@ -1090,7 +1120,7 @@ if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; - *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + *) printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi @@ -1154,7 +1184,7 @@ $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_myself" | +printf "%s\n" X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -1322,9 +1352,9 @@ if test "$ac_init_help" = "recursive"; then case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; @@ -1352,7 +1382,8 @@ esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } - # Check for guested configure. + # Check for configure.gnu first; this name is used for a wrapper for + # Metaconfig's "Configure" on case-insensitive file systems. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive @@ -1360,7 +1391,7 @@ ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix echo && $SHELL "$ac_srcdir/configure" --help=recursive else - $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + printf "%s\n" "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done @@ -1370,9 +1401,9 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF libccv configure 0.7 -generated by GNU Autoconf 2.69 +generated by GNU Autoconf 2.71 -Copyright (C) 2012 Free Software Foundation, Inc. +Copyright (C) 2021 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF @@ -1389,14 +1420,14 @@ fi ac_fn_c_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext + rm -f conftest.$ac_objext conftest.beam if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1404,14 +1435,15 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err - } && test -s conftest.$ac_objext; then : + } && test -s conftest.$ac_objext +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1427,14 +1459,14 @@ fi ac_fn_c_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext conftest$ac_exeext + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1442,17 +1474,18 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext - }; then : + } +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1479,7 +1512,7 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1487,14 +1520,15 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } > conftest.i && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err - }; then : + } +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1503,14 +1537,34 @@ fi as_fn_set_status $ac_retval } # ac_fn_c_try_cpp +ac_configure_args_raw= +for ac_arg +do + case $ac_arg in + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append ac_configure_args_raw " '$ac_arg'" +done + +case $ac_configure_args_raw in + *$as_nl*) + ac_safe_unquote= ;; + *) + ac_unsafe_z='|&;<>()$`\\"*?[ '' ' # This string ends in space, tab. + ac_unsafe_a="$ac_unsafe_z#~" + ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g" + ac_configure_args_raw=` printf "%s\n" "$ac_configure_args_raw" | sed "$ac_safe_unquote"`;; +esac + cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by libccv $as_me 0.7, which was -generated by GNU Autoconf 2.69. Invocation command line was +generated by GNU Autoconf 2.71. Invocation command line was - $ $0 $@ + $ $0$ac_configure_args_raw _ACEOF exec 5>>config.log @@ -1543,8 +1597,12 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - $as_echo "PATH: $as_dir" + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + printf "%s\n" "PATH: $as_dir" done IFS=$as_save_IFS @@ -1579,7 +1637,7 @@ do | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) - ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; @@ -1614,11 +1672,13 @@ done # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? + # Sanitize IFS. + IFS=" "" $as_nl" # Save into config.log some information that might help in debugging. { echo - $as_echo "## ---------------- ## + printf "%s\n" "## ---------------- ## ## Cache variables. ## ## ---------------- ##" echo @@ -1629,8 +1689,8 @@ trap 'exit_status=$? case $ac_val in #( *${as_nl}*) case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( @@ -1654,7 +1714,7 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; ) echo - $as_echo "## ----------------- ## + printf "%s\n" "## ----------------- ## ## Output variables. ## ## ----------------- ##" echo @@ -1662,14 +1722,14 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; do eval ac_val=\$$ac_var case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac - $as_echo "$ac_var='\''$ac_val'\''" + printf "%s\n" "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then - $as_echo "## ------------------- ## + printf "%s\n" "## ------------------- ## ## File substitutions. ## ## ------------------- ##" echo @@ -1677,15 +1737,15 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; do eval ac_val=\$$ac_var case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac - $as_echo "$ac_var='\''$ac_val'\''" + printf "%s\n" "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then - $as_echo "## ----------- ## + printf "%s\n" "## ----------- ## ## confdefs.h. ## ## ----------- ##" echo @@ -1693,8 +1753,8 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; echo fi test "$ac_signal" != 0 && - $as_echo "$as_me: caught signal $ac_signal" - $as_echo "$as_me: exit $exit_status" + printf "%s\n" "$as_me: caught signal $ac_signal" + printf "%s\n" "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && @@ -1708,63 +1768,48 @@ ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h -$as_echo "/* confdefs.h */" > confdefs.h +printf "%s\n" "/* confdefs.h */" > confdefs.h # Predefined preprocessor variables. -cat >>confdefs.h <<_ACEOF -#define PACKAGE_NAME "$PACKAGE_NAME" -_ACEOF +printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_TARNAME "$PACKAGE_TARNAME" -_ACEOF +printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_VERSION "$PACKAGE_VERSION" -_ACEOF +printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_STRING "$PACKAGE_STRING" -_ACEOF +printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" -_ACEOF +printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_URL "$PACKAGE_URL" -_ACEOF +printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. -ac_site_file1=NONE -ac_site_file2=NONE if test -n "$CONFIG_SITE"; then - # We do not want a PATH search for config.site. - case $CONFIG_SITE in #(( - -*) ac_site_file1=./$CONFIG_SITE;; - */*) ac_site_file1=$CONFIG_SITE;; - *) ac_site_file1=./$CONFIG_SITE;; - esac + ac_site_files="$CONFIG_SITE" elif test "x$prefix" != xNONE; then - ac_site_file1=$prefix/share/config.site - ac_site_file2=$prefix/etc/config.site + ac_site_files="$prefix/share/config.site $prefix/etc/config.site" else - ac_site_file1=$ac_default_prefix/share/config.site - ac_site_file2=$ac_default_prefix/etc/config.site + ac_site_files="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" fi -for ac_site_file in "$ac_site_file1" "$ac_site_file2" + +for ac_site_file in $ac_site_files do - test "x$ac_site_file" = xNONE && continue - if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 -$as_echo "$as_me: loading site script $ac_site_file" >&6;} + case $ac_site_file in #( + */*) : + ;; #( + *) : + ac_site_file=./$ac_site_file ;; +esac + if test -f "$ac_site_file" && test -r "$ac_site_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ - || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file See \`config.log' for more details" "$LINENO" 5; } fi @@ -1774,19 +1819,327 @@ if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special files # actually), so we avoid doing that. DJGPP emulates it as a regular file. if test /dev/null != "$cache_file" && test -f "$cache_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 -$as_echo "$as_me: loading cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +printf "%s\n" "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 -$as_echo "$as_me: creating cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +printf "%s\n" "$as_me: creating cache $cache_file" >&6;} >$cache_file fi +# Test code for whether the C compiler supports C89 (global declarations) +ac_c_conftest_c89_globals=' +/* Does the compiler advertise C89 conformance? + Do not test the value of __STDC__, because some compilers set it to 0 + while being otherwise adequately conformant. */ +#if !defined __STDC__ +# error "Compiler does not advertise C89 conformance" +#endif + +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7 src/conf.sh. */ +struct buf { int x; }; +struct buf * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not \xHH hex character constants. + These do not provoke an error unfortunately, instead are silently treated + as an "x". The following induces an error, until -std is added to get + proper ANSI mode. Curiously \x00 != x always comes out true, for an + array size at least. It is necessary to write \x00 == 0 to get something + that is true only with -std. */ +int osf4_cc_array ['\''\x00'\'' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) '\''x'\'' +int xlc6_cc_array[FOO(a) == '\''x'\'' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, int *(*)(struct buf *, struct stat *, int), + int, int);' + +# Test code for whether the C compiler supports C89 (body of main). +ac_c_conftest_c89_main=' +ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]); +' + +# Test code for whether the C compiler supports C99 (global declarations) +ac_c_conftest_c99_globals=' +// Does the compiler advertise C99 conformance? +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L +# error "Compiler does not advertise C99 conformance" +#endif + +#include +extern int puts (const char *); +extern int printf (const char *, ...); +extern int dprintf (int, const char *, ...); +extern void *malloc (size_t); + +// Check varargs macros. These examples are taken from C99 6.10.3.5. +// dprintf is used instead of fprintf to avoid needing to declare +// FILE and stderr. +#define debug(...) dprintf (2, __VA_ARGS__) +#define showlist(...) puts (#__VA_ARGS__) +#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) +static void +test_varargs_macros (void) +{ + int x = 1234; + int y = 5678; + debug ("Flag"); + debug ("X = %d\n", x); + showlist (The first, second, and third items.); + report (x>y, "x is %d but y is %d", x, y); +} + +// Check long long types. +#define BIG64 18446744073709551615ull +#define BIG32 4294967295ul +#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) +#if !BIG_OK + #error "your preprocessor is broken" +#endif +#if BIG_OK +#else + #error "your preprocessor is broken" +#endif +static long long int bignum = -9223372036854775807LL; +static unsigned long long int ubignum = BIG64; + +struct incomplete_array +{ + int datasize; + double data[]; +}; + +struct named_init { + int number; + const wchar_t *name; + double average; +}; + +typedef const char *ccp; + +static inline int +test_restrict (ccp restrict text) +{ + // See if C++-style comments work. + // Iterate through items via the restricted pointer. + // Also check for declarations in for loops. + for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i) + continue; + return 0; +} + +// Check varargs and va_copy. +static bool +test_varargs (const char *format, ...) +{ + va_list args; + va_start (args, format); + va_list args_copy; + va_copy (args_copy, args); + + const char *str = ""; + int number = 0; + float fnumber = 0; + + while (*format) + { + switch (*format++) + { + case '\''s'\'': // string + str = va_arg (args_copy, const char *); + break; + case '\''d'\'': // int + number = va_arg (args_copy, int); + break; + case '\''f'\'': // float + fnumber = va_arg (args_copy, double); + break; + default: + break; + } + } + va_end (args_copy); + va_end (args); + + return *str && number && fnumber; +} +' + +# Test code for whether the C compiler supports C99 (body of main). +ac_c_conftest_c99_main=' + // Check bool. + _Bool success = false; + success |= (argc != 0); + + // Check restrict. + if (test_restrict ("String literal") == 0) + success = true; + char *restrict newvar = "Another string"; + + // Check varargs. + success &= test_varargs ("s, d'\'' f .", "string", 65, 34.234); + test_varargs_macros (); + + // Check flexible array members. + struct incomplete_array *ia = + malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); + ia->datasize = 10; + for (int i = 0; i < ia->datasize; ++i) + ia->data[i] = i * 1.234; + + // Check named initializers. + struct named_init ni = { + .number = 34, + .name = L"Test wide string", + .average = 543.34343, + }; + + ni.number = 58; + + int dynamic_array[ni.number]; + dynamic_array[0] = argv[0][0]; + dynamic_array[ni.number - 1] = 543; + + // work around unused variable warnings + ok |= (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == '\''x'\'' + || dynamic_array[ni.number - 1] != 543); +' + +# Test code for whether the C compiler supports C11 (global declarations) +ac_c_conftest_c11_globals=' +// Does the compiler advertise C11 conformance? +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L +# error "Compiler does not advertise C11 conformance" +#endif + +// Check _Alignas. +char _Alignas (double) aligned_as_double; +char _Alignas (0) no_special_alignment; +extern char aligned_as_int; +char _Alignas (0) _Alignas (int) aligned_as_int; + +// Check _Alignof. +enum +{ + int_alignment = _Alignof (int), + int_array_alignment = _Alignof (int[100]), + char_alignment = _Alignof (char) +}; +_Static_assert (0 < -_Alignof (int), "_Alignof is signed"); + +// Check _Noreturn. +int _Noreturn does_not_return (void) { for (;;) continue; } + +// Check _Static_assert. +struct test_static_assert +{ + int x; + _Static_assert (sizeof (int) <= sizeof (long int), + "_Static_assert does not work in struct"); + long int y; +}; + +// Check UTF-8 literals. +#define u8 syntax error! +char const utf8_literal[] = u8"happens to be ASCII" "another string"; + +// Check duplicate typedefs. +typedef long *long_ptr; +typedef long int *long_ptr; +typedef long_ptr long_ptr; + +// Anonymous structures and unions -- taken from C11 6.7.2.1 Example 1. +struct anonymous +{ + union { + struct { int i; int j; }; + struct { int k; long int l; } w; + }; + int m; +} v1; +' + +# Test code for whether the C compiler supports C11 (body of main). +ac_c_conftest_c11_main=' + _Static_assert ((offsetof (struct anonymous, i) + == offsetof (struct anonymous, w.k)), + "Anonymous union alignment botch"); + v1.i = 2; + v1.w.k = 5; + ok |= v1.i != 5; +' + +# Test code for whether the C compiler supports C11 (complete). +ac_c_conftest_c11_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} +${ac_c_conftest_c11_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + ${ac_c_conftest_c11_main} + return ok; +} +" + +# Test code for whether the C compiler supports C99 (complete). +ac_c_conftest_c99_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + return ok; +} +" + +# Test code for whether the C compiler supports C89 (complete). +ac_c_conftest_c89_program="${ac_c_conftest_c89_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + return ok; +} +" + # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false @@ -1797,12 +2150,12 @@ for ac_var in $ac_precious_vars; do eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) @@ -1811,24 +2164,24 @@ $as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 -$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else - { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 -$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +printf "%s\n" "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi - { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 -$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 -$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +printf "%s\n" "$as_me: former value: \`$ac_old_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +printf "%s\n" "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in - *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in @@ -1838,11 +2191,12 @@ $as_echo "$as_me: current value: \`$ac_new_val'" >&2;} fi done if $ac_cache_corrupted; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 -$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} - as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`${MAKE-make} distclean' and/or \`rm $cache_file' + and start over" "$LINENO" 5 fi ## -------------------- ## ## Main body of script. ## @@ -1868,6 +2222,8 @@ CUDA_COMPAT_LIB="gpu/libnnc-compat-cuda.o" CUDA_CMD_LIB="libnnc-cmd-cuda.o" +MFA_COMPAT_LIB="mfa/libnnc-compat-mfa.o" + MPS_COMPAT_LIB="mps/libnnc-compat-mps.o" MPS_CMD_LIB="libnnc-cmd-mps.o" @@ -1876,11 +2232,12 @@ MPS_CMD_LIB="libnnc-cmd-mps.o" # check if clang exists # Extract the first word of "clang", so it can be a program name with args. set dummy clang; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -1888,11 +2245,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="clang" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -1903,11 +2264,11 @@ fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -1915,11 +2276,12 @@ fi # check if nvcc exists # Extract the first word of "nvcc", so it can be a program name with args. set dummy nvcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_NVCC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_NVCC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$NVCC"; then ac_cv_prog_NVCC="$NVCC" # Let the user override the test. else @@ -1927,11 +2289,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_NVCC="nvcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -1942,11 +2308,11 @@ fi fi NVCC=$ac_cv_prog_NVCC if test -n "$NVCC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 -$as_echo "$NVCC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 +printf "%s\n" "$NVCC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -1954,21 +2320,31 @@ fi # check for ARM NEON support -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking neon" >&5 -$as_echo_n "checking neon... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking neon" >&5 +printf %s "checking neon... " >&6; } # Check whether --enable-neon was given. -if test "${enable_neon+set}" = set; then : +if test ${enable_neon+y} +then : enableval=$enable_neon; neon_support=$enableval -else +else $as_nop neon_support="no" fi if test "$neon_support" = yes; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } DEFINE_MACROS="$DEFINE_MACROS-D HAVE_NEON " - ac_ext=c + + + + + + + + + +ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' @@ -1976,11 +2352,12 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -1988,11 +2365,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2003,11 +2384,11 @@ fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -2016,11 +2397,12 @@ if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else @@ -2028,11 +2410,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2043,11 +2429,11 @@ fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi if test "x$ac_ct_CC" = x; then @@ -2055,8 +2441,8 @@ fi else case $cross_compiling:$ac_tool_warned in yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC @@ -2069,11 +2455,12 @@ if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -2081,11 +2468,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2096,11 +2487,11 @@ fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -2109,11 +2500,12 @@ fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -2122,15 +2514,19 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2146,18 +2542,18 @@ if test $ac_prog_rejected = yes; then # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift - ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -2168,11 +2564,12 @@ if test -z "$CC"; then do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else @@ -2180,11 +2577,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2195,11 +2596,11 @@ fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -2212,11 +2613,12 @@ if test -z "$CC"; then do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else @@ -2224,11 +2626,15 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2239,11 +2645,11 @@ fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi @@ -2255,34 +2661,138 @@ done else case $cross_compiling:$ac_tool_warned in yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. +set dummy ${ac_tool_prefix}clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "clang", so it can be a program name with args. +set dummy clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi +else + CC="$ac_cv_prog_CC" fi fi -test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. -$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 -for ac_option in --version -v -V -qversion; do +for ac_option in --version -v -V -qversion -version; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -2292,7 +2802,7 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done @@ -2300,7 +2810,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; @@ -2312,9 +2822,9 @@ ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 -$as_echo_n "checking whether the C compiler works... " >&6; } -ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +printf %s "checking whether the C compiler works... " >&6; } +ac_link_default=`printf "%s\n" "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" @@ -2335,11 +2845,12 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link_default") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, @@ -2356,7 +2867,7 @@ do # certainly right. break;; *.* ) - if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi @@ -2372,44 +2883,46 @@ do done test "$ac_cv_exeext" = no && ac_cv_exeext= -else +else $as_nop ac_file='' fi -if test -z "$ac_file"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -$as_echo "$as_me: failed program was:" >&5 +if test -z "$ac_file" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "C compiler cannot create executables See \`config.log' for more details" "$LINENO" 5; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 -$as_echo_n "checking for C compiler default output file name... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 -$as_echo "$ac_file" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +printf %s "checking for C compiler default output file name... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +printf "%s\n" "$ac_file" >&6; } ac_exeext=$ac_cv_exeext rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 -$as_echo_n "checking for suffix of executables... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +printf %s "checking for suffix of executables... " >&6; } if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with @@ -2423,15 +2936,15 @@ for ac_file in conftest.exe conftest conftest.*; do * ) break;; esac done -else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 -$as_echo "$ac_cv_exeext" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +printf "%s\n" "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext @@ -2440,7 +2953,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int -main () +main (void) { FILE *f = fopen ("conftest.out", "w"); return ferror (f) || fclose (f) != 0; @@ -2452,8 +2965,8 @@ _ACEOF ac_clean_files="$ac_clean_files conftest.out" # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 -$as_echo_n "checking whether we are cross compiling... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +printf %s "checking whether we are cross compiling... " >&6; } if test "$cross_compiling" != yes; then { { ac_try="$ac_link" case "(($ac_try" in @@ -2461,10 +2974,10 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if { ac_try='./conftest$ac_cv_exeext' { { case "(($ac_try" in @@ -2472,39 +2985,40 @@ $as_echo "$ac_try_echo"; } >&5 *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "cannot run C compiled programs. + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details" "$LINENO" 5; } fi fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 -$as_echo "$cross_compiling" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +printf "%s\n" "$cross_compiling" >&6; } rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 -$as_echo_n "checking for suffix of object files... " >&6; } -if ${ac_cv_objext+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +printf %s "checking for suffix of object files... " >&6; } +if test ${ac_cv_objext+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; @@ -2518,11 +3032,12 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in @@ -2531,31 +3046,32 @@ $as_echo "$ac_try_echo"; } >&5 break;; esac done -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 -$as_echo "$ac_cv_objext" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +printf "%s\n" "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 -$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } -if ${ac_cv_c_compiler_gnu+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 +printf %s "checking whether the compiler supports GNU C... " >&6; } +if test ${ac_cv_c_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { #ifndef __GNUC__ choke me @@ -2565,29 +3081,33 @@ main () return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : ac_compiler_gnu=yes -else +else $as_nop ac_compiler_gnu=no fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 -$as_echo "$ac_cv_c_compiler_gnu" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_c_compiler_gnu + if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi -ac_test_CFLAGS=${CFLAGS+set} +ac_test_CFLAGS=${CFLAGS+y} ac_save_CFLAGS=$CFLAGS -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 -$as_echo_n "checking whether $CC accepts -g... " >&6; } -if ${ac_cv_prog_cc_g+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +printf %s "checking whether $CC accepts -g... " >&6; } +if test ${ac_cv_prog_cc_g+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no @@ -2596,57 +3116,60 @@ else /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : ac_cv_prog_cc_g=yes -else +else $as_nop CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : -else +else $as_nop ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : ac_cv_prog_cc_g=yes fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 -$as_echo "$ac_cv_prog_cc_g" >&6; } -if test "$ac_test_CFLAGS" = set; then +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +printf "%s\n" "$ac_cv_prog_cc_g" >&6; } +if test $ac_test_CFLAGS; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then @@ -2661,94 +3184,144 @@ else CFLAGS= fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 -$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } -if ${ac_cv_prog_cc_c89+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_cv_prog_cc_c89=no +ac_prog_cc_stdc=no +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 +printf %s "checking for $CC option to enable C11 features... " >&6; } +if test ${ac_cv_prog_cc_c11+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c11=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include -#include -struct stat; -/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ -struct buf { int x; }; -FILE * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} - -/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has - function prototypes and stuff, but not '\xHH' hex character constants. - These don't provoke an error unfortunately, instead are silently treated - as 'x'. The following induces an error, until -std is added to get - proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an - array size at least. It's necessary to write '\x00'==0 to get something - that's true only with -std. */ -int osf4_cc_array ['\x00' == 0 ? 1 : -1]; +$ac_c_conftest_c11_program +_ACEOF +for ac_arg in '' -std=gnu11 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c11=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c11" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) 'x' -int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; +if test "x$ac_cv_prog_cc_c11" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c11" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 +printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } + CC="$CC $ac_cv_prog_cc_c11" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 + ac_prog_cc_stdc=c11 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 +printf %s "checking for $CC option to enable C99 features... " >&6; } +if test ${ac_cv_prog_cc_c99+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c99_program +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); -int argc; -char **argv; -int -main () -{ -return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; - ; - return 0; -} +if test "x$ac_cv_prog_cc_c99" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c99" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } + CC="$CC $ac_cv_prog_cc_c99" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 + ac_prog_cc_stdc=c99 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 +printf %s "checking for $CC option to enable C89 features... " >&6; } +if test ${ac_cv_prog_cc_c89+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c89_program _ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ - -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO"; then : + if ac_fn_c_try_compile "$LINENO" +then : ac_cv_prog_cc_c89=$ac_arg fi -rm -f core conftest.err conftest.$ac_objext +rm -f core conftest.err conftest.$ac_objext conftest.beam test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC - fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c89" in - x) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -$as_echo "none needed" >&6; } ;; - xno) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -$as_echo "unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c89" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 -$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; -esac -if test "x$ac_cv_prog_cc_c89" != xno; then : +if test "x$ac_cv_prog_cc_c89" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c89" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } + CC="$CC $ac_cv_prog_cc_c89" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 + ac_prog_cc_stdc=c89 +fi fi ac_ext=c @@ -2758,11 +3331,12 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $ ac_compiler_gnu=$ac_cv_c_compiler_gnu - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mfpu=neon" >&5 -$as_echo_n "checking whether C compiler accepts -mfpu=neon... " >&6; } -if ${ax_cv_check_cflags__Werror__mfpu_neon+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mfpu=neon" >&5 +printf %s "checking whether C compiler accepts -mfpu=neon... " >&6; } +if test ${ax_cv_check_cflags__Werror__mfpu_neon+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -Werror -mfpu=neon" @@ -2770,64 +3344,68 @@ else /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_c_try_compile "$LINENO" +then : ax_cv_check_cflags__Werror__mfpu_neon=yes -else +else $as_nop ax_cv_check_cflags__Werror__mfpu_neon=no fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mfpu_neon" >&5 -$as_echo "$ax_cv_check_cflags__Werror__mfpu_neon" >&6; } - if test "x$ax_cv_check_cflags__Werror__mfpu_neon" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mfpu_neon" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mfpu_neon" >&6; } + if test "x$ax_cv_check_cflags__Werror__mfpu_neon" = xyes +then : MKCFLAGS="$MKCFLAGS-mfpu=neon -mfloat-abi=hard " -else +else $as_nop : fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking arch" >&5 -$as_echo_n "checking arch... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking arch" >&5 +printf %s "checking arch... " >&6; } # Check whether --with-arch was given. -if test "${with_arch+set}" = set; then : +if test ${with_arch+y} +then : withval=$with_arch; arch_native=$withval -else +else $as_nop arch_native="native" fi if test "$arch_native" = native; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: native" >&5 -$as_echo "native" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: native" >&5 +printf "%s\n" "native" >&6; } else MKCFLAGS="$MKCFLAGS-march=$arch_native -mtune=$arch_native " - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $arch_native" >&5 -$as_echo "$arch_native" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $arch_native" >&5 +printf "%s\n" "$arch_native" >&6; } fi # check for blas blas_ok=no # check for ATLAS library if test "$blas_ok" = no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ATL_xerbla in -latlas" >&5 -$as_echo_n "checking for ATL_xerbla in -latlas... " >&6; } -if ${ac_cv_lib_atlas_ATL_xerbla+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ATL_xerbla in -latlas" >&5 +printf %s "checking for ATL_xerbla in -latlas... " >&6; } +if test ${ac_cv_lib_atlas_ATL_xerbla+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-latlas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -2836,35 +3414,35 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char ATL_xerbla (); int -main () +main (void) { return ATL_xerbla (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_atlas_ATL_xerbla=yes -else +else $as_nop ac_cv_lib_atlas_ATL_xerbla=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_atlas_ATL_xerbla" >&5 -$as_echo "$ac_cv_lib_atlas_ATL_xerbla" >&6; } -if test "x$ac_cv_lib_atlas_ATL_xerbla" = xyes; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cblas_dgemm in -lcblas" >&5 -$as_echo_n "checking for cblas_dgemm in -lcblas... " >&6; } -if ${ac_cv_lib_cblas_cblas_dgemm+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_atlas_ATL_xerbla" >&5 +printf "%s\n" "$ac_cv_lib_atlas_ATL_xerbla" >&6; } +if test "x$ac_cv_lib_atlas_ATL_xerbla" = xyes +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_dgemm in -lcblas" >&5 +printf %s "checking for cblas_dgemm in -lcblas... " >&6; } +if test ${ac_cv_lib_cblas_cblas_dgemm+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lcblas -latlas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -2873,30 +3451,29 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char cblas_dgemm (); int -main () +main (void) { return cblas_dgemm (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_cblas_cblas_dgemm=yes -else +else $as_nop ac_cv_lib_cblas_cblas_dgemm=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cblas_cblas_dgemm" >&5 -$as_echo "$ac_cv_lib_cblas_cblas_dgemm" >&6; } -if test "x$ac_cv_lib_cblas_cblas_dgemm" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cblas_cblas_dgemm" >&5 +printf "%s\n" "$ac_cv_lib_cblas_cblas_dgemm" >&6; } +if test "x$ac_cv_lib_cblas_cblas_dgemm" = xyes +then : blas_ok=yes BLAS_LIBS="-lcblas -latlas" fi @@ -2906,11 +3483,12 @@ fi fi # check for Generic BLAS library if test "$blas_ok" = no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sgemm in -lblas" >&5 -$as_echo_n "checking for sgemm in -lblas... " >&6; } -if ${ac_cv_lib_blas_sgemm+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sgemm in -lblas" >&5 +printf %s "checking for sgemm in -lblas... " >&6; } +if test ${ac_cv_lib_blas_sgemm+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lblas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -2919,41 +3497,41 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char sgemm (); int -main () +main (void) { return sgemm (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_blas_sgemm=yes -else +else $as_nop ac_cv_lib_blas_sgemm=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blas_sgemm" >&5 -$as_echo "$ac_cv_lib_blas_sgemm" >&6; } -if test "x$ac_cv_lib_blas_sgemm" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blas_sgemm" >&5 +printf "%s\n" "$ac_cv_lib_blas_sgemm" >&6; } +if test "x$ac_cv_lib_blas_sgemm" = xyes +then : blas_ok=yes; BLAS_LIBS="-lblas" fi fi # check for OpenBLAS library if test "$blas_ok" = no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lopenblas" >&5 -$as_echo_n "checking for cblas_sgemm in -lopenblas... " >&6; } -if ${ac_cv_lib_openblas_cblas_sgemm+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_sgemm in -lopenblas" >&5 +printf %s "checking for cblas_sgemm in -lopenblas... " >&6; } +if test ${ac_cv_lib_openblas_cblas_sgemm+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lopenblas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -2962,30 +3540,29 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char cblas_sgemm (); int -main () +main (void) { return cblas_sgemm (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_openblas_cblas_sgemm=yes -else +else $as_nop ac_cv_lib_openblas_cblas_sgemm=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_cblas_sgemm" >&5 -$as_echo "$ac_cv_lib_openblas_cblas_sgemm" >&6; } -if test "x$ac_cv_lib_openblas_cblas_sgemm" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_openblas_cblas_sgemm" >&5 +printf "%s\n" "$ac_cv_lib_openblas_cblas_sgemm" >&6; } +if test "x$ac_cv_lib_openblas_cblas_sgemm" = xyes +then : blas_ok=yes; BLAS_LIBS="-lopenblas" fi @@ -2997,11 +3574,12 @@ if test "$blas_ok" = yes; then fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for clock_gettime in -lrt" >&5 -$as_echo_n "checking for clock_gettime in -lrt... " >&6; } -if ${ac_cv_lib_rt_clock_gettime+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for clock_gettime in -lrt" >&5 +printf %s "checking for clock_gettime in -lrt... " >&6; } +if test ${ac_cv_lib_rt_clock_gettime+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lrt $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -3010,30 +3588,29 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char clock_gettime (); int -main () +main (void) { return clock_gettime (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_rt_clock_gettime=yes -else +else $as_nop ac_cv_lib_rt_clock_gettime=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_clock_gettime" >&5 -$as_echo "$ac_cv_lib_rt_clock_gettime" >&6; } -if test "x$ac_cv_lib_rt_clock_gettime" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_clock_gettime" >&5 +printf "%s\n" "$ac_cv_lib_rt_clock_gettime" >&6; } +if test "x$ac_cv_lib_rt_clock_gettime" = xyes +then : MKLDFLAGS="$MKLDFLAGS-lrt " fi @@ -3045,40 +3622,36 @@ ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 -$as_echo_n "checking how to run the C preprocessor... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +printf %s "checking how to run the C preprocessor... " >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then - if ${ac_cv_prog_CPP+:} false; then : - $as_echo_n "(cached) " >&6 -else - # Double quotes because CPP needs to be expanded - for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + if test ${ac_cv_prog_CPP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + # Double quotes because $CC needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" cpp /lib/cpp do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif +#include Syntax error _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : -else +else $as_nop # Broken: fails on valid input. continue fi @@ -3090,10 +3663,11 @@ rm -f conftest.err conftest.i conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : # Broken: success on invalid input. continue -else +else $as_nop # Passes both tests. ac_preproc_ok=: break @@ -3103,7 +3677,8 @@ rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext -if $ac_preproc_ok; then : +if $ac_preproc_ok +then : break fi @@ -3115,29 +3690,24 @@ fi else ac_cv_prog_CPP=$CPP fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 -$as_echo "$CPP" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +printf "%s\n" "$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. - # Prefer to if __STDC__ is defined, since - # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#ifdef __STDC__ -# include -#else -# include -#endif +#include Syntax error _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : -else +else $as_nop # Broken: fails on valid input. continue fi @@ -3149,10 +3719,11 @@ rm -f conftest.err conftest.i conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : # Broken: success on invalid input. continue -else +else $as_nop # Passes both tests. ac_preproc_ok=: break @@ -3162,11 +3733,12 @@ rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext -if $ac_preproc_ok; then : +if $ac_preproc_ok +then : -else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details" "$LINENO" 5; } fi @@ -3178,11 +3750,12 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $ ac_compiler_gnu=$ac_cv_c_compiler_gnu - { $as_echo "$as_me:${as_lineno-$LINENO}: checking png.h presence" >&5 -$as_echo_n "checking png.h presence... " >&6; } -if ${ax_cv_check_cflags_png_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking png.h presence" >&5 +printf %s "checking png.h presence... " >&6; } +if test ${ax_cv_check_cflags_png_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS png.h" @@ -3190,36 +3763,39 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_png_h=yes -else +else $as_nop ax_cv_check_cflags_png_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_png_h" >&5 -$as_echo "$ax_cv_check_cflags_png_h" >&6; } - if test "x$ax_cv_check_cflags_png_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_png_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_png_h" >&6; } + if test "x$ax_cv_check_cflags_png_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_LIBPNG " MKLDFLAGS="$MKLDFLAGS-lpng " -else +else $as_nop : fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking jpeglib.h presence" >&5 -$as_echo_n "checking jpeglib.h presence... " >&6; } -if ${ax_cv_check_cflags_jpeglib_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking jpeglib.h presence" >&5 +printf %s "checking jpeglib.h presence... " >&6; } +if test ${ax_cv_check_cflags_jpeglib_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS jpeglib.h" @@ -3227,47 +3803,51 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_jpeglib_h=yes -else +else $as_nop ax_cv_check_cflags_jpeglib_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_jpeglib_h" >&5 -$as_echo "$ax_cv_check_cflags_jpeglib_h" >&6; } - if test "x$ax_cv_check_cflags_jpeglib_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_jpeglib_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_jpeglib_h" >&6; } + if test "x$ax_cv_check_cflags_jpeglib_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_LIBJPEG " MKLDFLAGS="$MKLDFLAGS-ljpeg " -else +else $as_nop : fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking fftw3" >&5 -$as_echo_n "checking fftw3... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking fftw3" >&5 +printf %s "checking fftw3... " >&6; } # Check whether --enable-fftw3 was given. -if test "${enable_fftw3+set}" = set; then : +if test ${enable_fftw3+y} +then : enableval=$enable_fftw3; fftw3_enable=$enableval -else +else $as_nop fftw3_enable="yes" fi if test "$fftw3_enable" != no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking fftw3.h presence" >&5 -$as_echo_n "checking fftw3.h presence... " >&6; } -if ${ax_cv_check_cflags_fftw3_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking fftw3.h presence" >&5 +printf %s "checking fftw3.h presence... " >&6; } +if test ${ax_cv_check_cflags_fftw3_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS fftw3.h" @@ -3275,41 +3855,44 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_fftw3_h=yes -else +else $as_nop ax_cv_check_cflags_fftw3_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_fftw3_h" >&5 -$as_echo "$ax_cv_check_cflags_fftw3_h" >&6; } - if test "x$ax_cv_check_cflags_fftw3_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_fftw3_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_fftw3_h" >&6; } + if test "x$ax_cv_check_cflags_fftw3_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_FFTW3 " MKLDFLAGS="$MKLDFLAGS-lfftw3 -lfftw3f -lpthread " -else +else $as_nop : fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: disabled" >&5 -$as_echo "disabled" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: disabled" >&5 +printf "%s\n" "disabled" >&6; } fi # Check pthread - { $as_echo "$as_me:${as_lineno-$LINENO}: checking pthread.h presence" >&5 -$as_echo_n "checking pthread.h presence... " >&6; } -if ${ax_cv_check_cflags_pthread_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking pthread.h presence" >&5 +printf %s "checking pthread.h presence... " >&6; } +if test ${ax_cv_check_cflags_pthread_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS pthread.h" @@ -3317,36 +3900,39 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_pthread_h=yes -else +else $as_nop ax_cv_check_cflags_pthread_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_pthread_h" >&5 -$as_echo "$ax_cv_check_cflags_pthread_h" >&6; } - if test "x$ax_cv_check_cflags_pthread_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_pthread_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_pthread_h" >&6; } + if test "x$ax_cv_check_cflags_pthread_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_PTHREAD " MKLDFLAGS="$MKLDFLAGS-lpthread " -else +else $as_nop : fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking linear.h presence" >&5 -$as_echo_n "checking linear.h presence... " >&6; } -if ${ax_cv_check_cflags_linear_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking linear.h presence" >&5 +printf %s "checking linear.h presence... " >&6; } +if test ${ax_cv_check_cflags_linear_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS linear.h" @@ -3354,36 +3940,39 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_linear_h=yes -else +else $as_nop ax_cv_check_cflags_linear_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_linear_h" >&5 -$as_echo "$ax_cv_check_cflags_linear_h" >&6; } - if test "x$ax_cv_check_cflags_linear_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_linear_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_linear_h" >&6; } + if test "x$ax_cv_check_cflags_linear_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_LIBLINEAR " MKLDFLAGS="$MKLDFLAGS-llinear " -else +else $as_nop : fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking tesseract/capi.h presence" >&5 -$as_echo_n "checking tesseract/capi.h presence... " >&6; } -if ${ax_cv_check_cflags_tesseract_capi_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking tesseract/capi.h presence" >&5 +printf %s "checking tesseract/capi.h presence... " >&6; } +if test ${ax_cv_check_cflags_tesseract_capi_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS tesseract/capi.h" @@ -3391,36 +3980,39 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_tesseract_capi_h=yes -else +else $as_nop ax_cv_check_cflags_tesseract_capi_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_tesseract_capi_h" >&5 -$as_echo "$ax_cv_check_cflags_tesseract_capi_h" >&6; } - if test "x$ax_cv_check_cflags_tesseract_capi_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_tesseract_capi_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_tesseract_capi_h" >&6; } + if test "x$ax_cv_check_cflags_tesseract_capi_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_TESSERACT " MKLDFLAGS="$MKLDFLAGS-ltesseract " -else +else $as_nop : fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking Accelerate/Accelerate.h presence" >&5 -$as_echo_n "checking Accelerate/Accelerate.h presence... " >&6; } -if ${ax_cv_check_cflags_Accelerate_Accelerate_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking Accelerate/Accelerate.h presence" >&5 +printf %s "checking Accelerate/Accelerate.h presence... " >&6; } +if test ${ax_cv_check_cflags_Accelerate_Accelerate_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS Accelerate/Accelerate.h" @@ -3428,36 +4020,39 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_Accelerate_Accelerate_h=yes -else +else $as_nop ax_cv_check_cflags_Accelerate_Accelerate_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_Accelerate_Accelerate_h" >&5 -$as_echo "$ax_cv_check_cflags_Accelerate_Accelerate_h" >&6; } - if test "x$ax_cv_check_cflags_Accelerate_Accelerate_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_Accelerate_Accelerate_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_Accelerate_Accelerate_h" >&6; } + if test "x$ax_cv_check_cflags_Accelerate_Accelerate_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_ACCELERATE_FRAMEWORK " MKLDFLAGS="$MKLDFLAGS-framework Accelerate " -else +else $as_nop : fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking libavcodec/avcodec.h presence" >&5 -$as_echo_n "checking libavcodec/avcodec.h presence... " >&6; } -if ${ax_cv_check_cflags_libavcodec_avcodec_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking libavcodec/avcodec.h presence" >&5 +printf %s "checking libavcodec/avcodec.h presence... " >&6; } +if test ${ax_cv_check_cflags_libavcodec_avcodec_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS libavcodec/avcodec.h" @@ -3465,36 +4060,39 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_libavcodec_avcodec_h=yes -else +else $as_nop ax_cv_check_cflags_libavcodec_avcodec_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_libavcodec_avcodec_h" >&5 -$as_echo "$ax_cv_check_cflags_libavcodec_avcodec_h" >&6; } - if test "x$ax_cv_check_cflags_libavcodec_avcodec_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_libavcodec_avcodec_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_libavcodec_avcodec_h" >&6; } + if test "x$ax_cv_check_cflags_libavcodec_avcodec_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_AVCODEC " MKLDFLAGS="$MKLDFLAGS-lavcodec " -else +else $as_nop : fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking libavformat/avformat.h presence" >&5 -$as_echo_n "checking libavformat/avformat.h presence... " >&6; } -if ${ax_cv_check_cflags_libavformat_avformat_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking libavformat/avformat.h presence" >&5 +printf %s "checking libavformat/avformat.h presence... " >&6; } +if test ${ax_cv_check_cflags_libavformat_avformat_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS libavformat/avformat.h" @@ -3502,36 +4100,39 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_libavformat_avformat_h=yes -else +else $as_nop ax_cv_check_cflags_libavformat_avformat_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_libavformat_avformat_h" >&5 -$as_echo "$ax_cv_check_cflags_libavformat_avformat_h" >&6; } - if test "x$ax_cv_check_cflags_libavformat_avformat_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_libavformat_avformat_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_libavformat_avformat_h" >&6; } + if test "x$ax_cv_check_cflags_libavformat_avformat_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_AVFORMAT " MKLDFLAGS="$MKLDFLAGS-lavformat " -else +else $as_nop : fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking libavutil/avutil.h presence" >&5 -$as_echo_n "checking libavutil/avutil.h presence... " >&6; } -if ${ax_cv_check_cflags_libavutil_avutil_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking libavutil/avutil.h presence" >&5 +printf %s "checking libavutil/avutil.h presence... " >&6; } +if test ${ax_cv_check_cflags_libavutil_avutil_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS libavutil/avutil.h" @@ -3539,36 +4140,39 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_libavutil_avutil_h=yes -else +else $as_nop ax_cv_check_cflags_libavutil_avutil_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_libavutil_avutil_h" >&5 -$as_echo "$ax_cv_check_cflags_libavutil_avutil_h" >&6; } - if test "x$ax_cv_check_cflags_libavutil_avutil_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_libavutil_avutil_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_libavutil_avutil_h" >&6; } + if test "x$ax_cv_check_cflags_libavutil_avutil_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_AVUTIL " MKLDFLAGS="$MKLDFLAGS-lavutil " -else +else $as_nop : fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking libswscale/swscale.h presence" >&5 -$as_echo_n "checking libswscale/swscale.h presence... " >&6; } -if ${ax_cv_check_cflags_libswscale_swscale_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking libswscale/swscale.h presence" >&5 +printf %s "checking libswscale/swscale.h presence... " >&6; } +if test ${ax_cv_check_cflags_libswscale_swscale_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS libswscale/swscale.h" @@ -3576,95 +4180,108 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_libswscale_swscale_h=yes -else +else $as_nop ax_cv_check_cflags_libswscale_swscale_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_libswscale_swscale_h" >&5 -$as_echo "$ax_cv_check_cflags_libswscale_swscale_h" >&6; } - if test "x$ax_cv_check_cflags_libswscale_swscale_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_libswscale_swscale_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_libswscale_swscale_h" >&6; } + if test "x$ax_cv_check_cflags_libswscale_swscale_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_SWSCALE " MKLDFLAGS="$MKLDFLAGS-lswscale " -else +else $as_nop : fi # try to find CPU parallel libraries, OpenMP or libdispatch, we will prefer OpenMP when possible. - - OPENMP_CFLAGS= - # Check whether --enable-openmp was given. -if test "${enable_openmp+set}" = set; then : +if test -e penmp || test -e mp; then + as_fn_error $? "AC_OPENMP clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5 +fi +# Check whether --enable-openmp was given. +if test ${enable_openmp+y} +then : enableval=$enable_openmp; fi + OPENMP_CFLAGS= if test "$enable_openmp" != no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to support OpenMP" >&5 -$as_echo_n "checking for $CC option to support OpenMP... " >&6; } -if ${ac_cv_prog_c_openmp+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to support OpenMP" >&5 +printf %s "checking for $CC option to support OpenMP... " >&6; } +if test ${ac_cv_prog_c_openmp+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_c_openmp='not found' + for ac_option in '' -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ + -Popenmp --openmp; do + + ac_save_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS $ac_option" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifndef _OPENMP - choke me +#error "OpenMP not supported" #endif #include -int main () { return omp_get_num_threads (); } +int main (void) { return omp_get_num_threads (); } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_prog_c_openmp='none needed' -else - ac_cv_prog_c_openmp='unsupported' - for ac_option in -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ - -Popenmp --openmp; do - ac_save_CFLAGS=$CFLAGS - CFLAGS="$CFLAGS $ac_option" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext +if ac_fn_c_try_compile "$LINENO" +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifndef _OPENMP - choke me +#error "OpenMP not supported" #endif #include -int main () { return omp_get_num_threads (); } +int main (void) { return omp_get_num_threads (); } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_prog_c_openmp=$ac_option +else $as_nop + ac_cv_prog_c_openmp='unsupported' fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext - CFLAGS=$ac_save_CFLAGS - if test "$ac_cv_prog_c_openmp" != unsupported; then - break - fi - done fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ac_save_CFLAGS + + if test "$ac_cv_prog_c_openmp" != 'not found'; then + break + fi + done + if test "$ac_cv_prog_c_openmp" = 'not found'; then + ac_cv_prog_c_openmp='unsupported' + elif test "$ac_cv_prog_c_openmp" = ''; then + ac_cv_prog_c_openmp='none needed' + fi + rm -f penmp mp fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_c_openmp" >&5 -$as_echo "$ac_cv_prog_c_openmp" >&6; } - case $ac_cv_prog_c_openmp in #( - "none needed" | unsupported) - ;; #( - *) - OPENMP_CFLAGS=$ac_cv_prog_c_openmp ;; - esac +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_c_openmp" >&5 +printf "%s\n" "$ac_cv_prog_c_openmp" >&6; } + if test "$ac_cv_prog_c_openmp" != 'unsupported' && \ + test "$ac_cv_prog_c_openmp" != 'none needed'; then + OPENMP_CFLAGS="$ac_cv_prog_c_openmp" + fi fi @@ -3678,11 +4295,12 @@ if test -n "${OPENMP_CFLAGS}"; then fi # only check dispatch if compiled with clang if test $CC = clang; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking dispatch/dispatch.h presence" >&5 -$as_echo_n "checking dispatch/dispatch.h presence... " >&6; } -if ${ax_cv_check_cflags_dispatch_dispatch_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dispatch/dispatch.h presence" >&5 +printf %s "checking dispatch/dispatch.h presence... " >&6; } +if test ${ax_cv_check_cflags_dispatch_dispatch_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS dispatch/dispatch.h" @@ -3690,36 +4308,39 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_dispatch_dispatch_h=yes -else +else $as_nop ax_cv_check_cflags_dispatch_dispatch_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_dispatch_dispatch_h" >&5 -$as_echo "$ax_cv_check_cflags_dispatch_dispatch_h" >&6; } - if test "x$ax_cv_check_cflags_dispatch_dispatch_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_dispatch_dispatch_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_dispatch_dispatch_h" >&6; } + if test "x$ax_cv_check_cflags_dispatch_dispatch_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D USE_DISPATCH " MKCFLAGS="$MKCFLAGS-fblocks " -else +else $as_nop : fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dispatch_apply in -ldispatch" >&5 -$as_echo_n "checking for dispatch_apply in -ldispatch... " >&6; } -if ${ac_cv_lib_dispatch_dispatch_apply+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dispatch_apply in -ldispatch" >&5 +printf %s "checking for dispatch_apply in -ldispatch... " >&6; } +if test ${ac_cv_lib_dispatch_dispatch_apply+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-ldispatch $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -3728,30 +4349,29 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char dispatch_apply (); int -main () +main (void) { return dispatch_apply (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_dispatch_dispatch_apply=yes -else +else $as_nop ac_cv_lib_dispatch_dispatch_apply=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dispatch_dispatch_apply" >&5 -$as_echo "$ac_cv_lib_dispatch_dispatch_apply" >&6; } -if test "x$ac_cv_lib_dispatch_dispatch_apply" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dispatch_dispatch_apply" >&5 +printf "%s\n" "$ac_cv_lib_dispatch_dispatch_apply" >&6; } +if test "x$ac_cv_lib_dispatch_dispatch_apply" = xyes +then : MKLDFLAGS="$MKLDFLAGS-ldispatch -lBlocksRuntime " fi @@ -3759,11 +4379,12 @@ fi fi # check for SSE2 support only we don't enable NEON explicitly if test "$neon_support" != yes; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking xmmintrin.h presence" >&5 -$as_echo_n "checking xmmintrin.h presence... " >&6; } -if ${ax_cv_check_cflags_xmmintrin_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking xmmintrin.h presence" >&5 +printf %s "checking xmmintrin.h presence... " >&6; } +if test ${ax_cv_check_cflags_xmmintrin_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS xmmintrin.h" @@ -3771,49 +4392,53 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_xmmintrin_h=yes -else +else $as_nop ax_cv_check_cflags_xmmintrin_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_xmmintrin_h" >&5 -$as_echo "$ax_cv_check_cflags_xmmintrin_h" >&6; } - if test "x$ax_cv_check_cflags_xmmintrin_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_xmmintrin_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_xmmintrin_h" >&6; } + if test "x$ax_cv_check_cflags_xmmintrin_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_SSE2 " MKCFLAGS="$MKCFLAGS-msse2 " -else +else $as_nop : fi fi # check for gsl, and I need to first check these two before I can check gsl -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking gsl" >&5 -$as_echo_n "checking gsl... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking gsl" >&5 +printf %s "checking gsl... " >&6; } # Check whether --enable-gsl was given. -if test "${enable_gsl+set}" = set; then : +if test ${enable_gsl+y} +then : enableval=$enable_gsl; gsl_enable=$enableval -else +else $as_nop gsl_enable="yes" fi if test "$gsl_enable" != no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5 -$as_echo_n "checking for cos in -lm... " >&6; } -if ${ac_cv_lib_m_cos+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5 +printf %s "checking for cos in -lm... " >&6; } +if test ${ac_cv_lib_m_cos+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lm $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -3822,43 +4447,41 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char cos (); int -main () +main (void) { return cos (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_m_cos=yes -else +else $as_nop ac_cv_lib_m_cos=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_cos" >&5 -$as_echo "$ac_cv_lib_m_cos" >&6; } -if test "x$ac_cv_lib_m_cos" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_LIBM 1 -_ACEOF +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_cos" >&5 +printf "%s\n" "$ac_cv_lib_m_cos" >&6; } +if test "x$ac_cv_lib_m_cos" = xyes +then : + printf "%s\n" "#define HAVE_LIBM 1" >>confdefs.h LIBS="-lm $LIBS" fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cblas_dgemm in -lgslcblas" >&5 -$as_echo_n "checking for cblas_dgemm in -lgslcblas... " >&6; } -if ${ac_cv_lib_gslcblas_cblas_dgemm+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cblas_dgemm in -lgslcblas" >&5 +printf %s "checking for cblas_dgemm in -lgslcblas... " >&6; } +if test ${ac_cv_lib_gslcblas_cblas_dgemm+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lgslcblas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -3867,43 +4490,41 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char cblas_dgemm (); int -main () +main (void) { return cblas_dgemm (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_gslcblas_cblas_dgemm=yes -else +else $as_nop ac_cv_lib_gslcblas_cblas_dgemm=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gslcblas_cblas_dgemm" >&5 -$as_echo "$ac_cv_lib_gslcblas_cblas_dgemm" >&6; } -if test "x$ac_cv_lib_gslcblas_cblas_dgemm" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_LIBGSLCBLAS 1 -_ACEOF +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gslcblas_cblas_dgemm" >&5 +printf "%s\n" "$ac_cv_lib_gslcblas_cblas_dgemm" >&6; } +if test "x$ac_cv_lib_gslcblas_cblas_dgemm" = xyes +then : + printf "%s\n" "#define HAVE_LIBGSLCBLAS 1" >>confdefs.h LIBS="-lgslcblas $LIBS" fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gsl_blas_dgemm in -lgsl" >&5 -$as_echo_n "checking for gsl_blas_dgemm in -lgsl... " >&6; } -if ${ac_cv_lib_gsl_gsl_blas_dgemm+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gsl_blas_dgemm in -lgsl" >&5 +printf %s "checking for gsl_blas_dgemm in -lgsl... " >&6; } +if test ${ac_cv_lib_gsl_gsl_blas_dgemm+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lgsl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -3912,48 +4533,48 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif char gsl_blas_dgemm (); int -main () +main (void) { return gsl_blas_dgemm (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_c_try_link "$LINENO" +then : ac_cv_lib_gsl_gsl_blas_dgemm=yes -else +else $as_nop ac_cv_lib_gsl_gsl_blas_dgemm=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gsl_gsl_blas_dgemm" >&5 -$as_echo "$ac_cv_lib_gsl_gsl_blas_dgemm" >&6; } -if test "x$ac_cv_lib_gsl_gsl_blas_dgemm" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gsl_gsl_blas_dgemm" >&5 +printf "%s\n" "$ac_cv_lib_gsl_gsl_blas_dgemm" >&6; } +if test "x$ac_cv_lib_gsl_gsl_blas_dgemm" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_GSL " MKLDFLAGS="$MKLDFLAGS-lgsl -lgslcblas " fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: disabled" >&5 -$as_echo "disabled" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: disabled" >&5 +printf "%s\n" "disabled" >&6; } fi # prepare for cuda -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking cuda" >&5 -$as_echo_n "checking cuda... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda" >&5 +printf %s "checking cuda... " >&6; } # Check whether --with-cuda was given. -if test "${with_cuda+set}" = set; then : +if test ${with_cuda+y} +then : withval=$with_cuda; cuda_prefix=$withval -else +else $as_nop cuda_prefix="/usr/local/cuda" fi @@ -3968,19 +4589,20 @@ if [ -d "$cuda_prefix" ]; then if [ -d "$cuda_prefix/lib64" ]; then MKLDFLAGS="$MKLDFLAGS-L$cuda_prefix/lib64 " - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes, x86_64" >&5 -$as_echo "yes, x86_64" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes, x86_64" >&5 +printf "%s\n" "yes, x86_64" >&6; } else MKLDFLAGS="$MKLDFLAGS-L$cuda_prefix/lib " - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes, i386" >&5 -$as_echo "yes, i386" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes, i386" >&5 +printf "%s\n" "yes, i386" >&6; } fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking cudnn.h presence" >&5 -$as_echo_n "checking cudnn.h presence... " >&6; } -if ${ax_cv_check_cflags_cudnn_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cudnn.h presence" >&5 +printf %s "checking cudnn.h presence... " >&6; } +if test ${ax_cv_check_cflags_cudnn_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS cudnn.h" @@ -3988,36 +4610,39 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_cudnn_h=yes -else +else $as_nop ax_cv_check_cflags_cudnn_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_cudnn_h" >&5 -$as_echo "$ax_cv_check_cflags_cudnn_h" >&6; } - if test "x$ax_cv_check_cflags_cudnn_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_cudnn_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_cudnn_h" >&6; } + if test "x$ax_cv_check_cflags_cudnn_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_CUDNN " MKLDFLAGS="$MKLDFLAGS-lcudnn " -else +else $as_nop : fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking nccl.h presence" >&5 -$as_echo_n "checking nccl.h presence... " >&6; } -if ${ax_cv_check_cflags_nccl_h+:} false; then : - $as_echo_n "(cached) " >&6 -else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking nccl.h presence" >&5 +printf %s "checking nccl.h presence... " >&6; } +if test ${ax_cv_check_cflags_nccl_h+y} +then : + printf %s "(cached) " >&6 +else $as_nop ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS nccl.h" @@ -4025,37 +4650,40 @@ else /* end confdefs.h. */ #include int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_cpp "$LINENO"; then : +if ac_fn_c_try_cpp "$LINENO" +then : ax_cv_check_cflags_nccl_h=yes -else +else $as_nop ax_cv_check_cflags_nccl_h=no fi rm -f conftest.err conftest.i conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_nccl_h" >&5 -$as_echo "$ax_cv_check_cflags_nccl_h" >&6; } - if test "x$ax_cv_check_cflags_nccl_h" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags_nccl_h" >&5 +printf "%s\n" "$ax_cv_check_cflags_nccl_h" >&6; } + if test "x$ax_cv_check_cflags_nccl_h" = xyes +then : DEFINE_MACROS="$DEFINE_MACROS-D HAVE_NCCL " MKLDFLAGS="$MKLDFLAGS-lnccl " -else +else $as_nop : fi - as_ac_File=`$as_echo "ac_cv_file_$cuda_prefix/include/cub/cub.cuh" | $as_tr_sh` -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $cuda_prefix/include/cub/cub.cuh" >&5 -$as_echo_n "checking for $cuda_prefix/include/cub/cub.cuh... " >&6; } -if eval \${$as_ac_File+:} false; then : - $as_echo_n "(cached) " >&6 -else + as_ac_File=`printf "%s\n" "ac_cv_file_$cuda_prefix/include/cub/cub.cuh" | $as_tr_sh` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $cuda_prefix/include/cub/cub.cuh" >&5 +printf %s "checking for $cuda_prefix/include/cub/cub.cuh... " >&6; } +if eval test \${$as_ac_File+y} +then : + printf %s "(cached) " >&6 +else $as_nop test "$cross_compiling" = yes && as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 if test -r "$cuda_prefix/include/cub/cub.cuh"; then @@ -4065,9 +4693,10 @@ else fi fi eval ac_res=\$$as_ac_File - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -$as_echo "$ac_res" >&6; } -if eval test \"x\$"$as_ac_File"\" = x"yes"; then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } +if eval test \"x\$"$as_ac_File"\" = x"yes" +then : DEFINE_MACROS="$DEFINE_MACROS-D USE_SYSTEM_CUB " fi @@ -4079,26 +4708,27 @@ else CUDA_CMD_LIB="" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi # check for MPS support -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking Metal Performance Shaders" >&5 -$as_echo_n "checking Metal Performance Shaders... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking Metal Performance Shaders" >&5 +printf %s "checking Metal Performance Shaders... " >&6; } # Check whether --enable-mps was given. -if test "${enable_mps+set}" = set; then : +if test ${enable_mps+y} +then : enableval=$enable_mps; mps_support=$enableval -else +else $as_nop mps_support="no" fi if test "$mps_support" = yes; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } DEFINE_MACROS="$DEFINE_MACROS-D HAVE_MPS " - MKLDFLAGS="$MKLDFLAGS-framework MetalPerformanceShaders -framework MetalPerformanceShadersGraph -framework Foundation -framework Metal " + MKLDFLAGS="$MKLDFLAGS-framework MetalPerformanceShaders -framework MetalPerformanceShadersGraph -framework Foundation -framework Metal -lc++ " CUDA_SRCS="" @@ -4107,8 +4737,10 @@ $as_echo "yes" >&6; } CUDA_CMD_LIB="" else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + MFA_COMPAT_LIB="" + MPS_COMPAT_LIB="" MPS_CMD_LIB="" @@ -4173,8 +4805,8 @@ _ACEOF case $ac_val in #( *${as_nl}*) case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( @@ -4204,15 +4836,15 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; /^ac_cv_env_/b end t clear :clear - s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then if test "x$cache_file" != "x/dev/null"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 -$as_echo "$as_me: updating cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +printf "%s\n" "$as_me: updating cache $cache_file" >&6;} if test ! -f "$cache_file" || test -h "$cache_file"; then cat confcache >"$cache_file" else @@ -4226,8 +4858,8 @@ $as_echo "$as_me: updating cache $cache_file" >&6;} fi fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 -$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache @@ -4280,7 +4912,7 @@ U= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' - ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" @@ -4296,8 +4928,8 @@ LTLIBOBJS=$ac_ltlibobjs ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" -{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 -$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;} as_write_fail=0 cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 #! $SHELL @@ -4320,14 +4952,16 @@ cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else +else $as_nop case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( @@ -4337,46 +4971,46 @@ esac fi + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. as_nl=' ' export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi # The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then +if ${PATH_SEPARATOR+false} :; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || @@ -4385,13 +5019,6 @@ if test "${PATH_SEPARATOR+set}" != set; then fi -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( @@ -4400,8 +5027,12 @@ case $0 in #(( for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break done IFS=$as_save_IFS @@ -4413,30 +5044,10 @@ if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH # as_fn_error STATUS ERROR [LINENO LOG_FD] @@ -4449,13 +5060,14 @@ as_fn_error () as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi - $as_echo "$as_me: error: $2" >&2 + printf "%s\n" "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error + # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. @@ -4482,18 +5094,20 @@ as_fn_unset () { eval $1=; unset $1;} } as_unset=as_fn_unset + # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : eval 'as_fn_append () { eval $1+=\$2 }' -else +else $as_nop as_fn_append () { eval $1=\$$1\$2 @@ -4505,12 +5119,13 @@ fi # as_fn_append # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : eval 'as_fn_arith () { as_val=$(( $* )) }' -else +else $as_nop as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` @@ -4541,7 +5156,7 @@ as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | +printf "%s\n" X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q @@ -4563,6 +5178,10 @@ as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) @@ -4576,6 +5195,12 @@ case `echo -n x` in #((((( ECHO_N='-n';; esac +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file @@ -4617,7 +5242,7 @@ as_fn_mkdir_p () as_dirs= while :; do case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" @@ -4626,7 +5251,7 @@ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | +printf "%s\n" X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -4689,7 +5314,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # values after options handling. ac_log=" This file was extended by libccv $as_me 0.7, which was -generated by GNU Autoconf 2.69. Invocation command line was +generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS @@ -4738,14 +5363,16 @@ $config_files Report bugs to the package provider." _ACEOF +ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"` +ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\''/g"` cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ libccv config.status 0.7 -configured by $0, generated by GNU Autoconf 2.69, +configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" -Copyright (C) 2012 Free Software Foundation, Inc. +Copyright (C) 2021 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." @@ -4782,21 +5409,21 @@ do -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) - $as_echo "$ac_cs_version"; exit ;; + printf "%s\n" "$ac_cs_version"; exit ;; --config | --confi | --conf | --con | --co | --c ) - $as_echo "$ac_cs_config"; exit ;; + printf "%s\n" "$ac_cs_config"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in - *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; '') as_fn_error $? "missing file argument" ;; esac as_fn_append CONFIG_FILES " '$ac_optarg'" ac_need_defaults=false;; --he | --h | --help | --hel | -h ) - $as_echo "$ac_cs_usage"; exit ;; + printf "%s\n" "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; @@ -4824,7 +5451,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift - \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" @@ -4838,7 +5465,7 @@ exec 5>>config.log sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX - $as_echo "$ac_log" + printf "%s\n" "$ac_log" } >&5 _ACEOF @@ -4863,7 +5490,7 @@ done # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then - test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files fi # Have a temporary directory for convenience. Make it in the build tree @@ -5091,7 +5718,7 @@ do esac || as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac - case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done @@ -5099,17 +5726,17 @@ do # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` - $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" - { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 -$as_echo "$as_me: creating $ac_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +printf "%s\n" "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) - ac_sed_conf_input=`$as_echo "$configure_input" | + ac_sed_conf_input=`printf "%s\n" "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac @@ -5126,7 +5753,7 @@ $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$ac_file" | +printf "%s\n" X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -5150,9 +5777,9 @@ $as_echo X"$ac_file" | case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; @@ -5205,8 +5832,8 @@ ac_sed_dataroot=' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 -$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' @@ -5248,9 +5875,9 @@ test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 -$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" @@ -5297,7 +5924,8 @@ if test "$no_create" != yes; then $ac_cs_success || as_fn_exit 1 fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 -$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi + diff --git a/lib/configure.ac b/lib/configure.ac index 6b16fb798..00450891f 100644 --- a/lib/configure.ac +++ b/lib/configure.ac @@ -6,6 +6,7 @@ AC_SUBST(MKLDFLAGS, ["-lm "]) AC_SUBST(CUDA_SRCS, ["cuda/cwc_convnet.cu cuda/cwc_convnet_ext.c cuda/convnet/cwc_convnet_convolutional.cu cuda/convnet/cwc_convnet_rnorm.cu cuda/convnet/cwc_convnet_pool.cu cuda/convnet/cwc_convnet_full_connect.cu"]) AC_SUBST(CUDA_COMPAT_LIB, ["gpu/libnnc-compat-cuda.o"]) AC_SUBST(CUDA_CMD_LIB, ["libnnc-cmd-cuda.o"]) +AC_SUBST(MFA_COMPAT_LIB, ["mfa/libnnc-compat-mfa.o"]) AC_SUBST(MPS_COMPAT_LIB, ["mps/libnnc-compat-mps.o"]) AC_SUBST(MPS_CMD_LIB, ["libnnc-cmd-mps.o"]) @@ -190,12 +191,13 @@ AC_ARG_ENABLE(mps, [AS_HELP_STRING([--enable-mps], [Enable Metal support])], [mp if test "$mps_support" = yes; then AC_MSG_RESULT(yes) AC_SUBST(DEFINE_MACROS, ["$DEFINE_MACROS-D HAVE_MPS "]) - AC_SUBST(MKLDFLAGS, ["$MKLDFLAGS-framework MetalPerformanceShaders -framework MetalPerformanceShadersGraph -framework Foundation -framework Metal "]) + AC_SUBST(MKLDFLAGS, ["$MKLDFLAGS-framework MetalPerformanceShaders -framework MetalPerformanceShadersGraph -framework Foundation -framework Metal -lc++ "]) AC_SUBST(CUDA_SRCS, [""]) AC_SUBST(CUDA_COMPAT_LIB, [""]) AC_SUBST(CUDA_CMD_LIB, [""]) else AC_MSG_RESULT(no) + AC_SUBST(MFA_COMPAT_LIB, [""]) AC_SUBST(MPS_COMPAT_LIB, [""]) AC_SUBST(MPS_CMD_LIB, [""]) fi diff --git a/lib/nnc/cmd/blas/mps/ccv_nnc_add_mps.m b/lib/nnc/cmd/blas/mps/ccv_nnc_add_mps.m index 38ae0a87b..63d16a1a6 100644 --- a/lib/nnc/cmd/blas/mps/ccv_nnc_add_mps.m +++ b/lib/nnc/cmd/blas/mps/ccv_nnc_add_mps.m @@ -23,7 +23,7 @@ static int _ccv_nnc_add_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, if (inputs[1] == 0) { @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); if (p == 1) { MPSGraph* graph = [MPSGraph new]; @@ -52,13 +52,13 @@ static int _ccv_nnc_add_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &c, (int*[]){ c->info.dim }, (int*[]){ c->stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } const ccv_nnc_tensor_view_t* const b = (const ccv_nnc_tensor_view_t*)inputs[1]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[2]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -89,7 +89,7 @@ static int _ccv_nnc_add_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, MPSGraphTensorData* data_b = ccv_nnc_mps_graph_tensor_data(b, b->info.dim, b->stride); MPSGraphTensorData* data[] = {data_a, data_b}; ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]]], &c, (int*[]){ c->info.dim }, (int*[]){ c->stride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/blas/mps/ccv_nnc_gemm_mps.m b/lib/nnc/cmd/blas/mps/ccv_nnc_gemm_mps.m index ef7465c3a..bb2caa4d7 100644 --- a/lib/nnc/cmd/blas/mps/ccv_nnc_gemm_mps.m +++ b/lib/nnc/cmd/blas/mps/ccv_nnc_gemm_mps.m @@ -114,77 +114,229 @@ static int _ccv_nnc_gemm_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint if (w_batch_size == 1 && b_batch_size > 1) w_batch_inc = 0; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); - // If all these conditions are met, let's use MPS directly. - if ((!CCV_IS_TENSOR_VIEW(a) || ccv_nnc_tensor_view_is_contiguous(adim, astride)) && + const int is_contiguous = + (!CCV_IS_TENSOR_VIEW(a) || ccv_nnc_tensor_view_is_contiguous(adim, astride)) && (!CCV_IS_TENSOR_VIEW(w) || ccv_nnc_tensor_view_is_contiguous(w->info.dim, w->stride)) && - (!CCV_IS_TENSOR_VIEW(b) || ccv_nnc_tensor_view_is_contiguous(b->info.dim, b->stride)) && - a->info.datatype == w->info.datatype && a->info.datatype == b->info.datatype && - a_batch_size == w_batch_size && a_batch_size == b_batch_size && !(ccv_nnc_flags() & CCV_NNC_DISABLE_MIXED_MPS_GEMM) && - !bias) - { - id a_buffer = mpgetbuffer((ccv_nnc_tensor_t*)a); - MPSMatrix* leftMatrix = [[MPSMatrix alloc] initWithBuffer:a_buffer offset:a->dataof descriptor:[MPSMatrixDescriptor matrixDescriptorWithRows:(is_transpose_a ? a_cols : a_rows) columns:(is_transpose_a ? a_rows : a_cols) matrices:b_batch_size rowBytes:CCV_GET_DATA_TYPE_SIZE(a->info.datatype) * (is_transpose_a ? a_cols_inc : a_rows_inc) matrixBytes:CCV_GET_DATA_TYPE_SIZE(a->info.datatype) * a_batch_inc dataType:ccv_nnc_mps_datatype(a->info.datatype)]]; - id w_buffer = mpgetbuffer((ccv_nnc_tensor_t*)w); - MPSMatrix* rightMatrix = [[MPSMatrix alloc] initWithBuffer:w_buffer offset:w->dataof descriptor:[MPSMatrixDescriptor matrixDescriptorWithRows:(is_transpose_w ? w_cols : w_rows) columns:(is_transpose_w ? w_rows : w_cols) matrices:b_batch_size rowBytes:CCV_GET_DATA_TYPE_SIZE(w->info.datatype) * (is_transpose_w ? w_cols_inc : w_rows_inc) matrixBytes:CCV_GET_DATA_TYPE_SIZE(w->info.datatype) * w_batch_inc dataType:ccv_nnc_mps_datatype(w->info.datatype)]]; - id b_buffer = mpgetbuffer((ccv_nnc_tensor_t*)b); - MPSMatrix* resultMatrix = [[MPSMatrix alloc] initWithBuffer:b_buffer offset:b->dataof descriptor:[MPSMatrixDescriptor matrixDescriptorWithRows:b_rows columns:b_cols matrices:b_batch_size rowBytes:CCV_GET_DATA_TYPE_SIZE(b->info.datatype) * b_rows_inc matrixBytes:CCV_GET_DATA_TYPE_SIZE(b->info.datatype) * b_batch_inc dataType:ccv_nnc_mps_datatype(b->info.datatype)]]; - MPSMatrixMultiplication* matrixMultiplication = [[MPSMatrixMultiplication alloc] initWithDevice:ccv_nnc_default_device() transposeLeft:(is_transpose_a ? YES : NO) transposeRight:(is_transpose_w ? YES : NO) resultRows:b_rows resultColumns:b_cols interiorColumns:a_cols alpha:1 beta:0]; - [leftMatrix synchronizeOnCommandBuffer:command_buffer]; - [rightMatrix synchronizeOnCommandBuffer:command_buffer]; - [matrixMultiplication encodeToCommandBuffer:command_buffer leftMatrix:leftMatrix rightMatrix:rightMatrix resultMatrix:resultMatrix]; - [resultMatrix synchronizeOnCommandBuffer:command_buffer]; - [matrixMultiplication release]; - [leftMatrix release]; - [rightMatrix release]; - [resultMatrix release]; - // TODO: Try to use MPSMatrixFullyConnected for with bias case. + (!CCV_IS_TENSOR_VIEW(b) || ccv_nnc_tensor_view_is_contiguous(b->info.dim, b->stride)); + + const int is_same_dtype = + (a->info.datatype == w->info.datatype) && + (a->info.datatype == b->info.datatype); + + int is_supported_dtype = 0; + uint32_t mtl_data_type = UINT32_MAX; + switch (a->info.datatype) { + case CCV_16F: { + is_supported_dtype = 1; + mtl_data_type = 16; + break; + } + case CCV_32F: { + is_supported_dtype = 1; + mtl_data_type = 3; + break; + } + default: { + break; + } + } + + const int is_same_batch = + (a_batch_size == w_batch_size) && + (a_batch_size == b_batch_size); + + // NNC uses the convention B = A * W. + // MFA uses the convention C = A * B. + int is_batched = 0; + int is_mfa_compatible_batch = 0; + int A_batch_size = a_batch_size; + int B_batch_size = w_batch_size; + int C_batch_size = b_batch_size; + if (A_batch_size == 1 && B_batch_size == 1 && C_batch_size == 1) { + // Not batched. + } else if (A_batch_size <= 0 || B_batch_size <= 0 || C_batch_size <= 0) { + // Invalid batch size. } else { - // Otherwise, use MPSGraph - ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); - // Key will be consumed by the next method, therefore, no need to free. - int indices[3]; - MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { - MPSGraphTensor* mps_input_a; - MPSGraphTensor* mps_a = ccv_nnc_mps_graph_tensor_input(graph, a, adim_r, astride_r, &mps_input_a); - MPSGraphTensor* mps_input_w; - MPSGraphTensor* mps_w = ccv_nnc_mps_graph_tensor_input(graph, w, w->info.dim, w->stride, &mps_input_w); - MPSGraphShapedType* mps_a_shape = ccv_nnc_mps_graph_tensor_input_shape(a, adim_r, astride_r); - MPSGraphShapedType* mps_w_shape = ccv_nnc_mps_graph_tensor_input_shape(w, w->info.dim, w->stride); - if (is_transpose_a) - mps_a = [graph transposeTensor:mps_a dimension:-2 withDimension:-1 name:nil]; - if (is_transpose_w) - mps_w = [graph transposeTensor:mps_w dimension:-2 withDimension:-1 name:nil]; - MPSGraphTensor* mps_b = [graph matrixMultiplicationWithPrimaryTensor:mps_a secondaryTensor:mps_w name:nil]; - [inputTensors addObject:mps_input_a]; - [inputShapedTypes addObject:mps_a_shape]; - [inputTensors addObject:mps_input_w]; - [inputShapedTypes addObject:mps_w_shape]; - if (bias) + is_batched = 1; + if (A_batch_size == C_batch_size) { + if (A_batch_size == B_batch_size) { + is_mfa_compatible_batch = 1; + } else if (B_batch_size == 1) { + is_mfa_compatible_batch = 1; + } + } + } + + ccv_nnc_mfa_context_t* context = ccv_nnc_default_mfa_context(); + const int is_mfa_supported = + ccv_nnc_mfa_context_supported(context) && is_contiguous && is_same_dtype && is_supported_dtype && (is_mfa_compatible_batch || !is_batched) && !bias; + + if (METAL_LOG_LEVEL(context) >= 3) + { + if (is_mfa_supported) + { + ccv_nnc_mfa_log_message("Compatible GEMM found."); + } else { + ccv_nnc_mfa_log_message("Incompatible GEMM found. Incompatible because:"); + if (!is_contiguous) + { + ccv_nnc_mfa_log_message(" Strided."); + } + if (!is_same_dtype) + { + ccv_nnc_mfa_log_message(" Mixed precision."); + } + if (!is_same_dtype) { - MPSGraphTensor* mps_input_bias; - MPSGraphTensor* mps_bias = ccv_nnc_mps_graph_tensor_input(graph, bias, biasdim_r, biasstride_r, &mps_input_bias); - MPSGraphShapedType* mps_bias_shape = ccv_nnc_mps_graph_tensor_input_shape(bias, biasdim_r, biasstride_r); - // Add support broadcast directly. - mps_b = [graph additionWithPrimaryTensor:mps_b secondaryTensor:mps_bias name:nil]; - [inputTensors addObject:mps_input_bias]; - [inputShapedTypes addObject:mps_bias_shape]; + ccv_nnc_mfa_log_message(" Unsupported data type."); } - [resultTensors addObject:mps_b]; - }); - MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, adim, astride); - MPSGraphTensorData* data_w = ccv_nnc_mps_graph_tensor_data(w, w->info.dim, w->stride); - if (bias) + if (!(is_mfa_compatible_batch || !is_batched)) + { + ccv_nnc_mfa_log_message(" Unsupported batch."); + } + if (!(!bias)) + { + ccv_nnc_mfa_log_message(" Requires fused activations."); + } + } + } + + if (is_mfa_supported) + { + // On supported devices, use Metal directly. + ccv_nnc_mfa_gemm_params_t params = { + .data_type = mtl_data_type, + .M = (uint32_t)b_rows, // C_rows + .N = (uint32_t)b_cols, // C_cols + .K = (uint32_t)w_rows, // B_rows + .A_trans = (is_transpose_a ? 1 : 0), + .B_trans = (is_transpose_w ? 1 : 0), + .alpha = (float)1.0, + .beta = (float)0.0, + .batched = is_batched, + .fused_activation = 0, + + .batch_dims_a = { 0 }, + .batch_dims_b = { 0 }, + }; + if (is_batched) { + // Create a null-terminated list of batch dimensions. + int A_batch_dim = a_nd - 2; + for (int i = 0; i < A_batch_dim; ++i) { + params.batch_dims_a[i] = adim[i]; + } + if (A_batch_dim < CCV_NNC_MAX_DIM_ALLOC) { + params.batch_dims_a[A_batch_dim] = 0; + } + + int B_batch_dim = w_nd - 2; + for (int i = 0; i < B_batch_dim; ++i) { + params.batch_dims_b[i] = w->info.dim[i]; + } + if (B_batch_dim < CCV_NNC_MAX_DIM_ALLOC) { + params.batch_dims_b[B_batch_dim] = 0; + } + } + ccv_nnc_mfa_sync_prepare_gemm(context, params); + + // Creating a new command buffer has a >10 µs penalty CPU-side. Still + // faster the >50 µs penalty for MPSGraph (probably why + // MPSMatrixMultiplication is faster for GEMM). + mtl_command_batch_t* command_batch = ccv_nnc_stream_context_start_command_batch(stream_context); + mtl_buffer_t* tensors[4] = { + mpgetbuffer((ccv_nnc_tensor_t*)a), // A + mpgetbuffer((ccv_nnc_tensor_t*)w), // B + mpgetbuffer((ccv_nnc_tensor_t*)b), // C + NULL + }; + size_t tensor_offsets[3] = { + a->dataof, // A offset + w->dataof, // B offset + b->dataof, // C offset + }; + ccv_nnc_mfa_encode_gemm(context, params, command_batch, tensors, tensor_offsets); + + // TODO: Add this diagnostic once we consistently capture >>1 commands/batch. +// if (METAL_LOG_LEVEL(context) >= 3) { +// if (command_batch->batched_command_count == 0) { +// ccv_nnc_mfa_log_message("Encoded 0 commands in the batch."); +// } else if (command_batch->batched_command_count == 1) { +// ccv_nnc_mfa_log_message("Encoded 1 command in the batch."); +// } else { +// ccv_nnc_mfa_log_message("Encoded >1 commands in the batch."); +// } +// } + ccv_nnc_stream_context_finish_command_batch(stream_context, command_batch); + // TODO: Try to use `fused_activation` for with bias case. + } else { + // Otherwise, incur the ~10-50 microsecond latency of MPS. + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); + + // If all conditions are met, use MPSMatrixMultiplication. + if (is_contiguous && is_same_dtype && is_same_batch && !(ccv_nnc_flags() & CCV_NNC_DISABLE_MIXED_MPS_GEMM) && !bias) { - MPSGraphTensorData* data_bias = ccv_nnc_mps_graph_tensor_data(bias, biasdim, biasstride); - MPSGraphTensorData* data[] = {data_a, data_w, data_bias}; - ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]], data[indices[2]]], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); + id a_buffer = mpgetbuffer((ccv_nnc_tensor_t*)a); + MPSMatrix* leftMatrix = [[MPSMatrix alloc] initWithBuffer:a_buffer offset:a->dataof descriptor:[MPSMatrixDescriptor matrixDescriptorWithRows:(is_transpose_a ? a_cols : a_rows) columns:(is_transpose_a ? a_rows : a_cols) matrices:b_batch_size rowBytes:CCV_GET_DATA_TYPE_SIZE(a->info.datatype) * (is_transpose_a ? a_cols_inc : a_rows_inc) matrixBytes:CCV_GET_DATA_TYPE_SIZE(a->info.datatype) * a_batch_inc dataType:ccv_nnc_mps_datatype(a->info.datatype)]]; + id w_buffer = mpgetbuffer((ccv_nnc_tensor_t*)w); + MPSMatrix* rightMatrix = [[MPSMatrix alloc] initWithBuffer:w_buffer offset:w->dataof descriptor:[MPSMatrixDescriptor matrixDescriptorWithRows:(is_transpose_w ? w_cols : w_rows) columns:(is_transpose_w ? w_rows : w_cols) matrices:b_batch_size rowBytes:CCV_GET_DATA_TYPE_SIZE(w->info.datatype) * (is_transpose_w ? w_cols_inc : w_rows_inc) matrixBytes:CCV_GET_DATA_TYPE_SIZE(w->info.datatype) * w_batch_inc dataType:ccv_nnc_mps_datatype(w->info.datatype)]]; + id b_buffer = mpgetbuffer((ccv_nnc_tensor_t*)b); + MPSMatrix* resultMatrix = [[MPSMatrix alloc] initWithBuffer:b_buffer offset:b->dataof descriptor:[MPSMatrixDescriptor matrixDescriptorWithRows:b_rows columns:b_cols matrices:b_batch_size rowBytes:CCV_GET_DATA_TYPE_SIZE(b->info.datatype) * b_rows_inc matrixBytes:CCV_GET_DATA_TYPE_SIZE(b->info.datatype) * b_batch_inc dataType:ccv_nnc_mps_datatype(b->info.datatype)]]; + MPSMatrixMultiplication* matrixMultiplication = [[MPSMatrixMultiplication alloc] initWithDevice:ccv_nnc_default_device() transposeLeft:(is_transpose_a ? YES : NO) transposeRight:(is_transpose_w ? YES : NO) resultRows:b_rows resultColumns:b_cols interiorColumns:a_cols alpha:1 beta:0]; + [leftMatrix synchronizeOnCommandBuffer:command_buffer]; + [rightMatrix synchronizeOnCommandBuffer:command_buffer]; + [matrixMultiplication encodeToCommandBuffer:command_buffer leftMatrix:leftMatrix rightMatrix:rightMatrix resultMatrix:resultMatrix]; + [resultMatrix synchronizeOnCommandBuffer:command_buffer]; + [matrixMultiplication release]; + [leftMatrix release]; + [rightMatrix release]; + [resultMatrix release]; } else { - MPSGraphTensorData* data[] = {data_a, data_w}; - ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]]], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); + // Otherwise, use MPSGraph. + ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); + // Key will be consumed by the next method, therefore, no need to free. + int indices[3]; + MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { + MPSGraphTensor* mps_input_a; + MPSGraphTensor* mps_a = ccv_nnc_mps_graph_tensor_input(graph, a, adim_r, astride_r, &mps_input_a); + MPSGraphTensor* mps_input_w; + MPSGraphTensor* mps_w = ccv_nnc_mps_graph_tensor_input(graph, w, w->info.dim, w->stride, &mps_input_w); + MPSGraphShapedType* mps_a_shape = ccv_nnc_mps_graph_tensor_input_shape(a, adim_r, astride_r); + MPSGraphShapedType* mps_w_shape = ccv_nnc_mps_graph_tensor_input_shape(w, w->info.dim, w->stride); + if (is_transpose_a) + mps_a = [graph transposeTensor:mps_a dimension:-2 withDimension:-1 name:nil]; + if (is_transpose_w) + mps_w = [graph transposeTensor:mps_w dimension:-2 withDimension:-1 name:nil]; + MPSGraphTensor* mps_b = [graph matrixMultiplicationWithPrimaryTensor:mps_a secondaryTensor:mps_w name:nil]; + [inputTensors addObject:mps_input_a]; + [inputShapedTypes addObject:mps_a_shape]; + [inputTensors addObject:mps_input_w]; + [inputShapedTypes addObject:mps_w_shape]; + if (bias) + { + MPSGraphTensor* mps_input_bias; + MPSGraphTensor* mps_bias = ccv_nnc_mps_graph_tensor_input(graph, bias, biasdim_r, biasstride_r, &mps_input_bias); + MPSGraphShapedType* mps_bias_shape = ccv_nnc_mps_graph_tensor_input_shape(bias, biasdim_r, biasstride_r); + // Add support broadcast directly. + mps_b = [graph additionWithPrimaryTensor:mps_b secondaryTensor:mps_bias name:nil]; + [inputTensors addObject:mps_input_bias]; + [inputShapedTypes addObject:mps_bias_shape]; + } + [resultTensors addObject:mps_b]; + }); + MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, adim, astride); + MPSGraphTensorData* data_w = ccv_nnc_mps_graph_tensor_data(w, w->info.dim, w->stride); + if (bias) + { + MPSGraphTensorData* data_bias = ccv_nnc_mps_graph_tensor_data(bias, biasdim, biasstride); + MPSGraphTensorData* data[] = {data_a, data_w, data_bias}; + ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]], data[indices[2]]], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); + } else { + MPSGraphTensorData* data[] = {data_a, data_w}; + ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]]], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); + } } + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/blas/mps/ccv_nnc_mul_mps.m b/lib/nnc/cmd/blas/mps/ccv_nnc_mul_mps.m index 681bc2612..ed068ef97 100644 --- a/lib/nnc/cmd/blas/mps/ccv_nnc_mul_mps.m +++ b/lib/nnc/cmd/blas/mps/ccv_nnc_mul_mps.m @@ -22,7 +22,7 @@ static int _ccv_nnc_mul_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, if (inputs[1] == 0) { @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); if (p == 1) { MPSGraph* graph = [MPSGraph new]; @@ -50,13 +50,13 @@ static int _ccv_nnc_mul_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &c, (int*[]){ c->info.dim }, (int*[]){ c->stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } const ccv_nnc_tensor_view_t* const b = (const ccv_nnc_tensor_view_t*)inputs[1]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[2]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -82,7 +82,7 @@ static int _ccv_nnc_mul_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, MPSGraphTensorData* data_b = ccv_nnc_mps_graph_tensor_data(b, b->info.dim, b->stride); MPSGraphTensorData* data[] = {data_a, data_b}; ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]]], &c, (int*[]){ c->info.dim }, (int*[]){ c->stride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } @@ -102,7 +102,7 @@ static int _ccv_nnc_scalar_mul_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[0]; ccv_nnc_tensor_view_t* const c = (ccv_nnc_tensor_view_t*)outputs[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); if (p == 1) { MPSGraph* graph = [MPSGraph new]; @@ -131,7 +131,7 @@ static int _ccv_nnc_scalar_mul_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &c, (int*[]){ c->info.dim }, (int*[]){ c->stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/convolution/mps/ccv_nnc_conv_mps.m b/lib/nnc/cmd/convolution/mps/ccv_nnc_conv_mps.m index 6730b8191..0c1b56b4a 100644 --- a/lib/nnc/cmd/convolution/mps/ccv_nnc_conv_mps.m +++ b/lib/nnc/cmd/convolution/mps/ccv_nnc_conv_mps.m @@ -51,7 +51,7 @@ static int _ccv_nnc_conv_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint } } @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int* adim_r = adim; int* astride_r = astride; @@ -96,7 +96,7 @@ static int _ccv_nnc_conv_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint MPSGraphTensorData* data[] = {data_a, data_w}; ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]]], &b, (int*[]){ bdim }, (int*[]){ bstride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/ew/mps/ccv_nnc_ew_mps.m b/lib/nnc/cmd/ew/mps/ccv_nnc_ew_mps.m index d5d5d886a..d2aded1d1 100644 --- a/lib/nnc/cmd/ew/mps/ccv_nnc_ew_mps.m +++ b/lib/nnc/cmd/ew/mps/ccv_nnc_ew_mps.m @@ -19,7 +19,7 @@ static int _ccv_nnc_ewsum_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hin } ccv_nnc_tensor_view_t* const c = (ccv_nnc_tensor_view_t*)outputs[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int* indices = (int*)ccv_nnc_stream_context_get_workspace(stream_context, (sizeof(int) + sizeof(MPSGraphTensorData*)) * input_size, CCV_TENSOR_CPU_MEMORY); MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -58,7 +58,7 @@ static int _ccv_nnc_ewsum_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hin [feeds addObject:data[indices[z]]]; ccv_nnc_mps_graph_executable_result(executable, command_buffer, feeds, &c, (int*[]){ c->info.dim }, (int*[]){ c->stride }, 1); [feeds release]; - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } @@ -88,7 +88,7 @@ static int _ccv_nnc_ewdiv_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hin for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; i++) { assert(a->info.dim[i] == b->info.dim[i]); } @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[2]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -114,7 +114,7 @@ static int _ccv_nnc_ewdiv_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hin } } else { @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -154,7 +154,7 @@ static int _ccv_nnc_ewexp_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hin for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; i++) { assert(a->info.dim[i] == c->info.dim[i]); } @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -193,7 +193,7 @@ static int _ccv_nnc_ewlog_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hin for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; i++) { assert(a->info.dim[i] == c->info.dim[i]); } @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -232,7 +232,7 @@ static int _ccv_nnc_ewsqrt_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hi for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; i++) { assert(a->info.dim[i] == c->info.dim[i]); } @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -276,7 +276,7 @@ static int _ccv_nnc_clamp_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hin if (isnan(minv)) { @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -296,7 +296,7 @@ static int _ccv_nnc_clamp_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hin } } else if (isnan(maxv)) { @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -316,7 +316,7 @@ static int _ccv_nnc_clamp_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hin } } else { @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { diff --git a/lib/nnc/cmd/gelu/mps/ccv_nnc_gelu_mps.m b/lib/nnc/cmd/gelu/mps/ccv_nnc_gelu_mps.m index 955c933f7..73a22376b 100644 --- a/lib/nnc/cmd/gelu/mps/ccv_nnc_gelu_mps.m +++ b/lib/nnc/cmd/gelu/mps/ccv_nnc_gelu_mps.m @@ -12,7 +12,7 @@ static int _ccv_nnc_gelu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[0]; ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -48,7 +48,7 @@ static int _ccv_nnc_gelu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint }); MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/index/mps/ccv_nnc_index_select_mps.m b/lib/nnc/cmd/index/mps/ccv_nnc_index_select_mps.m index d0e9ce7e8..6793ab983 100644 --- a/lib/nnc/cmd/index/mps/ccv_nnc_index_select_mps.m +++ b/lib/nnc/cmd/index/mps/ccv_nnc_index_select_mps.m @@ -23,7 +23,7 @@ static int _ccv_nnc_index_select_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hin ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; assert(ccv_nnc_tensor_nd(b->info.dim) <= 2); @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int idx[2]; int indices_dim[CCV_NNC_MAX_DIM_ALLOC] = {0}; @@ -102,7 +102,7 @@ static int _ccv_nnc_index_select_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hin MPSGraphTensorData* data_indices = ccv_nnc_mps_graph_tensor_data(indices, indices_dim, indices_stride); MPSGraphTensorData* data[] = {data_a, data_indices}; ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[idx[0]], data[idx[1]]], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/leaky_relu/mps/ccv_nnc_leaky_relu_mps.m b/lib/nnc/cmd/leaky_relu/mps/ccv_nnc_leaky_relu_mps.m index cf00f4bdf..4bc621ef2 100644 --- a/lib/nnc/cmd/leaky_relu/mps/ccv_nnc_leaky_relu_mps.m +++ b/lib/nnc/cmd/leaky_relu/mps/ccv_nnc_leaky_relu_mps.m @@ -12,7 +12,7 @@ static int _ccv_nnc_leaky_relu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[0]; ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; const double alpha = (double)cmd.info.leaky_relu.negative_slope; @@ -27,7 +27,7 @@ static int _ccv_nnc_leaky_relu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ }); MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/norm/mps/ccv_nnc_group_norm_mps.m b/lib/nnc/cmd/norm/mps/ccv_nnc_group_norm_mps.m index c27daa44d..0ad2b5f72 100644 --- a/lib/nnc/cmd/norm/mps/ccv_nnc_group_norm_mps.m +++ b/lib/nnc/cmd/norm/mps/ccv_nnc_group_norm_mps.m @@ -54,7 +54,7 @@ static int _ccv_nnc_group_norm_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ &bt }, 4); @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[3]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -140,7 +140,7 @@ static int _ccv_nnc_group_norm_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]], data[indices[2]]], (ccv_nnc_tensor_view_t* []){ (ccv_nnc_tensor_view_t*)outputs[0], &saved_meant, &saved_inv_stdt }, (int*[]){ outputs[0]->info.dim, saved_meant.info.dim, saved_inv_stdt.info.dim }, (int*[]){ ((ccv_nnc_tensor_view_t*)outputs[0])->stride, saved_meant.stride, saved_inv_stdt.stride }, 3); else ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]], data[indices[2]]], (ccv_nnc_tensor_view_t* []){ &bt, &saved_meant, &saved_inv_stdt }, (int*[]){ bt.info.dim, saved_meant.info.dim, saved_inv_stdt.info.dim }, (int*[]){ bt.stride, saved_meant.stride, saved_inv_stdt.stride }, 3); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/norm/mps/ccv_nnc_layer_norm_mps.m b/lib/nnc/cmd/norm/mps/ccv_nnc_layer_norm_mps.m index 489784b71..4967b49c8 100644 --- a/lib/nnc/cmd/norm/mps/ccv_nnc_layer_norm_mps.m +++ b/lib/nnc/cmd/norm/mps/ccv_nnc_layer_norm_mps.m @@ -22,7 +22,7 @@ static int _ccv_nnc_layer_norm_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ &bt }, 4); @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[3]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -79,7 +79,7 @@ static int _ccv_nnc_layer_norm_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ MPSGraphTensorData* data_bias = ccv_nnc_mps_graph_tensor_data(&biast, biast.info.dim, biast.stride); MPSGraphTensorData* data[] = {data_a, data_scale, data_bias}; ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]], data[indices[2]]], (ccv_nnc_tensor_view_t* []){ &bt, &saved_meant, &saved_inv_stdt }, (int*[]){ bt.info.dim, saved_meant.info.dim, saved_inv_stdt.info.dim }, (int*[]){ bt.stride, saved_meant.stride, saved_inv_stdt.stride }, 3); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/pool/mps/ccv_nnc_avg_pool_mps.m b/lib/nnc/cmd/pool/mps/ccv_nnc_avg_pool_mps.m index 4e8d69703..e3e655569 100644 --- a/lib/nnc/cmd/pool/mps/ccv_nnc_avg_pool_mps.m +++ b/lib/nnc/cmd/pool/mps/ccv_nnc_avg_pool_mps.m @@ -12,7 +12,7 @@ static int _ccv_nnc_avg_pool_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[0]; ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -27,7 +27,7 @@ static int _ccv_nnc_avg_pool_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t }); MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/pool/mps/ccv_nnc_max_pool_mps.m b/lib/nnc/cmd/pool/mps/ccv_nnc_max_pool_mps.m index 8f2f6beea..39eaf50f3 100644 --- a/lib/nnc/cmd/pool/mps/ccv_nnc_max_pool_mps.m +++ b/lib/nnc/cmd/pool/mps/ccv_nnc_max_pool_mps.m @@ -12,7 +12,7 @@ static int _ccv_nnc_max_pool_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[0]; ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -27,7 +27,7 @@ static int _ccv_nnc_max_pool_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t }); MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/rand/mps/ccv_nnc_rand_normal_mps.m b/lib/nnc/cmd/rand/mps/ccv_nnc_rand_normal_mps.m index cc6a6e46f..0e91a5e0d 100644 --- a/lib/nnc/cmd/rand/mps/ccv_nnc_rand_normal_mps.m +++ b/lib/nnc/cmd/rand/mps/ccv_nnc_rand_normal_mps.m @@ -12,7 +12,7 @@ static int _ccv_nnc_random_normal(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t const float std = cmd.info.blas.a[0]; const float mean = cmd.info.blas.a[1]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); for (i = 0; i < output_size; i++) { ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)outputs[i]; @@ -46,7 +46,7 @@ static int _ccv_nnc_random_normal(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t ccv_nnc_mps_graph_result(graph, command_buffer, @{}, mps_r[0], a, a->info.dim, a->stride); [graph release]; } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/rand/mps/ccv_nnc_rand_uniform_mps.m b/lib/nnc/cmd/rand/mps/ccv_nnc_rand_uniform_mps.m index cc57d6f0f..a56eb744c 100644 --- a/lib/nnc/cmd/rand/mps/ccv_nnc_rand_uniform_mps.m +++ b/lib/nnc/cmd/rand/mps/ccv_nnc_rand_uniform_mps.m @@ -12,7 +12,7 @@ static int _ccv_nnc_random_uniform(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t const float l = cmd.info.blas.a[0]; const float u = cmd.info.blas.a[1]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); for (i = 0; i < output_size; i++) { ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)outputs[i]; @@ -30,7 +30,7 @@ static int _ccv_nnc_random_uniform(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t ccv_nnc_mps_graph_result(graph, command_buffer, @{}, mps_a, a, a->info.dim, a->stride); [graph release]; } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/reduce/mps/ccv_nnc_argmax_mps.m b/lib/nnc/cmd/reduce/mps/ccv_nnc_argmax_mps.m index e9a38c36b..fcda2de1c 100644 --- a/lib/nnc/cmd/reduce/mps/ccv_nnc_argmax_mps.m +++ b/lib/nnc/cmd/reduce/mps/ccv_nnc_argmax_mps.m @@ -23,7 +23,7 @@ static int _ccv_nnc_argmax_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hi noop = btv.info.dim[i] != atv.info.dim[i]; const int axis = cmd.info.reduce.axis[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); if (noop) { MPSGraph* graph = [MPSGraph new]; @@ -51,7 +51,7 @@ static int _ccv_nnc_argmax_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hi MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(&atv, atv.info.dim, atv.stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &tvs[1], (int*[]){ btv.info.dim }, (int*[]){ btv.stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/reduce/mps/ccv_nnc_argmin_mps.m b/lib/nnc/cmd/reduce/mps/ccv_nnc_argmin_mps.m index 3188476ea..2737ec138 100644 --- a/lib/nnc/cmd/reduce/mps/ccv_nnc_argmin_mps.m +++ b/lib/nnc/cmd/reduce/mps/ccv_nnc_argmin_mps.m @@ -23,7 +23,7 @@ static int _ccv_nnc_argmin_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hi noop = btv.info.dim[i] != atv.info.dim[i]; const int axis = cmd.info.reduce.axis[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); if (noop) { MPSGraph* graph = [MPSGraph new]; @@ -51,7 +51,7 @@ static int _ccv_nnc_argmin_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hi MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(&atv, atv.info.dim, atv.stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &tvs[1], (int*[]){ btv.info.dim }, (int*[]){ btv.stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_max_mps.m b/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_max_mps.m index a5266340d..7b317805e 100644 --- a/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_max_mps.m +++ b/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_max_mps.m @@ -21,7 +21,7 @@ static int _ccv_nnc_reduce_max_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ for (i = 0; noop && i < a_nd; i++) noop = btv.info.dim[i] != atv.info.dim[i]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); if (noop) { MPSGraph* graph = [MPSGraph new]; @@ -55,7 +55,7 @@ static int _ccv_nnc_reduce_max_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(&atv, atv.info.dim, atv.stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &tvs[1], (int*[]){ btv.info.dim }, (int*[]){ btv.stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_mean_mps.m b/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_mean_mps.m index 7ede989ba..6049e1739 100644 --- a/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_mean_mps.m +++ b/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_mean_mps.m @@ -21,7 +21,7 @@ static int _ccv_nnc_reduce_mean_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint for (i = 0; noop && i < a_nd; i++) noop = btv.info.dim[i] != atv.info.dim[i]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); if (noop) { MPSGraph* graph = [MPSGraph new]; @@ -55,7 +55,7 @@ static int _ccv_nnc_reduce_mean_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(&atv, atv.info.dim, atv.stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &tvs[1], (int*[]){ btv.info.dim }, (int*[]){ btv.stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_min_mps.m b/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_min_mps.m index 38d6fac6c..d36ec6a24 100644 --- a/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_min_mps.m +++ b/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_min_mps.m @@ -21,7 +21,7 @@ static int _ccv_nnc_reduce_min_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ for (i = 0; noop && i < a_nd; i++) noop = btv.info.dim[i] != atv.info.dim[i]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); if (noop) { MPSGraph* graph = [MPSGraph new]; @@ -55,7 +55,7 @@ static int _ccv_nnc_reduce_min_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(&atv, atv.info.dim, atv.stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &tvs[1], (int*[]){ btv.info.dim }, (int*[]){ btv.stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_sum_mps.m b/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_sum_mps.m index 077d7114e..ab85cb984 100644 --- a/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_sum_mps.m +++ b/lib/nnc/cmd/reduce/mps/ccv_nnc_reduce_sum_mps.m @@ -21,7 +21,7 @@ static int _ccv_nnc_reduce_sum_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ for (i = 0; noop && i < a_nd; i++) noop = btv.info.dim[i] != atv.info.dim[i]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); if (noop) { MPSGraph* graph = [MPSGraph new]; @@ -55,7 +55,7 @@ static int _ccv_nnc_reduce_sum_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_ MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(&atv, atv.info.dim, atv.stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &tvs[1], (int*[]){ btv.info.dim }, (int*[]){ btv.stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/relu/mps/ccv_nnc_relu_mps.m b/lib/nnc/cmd/relu/mps/ccv_nnc_relu_mps.m index 56009ebe2..54ba2d557 100644 --- a/lib/nnc/cmd/relu/mps/ccv_nnc_relu_mps.m +++ b/lib/nnc/cmd/relu/mps/ccv_nnc_relu_mps.m @@ -12,7 +12,7 @@ static int _ccv_nnc_relu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[0]; ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -26,7 +26,7 @@ static int _ccv_nnc_relu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint }); MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/sigmoid/mps/ccv_nnc_sigmoid_mps.m b/lib/nnc/cmd/sigmoid/mps/ccv_nnc_sigmoid_mps.m index 58c689677..1bd6487ad 100644 --- a/lib/nnc/cmd/sigmoid/mps/ccv_nnc_sigmoid_mps.m +++ b/lib/nnc/cmd/sigmoid/mps/ccv_nnc_sigmoid_mps.m @@ -12,7 +12,7 @@ static int _ccv_nnc_sigmoid_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t h const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[0]; ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -26,7 +26,7 @@ static int _ccv_nnc_sigmoid_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t h }); MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } @@ -39,7 +39,7 @@ static int _ccv_nnc_sigmoid_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t h const ccv_nnc_tensor_view_t* const b = (const ccv_nnc_tensor_view_t*)inputs[2]; ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -65,7 +65,7 @@ static int _ccv_nnc_sigmoid_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t h MPSGraphTensorData* data_b = ccv_nnc_mps_graph_tensor_data(b, b->info.dim, b->stride); MPSGraphTensorData* data[] = {data_g, data_b}; ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data[indices[0]], data[indices[1]]], &h, (int*[]){ h->info.dim }, (int*[]){ h->stride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/softmax/mps/ccv_nnc_softmax_mps.m b/lib/nnc/cmd/softmax/mps/ccv_nnc_softmax_mps.m index 3249aec98..44595cb84 100644 --- a/lib/nnc/cmd/softmax/mps/ccv_nnc_softmax_mps.m +++ b/lib/nnc/cmd/softmax/mps/ccv_nnc_softmax_mps.m @@ -12,7 +12,7 @@ static int _ccv_nnc_softmax_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t h const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[0]; ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); const int a_nd = ccv_nnc_tensor_nd(a->info.dim); const int b_nd = ccv_nnc_tensor_nd(b->info.dim); if (a_nd <= 2 && b_nd <= 2 && !(ccv_nnc_flags() & CCV_NNC_DISABLE_MIXED_MPS_SOFTMAX)) // Simple case, we use MPS directly. @@ -73,7 +73,7 @@ static int _ccv_nnc_softmax_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t h MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/swish/mps/ccv_nnc_swish_mps.m b/lib/nnc/cmd/swish/mps/ccv_nnc_swish_mps.m index d9c5146a3..8834368fb 100644 --- a/lib/nnc/cmd/swish/mps/ccv_nnc_swish_mps.m +++ b/lib/nnc/cmd/swish/mps/ccv_nnc_swish_mps.m @@ -12,7 +12,7 @@ static int _ccv_nnc_swish_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hin const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[0]; ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -30,7 +30,7 @@ static int _ccv_nnc_swish_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hin }); MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/cmd/upsample/mps/ccv_nnc_upsample_mps.m b/lib/nnc/cmd/upsample/mps/ccv_nnc_upsample_mps.m index b9d5de984..b6f3c6985 100644 --- a/lib/nnc/cmd/upsample/mps/ccv_nnc_upsample_mps.m +++ b/lib/nnc/cmd/upsample/mps/ccv_nnc_upsample_mps.m @@ -39,7 +39,7 @@ static int _ccv_nnc_upsample_nearest_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc if (a->info.format == CCV_TENSOR_FORMAT_NCHW) { @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -53,12 +53,12 @@ static int _ccv_nnc_upsample_nearest_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc }); MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, adim, astride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ bdim }, (int*[]){ bstride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } } else { assert(a->info.format == CCV_TENSOR_FORMAT_NHWC); @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -72,7 +72,7 @@ static int _ccv_nnc_upsample_nearest_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc }); MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, adim, astride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ bdim }, (int*[]){ bstride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } } return CCV_NNC_EXEC_SUCCESS; @@ -104,7 +104,7 @@ static int _ccv_nnc_upsample_bilinear_forw(const ccv_nnc_cmd_t cmd, const ccv_nn if (a->info.format == CCV_TENSOR_FORMAT_NCHW) { @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -118,12 +118,12 @@ static int _ccv_nnc_upsample_bilinear_forw(const ccv_nnc_cmd_t cmd, const ccv_nn }); MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, adim, astride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ bdim }, (int*[]){ bstride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } } else { assert(a->info.format == CCV_TENSOR_FORMAT_NHWC); @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); ccv_nnc_mps_graph_key_t key = ccv_nnc_mps_graph_key_new(cmd, hint, flags, inputs, input_size, outputs, output_size); int indices[1]; MPSGraphExecutable* executable = ccv_nnc_mps_graph_executable_cache(key, indices, ^void (MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors) { @@ -137,7 +137,7 @@ static int _ccv_nnc_upsample_bilinear_forw(const ccv_nnc_cmd_t cmd, const ccv_nn }); MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, adim, astride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ bdim }, (int*[]){ bstride }, 1); - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } } return CCV_NNC_EXEC_SUCCESS; diff --git a/lib/nnc/cmd/util/mps/ccv_nnc_util_mps.m b/lib/nnc/cmd/util/mps/ccv_nnc_util_mps.m index 0bde6ed1c..dc89a0210 100644 --- a/lib/nnc/cmd/util/mps/ccv_nnc_util_mps.m +++ b/lib/nnc/cmd/util/mps/ccv_nnc_util_mps.m @@ -29,11 +29,11 @@ static int _ccv_nnc_data_transfer(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t const off_t offset_b = mpgetoffset(b); @autoreleasepool { id buffer_a = [ccv_nnc_default_device() newBufferWithBytesNoCopy:aligned_ptr length:aligned_size options:MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared deallocator:nil]; - id command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + id command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); id encoder = [command_buffer blitCommandEncoder]; [encoder copyFromBuffer:buffer_a sourceOffset:offset_a toBuffer:buffer_b destinationOffset:offset_b size:size]; [encoder endEncoding]; - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); [buffer_a release]; } } else if (CCV_TENSOR_GET_MEMORY(a->info.type) == CCV_TENSOR_GPU_MEMORY && CCV_TENSOR_GET_MEMORY(b->info.type) == CCV_TENSOR_CPU_MEMORY) { @@ -44,11 +44,11 @@ static int _ccv_nnc_data_transfer(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t const size_t aligned_size = ((size + offset_b + vm_page_size - 1) & -vm_page_size); @autoreleasepool { id buffer_b = [ccv_nnc_default_device() newBufferWithBytesNoCopy:aligned_ptr length:aligned_size options:MTLResourceCPUCacheModeDefaultCache | MTLResourceStorageModeShared deallocator:nil]; - id command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + id command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); id encoder = [command_buffer blitCommandEncoder]; [encoder copyFromBuffer:buffer_a sourceOffset:offset_a toBuffer:buffer_b destinationOffset:offset_b size:size]; [encoder endEncoding]; - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); [buffer_b release]; } } else if (CCV_TENSOR_GET_MEMORY(a->info.type) == CCV_TENSOR_CPU_MEMORY && CCV_TENSOR_GET_MEMORY(b->info.type) == CCV_TENSOR_CPU_MEMORY) @@ -62,11 +62,11 @@ static int _ccv_nnc_data_transfer(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t const off_t offset_a = mpgetoffset(a); const off_t offset_b = mpgetoffset(b); @autoreleasepool { - id command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + id command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); id encoder = [command_buffer blitCommandEncoder]; [encoder copyFromBuffer:buffer_a sourceOffset:offset_a toBuffer:buffer_b destinationOffset:offset_b size:size]; [encoder endEncoding]; - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } } } @@ -96,7 +96,7 @@ static int _ccv_nnc_transpose(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint assert(output_size <= input_size); int i; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); for (i = 0; i < output_size; i++) { const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[i]; @@ -115,7 +115,7 @@ static int _ccv_nnc_transpose(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint MPSGraphTensorData* data_a = ccv_nnc_mps_graph_tensor_data(a, a->info.dim, a->stride); ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[data_a], &b, (int*[]){ b->info.dim }, (int*[]){ b->stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } @@ -142,7 +142,7 @@ static int _ccv_nnc_set_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, { int i, j; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); for (i = 0; i < output_size; i++) { ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)outputs[i]; @@ -158,7 +158,7 @@ static int _ccv_nnc_set_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, [shape release]; ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[], &a, (int*[]){ a->info.dim }, (int*[]){ a->stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } @@ -167,7 +167,7 @@ static int _ccv_nnc_set_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, { int i, j; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); for (i = 0; i < output_size; i++) { ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)outputs[i]; @@ -183,7 +183,7 @@ static int _ccv_nnc_set_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, [shape release]; ccv_nnc_mps_graph_executable_result(executable, command_buffer, @[], &a, (int*[]){ a->info.dim }, (int*[]){ a->stride }, 1); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } @@ -211,7 +211,7 @@ static int _ccv_nnc_format_transform(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint assert(output_size <= input_size); int i; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); for (i = 0; i < output_size; i++) { const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[i]; @@ -311,7 +311,7 @@ static int _ccv_nnc_format_transform(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint ccv_nnc_mps_export_data(data_a, command_buffer, &bt, bdim, bstride); [graph release]; } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } @@ -339,7 +339,7 @@ static int _ccv_nnc_datatype_conversion(const ccv_nnc_cmd_t cmd, const ccv_nnc_h assert(output_size <= input_size); int i; @autoreleasepool { - MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_get_command_buffer(stream_context); + MPSCommandBuffer* command_buffer = ccv_nnc_stream_context_start_mps_command_buffer(stream_context); for (i = 0; i < output_size; i++) { const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[i]; @@ -362,7 +362,7 @@ static int _ccv_nnc_datatype_conversion(const ccv_nnc_cmd_t cmd, const ccv_nnc_h } else ccv_nnc_mps_export_data(data_a, command_buffer, b, b->info.dim, b->stride); } - ccv_nnc_stream_context_commit_command_buffer(stream_context, command_buffer); + ccv_nnc_stream_context_finish_mps_command_buffer(stream_context, command_buffer); } return CCV_NNC_EXEC_SUCCESS; } diff --git a/lib/nnc/makefile b/lib/nnc/makefile index d886c32f3..b7a7a94ce 100644 --- a/lib/nnc/makefile +++ b/lib/nnc/makefile @@ -7,7 +7,7 @@ SRCS := ccv_nnc_cmd.c ccv_nnc_tensor.c ccv_nnc_tensor_io.c ccv_nnc_stream.c ccv_ SRC_OBJS := $(patsubst %.c,%.o,$(SRCS)) -.PHONY: release all lib clean cmd libnnc.o cmd/libnnc-cmd.o gpu/libnnc-compat-cuda.o mps/libnnc-compat-mps.o +.PHONY: release all lib clean cmd libnnc.o cmd/libnnc-cmd.o gpu/libnnc-compat-cuda.o mfa/libnnc-compat-mfa.o mps/libnnc-compat-mps.o release: all @@ -18,9 +18,9 @@ all: lib lib: libnnc.o clean: - rm -f *.o gpu/*.o && ${MAKE} -C ./gpu clean && ${MAKE} -C ./mps clean && ${MAKE} -C ./cmd clean + rm -f *.o gpu/*.o && ${MAKE} -C ./gpu clean && ${MAKE} -C ./mfa clean && ${MAKE} -C ./mps clean && ${MAKE} -C ./cmd clean -libnnc.o: $(SRC_OBJS) cmd/libnnc-cmd.o $(CUDA_COMPAT_LIB) $(MPS_COMPAT_LIB) +libnnc.o: $(SRC_OBJS) cmd/libnnc-cmd.o $(CUDA_COMPAT_LIB) $(MFA_COMPAT_LIB) $(MPS_COMPAT_LIB) ld -r $^ -o $@ %.o: %.c @@ -38,6 +38,9 @@ cmd: gpu/libnnc-compat-cuda.o: ${MAKE} -C ./gpu +mfa/libnnc-compat-mfa.o: + ${MAKE} -C ./mfa + mps/libnnc-compat-mps.o: ${MAKE} -C ./mps diff --git a/lib/nnc/mfa/.ycm_extra_conf.py b/lib/nnc/mfa/.ycm_extra_conf.py new file mode 100644 index 000000000..4829ad51e --- /dev/null +++ b/lib/nnc/mfa/.ycm_extra_conf.py @@ -0,0 +1,54 @@ +import os +import ycm_core +from clang_helpers import PrepareClangFlags + +flags = [ + '-ffast-math', + '-Wall', + '-msse2', + '-D HAVE_SSE2', + '-D HAVE_CBLAS', + '-D HAVE_MPS', + '-I', + '../..' +] + +def DirectoryOfThisScript(): + return os.path.dirname(os.path.abspath(__file__)) + +def MakeRelativePathsInFlagsAbsolute(flags, working_directory): + if not working_directory: + return flags + new_flags = [] + make_next_absolute = False + path_flags = ['-isystem', '-I', '-iquote', '--sysroot='] + for flag in flags: + new_flag = flag + + if make_next_absolute: + make_next_absolute = False + if not flag.startswith('/'): + new_flag = os.path.join(working_directory, flag) + + for path_flag in path_flags: + if flag == path_flag: + make_next_absolute = True + break + + if flag.startswith(path_flag): + path = flag[len(path_flag):] + new_flag = path_flag + os.path.join(working_directory, path) + break + + if new_flag: + new_flags.append(new_flag) + return new_flags + +def FlagsForFile(filename): + relative_to = DirectoryOfThisScript() + final_flags = MakeRelativePathsInFlagsAbsolute(flags, relative_to) + return { + 'flags' : final_flags, + 'do_cache' : True + } + diff --git a/lib/nnc/mfa/3rdparty/libmfaios16-0.2.metallib b/lib/nnc/mfa/3rdparty/libmfaios16-0.2.metallib new file mode 100644 index 000000000..948c2df7a Binary files /dev/null and b/lib/nnc/mfa/3rdparty/libmfaios16-0.2.metallib differ diff --git a/lib/nnc/mfa/3rdparty/libmfamacos13-0.2.metallib b/lib/nnc/mfa/3rdparty/libmfamacos13-0.2.metallib new file mode 100644 index 000000000..5bcba51cc Binary files /dev/null and b/lib/nnc/mfa/3rdparty/libmfamacos13-0.2.metallib differ diff --git a/lib/3rdparty/metal-cpp/Dispatch.cpp b/lib/nnc/mfa/3rdparty/metal-cpp/Dispatch.cpp similarity index 100% rename from lib/3rdparty/metal-cpp/Dispatch.cpp rename to lib/nnc/mfa/3rdparty/metal-cpp/Dispatch.cpp diff --git a/lib/3rdparty/metal-cpp/Dispatch.hpp b/lib/nnc/mfa/3rdparty/metal-cpp/Dispatch.hpp similarity index 100% rename from lib/3rdparty/metal-cpp/Dispatch.hpp rename to lib/nnc/mfa/3rdparty/metal-cpp/Dispatch.hpp diff --git a/lib/3rdparty/metal-cpp/Metal.hpp b/lib/nnc/mfa/3rdparty/metal-cpp/Metal.hpp similarity index 100% rename from lib/3rdparty/metal-cpp/Metal.hpp rename to lib/nnc/mfa/3rdparty/metal-cpp/Metal.hpp diff --git a/lib/nnc/mfa/Metal.cpp b/lib/nnc/mfa/Metal.cpp new file mode 100644 index 000000000..11942a3e3 --- /dev/null +++ b/lib/nnc/mfa/Metal.cpp @@ -0,0 +1,5 @@ +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION + +#include "nnc/mfa/3rdparty/metal-cpp/Metal.hpp" diff --git a/lib/nnc/mfa/ccv_nnc_mfa.cpp b/lib/nnc/mfa/ccv_nnc_mfa.cpp new file mode 100644 index 000000000..38de74ce7 --- /dev/null +++ b/lib/nnc/mfa/ccv_nnc_mfa.cpp @@ -0,0 +1,170 @@ +#include "ccv_nnc_mfa.hpp" +using namespace ccv::nnc; + +#include + +// MARK: - C + +mfa::context* ccv_nnc_init_mfa_context(MTL::Device* device, const char* metallib_path) { + return new mfa::context(device, metallib_path); +} + +void ccv_nnc_deinit_mfa_context(mfa::context* context) { + delete context; +} + +uint8_t ccv_nnc_mfa_context_supported(mfa::context* context) { + return context->supported ? 1 : 0; +} + +uint16_t ccv_nnc_mfa_context_log_level(mfa::context* context) { + return context->log_level; +} + +void ccv_nnc_mfa_log_message(const char* message) { + std::cerr << METAL_LOG_HEADER << message << std::endl; +} + +MTL::CommandBatch* ccv_nnc_start_command_batch(MTL::CommandQueue* command_queue) { + return new MTL::CommandBatch(command_queue); +} + +void ccv_nnc_finish_command_batch(MTL::CommandBatch* command_batch) { + delete command_batch; +} + +// MARK: - C++ + +template +mfa::cache::cache() +{ + map = {}; +} + +template +mfa::cache::~cache() +{ + for (auto it = map.begin(); it != map.end(); ++it) { + delete it->second; + } +} + +// This is a workaround. If we use a template member function directly, the +// symbols won't link. +template +inline void _mfa_cache_prepare(std::unordered_map* map, mfa::context* context, T hash, bool async) +{ + if (map->find(hash) == map->end()) { + if (METAL_LOG_LEVEL(context) >= 2) { + std::cout << METAL_LOG_HEADER << "PSO cache miss." << std::endl; + std::cout << METAL_LOG_HEADER << " Creating new PSO asynchronously: " << async << std::endl; + std::cout << METAL_LOG_HEADER << " Contents of map (before):" << std::endl; + for (auto it = map->begin(); it != map->end(); ++it) { + std::cout << METAL_LOG_HEADER << " " << it->first << ": " << it->second << std::endl; + } + } + + auto* pipeline = new mfa::gemm::pipeline(context, hash, async); + (*map)[hash] = pipeline; + + if (METAL_LOG_LEVEL(context) >= 2) { + std::cout << METAL_LOG_HEADER << " Contents of map (after):" << std::endl; + for (auto it = map->begin(); it != map->end(); ++it) { + std::cout << METAL_LOG_HEADER << " " << it->first << ": " << it->second << std::endl; + } + } + } +} + +template <> +void mfa::cache::prepare(mfa::context* context, mfa::gemm::hash hash, bool async) +{ + _mfa_cache_prepare(&map, context, hash, async); +} + +mfa::context::context(MTL::Device* device, const char* metallib_path) +{ + auto* pool = NS::AutoreleasePool::alloc()->init(); + + this->log_level = 0; +#if CCV_METAL_LOGGING_ENABLE + const char* log_level_repr = getenv("CCV_METAL_LOG_LEVEL"); + if (log_level_repr) { + int log_level_raw = atoi(log_level_repr); + std::cerr << METAL_LOG_HEADER << "Using log level: " << log_level_raw << std::endl; + CCV_NNC_MFA_PRECONDITION(log_level_raw >= 0 && log_level_raw <= 3) + + this->log_level = uint16_t(log_level_raw); + } +#endif + + // Example: /usr/local/MetalFlashAttention/lib/libMetalFlashAttention.metallib + // We need to have two different variants based on the operating system. macOS + // will not accept a metallib compiled for iOS/tvOS/visionOS and vice versa. + if (!metallib_path) { + this->supported = false; + return; + } + if (METAL_LOG_LEVEL(this) >= 1) { + std::cerr << METAL_LOG_HEADER << "Started loading 'libMetalFlashAttention.metallib'." << std::endl; + } + + // Check whether the device architecture is supported. + this->supported = device->supportsFamily(MTL::GPUFamilyApple7); + if (!supported) { + if (METAL_LOG_LEVEL(this) >= 1) { + std::cerr << METAL_LOG_HEADER << "Device architecture not supported by Metal FlashAttention." << std::endl; + } + return; + } + + this->device = NS::RetainPtr(device); +#if TARGET_OS_OSX + // This method is only available on macOS 13.3+. To make the code compatible + // with macOS 12, we need to call ObjC runtime functions that check whether + // the selector actually exists. + device->setShouldMaximizeConcurrentCompilation(true); +#endif + + // Create a URL out of the path string. + auto c_path = metallib_path; + auto swift_path = NS::String::string(c_path, NS::UTF8StringEncoding); + auto url = NS::URL::fileURLWithPath(swift_path); + + // Attempt to load the library, otherwise crash with a detailed log message. + NS::Error* error; + this->library = NS::TransferPtr(device->newLibrary(url, &error)); + CCV_NNC_MFA_CHECK_ERROR(error) + + // Notify that this finished successfully, and is not just stalling on one of + // the previous lines of code. + if (METAL_LOG_LEVEL(this) >= 1) { + std::cerr << METAL_LOG_HEADER << "Finished loading 'libMetalFlashAttention.metallib'." << std::endl; + } + + pool->drain(); +} + +MTL::CommandBatch::CommandBatch(MTL::CommandQueue* command_queue) { + command_buffer = command_queue->commandBuffer(); + command_encoder = command_buffer->computeCommandEncoder(); +} + +MTL::ComputeCommandEncoder* MTL::CommandBatch::start_command(MTL::ComputePipelineState* pso) { + CCV_NNC_MFA_PRECONDITION(command_active == 0) + command_active = 1; + command_encoder->setComputePipelineState(pso); + return command_encoder; +} + +void MTL::CommandBatch::finish_command(MTL::ComputeCommandEncoder* command_encoder) { + CCV_NNC_MFA_PRECONDITION(command_active == 1) + command_active = 0; + batched_command_count += 1; +} + +MTL::CommandBatch::~CommandBatch() { + CCV_NNC_MFA_PRECONDITION(command_active == 0) + command_encoder->endEncoding(); + command_buffer->commit(); +} diff --git a/lib/nnc/mfa/ccv_nnc_mfa.hpp b/lib/nnc/mfa/ccv_nnc_mfa.hpp new file mode 100644 index 000000000..423101b52 --- /dev/null +++ b/lib/nnc/mfa/ccv_nnc_mfa.hpp @@ -0,0 +1,73 @@ +#ifndef GUARD_ccv_nnc_mfa_hpp +#define GUARD_ccv_nnc_mfa_hpp + +#include "nnc/ccv_nnc.h" +#include "ccv_nnc_mfa_defines.hpp" +#include "ccv_nnc_mfa_gemm.hpp" + +#ifdef __cplusplus +#include "nnc/mfa/3rdparty/metal-cpp/Dispatch.hpp" +#include "nnc/mfa/3rdparty/metal-cpp/Metal.hpp" +#include "ccv_nnc_mfa_error.hpp" +#include + +namespace ccv { +namespace nnc { +namespace mfa { + +class context; + +template +class cache { +public: + std::unordered_map map; + + cache(); + ~cache(); + + void prepare(context* context, T hash, bool async); +}; + +class context { +public: + bool supported; + uint16_t log_level; + + NS::SharedPtr device; + NS::SharedPtr library; + + context(MTL::Device* device, const char* metallib_path); + + // MFA keeps internal caches of pipeline state objects. If you're eagerly + // executing a command, call `sync_prepare_*` just before encoding it. This + // incurs non-negligible latency, which can be removed by compiling during + // graph compilation. Use `async_prepare_*` during graph compilation, which + // will transform the subsequent `sync_prepare_*` into a NOP. The async + // version has more latency but utilizes multicore CPU parallelism. + // + // After preparing the pipeline, call `encode_*`. Pass each tensor's backing + // `MTL::Buffer*` through a null-terminated list. + cache gemm_cache; +}; + +} // namespace mfa +} // namespace nnc +} // namespace ccv + +extern "C" { +#endif // __cplusplus + +ccv_nnc_mfa_context_t* ccv_nnc_init_mfa_context(mtl_device_t* context, const char* metallib_path); +void ccv_nnc_deinit_mfa_context(ccv_nnc_mfa_context_t* context); +uint8_t ccv_nnc_mfa_context_supported(ccv_nnc_mfa_context_t* context); +uint16_t ccv_nnc_mfa_context_log_level(ccv_nnc_mfa_context_t* context); +void ccv_nnc_mfa_log_message(const char* message); + +mtl_command_batch_t* ccv_nnc_start_command_batch(mtl_command_queue_t* command_queue); +void ccv_nnc_finish_command_batch(mtl_command_batch_t* command_batch); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif diff --git a/lib/nnc/mfa/ccv_nnc_mfa_defines.hpp b/lib/nnc/mfa/ccv_nnc_mfa_defines.hpp new file mode 100644 index 000000000..09c54a784 --- /dev/null +++ b/lib/nnc/mfa/ccv_nnc_mfa_defines.hpp @@ -0,0 +1,91 @@ +#ifndef GUARD_ccv_nnc_mfa_defines_hpp +#define GUARD_ccv_nnc_mfa_defines_hpp + +// MARK: - Types + +#ifdef __cplusplus +#include "nnc/mfa/3rdparty/metal-cpp/Metal.hpp" +namespace ccv { +namespace nnc { +namespace mfa { +class context; +} // namespace mfa +} // namespace nnc +} // namespace ccv + +typedef ccv::nnc::mfa::context ccv_nnc_mfa_context_t; +typedef MTL::Buffer mtl_buffer_t; +typedef MTL::CommandBuffer mtl_command_buffer_t; +typedef MTL::ComputeCommandEncoder mtl_compute_command_encoder_t; +typedef MTL::CommandQueue mtl_command_queue_t; +typedef MTL::Device mtl_device_t; +#else +typedef void ccv_nnc_mfa_context_t; +typedef void mtl_buffer_t; +typedef void mtl_command_buffer_t; +typedef void mtl_compute_command_encoder_t; +typedef void mtl_command_queue_t; +typedef void mtl_device_t; +#endif // __cplusplus + +#ifdef __cplusplus +namespace MTL { +class CommandBatch { +public: + MTL::CommandBuffer* command_buffer; + + // Although labeled `MTL::ComputeCommandEncoder`, this should be used for + // memcpy and memset as well. Here is a performant reference implementation + // using custom shaders to bypass the CPU-side latency of switching encoders: + // https://github.com/philipturner/metal-usm/tree/main/BlitEncoderAlternative + MTL::ComputeCommandEncoder* command_encoder; + + uint16_t batched_command_count = 0; + uint8_t command_active = 0; + + CommandBatch(MTL::CommandQueue* command_queue); + ~CommandBatch(); + + MTL::ComputeCommandEncoder* start_command(MTL::ComputePipelineState* pso); + void finish_command(MTL::ComputeCommandEncoder* command_encoder); +}; +} // namespace MTL + +typedef MTL::CommandBatch mtl_command_batch_t; +#else // __cplusplus +typedef struct { + mtl_command_buffer_t* command_buffer; + mtl_compute_command_encoder_t* command_encoder; + uint16_t batched_command_count; + uint8_t command_active; +} MTLCommandBatch; + +typedef MTLCommandBatch mtl_command_batch_t; +#endif // __cplusplus + +// MARK: - Diagnostics + +#ifndef CCV_METAL_LOGGING_ENABLE +#define CCV_METAL_LOGGING_ENABLE 1 +#endif + +// 0 - crash reports +// 1 - metallib initialization +// 2 - PSO creation +// 3 - command encoding + +#if CCV_METAL_LOGGING_ENABLE + +#ifdef __cplusplus +#define METAL_LOG_LEVEL(CONTEXT) CONTEXT->log_level +#else +#define METAL_LOG_LEVEL(CONTEXT) ccv_nnc_mfa_context_log_level(CONTEXT) +#endif // __cplusplus + +#else // CCV_NNC_METAL_LOGGING_ENABLE + +#define METAL_LOG_LEVEL(CONTEXT) 0 + +#endif // CCV_NNC_METAL_LOGGING_ENABLE + +#endif diff --git a/lib/nnc/mfa/ccv_nnc_mfa_error.cpp b/lib/nnc/mfa/ccv_nnc_mfa_error.cpp new file mode 100644 index 000000000..00071191f --- /dev/null +++ b/lib/nnc/mfa/ccv_nnc_mfa_error.cpp @@ -0,0 +1,46 @@ +#include "ccv_nnc_mfa.hpp" +using namespace ccv::nnc; + +#include + +inline void log_source_location(int line, const char *file_name, const char *function_name) { + std::cerr << METAL_LOG_HEADER << "Encountered unexpected error in: " << function_name << std::endl; + std::cerr << "\e[0;1m" << file_name << ":" << line << ":\e[0m "; + std::cerr << "\e[0;31m" << "error:" << "\e[0m "; +} + +void mfa::fatal_error(NS::Error* error, int line, const char *file_name, const char *function_name) { + auto description = error->localizedDescription(); + auto recovery_suggestion = error->localizedRecoverySuggestion(); + auto failure_reason = error->localizedFailureReason(); + + log_source_location(line, file_name, function_name); + std::cerr << "\e[0;1m"; + if (description) { + std::cerr << description->cString(NS::UTF8StringEncoding); + } else { + std::cerr << "[description not available]"; + } + std::cerr << "\e[0m" << std::endl; + if (recovery_suggestion) { + std::cerr << METAL_LOG_HEADER << "Recovery suggestion: " << recovery_suggestion->cString(NS::UTF8StringEncoding) << std::endl; + } + if (failure_reason) { + std::cerr << METAL_LOG_HEADER << "Failure reason: " << failure_reason->cString(NS::UTF8StringEncoding) << std::endl; + } + std::cerr << METAL_LOG_HEADER << "Quitting now." << std::endl; + exit(-1); +} + +void mfa::precondition_failure(const char *message, int line, const char *file_name, const char *function_name) { + log_source_location(line, file_name, function_name); + std::cerr << "\e[0;1m"; + if (message) { + std::cerr << message; + } else { + std::cerr << "[precondition failure]"; + } + std::cerr << "\e[0m" << std::endl; + std::cerr << METAL_LOG_HEADER << "Quitting now." << std::endl; + exit(-1); +} diff --git a/lib/nnc/mfa/ccv_nnc_mfa_error.hpp b/lib/nnc/mfa/ccv_nnc_mfa_error.hpp new file mode 100644 index 000000000..3fc0238e9 --- /dev/null +++ b/lib/nnc/mfa/ccv_nnc_mfa_error.hpp @@ -0,0 +1,30 @@ +#ifndef GUARD_ccv_nnc_mfa_error_hpp +#define GUARD_ccv_nnc_mfa_error_hpp + +#include "nnc/mfa/3rdparty/metal-cpp/Metal.hpp" + +// `std::cout` and `CACurrentMediaTime()` for profiling. +#include +#include + +namespace ccv { +namespace nnc { +namespace mfa { + +#define METAL_LOG_HEADER "\e[0;36m[Metal]\e[0m " + +#define CCV_NNC_MFA_CHECK_ERROR(error) \ +if (error) { ccv::nnc::mfa::fatal_error(error, __LINE__, __FILE__, __FUNCTION__); } \ + +void fatal_error(NS::Error* error, int line, const char *file_name, const char *function_name); + +#define CCV_NNC_MFA_PRECONDITION(expr) \ +if (!(expr)) { ccv::nnc::mfa::precondition_failure(nullptr, __LINE__, __FILE__, __FUNCTION__); } \ + +void precondition_failure(const char *message, int line, const char *file_name, const char *function_name); + +} // namespace mfa +} // namespace nnc +} // namespace ccv + +#endif diff --git a/lib/nnc/mfa/ccv_nnc_mfa_gemm.cpp b/lib/nnc/mfa/ccv_nnc_mfa_gemm.cpp new file mode 100644 index 000000000..5467bcf18 --- /dev/null +++ b/lib/nnc/mfa/ccv_nnc_mfa_gemm.cpp @@ -0,0 +1,384 @@ +#include "ccv_nnc_mfa.hpp" +#include "ccv_nnc_mfa_hash.hpp" +#include +using namespace ccv::nnc; + +#include + +// MARK: - C + +void ccv_nnc_mfa_async_prepare_gemm(mfa::context* context, ccv_nnc_mfa_gemm_params_t params) +{ + context->gemm_cache.prepare(context, mfa::gemm::hash(params), true); +} + +void ccv_nnc_mfa_sync_prepare_gemm(mfa::context* context, ccv_nnc_mfa_gemm_params_t params) +{ + context->gemm_cache.prepare(context, mfa::gemm::hash(params), false); +} + +void ccv_nnc_mfa_encode_gemm(mfa::context* context, ccv_nnc_mfa_gemm_params_t params, MTL::CommandBatch* command_batch, MTL::Buffer** tensors, size_t* tensor_offsets) +{ + mfa::gemm::hash hash(params); + auto iterator = context->gemm_cache.map.find(hash); + if (iterator == context->gemm_cache.map.end()) { + mfa::precondition_failure("GEMM hash not cached.", __LINE__, __FILE__, __FUNCTION__); + } + + auto* pipeline = iterator->second; + pipeline->wait(); + + auto* encoder = command_batch->start_command(pipeline->get_pso()); + encoder->setThreadgroupMemoryLength(pipeline->get_threadgroup_memory_length(), 0); + + int num_tensors = 0; + while (tensors[num_tensors] != nullptr) { + num_tensors += 1; + } + CCV_NNC_MFA_PRECONDITION(num_tensors == 3) + for (int i = 0; i < num_tensors; ++i) { + if (i < 2) { + encoder->useResource(tensors[i], MTL::ResourceUsageRead); + } else if (i < 3) { + encoder->useResource(tensors[i], MTL::ResourceUsageWrite); + } else { + // This should never happen. + CCV_NNC_MFA_PRECONDITION(false); + } + encoder->setBuffer(tensors[i], tensor_offsets[i], i); + } + + uint32_t batch_size; + if (pipeline->get_batched()) { + uint16_t num_batch_dims_a = 0; + uint64_t batch_size_a = 1; + for (int i = 0; i < CCV_NNC_MAX_DIM_ALLOC; ++i) { + if (params.batch_dims_a[i] == 0) { + break; + } + num_batch_dims_a += 1; + batch_size_a *= params.batch_dims_a[i]; + } + + uint16_t num_batch_dims_b = 0; + uint64_t batch_size_b = 1; + for (int i = 0; i < CCV_NNC_MAX_DIM_ALLOC; ++i) { + if (params.batch_dims_b[i] == 0) { + break; + } + num_batch_dims_b += 1; + batch_size_b *= params.batch_dims_b[i]; + } + + bool same_batch_dims = true; + if (num_batch_dims_a != num_batch_dims_b) { + same_batch_dims = false; + } else if (batch_size_a != batch_size_b) { + same_batch_dims = false; + } else { + for (int i = 0; i < CCV_NNC_MAX_DIM_ALLOC; ++i) { + if (params.batch_dims_a[i] != params.batch_dims_b[i]) { + same_batch_dims = false; + } + } + } + + if (!same_batch_dims) { + CCV_NNC_MFA_PRECONDITION(batch_size_b == 1); + } + batch_size = batch_size_a; + + uint16_t element_size = 0; + switch (params.data_type) { + case MTL::DataTypeHalf: { + element_size = 2; + break; + } + case MTL::DataTypeFloat: { + element_size = 4; + break; + } + default: + CCV_NNC_MFA_PRECONDITION(false); + break; + } + uint64_t byte_stride_a = hash.M * hash.K * element_size; + uint64_t byte_stride_b = hash.K * hash.N * element_size; + uint64_t byte_stride_c = hash.M * hash.N * element_size; + if (batch_size_b == 1) { + byte_stride_b = 0; + } + + simd::ulong4 matrix_offsets[batch_size]; + for (int i = 0; i < batch_size; ++i) { + matrix_offsets[i] = simd::ulong4 { + i * byte_stride_a, + i * byte_stride_b, + i * byte_stride_c, + 0 + }; + } + encoder->setBytes(matrix_offsets, batch_size * 32, 10); + } else { + batch_size = 1; + } + + auto grid_size = pipeline->get_grid_size(); + grid_size.depth = batch_size; + encoder->dispatchThreadgroups(grid_size, pipeline->get_group_size()); + command_batch->finish_command(encoder); +} + +// MARK: - C++ + +mfa::gemm::hash::hash(ccv_nnc_mfa_gemm_params_t params) { + data_type = params.data_type; + M = params.M; + N = params.N; + K = params.K; + A_trans = params.A_trans; + B_trans = params.B_trans; + alpha = params.alpha; + beta = params.beta; + batched = params.batched; + fused_activation = params.fused_activation; +} + +bool mfa::gemm::hash::operator==(const mfa::gemm::hash& hash) const { + return + (data_type == hash.data_type) && + (M == hash.M) && + (N == hash.N) && + (K == hash.K) && + (A_trans == hash.A_trans) && + (B_trans == hash.B_trans) && + (alpha == hash.alpha) && + (beta == hash.beta) && + (batched == hash.batched) && + (fused_activation == hash.fused_activation); +} + +mfa::gemm::pipeline::pipeline(mfa::context* context, mfa::gemm::hash hash, bool async) { + CCV_NNC_MFA_PRECONDITION((hash.data_type == MTL::DataTypeFloat) || (hash.data_type == MTL::DataTypeHalf)) + CCV_NNC_MFA_PRECONDITION(hash.alpha == 1.0) + CCV_NNC_MFA_PRECONDITION(hash.beta == 0.0) + CCV_NNC_MFA_PRECONDITION(hash.fused_activation == false) + + auto* pool = NS::AutoreleasePool::alloc()->init(); + + if (async) { + finished = false; + semaphore = new Dispatch::Semaphore(0); + } else { + finished = true; + semaphore = nullptr; + } + this->batched = hash.batched; + + auto constants = NS::TransferPtr(MTL::FunctionConstantValues::alloc()->init()); + constants->setConstantValue(&hash.M, MTL::DataTypeUInt, NS::UInteger(0)); + constants->setConstantValue(&hash.N, MTL::DataTypeUInt, 1); + constants->setConstantValue(&hash.K, MTL::DataTypeUInt, 2); + constants->setConstantValue(&hash.A_trans, MTL::DataTypeBool, 10); + constants->setConstantValue(&hash.B_trans, MTL::DataTypeBool, 11); + constants->setConstantValue(&hash.alpha, MTL::DataTypeFloat, 20); + constants->setConstantValue(&hash.beta, MTL::DataTypeFloat, 21); + constants->setConstantValue(&hash.batched, MTL::DataTypeBool, 100); + constants->setConstantValue(&hash.fused_activation, MTL::DataTypeBool, 101); + + // Eventually, this will incorporate the batch size. + // BxMxN > 1,000,000 -> 48x48, only if M >= 88 and N >= 88 + // BxMxN > 4,000,000 -> 64x64, only if M >= 120 and N >= 120 + uint64_t C_elements = uint64_t(hash.M) * uint64_t(hash.N); + if (batched) { + C_elements *= 2; + } + int is_half = (hash.data_type == MTL::DataTypeHalf); // SD v1 attention + int is_float = (hash.data_type == MTL::DataTypeFloat); // SD v2 attention + + uint16_t M_group = 32; + uint16_t N_group = 32; + uint16_t K_group = 32; + if (C_elements > 1000 * 1000) { + M_group = 48; + N_group = 48; + } + + // If K_simd is perfectly equal to matrix K, the compiler can elide a large + // amount of logic in the kernel. + if (hash.K >= 33 && hash.K <= 40) { + K_group = 40; // 1 * 40 + } else if (is_half && hash.K >= 73 && hash.K <= 80) { + K_group = 40; // 2 * 40 + } else if (C_elements > 1000 * 1000) { + if (hash.K <= 16) { + K_group = 16; // 1 * 16 + } else if (hash.K <= 24) { + K_group = 24; // 1 * 24 + } else if (hash.K <= 32) { + K_group = 32; // 1 * 32 + } else if (hash.K <= 48) { + K_group = 24; + } else if (hash.K <= 64) { + K_group = 32; + } else if (is_float) { + K_group = 24; + } + } + + uint16_t M_splits = 2; + uint16_t N_splits = 2; + uint16_t K_splits = 1; + uint16_t M_simd = M_group / M_splits; + uint16_t N_simd = N_group / N_splits; + uint16_t K_simd = K_group / K_splits; + + constants->setConstantValue(&M_simd, MTL::DataTypeUShort, 200); + constants->setConstantValue(&N_simd, MTL::DataTypeUShort, 201); + constants->setConstantValue(&K_simd, MTL::DataTypeUShort, 202); + constants->setConstantValue(&M_splits, MTL::DataTypeUShort, 210); + constants->setConstantValue(&N_splits, MTL::DataTypeUShort, 211); + constants->setConstantValue(&K_splits, MTL::DataTypeUShort, 212); + + std::string cpp_name; + uint16_t data_type_size = UINT16_MAX; + switch (hash.data_type) { + case MTL::DataTypeHalf: { + cpp_name = "hgemm"; + data_type_size = 2; + break; + } + case MTL::DataTypeFloat: { + cpp_name = "sgemm"; + data_type_size = 4; + break; + } + default: { + CCV_NNC_MFA_PRECONDITION(false) + break; + } + } + auto* swift_name = NS::String::string(cpp_name.c_str(), NS::UTF8StringEncoding); + + uint16_t A_block_bytes = M_group * K_group * data_type_size; + uint16_t B_block_bytes = K_group * N_group * data_type_size; + uint16_t C_block_bytes = M_group * N_group * data_type_size; + threadgroup_memory_length = A_block_bytes + B_block_bytes; + + if ((hash.M % 8 > 0) && (hash.N % 8 > 0)) { + if (C_block_bytes > threadgroup_memory_length) { + threadgroup_memory_length = C_block_bytes; + } + } + + std::function ceil_divide = [](size_t original, uint16_t granularity) { + return (original + size_t(granularity) - 1) / size_t(granularity); + }; + grid_size = MTL::Size(ceil_divide(hash.N, N_group), ceil_divide(hash.M, M_group), 1); + group_size = MTL::Size(128 * K_splits, 1, 1); + + NS::Error* error; + auto function = NS::TransferPtr(context->library->newFunction(swift_name, constants.get(), &error)); + CCV_NNC_MFA_CHECK_ERROR(error) + + if (async) { + context->device->newComputePipelineState(function.get(), [=](MTL::ComputePipelineState* pipeline, NS::Error* error) { + CCV_NNC_MFA_CHECK_ERROR(error) + + pipeline->retain(); + pso = pipeline; + semaphore->signal(); + }); + } else { + pso = context->device->newComputePipelineState(function.get(), &error); + CCV_NNC_MFA_CHECK_ERROR(error) + } + + pool->drain(); +} + +mfa::gemm::pipeline::~pipeline() { + if (semaphore) { + delete semaphore; + } + pso->release(); +} + +void mfa::gemm::pipeline::wait() { + if (!finished) { + semaphore->wait(); + finished = true; + } +} + +MTL::ComputePipelineState* mfa::gemm::pipeline::get_pso() const { + if (finished) { + return pso; + } else { + return nullptr; + } +} + +bool mfa::gemm::pipeline::get_batched() const { + if (finished) { + return batched; + } else { + return false; + } +} + +uint16_t mfa::gemm::pipeline::get_threadgroup_memory_length() const { + if (finished) { + return threadgroup_memory_length; + } else { + return UINT16_MAX; + } +} + +MTL::Size mfa::gemm::pipeline::get_grid_size() const { + if (finished) { + return grid_size; + } else { + return MTL::Size(0, UINT64_MAX, UINT64_MAX); + } +} + +MTL::Size mfa::gemm::pipeline::get_group_size() const { + if (finished) { + return group_size; + } else { + return MTL::Size(0, UINT64_MAX, UINT64_MAX); + } +} + +std::ostream& operator<<(std::ostream& os, const mfa::gemm::hash& hash) +{ + os << "mfa::gemm::hash {"; + os << " .data_type = " << hash.data_type << ','; + os << " .M = " << hash.M << ','; + os << " .N = " << hash.N << ','; + os << " .K = " << hash.K << ','; + os << " .A_trans = " << bool(hash.A_trans) << ','; + os << " .B_trans = " << bool(hash.B_trans) << ','; + os << " .alpha = " << double(hash.alpha) << ','; + os << " .beta = " << double(hash.beta) << ','; + os << " .batched = " << bool(hash.batched) << ','; + os << " .fused_activation = " << bool(hash.fused_activation); + os << "}"; + return os; +} + +std::size_t std::hash::operator()(const mfa::gemm::hash& hash) const noexcept { + std::size_t seed = 0; + mfa::hash::combine_64(seed, hash.data_type); + mfa::hash::combine_32(seed, hash.M); + mfa::hash::combine_32(seed, hash.N); + mfa::hash::combine_32(seed, hash.K); + mfa::hash::combine_32(seed, uint32_t(hash.A_trans)); + mfa::hash::combine_32(seed, uint32_t(hash.B_trans)); + mfa::hash::combine_32(seed, *reinterpret_cast(&hash.alpha)); + mfa::hash::combine_32(seed, *reinterpret_cast(&hash.beta)); + mfa::hash::combine_32(seed, uint32_t(hash.batched)); + mfa::hash::combine_32(seed, uint32_t(hash.fused_activation)); + return seed; +} diff --git a/lib/nnc/mfa/ccv_nnc_mfa_gemm.hpp b/lib/nnc/mfa/ccv_nnc_mfa_gemm.hpp new file mode 100644 index 000000000..8bf66fe22 --- /dev/null +++ b/lib/nnc/mfa/ccv_nnc_mfa_gemm.hpp @@ -0,0 +1,100 @@ +#ifndef GUARD_ccv_nnc_mfa_gemm_hpp +#define GUARD_ccv_nnc_mfa_gemm_hpp + +typedef struct { + uint64_t data_type; + uint32_t M; + uint32_t N; + uint32_t K; + uint8_t A_trans; + uint8_t B_trans; + float alpha; + float beta; + uint8_t batched; + uint8_t fused_activation; + + // Fill these in the same order as the original shape, but null-terminated. + // Both arrays must have the same length. + uint32_t batch_dims_a[CCV_NNC_MAX_DIM_ALLOC]; + uint32_t batch_dims_b[CCV_NNC_MAX_DIM_ALLOC]; +} ccv_nnc_mfa_gemm_params_t; + +#ifdef __cplusplus +#include "nnc/mfa/3rdparty/metal-cpp/Dispatch.hpp" +#include "nnc/mfa/3rdparty/metal-cpp/Metal.hpp" + +namespace ccv { +namespace nnc { +namespace mfa { +namespace gemm { + +class hash { +public: + uint64_t data_type; + uint32_t M; + uint32_t N; + uint32_t K; + uint8_t A_trans; + uint8_t B_trans; + float alpha; + float beta; + uint8_t batched; + uint8_t fused_activation; + + hash(ccv_nnc_mfa_gemm_params_t); + + bool operator==(const hash& rhs) const; +}; + +class pipeline { + bool finished; + Dispatch::Semaphore* semaphore; + + MTL::ComputePipelineState* pso; + + bool batched; + uint16_t threadgroup_memory_length; + MTL::Size grid_size; + MTL::Size group_size; + +public: + pipeline(context* context, hash hash, bool async); + ~pipeline(); + + // This is a potentially blocking function. Call it before accessing any of + // the property getters. + void wait(); + + MTL::ComputePipelineState* get_pso() const; + + bool get_batched() const; + uint16_t get_threadgroup_memory_length() const; + MTL::Size get_grid_size() const; + MTL::Size get_group_size() const; +}; + +} // namespace gemm +} // namespace mfa +} // namespace nnc +} // namespace ccv + +std::ostream& operator<<(std::ostream& os, const ccv::nnc::mfa::gemm::hash& hash); + +template<> +struct std::hash +{ + std::size_t operator()(const ccv::nnc::mfa::gemm::hash& hash) const noexcept; +}; + +extern "C" { +#endif // __cplusplus + +void ccv_nnc_mfa_async_prepare_gemm(ccv_nnc_mfa_context_t* context, ccv_nnc_mfa_gemm_params_t params); +void ccv_nnc_mfa_sync_prepare_gemm(ccv_nnc_mfa_context_t* context, ccv_nnc_mfa_gemm_params_t params); +void ccv_nnc_mfa_encode_gemm(ccv_nnc_mfa_context_t* context, ccv_nnc_mfa_gemm_params_t params, mtl_command_batch_t* command_batch, mtl_buffer_t** tensors, size_t* tensor_offsets); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif diff --git a/lib/nnc/mfa/ccv_nnc_mfa_hash.hpp b/lib/nnc/mfa/ccv_nnc_mfa_hash.hpp new file mode 100644 index 000000000..60d58a056 --- /dev/null +++ b/lib/nnc/mfa/ccv_nnc_mfa_hash.hpp @@ -0,0 +1,58 @@ +#ifndef GUARD_ccv_nnc_mfa_hash_hpp +#define GUARD_ccv_nnc_mfa_hash_hpp + +// Source: +// https://stackoverflow.com/a/50978188 + +namespace { +template +T xorshift(const T& n,int i){ + return n^(n>>i); +} + +// a hash function with another name as to not confuse with std::hash +uint32_t distribute_32(const uint32_t& n){ + uint32_t p = 0x55555555ul; // pattern of alternating 0 and 1 + uint32_t c = 3423571495ul; // random uneven integer constant; + return c*xorshift(p*xorshift(n,16),16); +} + +// a hash function with another name as to not confuse with std::hash +uint64_t distribute_64(const uint64_t& n){ + uint64_t p = 0x5555555555555555ull; // pattern of alternating 0 and 1 + uint64_t c = 17316035218449499591ull;// random uneven integer constant; + return c*xorshift(p*xorshift(n,32),32); +} + +// if c++20 rotl is not available: +template +typename std::enable_if::value,T>::type +constexpr rotl(const T n, const S i){ + const T m = (std::numeric_limits::digits-1); + const T c = i&m; + return (n<>((T(0)-c)&m)); // this is usually recognized by the compiler to mean rotation, also c++20 now gives us rotl directly +} +} + +namespace ccv { +namespace nnc { +namespace mfa { +namespace hash { + +// call this function with the old seed and the new key to be hashed and combined into the new seed value, respectively the final hash +inline size_t combine_32(std::size_t& seed, const uint32_t& v) +{ + return rotl(seed,std::numeric_limits::digits/3) ^ distribute_32(v); +} + +inline size_t combine_64(std::size_t& seed, const uint64_t& v) +{ + return rotl(seed,std::numeric_limits::digits/3) ^ distribute_64(v); +} + +} // namespace hash +} // namespace mfa +} // namespace nnc +} // namespace ccv + +#endif diff --git a/lib/nnc/mfa/makefile b/lib/nnc/mfa/makefile new file mode 100644 index 000000000..864d18d31 --- /dev/null +++ b/lib/nnc/mfa/makefile @@ -0,0 +1,39 @@ +include ../../config.mk + +CFLAGS := -std=c++17 -O3 -Wall -I"../../" $(CFLAGS) + +SRCS := Metal.cpp ccv_nnc_mfa.cpp ccv_nnc_mfa_error.cpp ccv_nnc_mfa_gemm.cpp 3rdparty/metal-cpp/Dispatch.cpp + +SRC_OBJS := $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(SRCS))) + +.PHONY: release all lib clean + +release: all + +include ../../scheme.mk + +all: lib + +lib: libnnc-compat-mfa.o + +clean: + rm -f *.o + +libnnc-compat-mfa.o: $(SRC_OBJS) + ld -r $^ -o $@ + +%.o: %.c + $(CC) $< -o $@ -c $(CFLAGS) + +%.o: %.cpp + $(CC) $< -o $@ -c $(CFLAGS) + +dep: .dep.mk +.dep.mk: $(SRCS) + echo '' > .dep.mk + for SRC in $(patsubst %.c,,$^) ; do \ + $(CC) $(CFLAGS) -M $$SRC | sed -e 's/^.*\://g' | (echo "$${SRC%%.*}.o: \\" && cat) >> .dep.mk ; \ + done + +-include .dep.mk + diff --git a/lib/nnc/mps/ccv_nnc_mps.h b/lib/nnc/mps/ccv_nnc_mps.h index 191d252e0..4ada91697 100644 --- a/lib/nnc/mps/ccv_nnc_mps.h +++ b/lib/nnc/mps/ccv_nnc_mps.h @@ -3,6 +3,7 @@ #include "nnc/ccv_nnc.h" #include "nnc/_ccv_nnc_stream.h" +#include "nnc/mfa/ccv_nnc_mfa.hpp" void* mpheapalloc(int device, size_t size); void mpheapfree(int device, void* ptr); @@ -61,8 +62,11 @@ typedef struct { off_t mpgetoffset(const ccv_nnc_tensor_t* const tensor); id mpgetbuffer(const ccv_nnc_tensor_t* const tensor); id ccv_nnc_default_device(void); -CCV_WARN_UNUSED(MPSCommandBuffer*) ccv_nnc_stream_context_get_command_buffer(ccv_nnc_stream_context_t* const stream_context); -void ccv_nnc_stream_context_commit_command_buffer(ccv_nnc_stream_context_t* const stream_context, MPSCommandBuffer* command_buffer); +ccv_nnc_mfa_context_t* ccv_nnc_default_mfa_context(void); +CCV_WARN_UNUSED(MTLCommandBatch*) ccv_nnc_stream_context_start_command_batch(ccv_nnc_stream_context_t* const stream_context); +CCV_WARN_UNUSED(MPSCommandBuffer*) ccv_nnc_stream_context_start_mps_command_buffer(ccv_nnc_stream_context_t* const stream_context); +void ccv_nnc_stream_context_finish_command_batch(ccv_nnc_stream_context_t* const stream_context, MTLCommandBatch* command_batch); +void ccv_nnc_stream_context_finish_mps_command_buffer(ccv_nnc_stream_context_t* const stream_context, MPSCommandBuffer* command_buffer); CCV_WARN_UNUSED(MPSGraphExecutable*) ccv_nnc_mps_graph_executable_cache(const ccv_nnc_mps_graph_key_t key, int* indices, void(NS_NOESCAPE ^block)(MPSGraph* graph, NSMutableArray* inputTensors, NSMutableArray* inputShapedTypes, NSMutableArray* resultTensors)); CCV_WARN_UNUSED(ccv_nnc_mps_graph_key_t) ccv_nnc_mps_graph_key_new(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size); CCV_WARN_UNUSED(MPSDataType) ccv_nnc_mps_datatype(const int datatype); // Get the datatype corresponding to MPS datatype. diff --git a/lib/nnc/mps/ccv_nnc_mps.m b/lib/nnc/mps/ccv_nnc_mps.m index e5c29c311..10df9db84 100644 --- a/lib/nnc/mps/ccv_nnc_mps.m +++ b/lib/nnc/mps/ccv_nnc_mps.m @@ -6,6 +6,7 @@ #include #import #import +#import #import #import #import @@ -24,6 +25,32 @@ return device; } +@interface MTLFileBackedBuffer: NSObject +@property (nonatomic, copy) NSString* path; +@property (nonatomic, assign) NSUInteger size; +@end + +ccv_nnc_mfa_context_t* ccv_nnc_default_mfa_context(void) +{ + static dispatch_once_t once; + static ccv_nnc_mfa_context_t* context; + dispatch_once(&once, ^{ + const char* metallib_path = getenv("CCV_NNC_MFA_METALLIB_PATH"); + if (metallib_path) + context = ccv_nnc_init_mfa_context((__bridge mtl_device_t*)ccv_nnc_default_device(), metallib_path); + else { + NSBundle* bundle = [NSBundle bundleForClass:[MTLFileBackedBuffer class]]; +#if TARGET_OS_IPHONE || TARGET_OS_MACCATALYST + NSString* path = [bundle pathForResource:@"libmfaios16-0.2" ofType:@"metallib"]; +#else + NSString* path = [bundle pathForResource:@"libmfamacos13-0.2" ofType:@"metallib"]; +#endif + context = ccv_nnc_init_mfa_context((__bridge mtl_device_t*)ccv_nnc_default_device(), path.UTF8String); + } + }); + return context; +} + MPSGraphDevice* _ccv_nnc_default_mps_device(void) { static dispatch_once_t once; @@ -196,10 +223,6 @@ void mpobjfree(int device, void* ptr) return buffer; } -@interface MTLFileBackedBuffer: NSObject -@property (nonatomic, copy) NSString* path; -@property (nonatomic, assign) NSUInteger size; -@end @implementation MTLFileBackedBuffer @end @@ -517,6 +540,8 @@ ccv_nnc_mps_graph_key_t ccv_nnc_mps_graph_key_new(const ccv_nnc_cmd_t cmd, const // Stream context ccv_nnc_stream_context_t* ccv_nnc_init_stream_context(ccv_nnc_stream_context_t* const stream_context) { + // Initialize the MFA context. + ccv_nnc_default_mfa_context(); return stream_context; } @@ -657,7 +682,12 @@ int ccv_nnc_gpu_device_count(void) return 1; } -MPSCommandBuffer* ccv_nnc_stream_context_get_command_buffer(ccv_nnc_stream_context_t* const stream_context) +MTLCommandBatch* ccv_nnc_stream_context_start_command_batch(ccv_nnc_stream_context_t* const stream_context) +{ + return ccv_nnc_start_command_batch((__bridge mtl_command_queue_t*)_ccv_nnc_default_queue()); +} + +MPSCommandBuffer* ccv_nnc_stream_context_start_mps_command_buffer(ccv_nnc_stream_context_t* const stream_context) { return [MPSCommandBuffer commandBufferFromCommandQueue:_ccv_nnc_default_queue()]; } @@ -667,14 +697,25 @@ void ccv_nnc_mps_unbounded_command_buffers(int state) enable_unbounded_command_buffers = state; } -void ccv_nnc_stream_context_commit_command_buffer(ccv_nnc_stream_context_t* const stream_context, MPSCommandBuffer* command_buffer) +void ccv_nnc_stream_context_finish_command_buffer(ccv_nnc_stream_context_t* const stream_context, MPSCommandBuffer* mps_command_buffer, MTLCommandBatch* command_batch) { + id mtl_command_buffer; + if (mps_command_buffer != nil) { + mtl_command_buffer = mps_command_buffer.commandBuffer; + } else { + mtl_command_buffer = command_batch->command_buffer; + } + int i; const int buffer_size = enable_unbounded_command_buffers ? OLD_MAX_COMMAND_BUFFER_SIZE : OLD_LIMITED_COMMAND_BUFFER_SIZE; if (!stream_context) { - id committed_command_buffer = [command_buffer.commandBuffer retain]; - [command_buffer commit]; + id committed_command_buffer = [mtl_command_buffer retain]; + if (mps_command_buffer != nil) { + [mps_command_buffer commit]; + } else { + ccv_nnc_finish_command_batch(command_batch); + } id last_buffer; id old_buffers[buffer_size]; os_unfair_lock_lock(&queue_lock); @@ -703,10 +744,53 @@ void ccv_nnc_stream_context_commit_command_buffer(ccv_nnc_stream_context_t* cons old_last_command_buffers[i] = old_last_command_buffers[i + 1]; old_last_command_buffers[buffer_size - 1] = last_command_buffer; } else - old_last_command_buffer = [command_buffer.commandBuffer retain]; - last_command_buffer = [command_buffer.commandBuffer retain]; + old_last_command_buffer = [mtl_command_buffer retain]; + last_command_buffer = [mtl_command_buffer retain]; + + // There is an opportunity to automatically batch MFA commands or custom + // shaders into a command batch. Instead of explicitly starting and + // finishing, have a background thread automatically commit it. To prevent + // committing from happenning in the middle of encoding, protect the command + // batch using `queue_lock` (unknown latency) or a pthread mutex lock (~200 ns + // latency). + // + // Every 50 microseconds, the background thread checks whether a command batch + // is active. If so, it commits all the commands. The main thread can also + // commit when `batched_command_count` exceeds a certain threshold (while it's + // still holding the lock). The best threshold is unknown, but 8 would be a + // reasonable first guess. + // + // When encountering an MPS command, you will have to abort the command batch + // (i.e. eagerly commit it with `batched_command_count=1`) because + // MPSCommandBuffer can't guarantee the command buffer stays the same. Even if + // it did, creating a separate `MTL::ComputeCommandEncoder` for each command + // is no better than creating a new `MTL::CommandBuffer`. Closing the + // `MTL::ComputeCommandEncoder` and creating a new `MTL::BlitCommandEncoder` + // for memory copies is equally as slow. + // + // Until ~50% of all operations transition from MPS -> custom shaders, this + // optimization is not worthwhile. It should only be employed on platforms + // where custom shaders are consistently faster than MPS (e.g. Apple 7+ with + // MFA GEMM). A good start would be creating custom shaders for all the + // elementwise operations in MPSGraph, and 4-byte aligned memcpy/memset. There + // are many places in ML models where elementwise activations follow GEMM. 90% + // of the time, they would be automatically batched within the 50-µs window, + // providing a 10x speedup for those layers. + // + // As a final optimization, delay the encoding of the GEMM. Fuse the GEMM + + // activation into one command through the `fused_activation` MFA function + // constant. This will require two separate GEMM variants to be ready, one + // with and one without `fused_activation` enabled. Same for the elementwise - + // one `MTLComputePipelineState` and another `MTLVisibleFunctionTable`. It + // might be wise to delay the creation of the fused variant, until you detect + // a specific GEMM shape received 2+ opportunities for fusion. + // + // Alternative implementation path: compile-time graph transformations that + // - Check whether MFA is supported + // - Fuse groups of consecutive MFA-compatible commands os_unfair_lock_unlock(&queue_lock); - [command_buffer.commandBuffer addCompletedHandler:^(id buffer) { + + [mtl_command_buffer addCompletedHandler:^(id buffer) { id found_buffer = nil; os_unfair_lock_lock(&queue_lock); if (buffer == last_command_buffer) @@ -726,12 +810,26 @@ void ccv_nnc_stream_context_commit_command_buffer(ccv_nnc_stream_context_t* cons os_unfair_lock_unlock(&queue_lock); [found_buffer release]; }]; - [command_buffer commit]; + if (mps_command_buffer != nil) { + [mps_command_buffer commit]; + } else { + ccv_nnc_finish_command_batch(command_batch); + } // Wait if we need to bound how many in-flight command buffers there are. This helps memory usage. [old_last_command_buffer waitUntilCompleted]; [old_last_command_buffer release]; } +void ccv_nnc_stream_context_finish_command_batch(ccv_nnc_stream_context_t* const stream_context, MTLCommandBatch* command_batch) +{ + ccv_nnc_stream_context_finish_command_buffer(stream_context, nil, command_batch); +} + +void ccv_nnc_stream_context_finish_mps_command_buffer(ccv_nnc_stream_context_t* const stream_context, MPSCommandBuffer* command_buffer) +{ + ccv_nnc_stream_context_finish_command_buffer(stream_context, command_buffer, NULL); +} + MPSDataType ccv_nnc_mps_datatype(const int datatype) { switch (datatype) diff --git a/lib/nnc/mps/makefile b/lib/nnc/mps/makefile index e90ad192f..cbba0a9c9 100644 --- a/lib/nnc/mps/makefile +++ b/lib/nnc/mps/makefile @@ -32,7 +32,7 @@ dep: .dep.mk .dep.mk: $(SRCS) echo '' > .dep.mk for SRC in $(patsubst %.c,,$^) ; do \ - $(NVCC) $(NVFLAGS) -M $$SRC | sed -e 's/^.*\://g' | (echo "$${SRC%%.*}.o: \\" && cat) >> .dep.mk ; \ + $(CC) $(CFLAGS) -M $$SRC | sed -e 's/^.*\://g' | (echo "$${SRC%%.*}.o: \\" && cat) >> .dep.mk ; \ done -include .dep.mk diff --git a/test/int/nnc/mpsdnn.tests.c b/test/int/nnc/mpsdnn.tests.c index 67e5ddf76..2dde7e26b 100644 --- a/test/int/nnc/mpsdnn.tests.c +++ b/test/int/nnc/mpsdnn.tests.c @@ -377,7 +377,6 @@ TEST_CASE("compare sigmoid with mps in half precision") TEST_CASE("compare sigmoid gradient with mps") { - ccv_cli_set_output_levels(ccv_cli_output_level_and_above(CCV_CLI_VERBOSE)); GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SIGMOID_FORWARD, CCV_NNC_BACKEND_MPS) && ccv_nnc_cmd_ok(CCV_NNC_SIGMOID_BACKWARD, CCV_NNC_BACKEND_MPS)); ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();