From 73913a378011b73709d01285f9cc2c429d9cbd1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Pierret=20=28fepitre=29?= Date: Thu, 9 Mar 2023 22:27:33 +0100 Subject: [PATCH 01/64] Rework Archlinux packaging --- apply-patches | 26 -------- archlinux/PKGBUILD | 136 ------------------------------------------ archlinux/PKGBUILD.in | 128 +++++++++++++++++++++++++++++++++++++++ pkgs/.gitignore | 1 - series-vm.conf | 7 --- 5 files changed, 128 insertions(+), 170 deletions(-) delete mode 100755 apply-patches delete mode 100644 archlinux/PKGBUILD create mode 100644 archlinux/PKGBUILD.in delete mode 100644 pkgs/.gitignore delete mode 100644 series-vm.conf diff --git a/apply-patches b/apply-patches deleted file mode 100755 index b1c84686..00000000 --- a/apply-patches +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh -# -# Given a series.conf file and a directory with patches, applies them to the -# current directory. -# Used by kernel-source.spec.in and kernel-binary.spec.in - -USAGE="$0 [--vanilla] [symbol ...]" - -set -e -set -o pipefail -if test $# -lt 2; then - echo "$USAGE" >&2 - exit 1 -fi -DIR="${0%/*}" -SERIES_CONF=$1 -PATCH_DIR=$2 -shift 2 - -( - echo "trap 'echo \"*** patch \$_ failed ***\"' ERR" - echo "set -ex" - egrep -v '^\s*#|^\s*$' <"$SERIES_CONF" | \ - sed "s|^|patch -s -F0 -E -p1 --no-backup-if-mismatch -i $PATCH_DIR/|" -) | sh - diff --git a/archlinux/PKGBUILD b/archlinux/PKGBUILD deleted file mode 100644 index add27936..00000000 --- a/archlinux/PKGBUILD +++ /dev/null @@ -1,136 +0,0 @@ -# This is an example PKGBUILD file. Use this as a start to creating your own, -# and remove these comments. For more information, see 'man PKGBUILD'. -# NOTE: Please fill out the license field for your package! If it is unknown, -# then please put 'unknown'. - -# Maintainer: Olivier Medoc -pkgname=qubes-vm-xen -_upstream_pkgver=`cat version` -pkgver=${_upstream_pkgver/-/\~} -pkgrel=`cat rel` -epoch= -pkgdesc="Xen is a virtual machine monitor" -arch=("x86_64") -url="http://qubes-os.org/" -license=('GPL') -groups=() -depends=(python bridge-utils python-lxml libutil-linux lzo libsystemd yajl) -makedepends=(wget make gcc patch git bin86 dev86 iasl yajl pkg-config openssl pixman) -checkdepends=() -optdepends=() -provides=('xen-qubes-vm-essentials') -conflicts=() -replaces=() -backup=() -options=() -install= -changelog= - -source=(xen-$_upstream_pkgver.tar.gz series-vm.conf apply-patches) - -noextract=() -md5sums=() #generate with 'makepkg -g' - - -build() { - - export PYTHON=/usr/bin/python - - cd xen-$_upstream_pkgver - - for p in $srcdir/../*.patch; do ln -s $p; done - - $srcdir/apply-patches $srcdir/series-vm.conf . - - export XEN_VENDORVERSION="-$pkgrel" - export OCAML_TOOLS=n - unset LDFLAGS - - autoreconf --install - ./configure --prefix=/usr \ - --sbindir=/usr/bin \ - --disable-ocamltools \ - --disable-pvshim \ - --disable-blktap2 - -# make prefix=/usr dist-xen - make prefix=/usr dist-tools -# make prefix=/usr dist-docs - - -} - -package() { - - cd xen-$_upstream_pkgver - - export OCAML_TOOLS=n - # Note: Archlinux removed use of directory such as /sbin /bin /usr/sbin (https://mailman.archlinux.org/pipermail/arch-dev-public/2012-March/022625.html) - make DESTDIR=$pkgdir LIBDIR=/usr/lib/ SBINDIR=/usr/bin prefix=/usr install-tools - - # Remove unwated stuff - - # stubdom: newlib - rm -rf $pkgdir/usr/*-xen-elf - - # hypervisor symlinks - rm -rf $pkgdir/boot/ - - # silly doc dir fun - rm -rf $pkgdir/usr/share/doc/xen - rm -rf $pkgdir/usr/share/doc/qemu - - # Pointless helper - rm -f $pkgdir/usr/bin/xen-python-path - - # qemu stuff (unused or available from upstream) - rm -rf $pkgdir/usr/share/xen/man - - # README's not intended for end users - rm -rf $pkgdir/etc/xen/README* - - # standard gnu info files (removed by packaging post actions anyway) - rm -rf $pkgdir/usr/info - - # adhere to Static Library Packaging Guidelines - rm -rf $pkgdir/usr/lib/*.a - - # not used in Qubes VM - rm -f $pkgdir/usr/bin/xenstored - rm -f $pkgdir/usr/share/xen/create.dtd - rm -rf $pkgdir/etc/sysconfig - rm -rf $pkgdir/etc/rc.d/init.d - - rm -r "$pkgdir/var/run" - - ############ fixup files in /etc ############ - - # udev - #rm -rf %{buildroot}/etc/udev/rules.d/xen*.rules - #mv %{buildroot}/etc/udev/xen*.rules %{buildroot}/etc/udev/rules.d - rm -f $pkgdir/etc/udev/rules.d/xend.rules - - # config file only used for hotplug, Fedora uses udev instead - rm -f $pkgdir/etc/sysconfig/xend - - ############ assemble license files ############ - - mkdir licensedir - # avoid licensedir to avoid recursion, also stubdom/ioemu and dist - # which are copies of files elsewhere - find . -path licensedir -prune -o -path stubdom/ioemu -prune -o \ - -path dist -prune -o -name COPYING -o -name LICENSE | while read file; do - mkdir -p licensedir/`dirname $file` - install -m 644 $file licensedir/$file - done - -############ all done now ############ - - - # Remove /var/lock (it is tmpfs in archlinux anyway) - rm -rf $pkgdir/var/lock - -} - -# vim:set ts=2 sw=2 et: - diff --git a/archlinux/PKGBUILD.in b/archlinux/PKGBUILD.in new file mode 100644 index 00000000..71c3768c --- /dev/null +++ b/archlinux/PKGBUILD.in @@ -0,0 +1,128 @@ +# Maintainer: Frédéric Pierret (fepitre) + +_upstream_pkgver=@VERSION@ +pkgname=qubes-vm-xen +pkgbase="xen-$_upstream_pkgver" +pkgver=${_upstream_pkgver/-/\~} +pkgrel=@REL@ +pkgdesc="Xen is a virtual machine monitor" +arch=("x86_64") +url="http://qubes-os.org/" +license=('GPL') +depends=(bridge-utils python-lxml libutil-linux lzo libsystemd yajl) +makedepends=(wget make gcc patch git bin86 dev86 iasl yajl pkg-config openssl pixman) +provides=('xen-qubes-vm-essentials') + +_patches=( + 1000-Do-not-access-network-during-the-build.patch + 1001-hotplug-store-block-params-for-cleanup.patch + 1020-xen-tools-qubes-vm.patch + 1100-Define-build-dates-time-based-on-SOURCE_DATE_EPOCH.patch + 1101-docs-rename-DATE-to-PANDOC_REL_DATE-and-allow-to-spe.patch + 1102-docs-xen-headers-use-alphabetical-sorting-for-incont.patch + 1103-Strip-build-path-directories-in-tools-xen-and-xen-ar.patch +) +source=(xen-$_upstream_pkgver.tar.gz "${_patches[@]}") +md5sums=(SKIP SKIP SKIP SKIP SKIP SKIP SKIP SKIP) + +prepare() { + cd "${pkgbase}" + + for p in "${_patches[@]}"; do + patch -s -F0 -E -p1 --no-backup-if-mismatch -i "${srcdir}/$p" + done + + # Fix Install Paths. + sed 's,/var/run,/run,g' -i tools/hotplug/Linux/locking.sh + sed 's,/var/run,/run,g' -i tools/misc/xenpvnetboot + sed 's,/var/run,/run,g' -i tools/xenmon/xenbaked.c + sed 's,/var/run,/run,g' -i tools/xenmon/xenmon.py + sed 's,/var/run,/run,g' -i tools/pygrub/src/pygrub +} + +build() { + cd "${pkgbase}" + autoreconf --install + ./configure --prefix=/usr \ + --sbindir=/usr/bin \ + --libdir=/usr/lib \ + --disable-ocaml \ + --disable-pvshim \ + --disable-blktap2 + + make XEN_VENDORVERSION="-$pkgrel" prefix=/usr dist-tools +} + +package() { + set -x + cd "${pkgbase}" + + # Note: Archlinux removed use of directory such as /sbin /bin /usr/sbin (https://mailman.archlinux.org/pipermail/arch-dev-public/2012-March/022625.html) + make XEN_VENDORVERSION="-$pkgrel" DESTDIR="$pkgdir" LIBDIR=/usr/lib/ SBINDIR=/usr/bin prefix=/usr install-tools + + # + # Remove unwanted stuff + # + + rm -rf "$pkgdir"/usr/lib/debug + rm -rf "$pkgdir"/usr/src/debug + + # stubdom: newlib + rm -rf "$pkgdir"/usr/*-xen-elf + + # hypervisor symlinks + rm -rf "$pkgdir"/boot/ + + # remove documentation + rm -rf "$pkgdir"/usr/share/doc + rm -rf "$pkgdir"/usr/share/man + + # Pointless helper + rm -f "$pkgdir"/usr/bin/xen-python-path + + # qemu stuff (unused or available from upstream) + rm -rf "$pkgdir"/usr/share/xen/man + + # README's not intended for end users + rm -rf "$pkgdir"/etc/xen/README* + + # standard gnu info files (removed by packaging post actions anyway) + rm -rf "$pkgdir"/usr/info + + # adhere to Static Library Packaging Guidelines + rm -rf "$pkgdir"/usr/lib/*.a + + # not used in Qubes VM + rm -f "$pkgdir"/usr/bin/xenstored + rm -f "$pkgdir"/usr/share/xen/create.dtd + rm -rf "$pkgdir"/etc/sysconfig + rm -rf "$pkgdir"/etc/rc.d/init.d + + rm -r ""$pkgdir"/var/run" + + # udev + rm -f "$pkgdir"/etc/udev/rules.d/xend.rules + + # config file only used for hotplug, Fedora uses udev instead + rm -f "$pkgdir"/etc/sysconfig/xend + + # Remove /var/lock (it is tmpfs in archlinux anyway) + rm -rf "$pkgdir"/var/lock + + rm -rf "$pkgdir"/var/lib/xen + rm -rf "$pkgdir"/var/log + + + # + # Assemble license files + # + + mkdir licensedir + # avoid licensedir to avoid recursion, also stubdom/ioemu and dist + # which are copies of files elsewhere + find . -path licensedir -prune -o -path stubdom/ioemu -prune -o \ + -path dist -prune -o -name COPYING -o -name LICENSE | while read file; do + mkdir -p licensedir/$(dirname $file) + install -m 644 $file licensedir/$file + done +} diff --git a/pkgs/.gitignore b/pkgs/.gitignore deleted file mode 100644 index 72e8ffc0..00000000 --- a/pkgs/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* diff --git a/series-vm.conf b/series-vm.conf deleted file mode 100644 index 5cd24dd4..00000000 --- a/series-vm.conf +++ /dev/null @@ -1,7 +0,0 @@ -1000-Do-not-access-network-during-the-build.patch -1001-hotplug-store-block-params-for-cleanup.patch -1020-xen-tools-qubes-vm.patch -1100-Define-build-dates-time-based-on-SOURCE_DATE_EPOCH.patch -1101-docs-rename-DATE-to-PANDOC_REL_DATE-and-allow-to-spe.patch -1102-docs-xen-headers-use-alphabetical-sorting-for-incont.patch -1103-Strip-build-path-directories-in-tools-xen-and-xen-ar.patch From df4f8481b7d3327f9d28d8e3169d400b8c013215 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Pierret=20=28fepitre=29?= Date: Tue, 25 Apr 2023 15:22:26 +0200 Subject: [PATCH 02/64] gitlab-ci: remove 4.1 --- .gitlab-ci.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4cff2c95..0afeb3b5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,10 +1,4 @@ include: -- file: /r4.1/gitlab-base.yml - project: QubesOS/qubes-continuous-integration -- file: /r4.1/gitlab-dom0.yml - project: QubesOS/qubes-continuous-integration -- file: /r4.1/gitlab-vm-archlinux.yml - project: QubesOS/qubes-continuous-integration - file: /r4.2/gitlab-base.yml project: QubesOS/qubes-continuous-integration - file: /r4.2/gitlab-host.yml @@ -13,5 +7,6 @@ include: project: QubesOS/qubes-continuous-integration - file: /r4.2/gitlab-vm-archlinux.yml project: QubesOS/qubes-continuous-integration + variables: TRAVIS_INSTALL_EXCLUDE: xen-qubes-vm xen-qubes-debug xen-qubes-debuginfo xen-hypervisor-common xen-system-amd64 From d3f10e4cbe391c0f32339290e94b74df5f5fd467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 25 Apr 2023 16:45:29 +0200 Subject: [PATCH 03/64] version 4.17.0-9 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 45a4fb75..ec635144 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -8 +9 From 3e05028c1ec6c4f902794169f384e43c928e8c14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 23 May 2023 03:51:31 +0200 Subject: [PATCH 04/64] version 4.17.1-1 Drop patches included upstream already, update context of some existing patches. Include also XSA-431 patch that was published after 4.17.1 release. --- ...exit-to-inhibit-calling-ExitBootServ.patch | 10 +- ...the-ESRT-when-booting-via-multiboot2.patch | 50 ----- 0306-x86-Replace-PAT_-with-X86_MT_.patch | 6 +- ...nt-overflow-with-high-frequency-TSCs.patch | 33 --- ...nfrastructure-for-leaves-7-1-ecx-edx.patch | 131 ----------- ...e-CET-SS-on-parts-succeptable-to-fra.patch | 205 ------------------ ...tigate-Cross-Thread-Return-Address-P.patch | 119 ---------- 0501-xsa431.patch | 96 ++++++++ ...n-change-s-size-type-for-Python-3.10.patch | 68 ------ rel | 2 +- version | 2 +- xen.spec.in | 7 +- 12 files changed, 107 insertions(+), 622 deletions(-) delete mode 100644 0300-Relocate-the-ESRT-when-booting-via-multiboot2.patch delete mode 100644 0310-x86-time-prevent-overflow-with-high-frequency-TSCs.patch delete mode 100644 0311-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch delete mode 100644 0312-x86-shskt-Disable-CET-SS-on-parts-succeptable-to-fra.patch delete mode 100644 0313-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch create mode 100644 0501-xsa431.patch delete mode 100644 0629-tools-python-change-s-size-type-for-Python-3.10.patch diff --git a/0201-EFI-early-Add-noexit-to-inhibit-calling-ExitBootServ.patch b/0201-EFI-early-Add-noexit-to-inhibit-calling-ExitBootServ.patch index 9b766e4b..251b549c 100644 --- a/0201-EFI-early-Add-noexit-to-inhibit-calling-ExitBootServ.patch +++ b/0201-EFI-early-Add-noexit-to-inhibit-calling-ExitBootServ.patch @@ -22,8 +22,8 @@ index e82ac9daa7ad..2221cdec681a 100644 --- a/xen/arch/x86/efi/efi-boot.h +++ b/xen/arch/x86/efi/efi-boot.h @@ -816,7 +816,7 @@ void __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable - if ( gop ) - efi_set_gop_mode(gop, gop_mode); + + efi_relocate_esrt(SystemTable); - efi_exit_boot(ImageHandle, SystemTable); + efi_exit_boot(ImageHandle, SystemTable, true); @@ -39,13 +39,13 @@ index db0340c8e262..d78a315e56e0 100644 static void efi_variables(void); static void efi_set_gop_mode(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop, UINTN gop_mode); -static void efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable); -+static void efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable, bool efit_boot_services); ++static void efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable, bool exit_boot_services); static const EFI_BOOT_SERVICES *__initdata efi_bs; static UINT32 __initdata efi_bs_revision; @@ -1175,7 +1175,7 @@ static void __init efi_relocate_esrt(EFI_SYSTEM_TABLE *SystemTable) - efi_bs->FreePool(memory_map); - } + #define INVALID_VIRTUAL_ADDRESS (0xBAAADUL << \ + (EFI_PAGE_SHIFT + BITS_PER_LONG - 32)) -static void __init efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) +static void __init efi_exit_boot(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable, bool exit_boot_services) diff --git a/0300-Relocate-the-ESRT-when-booting-via-multiboot2.patch b/0300-Relocate-the-ESRT-when-booting-via-multiboot2.patch deleted file mode 100644 index b83c16a0..00000000 --- a/0300-Relocate-the-ESRT-when-booting-via-multiboot2.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 903eb78c5a3ee51e8692bddf354d6e840dd4947f Mon Sep 17 00:00:00 2001 -Message-Id: <903eb78c5a3ee51e8692bddf354d6e840dd4947f.1671229737.git.demi@invisiblethingslab.com> -In-Reply-To: -References: -From: Demi Marie Obenour -Date: Thu, 8 Dec 2022 19:25:43 -0500 -Subject: [PATCH 2/2] Relocate the ESRT when booting via multiboot2 -Cc: Marek Marczykowski-Górecki - -This was missed in the initial patchset. The patch sent upstream is -larger and involves significant code movement. I added a forward -declaration instead. - -Signed-off-by: Demi Marie Obenour ---- - xen/arch/x86/efi/efi-boot.h | 2 ++ - xen/common/efi/boot.c | 2 ++ - 2 files changed, 4 insertions(+) - -diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h -index abfc7ab0f31511e2c1ee402a09ac533d260444b2..a9a2991d6462dec9cea695c8b912b72df26bd511 100644 ---- a/xen/arch/x86/efi/efi-boot.h -+++ b/xen/arch/x86/efi/efi-boot.h -@@ -825,6 +825,8 @@ void __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable - if ( gop ) - efi_set_gop_mode(gop, gop_mode); - -+ efi_relocate_esrt(SystemTable); -+ - efi_exit_boot(ImageHandle, SystemTable, true); - } - -diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c -index 32ae6b43bb53448421c908819cda552757157c1f..dac3247590fc5485b64d538a564675fe70f7a3f6 100644 ---- a/xen/common/efi/boot.c -+++ b/xen/common/efi/boot.c -@@ -588,6 +588,8 @@ static int __init efi_check_dt_boot(const EFI_LOADED_IMAGE *loaded_image) - } - #endif - -+static void __init efi_relocate_esrt(EFI_SYSTEM_TABLE *SystemTable); -+ - static UINTN __initdata esrt = EFI_INVALID_TABLE_ADDR; - - static size_t __init get_esrt_size(const EFI_MEMORY_DESCRIPTOR *desc) --- -Sincerely, -Demi Marie Obenour (she/her/hers) -Invisible Things Lab - diff --git a/0306-x86-Replace-PAT_-with-X86_MT_.patch b/0306-x86-Replace-PAT_-with-X86_MT_.patch index 52c4ce09..aad8119a 100644 --- a/0306-x86-Replace-PAT_-with-X86_MT_.patch +++ b/0306-x86-Replace-PAT_-with-X86_MT_.patch @@ -162,7 +162,7 @@ index 4d2aa6def86de45aeeaade7a1a7815c5ef2b3d7a..242623f3c239ee18a44f882ecb3910a0 /* fall through */ default: @@ -638,12 +638,12 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start, - rcu_read_unlock(&pinned_cacheattr_rcu_lock); + domain_unlock(d); return -ENOENT; - case PAT_TYPE_UC_MINUS: @@ -181,14 +181,14 @@ index 4d2aa6def86de45aeeaade7a1a7815c5ef2b3d7a..242623f3c239ee18a44f882ecb3910a0 default: @@ -681,7 +681,7 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start, + xfree(newr); - list_add_rcu(&range->list, &d->arch.hvm.pinned_cacheattr_ranges); p2m_memory_type_changed(d); - if ( type != PAT_TYPE_WRBACK ) + if ( type != X86_MT_WB ) flush_all(FLUSH_CACHE); - return 0; + return rc; diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 7c81b80710f99e08fe8291d3e413c449322b777d..b543c3983d77ae807e8bd97330691a79d8d39bae 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c diff --git a/0310-x86-time-prevent-overflow-with-high-frequency-TSCs.patch b/0310-x86-time-prevent-overflow-with-high-frequency-TSCs.patch deleted file mode 100644 index 9719f0f6..00000000 --- a/0310-x86-time-prevent-overflow-with-high-frequency-TSCs.patch +++ /dev/null @@ -1,33 +0,0 @@ -From ad15a0a8ca2515d8ac58edfc0bc1d3719219cb77 Mon Sep 17 00:00:00 2001 -Message-Id: -From: Neowutran -Date: Mon, 19 Dec 2022 11:34:16 +0100 -Subject: [PATCH] x86/time: prevent overflow with high frequency TSCs - -Make sure tsc_khz is promoted to a 64-bit type before multiplying by -1000 to avoid an 'overflow before widen' bug. Otherwise just above -4.294GHz the value will overflow. Processors with clocks this high are -now in production and require this to work correctly. - -Signed-off-by: Neowutran -Reviewed-by: Jan Beulich ---- - xen/arch/x86/time.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c -index b01acd390d..d882b43cf0 100644 ---- a/xen/arch/x86/time.c -+++ b/xen/arch/x86/time.c -@@ -2585,7 +2585,7 @@ int tsc_set_info(struct domain *d, - case TSC_MODE_ALWAYS_EMULATE: - d->arch.vtsc_offset = get_s_time() - elapsed_nsec; - d->arch.tsc_khz = gtsc_khz ?: cpu_khz; -- set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000); -+ set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000UL); - - /* - * In default mode use native TSC if the host has safe TSC and --- -2.38.1 - diff --git a/0311-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch b/0311-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch deleted file mode 100644 index 3e59ebc5..00000000 --- a/0311-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch +++ /dev/null @@ -1,131 +0,0 @@ -From: Andrew Cooper -Subject: x86/cpuid: Infrastructure for leaves 7:1{ecx,edx} -Date: Wed, 04 Jan 2023 11:11:45 +0000 - -We don't actually need ecx yet, but adding it in now will reduce the amount to -which leaf 7 is out of order in a featureset. - -cpufeatureset.h remains in leaf architectrual order for the sanity of anyone -trying to locate where to insert new rows. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich ---- -CC: Jan Beulich -CC: Roger Pau Monné -CC: Wei Liu - -v2: - * Fix decodes[] short string ---- - tools/misc/xen-cpuid.c | 10 ++++++++++ - xen/arch/x86/cpu/common.c | 3 ++- - xen/include/public/arch-x86/cpufeatureset.h | 3 +++ - xen/include/xen/lib/x86/cpuid.h | 15 ++++++++++++++- - 4 files changed, 29 insertions(+), 2 deletions(-) - -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index d5833e9ce879..addb3a39a11a 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -202,6 +202,14 @@ static const char *const str_7b1[32] = - [ 0] = "ppin", - }; - -+static const char *const str_7c1[32] = -+{ -+}; -+ -+static const char *const str_7d1[32] = -+{ -+}; -+ - static const char *const str_7d2[32] = - { - [ 0] = "intel-psfd", -@@ -229,6 +237,8 @@ static const struct { - { "0x80000021.eax", "e21a", str_e21a }, - { "0x00000007:1.ebx", "7b1", str_7b1 }, - { "0x00000007:2.edx", "7d2", str_7d2 }, -+ { "0x00000007:1.ecx", "7c1", str_7c1 }, -+ { "0x00000007:1.edx", "7d1", str_7d1 }, - }; - - #define COL_ALIGN "18" -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 0412dbc915e5..b3fcf4680f3a 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -450,7 +450,8 @@ static void generic_identify(struct cpuinfo_x86 *c) - cpuid_count(7, 1, - &c->x86_capability[FEATURESET_7a1], - &c->x86_capability[FEATURESET_7b1], -- &tmp, &tmp); -+ &c->x86_capability[FEATURESET_7c1], -+ &c->x86_capability[FEATURESET_7d1]); - if (max_subleaf >= 2) - cpuid_count(7, 2, - &tmp, &tmp, &tmp, -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 7915f5826f57..7a896f0e2d92 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -288,6 +288,9 @@ XEN_CPUFEATURE(NSCB, 11*32+ 6) /*A Null Selector Clears Base (and - /* Intel-defined CPU features, CPUID level 0x00000007:1.ebx, word 12 */ - XEN_CPUFEATURE(INTEL_PPIN, 12*32+ 0) /* Protected Processor Inventory Number */ - -+/* Intel-defined CPU features, CPUID level 0x00000007:1.ecx, word 14 */ -+/* Intel-defined CPU features, CPUID level 0x00000007:1.edx, word 15 */ -+ - /* Intel-defined CPU features, CPUID level 0x00000007:2.edx, word 13 */ - XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */ - XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /* MSR_SPEC_CTRL.IPRED_DIS_* */ -diff --git a/xen/include/xen/lib/x86/cpuid.h b/xen/include/xen/lib/x86/cpuid.h -index 73a5c330365e..fa98b371eef4 100644 ---- a/xen/include/xen/lib/x86/cpuid.h -+++ b/xen/include/xen/lib/x86/cpuid.h -@@ -18,6 +18,8 @@ - #define FEATURESET_e21a 11 /* 0x80000021.eax */ - #define FEATURESET_7b1 12 /* 0x00000007:1.ebx */ - #define FEATURESET_7d2 13 /* 0x00000007:2.edx */ -+#define FEATURESET_7c1 14 /* 0x00000007:1.ecx */ -+#define FEATURESET_7d1 15 /* 0x00000007:1.edx */ - - struct cpuid_leaf - { -@@ -194,7 +196,14 @@ struct cpuid_policy - uint32_t _7b1; - struct { DECL_BITFIELD(7b1); }; - }; -- uint32_t /* c */:32, /* d */:32; -+ union { -+ uint32_t _7c1; -+ struct { DECL_BITFIELD(7c1); }; -+ }; -+ union { -+ uint32_t _7d1; -+ struct { DECL_BITFIELD(7d1); }; -+ }; - - /* Subleaf 2. */ - uint32_t /* a */:32, /* b */:32, /* c */:32; -@@ -343,6 +352,8 @@ static inline void cpuid_policy_to_featureset( - fs[FEATURESET_e21a] = p->extd.e21a; - fs[FEATURESET_7b1] = p->feat._7b1; - fs[FEATURESET_7d2] = p->feat._7d2; -+ fs[FEATURESET_7c1] = p->feat._7c1; -+ fs[FEATURESET_7d1] = p->feat._7d1; - } - - /* Fill in a CPUID policy from a featureset bitmap. */ -@@ -363,6 +374,8 @@ static inline void cpuid_featureset_to_policy( - p->extd.e21a = fs[FEATURESET_e21a]; - p->feat._7b1 = fs[FEATURESET_7b1]; - p->feat._7d2 = fs[FEATURESET_7d2]; -+ p->feat._7c1 = fs[FEATURESET_7c1]; -+ p->feat._7d1 = fs[FEATURESET_7d1]; - } - - static inline uint64_t cpuid_policy_xcr0_max(const struct cpuid_policy *p) --- -2.11.0 diff --git a/0312-x86-shskt-Disable-CET-SS-on-parts-succeptable-to-fra.patch b/0312-x86-shskt-Disable-CET-SS-on-parts-succeptable-to-fra.patch deleted file mode 100644 index 841fd730..00000000 --- a/0312-x86-shskt-Disable-CET-SS-on-parts-succeptable-to-fra.patch +++ /dev/null @@ -1,205 +0,0 @@ -From: Andrew Cooper -Subject: x86/shskt: Disable CET-SS on parts susceptible to fractured updates -Date: Wed, 04 Jan 2023 11:11:46 +0000 - -Refer to Intel SDM Rev 70 (Dec 2022), Vol3 17.2.3 "Supervisor Shadow Stack -Token". - -Architecturally, an event delivery which starts in CPL<3 and switches shadow -stack will first validate the Supervisor Shadow Stack Token (setting the busy -bit), then pushes CS/LIP/SSP. One example of this is an NMI interrupting Xen. - -Some CPUs suffer from an issue called fracturing, whereby a fault/vmexit/etc -between setting the busy bit and completing the event injection renders the -action non-restartable, because when it comes time to restart, the busy bit is -found to be already set. - -This is far more easily encountered under virt, yet it is not the fault of the -hypervisor, nor the fault of the guest kernel. The fault lies somewhere -between the architectural specification, and the uarch behaviour. - -Intel have allocated CPUID.7[1].ecx[18] CET_SSS to enumerate that supervisor -shadow stacks are safe to use. Because of how Xen lays out its shadow stacks, -fracturing is not expected to be a problem on native. - -Detect this case on boot and default to not using shstk if virtualised. -Specifying `cet=shstk` on the command line will override this heuristic and -enable shadow stacks irrespective. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich ---- -CC: Jan Beulich -CC: Roger Pau Monné -CC: Wei Liu - -v2: - * Confirmation from AMD that their parts are not impacted. - * Fix pv-shim build. The "define false" trick doesn't work with tristates. - * Tweak wording in several places. - * Fix tabs vs spaces. - -This ideally wants backporting to Xen 4.14. I have no idea how likely it is -to need to backport the prerequisite patch for new feature words, but we've -already had to do that once for security patches. OTOH, I have no idea how -easy it is to trigger in non-synthetic cases. ---- - docs/misc/xen-command-line.pandoc | 7 ++++- - tools/libs/light/libxl_cpuid.c | 2 ++ - tools/misc/xen-cpuid.c | 1 + - xen/arch/x86/cpu/common.c | 11 +++++-- - xen/arch/x86/setup.c | 46 ++++++++++++++++++++++++----- - xen/include/public/arch-x86/cpufeatureset.h | 1 + - 6 files changed, 57 insertions(+), 11 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 923910f553c5..19d4d815bdee 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -287,10 +287,15 @@ can be maintained with the pv-shim mechanism. - protection. - - The option is available when `CONFIG_XEN_SHSTK` is compiled in, and -- defaults to `true` on hardware supporting CET-SS. Specifying -+ generally defaults to `true` on hardware supporting CET-SS. Specifying - `cet=no-shstk` will cause Xen not to use Shadow Stacks even when support - is available in hardware. - -+ Some hardware suffers from an issue known as Supervisor Shadow Stack -+ Fracturing. On such hardware, Xen will default to not using Shadow Stacks -+ when virtualised. Specifying `cet=shstk` will override this heuristic and -+ enable Shadow Stacks unilaterally. -+ - * The `ibt=` boolean controls whether Xen uses Indirect Branch Tracking for - its own protection. - -diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c -index 2aa23225f42c..d97a2f3338bc 100644 ---- a/tools/libs/light/libxl_cpuid.c -+++ b/tools/libs/light/libxl_cpuid.c -@@ -235,6 +235,8 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str) - {"fsrs", 0x00000007, 1, CPUID_REG_EAX, 11, 1}, - {"fsrcs", 0x00000007, 1, CPUID_REG_EAX, 12, 1}, - -+ {"cet-sss", 0x00000007, 1, CPUID_REG_EDX, 18, 1}, -+ - {"intel-psfd", 0x00000007, 2, CPUID_REG_EDX, 0, 1}, - {"mcdt-no", 0x00000007, 2, CPUID_REG_EDX, 5, 1}, - -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index addb3a39a11a..0248eaef44c1 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -208,6 +208,7 @@ static const char *const str_7c1[32] = - - static const char *const str_7d1[32] = - { -+ [18] = "cet-sss", - }; - - static const char *const str_7d2[32] = -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index b3fcf4680f3a..27f73d3bbe31 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -346,11 +346,18 @@ void __init early_cpu_init(void) - x86_cpuid_vendor_to_str(c->x86_vendor), c->x86, c->x86, - c->x86_model, c->x86_model, c->x86_mask, eax); - -- if (c->cpuid_level >= 7) -- cpuid_count(7, 0, &eax, &ebx, -+ if (c->cpuid_level >= 7) { -+ uint32_t max_subleaf; -+ -+ cpuid_count(7, 0, &max_subleaf, &ebx, - &c->x86_capability[FEATURESET_7c0], - &c->x86_capability[FEATURESET_7d0]); - -+ if (max_subleaf >= 1) -+ cpuid_count(7, 1, &eax, &ebx, &ecx, -+ &c->x86_capability[FEATURESET_7d1]); -+ } -+ - eax = cpuid_eax(0x80000000); - if ((eax >> 16) == 0x8000 && eax >= 0x80000008) { - ebx = eax >= 0x8000001f ? cpuid_ebx(0x8000001f) : 0; -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index 566422600d94..1b8b74599f4a 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -95,11 +95,7 @@ unsigned long __initdata highmem_start; - size_param("highmem-start", highmem_start); - #endif - --#ifdef CONFIG_XEN_SHSTK --static bool __initdata opt_xen_shstk = true; --#else --#define opt_xen_shstk false --#endif -+static int8_t __initdata opt_xen_shstk = -IS_ENABLED(CONFIG_XEN_SHSTK); - - #ifdef CONFIG_XEN_IBT - static bool __initdata opt_xen_ibt = true; -@@ -1099,11 +1095,45 @@ void __init noreturn __start_xen(unsigned long mbi_p) - early_cpu_init(); - - /* Choose shadow stack early, to set infrastructure up appropriately. */ -- if ( opt_xen_shstk && boot_cpu_has(X86_FEATURE_CET_SS) ) -+ if ( !boot_cpu_has(X86_FEATURE_CET_SS) ) -+ opt_xen_shstk = 0; -+ -+ if ( opt_xen_shstk ) - { -- printk("Enabling Supervisor Shadow Stacks\n"); -+ /* -+ * Some CPUs suffer from Shadow Stack Fracturing, an issue whereby a -+ * fault/VMExit/etc between setting a Supervisor Busy bit and the -+ * event delivery completing renders the operation non-restartable. -+ * On restart, event delivery will find the Busy bit already set. -+ * -+ * This is a problem on bare metal, but outside of synthetic cases or -+ * a very badly timed #MC, it's not believed to problem. It is a much -+ * bigger problem under virt, because we can VMExit for a number of -+ * legitimate reasons and tickle this bug. -+ * -+ * CPUs with this addressed enumerate CET-SSS to indicate that -+ * supervisor shadow stacks are now safe to use. -+ */ -+ bool cpu_has_bug_shstk_fracture = -+ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && -+ !boot_cpu_has(X86_FEATURE_CET_SSS); - -- setup_force_cpu_cap(X86_FEATURE_XEN_SHSTK); -+ /* -+ * On bare metal, assume that Xen won't be impacted by shstk -+ * fracturing problems. Under virt, be more conservative and disable -+ * shstk by default. -+ */ -+ if ( opt_xen_shstk == -1 ) -+ opt_xen_shstk = -+ cpu_has_hypervisor ? !cpu_has_bug_shstk_fracture -+ : true; -+ -+ if ( opt_xen_shstk ) -+ { -+ printk("Enabling Supervisor Shadow Stacks\n"); -+ -+ setup_force_cpu_cap(X86_FEATURE_XEN_SHSTK); -+ } - } - - if ( opt_xen_ibt && boot_cpu_has(X86_FEATURE_CET_IBT) ) -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 7a896f0e2d92..f6a46f62a549 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -290,6 +290,7 @@ XEN_CPUFEATURE(INTEL_PPIN, 12*32+ 0) /* Protected Processor Inventory - - /* Intel-defined CPU features, CPUID level 0x00000007:1.ecx, word 14 */ - /* Intel-defined CPU features, CPUID level 0x00000007:1.edx, word 15 */ -+XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks safe to use */ - - /* Intel-defined CPU features, CPUID level 0x00000007:2.edx, word 13 */ - XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */ --- -2.11.0 diff --git a/0313-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch b/0313-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch deleted file mode 100644 index d45ff99d..00000000 --- a/0313-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch +++ /dev/null @@ -1,119 +0,0 @@ -From 63305e5392ec2d17b85e7996a97462744425db80 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Sep 2022 21:27:58 +0100 -Subject: [PATCH] x86/spec-ctrl: Mitigate Cross-Thread Return Address - Predictions - -This is XSA-426 / CVE-2022-27672 - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich ---- - docs/misc/xen-command-line.pandoc | 2 +- - xen/arch/x86/include/asm/cpufeatures.h | 3 ++- - xen/arch/x86/include/asm/spec_ctrl.h | 15 +++++++++++++ - xen/arch/x86/spec_ctrl.c | 31 +++++++++++++++++++++++--- - 4 files changed, 46 insertions(+), 5 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 19d4d815bdee..63dfda4a7a9e 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2360,7 +2360,7 @@ guests to use. - on entry and exit. These blocks are necessary to virtualise support for - guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc. - * `rsb=` offers control over whether to overwrite the Return Stack Buffer / -- Return Address Stack on entry to Xen. -+ Return Address Stack on entry to Xen and on idle. - * `md-clear=` offers control over whether to use VERW to flush - microarchitectural buffers on idle and exit from Xen. *Note: For - compatibility with development versions of this fix, `mds=` is also accepted -diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h -index 865f1109866d..da0593de8542 100644 ---- a/xen/arch/x86/include/asm/cpufeatures.h -+++ b/xen/arch/x86/include/asm/cpufeatures.h -@@ -35,7 +35,8 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM - XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */ - XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */ - XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */ --/* Bits 23,24 unused. */ -+/* Bits 23 unused. */ -+XEN_CPUFEATURE(SC_RSB_IDLE, X86_SYNTH(24)) /* RSB overwrite needed for idle. */ - XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */ - XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */ - XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */ -diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h -index e928596450f7..3cf8a7d304d4 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl.h -+++ b/xen/arch/x86/include/asm/spec_ctrl.h -@@ -159,6 +159,21 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) - */ - alternative_input("", "verw %[sel]", X86_FEATURE_SC_VERW_IDLE, - [sel] "m" (info->verw_sel)); -+ -+ /* -+ * Cross-Thread Return Address Predictions: -+ * -+ * On vulnerable systems, the return predictions (RSB/RAS) are statically -+ * partitioned between active threads. When entering idle, our entries -+ * are re-partitioned to allow the other threads to use them. -+ * -+ * In some cases, we might still have guest entries in the RAS, so flush -+ * them before injecting them sideways to our sibling thread. -+ * -+ * (ab)use alternative_input() to specify clobbers. -+ */ -+ alternative_input("", "DO_OVERWRITE_RSB", X86_FEATURE_SC_RSB_IDLE, -+ : "rax", "rcx"); - } - - /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */ -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index a320b81947c8..e80e2a5ed1a9 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1327,13 +1327,38 @@ void __init init_speculation_mitigations(void) - * 3) Some CPUs have RSBs which are not full width, which allow the - * attacker's entries to alias Xen addresses. - * -+ * 4) Some CPUs have RSBs which are re-partitioned based on thread -+ * idleness, which allows an attacker to inject entries into the other -+ * thread. We still active the optimisation in this case, and mitigate -+ * in the idle path which has lower overhead. -+ * - * It is safe to turn off RSB stuffing when Xen is using SMEP itself, and - * 32bit PV guests are disabled, and when the RSB is full width. - */ - BUILD_BUG_ON(RO_MPT_VIRT_START != PML4_ADDR(256)); -- if ( opt_rsb_pv == -1 && boot_cpu_has(X86_FEATURE_XEN_SMEP) && -- !opt_pv32 && rsb_is_full_width() ) -- opt_rsb_pv = 0; -+ if ( opt_rsb_pv == -1 ) -+ { -+ opt_rsb_pv = (opt_pv32 || !boot_cpu_has(X86_FEATURE_XEN_SMEP) || -+ !rsb_is_full_width()); -+ -+ /* -+ * Cross-Thread Return Address Predictions. -+ * -+ * Vulnerable systems are Zen1/Zen2 uarch, which is AMD Fam17 / Hygon -+ * Fam18, when SMT is active. -+ * -+ * To mitigate, we must flush the RSB/RAS/RAP once between entering -+ * Xen and going idle. -+ * -+ * Most cases flush on entry to Xen anyway. The one case where we -+ * don't is when using the SMEP optimisation for PV guests. Flushing -+ * before going idle is less overhead than flushing on PV entry. -+ */ -+ if ( !opt_rsb_pv && hw_smt_enabled && -+ (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD|X86_VENDOR_HYGON)) && -+ (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) ) -+ setup_force_cpu_cap(X86_FEATURE_SC_RSB_IDLE); -+ } - - if ( opt_rsb_pv ) - { --- -2.37.3 - diff --git a/0501-xsa431.patch b/0501-xsa431.patch new file mode 100644 index 00000000..07c39716 --- /dev/null +++ b/0501-xsa431.patch @@ -0,0 +1,96 @@ +From 66c930ceac3989b6dc6031bfc30e1e894fc6aebe Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 16 May 2023 17:22:35 +0200 +Subject: [PATCH] x86/amd: fix legacy setting of SSBD on AMD Family 17h +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The current logic to set SSBD on AMD Family 17h and Hygon Family 18h +processors requires that the setting of SSBD is coordinated at a core +level, as the setting is shared between threads. Logic was introduced +to keep track of how many threads require SSBD active in order to +coordinate it, such logic relies on using a per-core counter of +threads that have SSBD active. + +Given the current logic, it's possible for a guest to under or +overflow the thread counter, because each write to VIRT_SPEC_CTRL.SSBD +by the guest gets propagated to the helper that does the per-core +active accounting. Overflowing the counter is not so much of an +issue, as this would just make SSBD sticky. + +Underflowing however is more problematic: on non-debug Xen builds a +guest can perform empty writes to VIRT_SPEC_CTRL that would cause the +counter to underflow and thus the value gets saturated to the max +value of unsigned int. At which points attempts from any thread to +set VIRT_SPEC_CTRL.SSBD won't get propagated to the hardware anymore, +because the logic will see that the counter is greater than 1 and +assume that SSBD is already active, effectively loosing the setting +of SSBD and the protection it provides. + +Fix this by introducing a per-CPU variable that keeps track of whether +the current thread has legacy SSBD active or not, and thus only +attempt to propagate the value to the hardware once the thread +selected value changes. + +This is XSA-431 / CVE-2022-42336 + +Fixes: b2030e6730a2 ('amd/virt_ssbd: set SSBD at vCPU context switch') +Reported-by: Andrew Cooper +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +master commit: eda98ea870803ea204a1928519b3f21ec6a679b6 +master date: 2023-05-16 17:17:24 +0200 +--- + xen/arch/x86/cpu/amd.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index 1ddb55cbe5..b6a20d375a 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -783,12 +783,23 @@ bool __init amd_setup_legacy_ssbd(void) + return true; + } + ++/* ++ * legacy_ssbd is always initialized to false because when SSBD is set ++ * from the command line guest attempts to change it are a no-op (see ++ * amd_set_legacy_ssbd()), whereas when SSBD is inactive hardware will ++ * be forced into that mode (see amd_init_ssbd()). ++ */ ++static DEFINE_PER_CPU(bool, legacy_ssbd); ++ ++/* Must be called only when the SSBD setting needs toggling. */ + static void core_set_legacy_ssbd(bool enable) + { + const struct cpuinfo_x86 *c = ¤t_cpu_data; + struct ssbd_ls_cfg *status; + unsigned long flags; + ++ BUG_ON(this_cpu(legacy_ssbd) == enable); ++ + if ((c->x86 != 0x17 && c->x86 != 0x18) || c->x86_num_siblings <= 1) { + BUG_ON(!set_legacy_ssbd(c, enable)); + return; +@@ -816,12 +827,17 @@ void amd_set_legacy_ssbd(bool enable) + */ + return; + ++ if (this_cpu(legacy_ssbd) == enable) ++ return; ++ + if (cpu_has_virt_ssbd) + wrmsr(MSR_VIRT_SPEC_CTRL, enable ? SPEC_CTRL_SSBD : 0, 0); + else if (amd_legacy_ssbd) + core_set_legacy_ssbd(enable); + else + ASSERT_UNREACHABLE(); ++ ++ this_cpu(legacy_ssbd) = enable; + } + + /* +-- +2.40.1 + diff --git a/0629-tools-python-change-s-size-type-for-Python-3.10.patch b/0629-tools-python-change-s-size-type-for-Python-3.10.patch deleted file mode 100644 index 55e2c14a..00000000 --- a/0629-tools-python-change-s-size-type-for-Python-3.10.patch +++ /dev/null @@ -1,68 +0,0 @@ -From df461d49eaad6247246129735895bd2782f49f9d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= - -Date: Thu, 26 Jan 2023 03:10:51 +0100 -Subject: [PATCH] tools/python: change 's#' size type for Python >= 3.10 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Python < 3.10 by default uses 'int' type for data+size string types -(s#), unless PY_SSIZE_T_CLEAN is defined - in which case it uses -Py_ssize_t. The former behavior was removed in Python 3.10 and now it's -required to define PY_SSIZE_T_CLEAN before including Python.h, and using -Py_ssize_t for the length argument. - -Adjust bindings accordingly. - -Signed-off-by: Marek Marczykowski-Górecki ---- - tools/python/xen/lowlevel/xc/xc.c | 3 ++- - tools/python/xen/lowlevel/xs/xs.c | 3 ++- - 2 files changed, 4 insertions(+), 2 deletions(-) - -diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c -index fd008610329b..cfb2734a992b 100644 ---- a/tools/python/xen/lowlevel/xc/xc.c -+++ b/tools/python/xen/lowlevel/xc/xc.c -@@ -4,6 +4,7 @@ - * Copyright (c) 2003-2004, K A Fraser (University of Cambridge) - */ - -+#define PY_SSIZE_T_CLEAN - #include - #define XC_WANT_COMPAT_MAP_FOREIGN_API - #include -@@ -1774,7 +1775,7 @@ static PyObject *pyflask_load(PyObject *self, PyObject *args, PyObject *kwds) - { - xc_interface *xc_handle; - char *policy; -- uint32_t len; -+ Py_ssize_t len; - int ret; - - static char *kwd_list[] = { "policy", NULL }; -diff --git a/tools/python/xen/lowlevel/xs/xs.c b/tools/python/xen/lowlevel/xs/xs.c -index 0dad7fa5f2fc..3ba5a8b893d9 100644 ---- a/tools/python/xen/lowlevel/xs/xs.c -+++ b/tools/python/xen/lowlevel/xs/xs.c -@@ -18,6 +18,7 @@ - * Copyright (C) 2005 XenSource Ltd. - */ - -+#define PY_SSIZE_T_CLEAN - #include - - #include -@@ -141,7 +142,7 @@ static PyObject *xspy_write(XsHandle *self, PyObject *args) - char *thstr; - char *path; - char *data; -- int data_n; -+ Py_ssize_t data_n; - bool result; - - if (!xh) --- -2.37.3 - diff --git a/rel b/rel index ec635144..d00491fd 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -9 +1 diff --git a/version b/version index 43b58271..1b0a87fd 100644 --- a/version +++ b/version @@ -1 +1 @@ -4.17.0 +4.17.1 diff --git a/xen.spec.in b/xen.spec.in index 920b1d5d..5c23e79c 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -94,7 +94,6 @@ Patch0203: 0203-Add-xen.cfg-options-for-mapbs-and-noexitboot.patch Patch0204: 0204-xen.efi.build.patch # Backports -Patch0300: 0300-Relocate-the-ESRT-when-booting-via-multiboot2.patch Patch0301: 0301-x86-mm-shadow-avoid-assuming-a-specific-Xen-PAT.patch Patch0302: 0302-x86-mm-shadow-do-not-open-code-PAGE_CACHE_ATTRS.patch Patch0303: 0303-p2m-pt-Avoid-hard-coding-Xen-s-PAT.patch @@ -104,13 +103,10 @@ Patch0306: 0306-x86-Replace-PAT_-with-X86_MT_.patch Patch0307: 0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch Patch0308: 0308-x86-Replace-EPT_EMT_-constants-with-X86_MT_.patch Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch -Patch0310: 0310-x86-time-prevent-overflow-with-high-frequency-TSCs.patch -Patch0311: 0311-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch -Patch0312: 0312-x86-shskt-Disable-CET-SS-on-parts-succeptable-to-fra.patch -Patch0313: 0313-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch # Security fixes Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch +Patch0501: 0501-xsa431.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch @@ -138,7 +134,6 @@ Patch0625: 0625-libxl-Allow-stubdomain-to-control-interupts-of-PCI-d.patch Patch0626: 0626-Validate-EFI-memory-descriptors.patch Patch0627: 0627-x86-mm-Avoid-hard-coding-PAT-in-get_page_from_l1e.patch Patch0628: 0628-x86-mm-make-code-robust-to-future-PAT-changes.patch -Patch0629: 0629-tools-python-change-s-size-type-for-Python-3.10.patch Patch0630: 0630-Drop-ELF-notes-from-non-EFI-binary-too.patch # Intel HWP support From 88d40a1e98b31ae981d38f3a035db77f45939d96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 23 May 2023 11:18:58 +0200 Subject: [PATCH 05/64] Backport few patches Backport serial console fix, and a build fix with GCC 13 (relevant for Archlinux). --- ...emory-decoding-on-MMIO-based-PCI-con.patch | 51 ++++++++++++++++ ...uest-assist-gcc13-s-realloc-analyzer.patch | 59 +++++++++++++++++++ archlinux/PKGBUILD.in | 3 +- xen.spec.in | 2 + 4 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 0310-ns16550-enable-memory-decoding-on-MMIO-based-PCI-con.patch create mode 100644 0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch diff --git a/0310-ns16550-enable-memory-decoding-on-MMIO-based-PCI-con.patch b/0310-ns16550-enable-memory-decoding-on-MMIO-based-PCI-con.patch new file mode 100644 index 00000000..104d7cf6 --- /dev/null +++ b/0310-ns16550-enable-memory-decoding-on-MMIO-based-PCI-con.patch @@ -0,0 +1,51 @@ +From a16fb78515d54be95f81c0d1c0a3a7b954a54d0a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= + +Date: Mon, 8 May 2023 14:15:38 +0200 +Subject: [PATCH] ns16550: enable memory decoding on MMIO-based PCI console + card +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +pci_serial_early_init() enables PCI_COMMAND_IO for IO-based UART +devices, add setting PCI_COMMAND_MEMORY for MMIO-based UART devices too. +Note the MMIO-based devices in practice need a "pci" sub-option, +otherwise a few parameters are not initialized (including bar_idx, +reg_shift, reg_width etc). The "pci" is not supposed to be used with +explicit BDF, so do not key setting PCI_COMMAND_MEMORY on explicit BDF +being set. Contrary to the IO-based UART, pci_serial_early_init() will +not attempt to set BAR0 address, even if user provided io_base manually +- in most cases, those are with an offest and the current cmdline syntax +doesn't allow expressing it. Due to this, enable PCI_COMMAND_MEMORY only +if uart->bar is already populated. In similar spirit, this patch does +not support setting BAR0 of the bridge. + +Signed-off-by: Marek Marczykowski-Górecki +Acked-by: Jan Beulich +--- + xen/drivers/char/ns16550.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c +index 1b21eb93c45f..212a9c49ae8e 100644 +--- a/xen/drivers/char/ns16550.c ++++ b/xen/drivers/char/ns16550.c +@@ -272,6 +272,14 @@ static int cf_check ns16550_getc(struct serial_port *port, char *pc) + static void pci_serial_early_init(struct ns16550 *uart) + { + #ifdef NS16550_PCI ++ if ( uart->bar && uart->io_base >= 0x10000 ) ++ { ++ pci_conf_write16(PCI_SBDF(0, uart->ps_bdf[0], uart->ps_bdf[1], ++ uart->ps_bdf[2]), ++ PCI_COMMAND, PCI_COMMAND_MEMORY); ++ return; ++ } ++ + if ( !uart->ps_bdf_enable || uart->io_base >= 0x10000 ) + return; + +-- +2.39.2 + diff --git a/0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch b/0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch new file mode 100644 index 00000000..f8205c6c --- /dev/null +++ b/0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch @@ -0,0 +1,59 @@ +From 99a9c3d7141063ae3f357892c6181cfa3be8a280 Mon Sep 17 00:00:00 2001 +From: Olaf Hering +Date: Wed, 3 May 2023 15:06:41 +0200 +Subject: [PATCH] tools/libs/guest: assist gcc13's realloc analyzer + +gcc13 fails to track the allocated memory in backup_ptes: + +xg_offline_page.c: In function 'backup_ptes': +xg_offline_page.c:191:13: error: pointer 'orig' may be used after 'realloc' [-Werror=use-after-free] + 191 | free(orig); + +Assist the analyzer by slightly rearranging the code: +In case realloc succeeds, the previous allocation is either extended +or released internally. In case realloc fails, the previous allocation +is left unchanged. Return an error in this case, the caller will +release the currently allocated memory in its error path. + +http://bugzilla.suse.com/show_bug.cgi?id=1210570 + +Signed-off-by: Olaf Hering +Reviewed-by: Juergen Gross +Compile-tested-by: Jason Andryuk +Acked-by: Jan Beulich +--- + tools/libs/guest/xg_offline_page.c | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +diff --git a/tools/libs/guest/xg_offline_page.c b/tools/libs/guest/xg_offline_page.c +index ccd0299f0fc7..8f0a252417a5 100644 +--- a/tools/libs/guest/xg_offline_page.c ++++ b/tools/libs/guest/xg_offline_page.c +@@ -181,18 +181,14 @@ static int backup_ptes(xen_pfn_t table_mfn, int offset, + + if (backup->max == backup->cur) + { +- void *orig = backup->entries; ++ void *entries = realloc(backup->entries, backup->max * 2 * ++ sizeof(struct pte_backup_entry)); + +- backup->entries = realloc( +- orig, backup->max * 2 * sizeof(struct pte_backup_entry)); +- +- if (backup->entries == NULL) +- { +- free(orig); ++ if (entries == NULL) + return -1; +- } +- else +- backup->max *= 2; ++ ++ backup->entries = entries; ++ backup->max *= 2; + } + + backup->entries[backup->cur].table_mfn = table_mfn; +-- +2.39.2 + diff --git a/archlinux/PKGBUILD.in b/archlinux/PKGBUILD.in index 71c3768c..593d4cc3 100644 --- a/archlinux/PKGBUILD.in +++ b/archlinux/PKGBUILD.in @@ -14,6 +14,7 @@ makedepends=(wget make gcc patch git bin86 dev86 iasl yajl pkg-config openssl pi provides=('xen-qubes-vm-essentials') _patches=( + 0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch 1000-Do-not-access-network-during-the-build.patch 1001-hotplug-store-block-params-for-cleanup.patch 1020-xen-tools-qubes-vm.patch @@ -23,7 +24,7 @@ _patches=( 1103-Strip-build-path-directories-in-tools-xen-and-xen-ar.patch ) source=(xen-$_upstream_pkgver.tar.gz "${_patches[@]}") -md5sums=(SKIP SKIP SKIP SKIP SKIP SKIP SKIP SKIP) +md5sums=(SKIP SKIP SKIP SKIP SKIP SKIP SKIP SKIP SKIP) prepare() { cd "${pkgbase}" diff --git a/xen.spec.in b/xen.spec.in index 5c23e79c..b7107d4b 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -103,6 +103,8 @@ Patch0306: 0306-x86-Replace-PAT_-with-X86_MT_.patch Patch0307: 0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch Patch0308: 0308-x86-Replace-EPT_EMT_-constants-with-X86_MT_.patch Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch +Patch0310: 0310-ns16550-enable-memory-decoding-on-MMIO-based-PCI-con.patch +Patch0311: 0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch # Security fixes Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch From c18c73e4e3f9f7e8aa9a726c8f91f7c49ee3eb58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 24 May 2023 01:19:19 +0200 Subject: [PATCH 06/64] ci: update INSTALL_EXCLUDE variable name --- .gitlab-ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0afeb3b5..e0286fa3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -9,4 +9,5 @@ include: project: QubesOS/qubes-continuous-integration variables: - TRAVIS_INSTALL_EXCLUDE: xen-qubes-vm xen-qubes-debug xen-qubes-debuginfo xen-hypervisor-common xen-system-amd64 + CI_INSTALL_EXCLUDE: xen-qubes-vm xen-qubes-debug xen-qubes-debuginfo + From 6548eacfb48f4620b0c5844e4fe5fa4c4009c864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Fri, 26 May 2023 22:17:23 +0200 Subject: [PATCH 07/64] rpm: remove xen-qubes-vm subpackage It isn't used anymore, VMs use distribution-native Xen packages fully. --- xen.spec.in | 44 -------------------------------------------- 1 file changed, 44 deletions(-) diff --git a/xen.spec.in b/xen.spec.in index b7107d4b..753778e3 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -394,17 +394,6 @@ This package contains libraries for developing ocaml tools to manage Xen virtual machines. %endif -# BEGIN QUBES SPECIFIC PART -%package qubes-vm -Summary: Xen files required in Qubes VM -Requires: xen-libs = %{epoch}:%{version}-%{release} -Conflicts: xen -Provides: xen-qubes-vm-essentials = %{epoch}:%{version}-%{release} - -%description qubes-vm -Just a few xenstore-* tools and Xen hotplug scripts needed by Qubes VMs -# END QUBES SPECIFIC PART - %prep %autosetup -p1 -n %{name}-%{upstream_version} @@ -728,13 +717,6 @@ fi %ldconfig_scriptlets libs # BEGIN QUBES SPECIFIC PART -%post qubes-vm -# Unconditionally enable this service in Qubes VM -systemctl enable xendriverdomain.service >/dev/null 2>&1 || : - -%preun qubes-vm -%systemd_preun xendriverdomain.service - %post libs -p /sbin/ldconfig %postun libs -p /sbin/ldconfig # END QUBES SPECIFIC PART @@ -1107,31 +1089,5 @@ fi %{_libdir}/ocaml/xen*/*.cmx %endif -# BEGIN QUBES SPECIFIC PART -%files qubes-vm -%{_bindir}/xenstore -%{_bindir}/xenstore-* -%{_sbindir}/xl -%{_unitdir}/xendriverdomain.service -%config(noreplace) %{_sysconfdir}/xen/xl.conf - -%dir %attr(0700,root,root) %{_sysconfdir}/xen -%dir %attr(0700,root,root) %{_sysconfdir}/xen/scripts/ -%config %attr(0700,root,root) %{_sysconfdir}/xen/scripts/* - -# General Xen state -%dir %{_localstatedir}/lib/xen -%dir %{_localstatedir}/lib/xen/dump - -# Xen logfiles -%dir %attr(0700,root,root) %{_localstatedir}/log/xen - -# Python modules -%dir %{python3_sitearch}/xen -%{python3_sitearch}/xen/__init__.* -%{python3_sitearch}/xen/lowlevel -%{python3_sitearch}/xen-*.egg-info -# END QUBES SPECIFIC PART - %changelog @CHANGELOG@ From 7d58d34b9a30ac70c71fb39f7e9bd5e944b1088c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Fri, 23 Jun 2023 11:37:11 +0200 Subject: [PATCH 08/64] Use upstream approach for publishing ARCH_CAPS to guests Replace custom patch with proper upstream implementation. --- ...Rework-the-handling-of-dynamic-featu.patch | 118 ++ ...fit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch | 127 ++ ...t-cpu_policy-to-struct-old_cpuid_pol.patch | 157 ++ ...l-sysctl-.cpu_policy.-cpuid-msr-_pol.patch | 173 ++ ...ct-cpuid_policy-to-struct-cpu_policy.patch | 1073 ++++++++++ ...ct-msr_policy-into-struct-cpu_policy.patch | 307 +++ ...-the-system-cpuid-msr-policy-objects.patch | 612 ++++++ ...-a-domain-s-cpuid-msr-policy-objects.patch | 423 ++++ ...c_cpu_policy-s-cpuid-and-msr-objects.patch | 367 ++++ 0330-x86-Drop-struct-old_cpu_policy.patch | 292 +++ ...ine-the-policy-featureset-convertors.patch | 293 +++ ...R-policy-initialisation-logic-into-c.patch | 261 +++ ...PUID-policy-initialisation-logic-int.patch | 1780 +++++++++++++++++ ...witch-x86_emulate_ctxt-to-cpu_policy.patch | 142 ++ ...-tools-fuzz-Rework-afl-policy-fuzzer.patch | 133 ++ ...86-Update-library-API-for-cpu_policy.patch | 448 +++++ ...-temporary-cpuid-msr-_policy-defines.patch | 334 ++++ ...ate-FEATURESET_NR_ENTRIES-more-helpf.patch | 110 + ...ot-Rework-dom0-feature-configuration.patch | 136 ++ ...MSR_ARCH_CAPS-handling-for-the-Host-.patch | 39 + ...icy-Infrastructure-for-MSR_ARCH_CAPS.patch | 216 ++ ...u-policy-MSR_ARCH_CAPS-feature-names.patch | 102 + ...MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch | 96 + ...MSR_ARCH_CAPS-data-in-guest-max-poli.patch | 138 ++ ...Remove-opencoded-MSR_ARCH_CAPS-check.patch | 58 + ...Remove-opencoded-MSR_ARCH_CAPS-check.patch | 95 + ...Remove-opencoded-MSR_ARCH_CAPS-check.patch | 247 +++ ...-x86-spec-ctrl-Update-hardware-hints.patch | 51 + ...earrange-guest_common_default_featur.patch | 76 + ...c-ctrl-Fix-the-rendering-of-FB_CLEAR.patch | 56 + ...e-a-taint-for-CET-without-MSR_SPEC_C.patch | 48 + ...name-retpoline_safe-to-retpoline_cal.patch | 140 ++ ...x-up-the-RSBA-RRSBA-bits-as-appropri.patch | 172 ++ ...Derive-RSBA-RRSBA-for-guest-policies.patch | 158 ++ 0355-def-arch-caps.patch | 50 + 1017-Disable-TSX-by-default.patch | 2 +- ...what-speculative-workarounds-are-not.patch | 36 - xen.spec.in | 36 +- 38 files changed, 9064 insertions(+), 38 deletions(-) create mode 100644 0321-tools-xen-cpuid-Rework-the-handling-of-dynamic-featu.patch create mode 100644 0322-x86-sysctl-Retrofit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch create mode 100644 0323-x86-Rename-struct-cpu_policy-to-struct-old_cpuid_pol.patch create mode 100644 0324-x86-Rename-domctl-sysctl-.cpu_policy.-cpuid-msr-_pol.patch create mode 100644 0325-x86-Rename-struct-cpuid_policy-to-struct-cpu_policy.patch create mode 100644 0326-x86-Merge-struct-msr_policy-into-struct-cpu_policy.patch create mode 100644 0327-x86-Merge-the-system-cpuid-msr-policy-objects.patch create mode 100644 0328-x86-Merge-a-domain-s-cpuid-msr-policy-objects.patch create mode 100644 0329-x86-Merge-xc_cpu_policy-s-cpuid-and-msr-objects.patch create mode 100644 0330-x86-Drop-struct-old_cpu_policy.patch create mode 100644 0331-x86-Out-of-inline-the-policy-featureset-convertors.patch create mode 100644 0332-x86-boot-Move-MSR-policy-initialisation-logic-into-c.patch create mode 100644 0333-x86-boot-Merge-CPUID-policy-initialisation-logic-int.patch create mode 100644 0334-x86-emul-Switch-x86_emulate_ctxt-to-cpu_policy.patch create mode 100644 0335-tools-fuzz-Rework-afl-policy-fuzzer.patch create mode 100644 0336-libx86-Update-library-API-for-cpu_policy.patch create mode 100644 0337-x86-Remove-temporary-cpuid-msr-_policy-defines.patch create mode 100644 0338-x86-cpuid-Calculate-FEATURESET_NR_ENTRIES-more-helpf.patch create mode 100644 0339-x86-boot-Rework-dom0-feature-configuration.patch create mode 100644 0340-x86-boot-Adjust-MSR_ARCH_CAPS-handling-for-the-Host-.patch create mode 100644 0341-x86-cpu-policy-Infrastructure-for-MSR_ARCH_CAPS.patch create mode 100644 0342-x86-cpu-policy-MSR_ARCH_CAPS-feature-names.patch create mode 100644 0343-x86-boot-Record-MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch create mode 100644 0344-x86-boot-Expose-MSR_ARCH_CAPS-data-in-guest-max-poli.patch create mode 100644 0345-x86-vtx-Remove-opencoded-MSR_ARCH_CAPS-check.patch create mode 100644 0346-x86-tsx-Remove-opencoded-MSR_ARCH_CAPS-check.patch create mode 100644 0347-x86-spec-ctrl-Remove-opencoded-MSR_ARCH_CAPS-check.patch create mode 100644 0348-x86-spec-ctrl-Update-hardware-hints.patch create mode 100644 0349-x86-cpu-policy-Rearrange-guest_common_default_featur.patch create mode 100644 0350-x86-spec-ctrl-Fix-the-rendering-of-FB_CLEAR.patch create mode 100644 0351-x86-spec-ctrl-Use-a-taint-for-CET-without-MSR_SPEC_C.patch create mode 100644 0352-x86-spec-ctrl-Rename-retpoline_safe-to-retpoline_cal.patch create mode 100644 0353-x86-spec-ctrl-Fix-up-the-RSBA-RRSBA-bits-as-appropri.patch create mode 100644 0354-x86-cpu-policy-Derive-RSBA-RRSBA-for-guest-policies.patch create mode 100644 0355-def-arch-caps.patch delete mode 100644 1020-xen-tell-guests-what-speculative-workarounds-are-not.patch diff --git a/0321-tools-xen-cpuid-Rework-the-handling-of-dynamic-featu.patch b/0321-tools-xen-cpuid-Rework-the-handling-of-dynamic-featu.patch new file mode 100644 index 00000000..bd06bb94 --- /dev/null +++ b/0321-tools-xen-cpuid-Rework-the-handling-of-dynamic-featu.patch @@ -0,0 +1,118 @@ +From e509e270347f569f2112340155c78eb3ecb54c98 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 10 Mar 2023 19:04:22 +0000 +Subject: [PATCH 01/35] tools/xen-cpuid: Rework the handling of dynamic + featuresets + +struct fsinfo is the vestigial remnant of an older internal design which +didn't survive very long. + +Simplify things by inlining get_featureset() and having a single memory +allocation that gets reused. This in turn changes featuresets[] to be a +simple list of names, so rename it to fs_names[]. + +No functional change. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit ec3474e1dd42e6f410601f50b6e74fb7c442cfb9) +--- + tools/misc/xen-cpuid.c | 53 ++++++++++++++++++------------------------ + 1 file changed, 22 insertions(+), 31 deletions(-) + +diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c +index 45e443f5d9ad..e774a9950ebb 100644 +--- a/tools/misc/xen-cpuid.c ++++ b/tools/misc/xen-cpuid.c +@@ -244,16 +244,11 @@ static const struct { + + #define COL_ALIGN "18" + +-static struct fsinfo { +- const char *name; +- uint32_t len; +- uint32_t *fs; +-} featuresets[] = +-{ +- [XEN_SYSCTL_cpu_featureset_host] = { "Host", 0, NULL }, +- [XEN_SYSCTL_cpu_featureset_raw] = { "Raw", 0, NULL }, +- [XEN_SYSCTL_cpu_featureset_pv] = { "PV", 0, NULL }, +- [XEN_SYSCTL_cpu_featureset_hvm] = { "HVM", 0, NULL }, ++static const char *const fs_names[] = { ++ [XEN_SYSCTL_cpu_featureset_host] = "Host", ++ [XEN_SYSCTL_cpu_featureset_raw] = "Raw", ++ [XEN_SYSCTL_cpu_featureset_pv] = "PV", ++ [XEN_SYSCTL_cpu_featureset_hvm] = "HVM", + }; + + static void dump_leaf(uint32_t leaf, const char *const *strs) +@@ -300,22 +295,10 @@ static void decode_featureset(const uint32_t *features, + } + } + +-static int get_featureset(xc_interface *xch, unsigned int idx) +-{ +- struct fsinfo *f = &featuresets[idx]; +- +- f->len = nr_features; +- f->fs = calloc(nr_features, sizeof(*f->fs)); +- +- if ( !f->fs ) +- err(1, "calloc(, featureset)"); +- +- return xc_get_cpu_featureset(xch, idx, &f->len, f->fs); +-} +- + static void dump_info(xc_interface *xch, bool detail) + { + unsigned int i; ++ uint32_t *fs; + + printf("nr_features: %u\n", nr_features); + +@@ -346,26 +329,34 @@ static void dump_info(xc_interface *xch, bool detail) + nr_features, "HVM Hap Default", detail); + + printf("\nDynamic sets:\n"); +- for ( i = 0; i < ARRAY_SIZE(featuresets); ++i ) ++ ++ fs = malloc(sizeof(*fs) * nr_features); ++ if ( !fs ) ++ err(1, "malloc(featureset)"); ++ ++ for ( i = 0; i < ARRAY_SIZE(fs_names); ++i ) + { +- if ( get_featureset(xch, i) ) ++ uint32_t len = nr_features; ++ int ret; ++ ++ memset(fs, 0, sizeof(*fs) * nr_features); ++ ++ ret = xc_get_cpu_featureset(xch, i, &len, fs); ++ if ( ret ) + { + if ( errno == EOPNOTSUPP ) + { +- printf("%s featureset not supported by Xen\n", +- featuresets[i].name); ++ printf("%s featureset not supported by Xen\n", fs_names[i]); + continue; + } + + err(1, "xc_get_featureset()"); + } + +- decode_featureset(featuresets[i].fs, featuresets[i].len, +- featuresets[i].name, detail); ++ decode_featureset(fs, len, fs_names[i], detail); + } + +- for ( i = 0; i < ARRAY_SIZE(featuresets); ++i ) +- free(featuresets[i].fs); ++ free(fs); + } + + static void print_policy(const char *name, +-- +2.39.2 + diff --git a/0322-x86-sysctl-Retrofit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch b/0322-x86-sysctl-Retrofit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch new file mode 100644 index 00000000..f75d9206 --- /dev/null +++ b/0322-x86-sysctl-Retrofit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch @@ -0,0 +1,127 @@ +From 81e4ade2e5977fa3e72443ff2894765abb4de5dd Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 10 Mar 2023 19:37:56 +0000 +Subject: [PATCH 02/35] x86/sysctl: Retrofit + XEN_SYSCTL_cpu_featureset_{pv,hvm}_max + +Featuresets are supposed to be disappearing when the CPU policy infrastructure +is complete, but that has taken longer than expected, and isn't going to be +complete imminently either. + +In the meantime, Xen does have proper default/max featuresets, and xen-cpuid +can even get them via the XEN_SYSCTL_cpu_policy_* interface, but only knows +now to render them nicely via the featureset interface. + +Differences between default and max are a frequent source of errors, +frequently too in secret leading up to an embargo, so extend the featureset +sysctl to allow xen-cpuid to render them all nicely. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +Acked-by: Christian Lindig +(cherry picked from commit 433d012c6c2737ad5a9aaa994355a4140d601852) +--- + tools/misc/xen-cpuid.c | 10 ++++++---- + tools/ocaml/libs/xc/xenctrl.ml | 8 +++++++- + tools/ocaml/libs/xc/xenctrl.mli | 8 +++++++- + xen/arch/x86/sysctl.c | 4 +++- + xen/include/public/sysctl.h | 2 ++ + 5 files changed, 25 insertions(+), 7 deletions(-) + +diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c +index e774a9950ebb..859345ae8ab2 100644 +--- a/tools/misc/xen-cpuid.c ++++ b/tools/misc/xen-cpuid.c +@@ -245,10 +245,12 @@ static const struct { + #define COL_ALIGN "18" + + static const char *const fs_names[] = { +- [XEN_SYSCTL_cpu_featureset_host] = "Host", +- [XEN_SYSCTL_cpu_featureset_raw] = "Raw", +- [XEN_SYSCTL_cpu_featureset_pv] = "PV", +- [XEN_SYSCTL_cpu_featureset_hvm] = "HVM", ++ [XEN_SYSCTL_cpu_featureset_raw] = "Raw", ++ [XEN_SYSCTL_cpu_featureset_host] = "Host", ++ [XEN_SYSCTL_cpu_featureset_pv] = "PV Default", ++ [XEN_SYSCTL_cpu_featureset_hvm] = "HVM Default", ++ [XEN_SYSCTL_cpu_featureset_pv_max] = "PV Max", ++ [XEN_SYSCTL_cpu_featureset_hvm_max] = "HVM Max", + }; + + static void dump_leaf(uint32_t leaf, const char *const *strs) +diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml +index aa650533f718..49aa9102f503 100644 +--- a/tools/ocaml/libs/xc/xenctrl.ml ++++ b/tools/ocaml/libs/xc/xenctrl.ml +@@ -309,7 +309,13 @@ external version_changeset: handle -> string = "stub_xc_version_changeset" + external version_capabilities: handle -> string = + "stub_xc_version_capabilities" + +-type featureset_index = Featureset_raw | Featureset_host | Featureset_pv | Featureset_hvm ++type featureset_index = ++ | Featureset_raw ++ | Featureset_host ++ | Featureset_pv ++ | Featureset_hvm ++ | Featureset_pv_max ++ | Featureset_hvm_max + external get_cpu_featureset : handle -> featureset_index -> int64 array = "stub_xc_get_cpu_featureset" + + external watchdog : handle -> int -> int32 -> int +diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli +index 5bf5f5dfea36..b9b92741296d 100644 +--- a/tools/ocaml/libs/xc/xenctrl.mli ++++ b/tools/ocaml/libs/xc/xenctrl.mli +@@ -235,7 +235,13 @@ external version_changeset : handle -> string = "stub_xc_version_changeset" + external version_capabilities : handle -> string + = "stub_xc_version_capabilities" + +-type featureset_index = Featureset_raw | Featureset_host | Featureset_pv | Featureset_hvm ++type featureset_index = ++ | Featureset_raw ++ | Featureset_host ++ | Featureset_pv ++ | Featureset_hvm ++ | Featureset_pv_max ++ | Featureset_hvm_max + external get_cpu_featureset : handle -> featureset_index -> int64 array = "stub_xc_get_cpu_featureset" + + external pages_to_kib : int64 -> int64 = "stub_pages_to_kib" +diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c +index f8f8d797557e..d6612a17078d 100644 +--- a/xen/arch/x86/sysctl.c ++++ b/xen/arch/x86/sysctl.c +@@ -327,14 +327,16 @@ long arch_do_sysctl( + + case XEN_SYSCTL_get_cpu_featureset: + { +- static const struct cpuid_policy *const policy_table[4] = { ++ static const struct cpuid_policy *const policy_table[6] = { + [XEN_SYSCTL_cpu_featureset_raw] = &raw_cpuid_policy, + [XEN_SYSCTL_cpu_featureset_host] = &host_cpuid_policy, + #ifdef CONFIG_PV + [XEN_SYSCTL_cpu_featureset_pv] = &pv_def_cpuid_policy, ++ [XEN_SYSCTL_cpu_featureset_pv_max] = &pv_max_cpuid_policy, + #endif + #ifdef CONFIG_HVM + [XEN_SYSCTL_cpu_featureset_hvm] = &hvm_def_cpuid_policy, ++ [XEN_SYSCTL_cpu_featureset_hvm_max] = &hvm_max_cpuid_policy, + #endif + }; + const struct cpuid_policy *p = NULL; +diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h +index 001a4de27375..e8dded9fb94a 100644 +--- a/xen/include/public/sysctl.h ++++ b/xen/include/public/sysctl.h +@@ -796,6 +796,8 @@ struct xen_sysctl_cpu_featureset { + #define XEN_SYSCTL_cpu_featureset_host 1 + #define XEN_SYSCTL_cpu_featureset_pv 2 + #define XEN_SYSCTL_cpu_featureset_hvm 3 ++#define XEN_SYSCTL_cpu_featureset_pv_max 4 ++#define XEN_SYSCTL_cpu_featureset_hvm_max 5 + uint32_t index; /* IN: Which featureset to query? */ + uint32_t nr_features; /* IN/OUT: Number of entries in/written to + * 'features', or the maximum number of features if +-- +2.39.2 + diff --git a/0323-x86-Rename-struct-cpu_policy-to-struct-old_cpuid_pol.patch b/0323-x86-Rename-struct-cpu_policy-to-struct-old_cpuid_pol.patch new file mode 100644 index 00000000..e304cbd8 --- /dev/null +++ b/0323-x86-Rename-struct-cpu_policy-to-struct-old_cpuid_pol.patch @@ -0,0 +1,157 @@ +From 7dac2c4267256ffd0e2bcab53d27281e7d26510e Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 28 Mar 2023 20:31:33 +0100 +Subject: [PATCH 03/35] x86: Rename struct cpu_policy to struct + old_cpuid_policy + +We want to merge struct cpuid_policy and struct msr_policy together, and the +result wants to be called struct cpu_policy. + +The current struct cpu_policy, being a pair of pointers, isn't terribly +useful. Rename the type to struct old_cpu_policy, but it will disappear +entirely once the merge is complete. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit c2ec94c370f211d73f336ccfbdb32499f1b05f82) +--- + tools/libs/guest/xg_cpuid_x86.c | 4 ++-- + tools/tests/cpu-policy/test-cpu-policy.c | 4 ++-- + xen/arch/x86/domctl.c | 4 ++-- + xen/arch/x86/include/asm/cpuid.h | 2 +- + xen/arch/x86/sysctl.c | 4 ++-- + xen/include/xen/lib/x86/cpu-policy.h | 6 +++--- + xen/lib/x86/policy.c | 4 ++-- + 7 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c +index 57f81eb0a082..a22c7bf7923f 100644 +--- a/tools/libs/guest/xg_cpuid_x86.c ++++ b/tools/libs/guest/xg_cpuid_x86.c +@@ -868,8 +868,8 @@ bool xc_cpu_policy_is_compatible(xc_interface *xch, xc_cpu_policy_t *host, + xc_cpu_policy_t *guest) + { + struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; +- struct cpu_policy h = { &host->cpuid, &host->msr }; +- struct cpu_policy g = { &guest->cpuid, &guest->msr }; ++ struct old_cpu_policy h = { &host->cpuid, &host->msr }; ++ struct old_cpu_policy g = { &guest->cpuid, &guest->msr }; + int rc = x86_cpu_policies_are_compatible(&h, &g, &err); + + if ( !rc ) +diff --git a/tools/tests/cpu-policy/test-cpu-policy.c b/tools/tests/cpu-policy/test-cpu-policy.c +index ed450a099709..8a223fddb3da 100644 +--- a/tools/tests/cpu-policy/test-cpu-policy.c ++++ b/tools/tests/cpu-policy/test-cpu-policy.c +@@ -602,7 +602,7 @@ static void test_is_compatible_success(void) + for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i ) + { + struct test *t = &tests[i]; +- struct cpu_policy sys = { ++ struct old_cpu_policy sys = { + &t->host_cpuid, + &t->host_msr, + }, new = { +@@ -654,7 +654,7 @@ static void test_is_compatible_failure(void) + for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i ) + { + struct test *t = &tests[i]; +- struct cpu_policy sys = { ++ struct old_cpu_policy sys = { + &t->host_cpuid, + &t->host_msr, + }, new = { +diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c +index e9bfbc57a794..971d4937a1e6 100644 +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -41,8 +41,8 @@ + static int update_domain_cpu_policy(struct domain *d, + xen_domctl_cpu_policy_t *xdpc) + { +- struct cpu_policy new = {}; +- const struct cpu_policy *sys = is_pv_domain(d) ++ struct old_cpu_policy new = {}; ++ const struct old_cpu_policy *sys = is_pv_domain(d) + ? &system_policies[XEN_SYSCTL_cpu_policy_pv_max] + : &system_policies[XEN_SYSCTL_cpu_policy_hvm_max]; + struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; +diff --git a/xen/arch/x86/include/asm/cpuid.h b/xen/arch/x86/include/asm/cpuid.h +index 9c3637549a10..49b3128f06f9 100644 +--- a/xen/arch/x86/include/asm/cpuid.h ++++ b/xen/arch/x86/include/asm/cpuid.h +@@ -51,7 +51,7 @@ extern struct cpuid_policy raw_cpuid_policy, host_cpuid_policy, + pv_max_cpuid_policy, pv_def_cpuid_policy, + hvm_max_cpuid_policy, hvm_def_cpuid_policy; + +-extern const struct cpu_policy system_policies[]; ++extern const struct old_cpu_policy system_policies[]; + + /* Check that all previously present features are still available. */ + bool recheck_cpu_features(unsigned int cpu); +diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c +index d6612a17078d..4afe73ff4f4f 100644 +--- a/xen/arch/x86/sysctl.c ++++ b/xen/arch/x86/sysctl.c +@@ -33,7 +33,7 @@ + #include + #include + +-const struct cpu_policy system_policies[6] = { ++const struct old_cpu_policy system_policies[6] = { + [ XEN_SYSCTL_cpu_policy_raw ] = { + &raw_cpuid_policy, + &raw_msr_policy, +@@ -392,7 +392,7 @@ long arch_do_sysctl( + + case XEN_SYSCTL_get_cpu_policy: + { +- const struct cpu_policy *policy; ++ const struct old_cpu_policy *policy; + + /* Reserved field set, or bad policy index? */ + if ( sysctl->u.cpu_policy._rsvd || +diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h +index 5a2c4c7b2d90..3a5300d1078c 100644 +--- a/xen/include/xen/lib/x86/cpu-policy.h ++++ b/xen/include/xen/lib/x86/cpu-policy.h +@@ -5,7 +5,7 @@ + #include + #include + +-struct cpu_policy ++struct old_cpu_policy + { + struct cpuid_policy *cpuid; + struct msr_policy *msr; +@@ -33,8 +33,8 @@ struct cpu_policy_errors + * incompatibility is detected, the optional err pointer may identify the + * problematic leaf/subleaf and/or MSR. + */ +-int x86_cpu_policies_are_compatible(const struct cpu_policy *host, +- const struct cpu_policy *guest, ++int x86_cpu_policies_are_compatible(const struct old_cpu_policy *host, ++ const struct old_cpu_policy *guest, + struct cpu_policy_errors *err); + + #endif /* !XEN_LIB_X86_POLICIES_H */ +diff --git a/xen/lib/x86/policy.c b/xen/lib/x86/policy.c +index f6cea4e2f9bd..2975711d7c6c 100644 +--- a/xen/lib/x86/policy.c ++++ b/xen/lib/x86/policy.c +@@ -2,8 +2,8 @@ + + #include + +-int x86_cpu_policies_are_compatible(const struct cpu_policy *host, +- const struct cpu_policy *guest, ++int x86_cpu_policies_are_compatible(const struct old_cpu_policy *host, ++ const struct old_cpu_policy *guest, + struct cpu_policy_errors *err) + { + struct cpu_policy_errors e = INIT_CPU_POLICY_ERRORS; +-- +2.39.2 + diff --git a/0324-x86-Rename-domctl-sysctl-.cpu_policy.-cpuid-msr-_pol.patch b/0324-x86-Rename-domctl-sysctl-.cpu_policy.-cpuid-msr-_pol.patch new file mode 100644 index 00000000..e4b964a7 --- /dev/null +++ b/0324-x86-Rename-domctl-sysctl-.cpu_policy.-cpuid-msr-_pol.patch @@ -0,0 +1,173 @@ +From ea2ccf5cdbea3bbe8684ee2f1eb440644c8e385d Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 28 Mar 2023 20:48:29 +0100 +Subject: [PATCH 04/35] x86: Rename + {domctl,sysctl}.cpu_policy.{cpuid,msr}_policy fields + +These weren't great names to begin with, and using {leaves,msrs} matches up +better with the existing nr_{leaves,msr} parameters anyway. + +Furthermore, by renaming these fields we can get away with using some #define +trickery to avoid the struct {cpuid,msr}_policy merge needing to happen in a +single changeset. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 21e3ef57e0406b6b9a783f721f29df8f91a00f99) +--- + tools/libs/guest/xg_cpuid_x86.c | 12 ++++++------ + xen/arch/x86/domctl.c | 12 ++++++------ + xen/arch/x86/sysctl.c | 8 ++++---- + xen/include/public/domctl.h | 4 ++-- + xen/include/public/sysctl.h | 4 ++-- + 5 files changed, 20 insertions(+), 20 deletions(-) + +diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c +index a22c7bf7923f..76d9522c3a7c 100644 +--- a/tools/libs/guest/xg_cpuid_x86.c ++++ b/tools/libs/guest/xg_cpuid_x86.c +@@ -145,9 +145,9 @@ static int get_system_cpu_policy(xc_interface *xch, uint32_t index, + sysctl.cmd = XEN_SYSCTL_get_cpu_policy; + sysctl.u.cpu_policy.index = index; + sysctl.u.cpu_policy.nr_leaves = *nr_leaves; +- set_xen_guest_handle(sysctl.u.cpu_policy.cpuid_policy, leaves); ++ set_xen_guest_handle(sysctl.u.cpu_policy.leaves, leaves); + sysctl.u.cpu_policy.nr_msrs = *nr_msrs; +- set_xen_guest_handle(sysctl.u.cpu_policy.msr_policy, msrs); ++ set_xen_guest_handle(sysctl.u.cpu_policy.msrs, msrs); + + ret = do_sysctl(xch, &sysctl); + +@@ -183,9 +183,9 @@ static int get_domain_cpu_policy(xc_interface *xch, uint32_t domid, + domctl.cmd = XEN_DOMCTL_get_cpu_policy; + domctl.domain = domid; + domctl.u.cpu_policy.nr_leaves = *nr_leaves; +- set_xen_guest_handle(domctl.u.cpu_policy.cpuid_policy, leaves); ++ set_xen_guest_handle(domctl.u.cpu_policy.leaves, leaves); + domctl.u.cpu_policy.nr_msrs = *nr_msrs; +- set_xen_guest_handle(domctl.u.cpu_policy.msr_policy, msrs); ++ set_xen_guest_handle(domctl.u.cpu_policy.msrs, msrs); + + ret = do_domctl(xch, &domctl); + +@@ -232,9 +232,9 @@ int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid, + domctl.cmd = XEN_DOMCTL_set_cpu_policy; + domctl.domain = domid; + domctl.u.cpu_policy.nr_leaves = nr_leaves; +- set_xen_guest_handle(domctl.u.cpu_policy.cpuid_policy, leaves); ++ set_xen_guest_handle(domctl.u.cpu_policy.leaves, leaves); + domctl.u.cpu_policy.nr_msrs = nr_msrs; +- set_xen_guest_handle(domctl.u.cpu_policy.msr_policy, msrs); ++ set_xen_guest_handle(domctl.u.cpu_policy.msrs, msrs); + domctl.u.cpu_policy.err_leaf = -1; + domctl.u.cpu_policy.err_subleaf = -1; + domctl.u.cpu_policy.err_msr = -1; +diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c +index 971d4937a1e6..175d473e412a 100644 +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -55,10 +55,10 @@ static int update_domain_cpu_policy(struct domain *d, + + /* Merge the toolstack provided data. */ + if ( (ret = x86_cpuid_copy_from_buffer( +- new.cpuid, xdpc->cpuid_policy, xdpc->nr_leaves, ++ new.cpuid, xdpc->leaves, xdpc->nr_leaves, + &err.leaf, &err.subleaf)) || + (ret = x86_msr_copy_from_buffer( +- new.msr, xdpc->msr_policy, xdpc->nr_msrs, &err.msr)) ) ++ new.msr, xdpc->msrs, xdpc->nr_msrs, &err.msr)) ) + goto out; + + /* Trim any newly-stale out-of-range leaves. */ +@@ -1318,20 +1318,20 @@ long arch_do_domctl( + + case XEN_DOMCTL_get_cpu_policy: + /* Process the CPUID leaves. */ +- if ( guest_handle_is_null(domctl->u.cpu_policy.cpuid_policy) ) ++ if ( guest_handle_is_null(domctl->u.cpu_policy.leaves) ) + domctl->u.cpu_policy.nr_leaves = CPUID_MAX_SERIALISED_LEAVES; + else if ( (ret = x86_cpuid_copy_to_buffer( + d->arch.cpuid, +- domctl->u.cpu_policy.cpuid_policy, ++ domctl->u.cpu_policy.leaves, + &domctl->u.cpu_policy.nr_leaves)) ) + break; + + /* Process the MSR entries. */ +- if ( guest_handle_is_null(domctl->u.cpu_policy.msr_policy) ) ++ if ( guest_handle_is_null(domctl->u.cpu_policy.msrs) ) + domctl->u.cpu_policy.nr_msrs = MSR_MAX_SERIALISED_ENTRIES; + else if ( (ret = x86_msr_copy_to_buffer( + d->arch.msr, +- domctl->u.cpu_policy.msr_policy, ++ domctl->u.cpu_policy.msrs, + &domctl->u.cpu_policy.nr_msrs)) ) + break; + +diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c +index 4afe73ff4f4f..838a9947bfe3 100644 +--- a/xen/arch/x86/sysctl.c ++++ b/xen/arch/x86/sysctl.c +@@ -412,11 +412,11 @@ long arch_do_sysctl( + } + + /* Process the CPUID leaves. */ +- if ( guest_handle_is_null(sysctl->u.cpu_policy.cpuid_policy) ) ++ if ( guest_handle_is_null(sysctl->u.cpu_policy.leaves) ) + sysctl->u.cpu_policy.nr_leaves = CPUID_MAX_SERIALISED_LEAVES; + else if ( (ret = x86_cpuid_copy_to_buffer( + policy->cpuid, +- sysctl->u.cpu_policy.cpuid_policy, ++ sysctl->u.cpu_policy.leaves, + &sysctl->u.cpu_policy.nr_leaves)) ) + break; + +@@ -428,11 +428,11 @@ long arch_do_sysctl( + } + + /* Process the MSR entries. */ +- if ( guest_handle_is_null(sysctl->u.cpu_policy.msr_policy) ) ++ if ( guest_handle_is_null(sysctl->u.cpu_policy.msrs) ) + sysctl->u.cpu_policy.nr_msrs = MSR_MAX_SERIALISED_ENTRIES; + else if ( (ret = x86_msr_copy_to_buffer( + policy->msr, +- sysctl->u.cpu_policy.msr_policy, ++ sysctl->u.cpu_policy.msrs, + &sysctl->u.cpu_policy.nr_msrs)) ) + break; + +diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h +index 51be28c3de7c..c377e8365225 100644 +--- a/xen/include/public/domctl.h ++++ b/xen/include/public/domctl.h +@@ -683,8 +683,8 @@ struct xen_domctl_cpu_policy { + * 'cpuid_policy'. */ + uint32_t nr_msrs; /* IN/OUT: Number of MSRs in/written to + * 'msr_policy' */ +- XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* IN/OUT */ +- XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy; /* IN/OUT */ ++ XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) leaves; /* IN/OUT */ ++ XEN_GUEST_HANDLE_64(xen_msr_entry_t) msrs; /* IN/OUT */ + + /* + * OUT, set_policy only. Written in some (but not all) error cases to +diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h +index e8dded9fb94a..2b24d6bfd00e 100644 +--- a/xen/include/public/sysctl.h ++++ b/xen/include/public/sysctl.h +@@ -1050,8 +1050,8 @@ struct xen_sysctl_cpu_policy { + * 'msr_policy', or the maximum number of MSRs if + * the guest handle is NULL. */ + uint32_t _rsvd; /* Must be zero. */ +- XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* OUT */ +- XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy; /* OUT */ ++ XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) leaves; /* OUT */ ++ XEN_GUEST_HANDLE_64(xen_msr_entry_t) msrs; /* OUT */ + }; + typedef struct xen_sysctl_cpu_policy xen_sysctl_cpu_policy_t; + DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_policy_t); +-- +2.39.2 + diff --git a/0325-x86-Rename-struct-cpuid_policy-to-struct-cpu_policy.patch b/0325-x86-Rename-struct-cpuid_policy-to-struct-cpu_policy.patch new file mode 100644 index 00000000..ad580570 --- /dev/null +++ b/0325-x86-Rename-struct-cpuid_policy-to-struct-cpu_policy.patch @@ -0,0 +1,1073 @@ +From 78bef4f330ced9ac39f0a262777cc160d087f252 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 28 Mar 2023 18:55:19 +0100 +Subject: [PATCH 05/35] x86: Rename struct cpuid_policy to struct cpu_policy + +Also merge lib/x86/cpuid.h entirely into lib/x86/cpu-policy.h + +Use a temporary define to make struct cpuid_policy still work. + +There's one forward declaration of struct cpuid_policy in +tools/tests/x86_emulator/x86-emulate.h that isn't covered by the define, and +it's easier to rename that now than to rearrange the includes. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 743e530380a007774017df9dc2d8cb0659040ee3) +--- + tools/fuzz/cpu-policy/afl-policy-fuzzer.c | 2 +- + tools/tests/x86_emulator/Makefile | 2 +- + tools/tests/x86_emulator/x86-emulate.h | 2 +- + xen/arch/x86/include/asm/cpuid.h | 1 - + xen/arch/x86/x86_emulate/x86_emulate.h | 2 +- + xen/include/xen/lib/x86/cpu-policy.h | 463 ++++++++++++++++++++- + xen/include/xen/lib/x86/cpuid.h | 475 ---------------------- + xen/lib/x86/cpuid.c | 2 +- + 8 files changed, 467 insertions(+), 482 deletions(-) + delete mode 100644 xen/include/xen/lib/x86/cpuid.h + +diff --git a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c +index 79a849a044c7..7ebe8ee7c325 100644 +--- a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c ++++ b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c +@@ -9,7 +9,7 @@ + #include + + #include +-#include ++#include + #include + #include + +diff --git a/tools/tests/x86_emulator/Makefile b/tools/tests/x86_emulator/Makefile +index 7b07c31bbde4..bd82598f9766 100644 +--- a/tools/tests/x86_emulator/Makefile ++++ b/tools/tests/x86_emulator/Makefile +@@ -286,7 +286,7 @@ HOSTCFLAGS += $(CFLAGS_xeninclude) -I. $(HOSTCFLAGS-$(XEN_COMPILE_ARCH)) + x86.h := $(addprefix $(XEN_ROOT)/tools/include/xen/asm/,\ + x86-vendors.h x86-defns.h msr-index.h) \ + $(addprefix $(XEN_ROOT)/tools/include/xen/lib/x86/, \ +- cpuid.h cpuid-autogen.h) ++ cpu-policy.h cpuid-autogen.h) + x86_emulate.h := x86-emulate.h x86_emulate/x86_emulate.h $(x86.h) + + x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o: %.o: %.c $(x86_emulate.h) +diff --git a/tools/tests/x86_emulator/x86-emulate.h b/tools/tests/x86_emulator/x86-emulate.h +index 18ae40d01712..19bea9c38d58 100644 +--- a/tools/tests/x86_emulator/x86-emulate.h ++++ b/tools/tests/x86_emulator/x86-emulate.h +@@ -70,7 +70,7 @@ + #define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63)) + + extern uint32_t mxcsr_mask; +-extern struct cpuid_policy cp; ++extern struct cpu_policy cp; + + #define MMAP_SZ 16384 + bool emul_test_init(void); +diff --git a/xen/arch/x86/include/asm/cpuid.h b/xen/arch/x86/include/asm/cpuid.h +index 49b3128f06f9..d418e8100dde 100644 +--- a/xen/arch/x86/include/asm/cpuid.h ++++ b/xen/arch/x86/include/asm/cpuid.h +@@ -9,7 +9,6 @@ + #include + + #include +-#include + + #include + +diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h +index 4732855c40ed..c89c53e83bfe 100644 +--- a/xen/arch/x86/x86_emulate/x86_emulate.h ++++ b/xen/arch/x86/x86_emulate/x86_emulate.h +@@ -23,7 +23,7 @@ + #ifndef __X86_EMULATE_H__ + #define __X86_EMULATE_H__ + +-#include ++#include + + #define MAX_INST_LEN 15 + +diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h +index 3a5300d1078c..666505964d00 100644 +--- a/xen/include/xen/lib/x86/cpu-policy.h ++++ b/xen/include/xen/lib/x86/cpu-policy.h +@@ -2,9 +2,342 @@ + #ifndef XEN_LIB_X86_POLICIES_H + #define XEN_LIB_X86_POLICIES_H + +-#include ++#include + #include + ++#define FEATURESET_1d 0 /* 0x00000001.edx */ ++#define FEATURESET_1c 1 /* 0x00000001.ecx */ ++#define FEATURESET_e1d 2 /* 0x80000001.edx */ ++#define FEATURESET_e1c 3 /* 0x80000001.ecx */ ++#define FEATURESET_Da1 4 /* 0x0000000d:1.eax */ ++#define FEATURESET_7b0 5 /* 0x00000007:0.ebx */ ++#define FEATURESET_7c0 6 /* 0x00000007:0.ecx */ ++#define FEATURESET_e7d 7 /* 0x80000007.edx */ ++#define FEATURESET_e8b 8 /* 0x80000008.ebx */ ++#define FEATURESET_7d0 9 /* 0x00000007:0.edx */ ++#define FEATURESET_7a1 10 /* 0x00000007:1.eax */ ++#define FEATURESET_e21a 11 /* 0x80000021.eax */ ++#define FEATURESET_7b1 12 /* 0x00000007:1.ebx */ ++#define FEATURESET_7d2 13 /* 0x00000007:2.edx */ ++#define FEATURESET_7c1 14 /* 0x00000007:1.ecx */ ++#define FEATURESET_7d1 15 /* 0x00000007:1.edx */ ++ ++struct cpuid_leaf ++{ ++ uint32_t a, b, c, d; ++}; ++ ++/* ++ * Versions of GCC before 5 unconditionally reserve %rBX as the PIC hard ++ * register, and are unable to cope with spilling it. This results in a ++ * rather cryptic error: ++ * error: inconsistent operand constraints in an ‘asm’ ++ * ++ * In affected situations, work around the issue by using a separate register ++ * to hold the the %rBX output, and xchg twice to leave %rBX preserved around ++ * the asm() statement. ++ */ ++#if defined(__PIC__) && __GNUC__ < 5 && !defined(__clang__) && defined(__i386__) ++# define XCHG_BX "xchg %%ebx, %[bx];" ++# define BX_CON [bx] "=&r" ++#elif defined(__PIC__) && __GNUC__ < 5 && !defined(__clang__) && \ ++ defined(__x86_64__) && (defined(__code_model_medium__) || \ ++ defined(__code_model_large__)) ++# define XCHG_BX "xchg %%rbx, %q[bx];" ++# define BX_CON [bx] "=&r" ++#else ++# define XCHG_BX "" ++# define BX_CON "=&b" ++#endif ++ ++static inline void cpuid_leaf(uint32_t leaf, struct cpuid_leaf *l) ++{ ++ asm ( XCHG_BX ++ "cpuid;" ++ XCHG_BX ++ : "=a" (l->a), BX_CON (l->b), "=&c" (l->c), "=&d" (l->d) ++ : "a" (leaf) ); ++} ++ ++static inline void cpuid_count_leaf( ++ uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *l) ++{ ++ asm ( XCHG_BX ++ "cpuid;" ++ XCHG_BX ++ : "=a" (l->a), BX_CON (l->b), "=c" (l->c), "=&d" (l->d) ++ : "a" (leaf), "c" (subleaf) ); ++} ++ ++#undef BX_CON ++#undef XCHG ++ ++/** ++ * Given the vendor id from CPUID leaf 0, look up Xen's internal integer ++ * vendor ID. Returns X86_VENDOR_UNKNOWN for any unknown vendor. ++ */ ++unsigned int x86_cpuid_lookup_vendor(uint32_t ebx, uint32_t ecx, uint32_t edx); ++ ++/** ++ * Given Xen's internal vendor ID, return a string suitable for printing. ++ * Returns "Unknown" for any unrecognised ID. ++ */ ++const char *x86_cpuid_vendor_to_str(unsigned int vendor); ++ ++#define CPUID_GUEST_NR_BASIC (0xdu + 1) ++#define CPUID_GUEST_NR_CACHE (5u + 1) ++#define CPUID_GUEST_NR_FEAT (2u + 1) ++#define CPUID_GUEST_NR_TOPO (1u + 1) ++#define CPUID_GUEST_NR_XSTATE (62u + 1) ++#define CPUID_GUEST_NR_EXTD_INTEL (0x8u + 1) ++#define CPUID_GUEST_NR_EXTD_AMD (0x21u + 1) ++#define CPUID_GUEST_NR_EXTD MAX(CPUID_GUEST_NR_EXTD_INTEL, \ ++ CPUID_GUEST_NR_EXTD_AMD) ++ ++/* ++ * Maximum number of leaves a struct cpu_policy turns into when serialised for ++ * interaction with the toolstack. (Sum of all leaves in each union, less the ++ * entries in basic which sub-unions hang off of.) ++ */ ++#define CPUID_MAX_SERIALISED_LEAVES \ ++ (CPUID_GUEST_NR_BASIC + \ ++ CPUID_GUEST_NR_FEAT - !!CPUID_GUEST_NR_FEAT + \ ++ CPUID_GUEST_NR_CACHE - !!CPUID_GUEST_NR_CACHE + \ ++ CPUID_GUEST_NR_TOPO - !!CPUID_GUEST_NR_TOPO + \ ++ CPUID_GUEST_NR_XSTATE - !!CPUID_GUEST_NR_XSTATE + \ ++ CPUID_GUEST_NR_EXTD + 2 /* hv_limit and hv2_limit */ ) ++ ++struct cpu_policy ++{ ++#define DECL_BITFIELD(word) _DECL_BITFIELD(FEATURESET_ ## word) ++#define _DECL_BITFIELD(x) __DECL_BITFIELD(x) ++#define __DECL_BITFIELD(x) CPUID_BITFIELD_ ## x ++ ++ /* Basic leaves: 0x000000xx */ ++ union { ++ struct cpuid_leaf raw[CPUID_GUEST_NR_BASIC]; ++ struct { ++ /* Leaf 0x0 - Max and vendor. */ ++ uint32_t max_leaf, vendor_ebx, vendor_ecx, vendor_edx; ++ ++ /* Leaf 0x1 - Family/model/stepping and features. */ ++ uint32_t raw_fms; ++ uint8_t :8, /* Brand ID. */ ++ clflush_size, /* Number of 8-byte blocks per cache line. */ ++ lppp, /* Logical processors per package. */ ++ apic_id; /* Initial APIC ID. */ ++ union { ++ uint32_t _1c; ++ struct { DECL_BITFIELD(1c); }; ++ }; ++ union { ++ uint32_t _1d; ++ struct { DECL_BITFIELD(1d); }; ++ }; ++ ++ /* Leaf 0x2 - TLB/Cache/Prefetch. */ ++ uint8_t l2_nr_queries; /* Documented as fixed to 1. */ ++ uint8_t l2_desc[15]; ++ ++ uint64_t :64, :64; /* Leaf 0x3 - PSN. */ ++ uint64_t :64, :64; /* Leaf 0x4 - Structured Cache. */ ++ uint64_t :64, :64; /* Leaf 0x5 - MONITOR. */ ++ uint64_t :64, :64; /* Leaf 0x6 - Therm/Perf. */ ++ uint64_t :64, :64; /* Leaf 0x7 - Structured Features. */ ++ uint64_t :64, :64; /* Leaf 0x8 - rsvd */ ++ uint64_t :64, :64; /* Leaf 0x9 - DCA */ ++ ++ /* Leaf 0xa - Intel PMU. */ ++ uint8_t pmu_version, _pmu[15]; ++ ++ uint64_t :64, :64; /* Leaf 0xb - Topology. */ ++ uint64_t :64, :64; /* Leaf 0xc - rsvd */ ++ uint64_t :64, :64; /* Leaf 0xd - XSTATE. */ ++ }; ++ } basic; ++ ++ /* Structured cache leaf: 0x00000004[xx] */ ++ union { ++ struct cpuid_leaf raw[CPUID_GUEST_NR_CACHE]; ++ struct cpuid_cache_leaf { ++ uint32_t /* a */ type:5, level:3; ++ bool self_init:1, fully_assoc:1; ++ uint32_t :4, threads_per_cache:12, cores_per_package:6; ++ uint32_t /* b */ line_size:12, partitions:10, ways:10; ++ uint32_t /* c */ sets; ++ bool /* d */ wbinvd:1, inclusive:1, complex:1; ++ } subleaf[CPUID_GUEST_NR_CACHE]; ++ } cache; ++ ++ /* Structured feature leaf: 0x00000007[xx] */ ++ union { ++ struct cpuid_leaf raw[CPUID_GUEST_NR_FEAT]; ++ struct { ++ /* Subleaf 0. */ ++ uint32_t max_subleaf; ++ union { ++ uint32_t _7b0; ++ struct { DECL_BITFIELD(7b0); }; ++ }; ++ union { ++ uint32_t _7c0; ++ struct { DECL_BITFIELD(7c0); }; ++ }; ++ union { ++ uint32_t _7d0; ++ struct { DECL_BITFIELD(7d0); }; ++ }; ++ ++ /* Subleaf 1. */ ++ union { ++ uint32_t _7a1; ++ struct { DECL_BITFIELD(7a1); }; ++ }; ++ union { ++ uint32_t _7b1; ++ struct { DECL_BITFIELD(7b1); }; ++ }; ++ union { ++ uint32_t _7c1; ++ struct { DECL_BITFIELD(7c1); }; ++ }; ++ union { ++ uint32_t _7d1; ++ struct { DECL_BITFIELD(7d1); }; ++ }; ++ ++ /* Subleaf 2. */ ++ uint32_t /* a */:32, /* b */:32, /* c */:32; ++ union { ++ uint32_t _7d2; ++ struct { DECL_BITFIELD(7d2); }; ++ }; ++ }; ++ } feat; ++ ++ /* Extended topology enumeration: 0x0000000B[xx] */ ++ union { ++ struct cpuid_leaf raw[CPUID_GUEST_NR_TOPO]; ++ struct cpuid_topo_leaf { ++ uint32_t id_shift:5, :27; ++ uint16_t nr_logical, :16; ++ uint8_t level, type, :8, :8; ++ uint32_t x2apic_id; ++ } subleaf[CPUID_GUEST_NR_TOPO]; ++ } topo; ++ ++ /* Xstate feature leaf: 0x0000000D[xx] */ ++ union { ++ struct cpuid_leaf raw[CPUID_GUEST_NR_XSTATE]; ++ ++ struct { ++ /* Subleaf 0. */ ++ uint32_t xcr0_low, /* b */:32, max_size, xcr0_high; ++ ++ /* Subleaf 1. */ ++ union { ++ uint32_t Da1; ++ struct { DECL_BITFIELD(Da1); }; ++ }; ++ uint32_t /* b */:32, xss_low, xss_high; ++ }; ++ ++ /* Per-component common state. Valid for i >= 2. */ ++ struct { ++ uint32_t size, offset; ++ bool xss:1, align:1; ++ uint32_t _res_d; ++ } comp[CPUID_GUEST_NR_XSTATE]; ++ } xstate; ++ ++ /* Extended leaves: 0x800000xx */ ++ union { ++ struct cpuid_leaf raw[CPUID_GUEST_NR_EXTD]; ++ struct { ++ /* Leaf 0x80000000 - Max and vendor. */ ++ uint32_t max_leaf, vendor_ebx, vendor_ecx, vendor_edx; ++ ++ /* Leaf 0x80000001 - Family/model/stepping and features. */ ++ uint32_t raw_fms, /* b */:32; ++ union { ++ uint32_t e1c; ++ struct { DECL_BITFIELD(e1c); }; ++ }; ++ union { ++ uint32_t e1d; ++ struct { DECL_BITFIELD(e1d); }; ++ }; ++ ++ uint64_t :64, :64; /* Brand string. */ ++ uint64_t :64, :64; /* Brand string. */ ++ uint64_t :64, :64; /* Brand string. */ ++ uint64_t :64, :64; /* L1 cache/TLB. */ ++ uint64_t :64, :64; /* L2/3 cache/TLB. */ ++ ++ /* Leaf 0x80000007 - Advanced Power Management. */ ++ uint32_t /* a */:32, /* b */:32, /* c */:32; ++ union { ++ uint32_t e7d; ++ struct { DECL_BITFIELD(e7d); }; ++ }; ++ ++ /* Leaf 0x80000008 - Misc addr/feature info. */ ++ uint8_t maxphysaddr, maxlinaddr, :8, :8; ++ union { ++ uint32_t e8b; ++ struct { DECL_BITFIELD(e8b); }; ++ }; ++ uint32_t nc:8, :4, apic_id_size:4, :16; ++ uint32_t /* d */:32; ++ ++ uint64_t :64, :64; /* Leaf 0x80000009. */ ++ uint64_t :64, :64; /* Leaf 0x8000000a - SVM rev and features. */ ++ uint64_t :64, :64; /* Leaf 0x8000000b. */ ++ uint64_t :64, :64; /* Leaf 0x8000000c. */ ++ uint64_t :64, :64; /* Leaf 0x8000000d. */ ++ uint64_t :64, :64; /* Leaf 0x8000000e. */ ++ uint64_t :64, :64; /* Leaf 0x8000000f. */ ++ uint64_t :64, :64; /* Leaf 0x80000010. */ ++ uint64_t :64, :64; /* Leaf 0x80000011. */ ++ uint64_t :64, :64; /* Leaf 0x80000012. */ ++ uint64_t :64, :64; /* Leaf 0x80000013. */ ++ uint64_t :64, :64; /* Leaf 0x80000014. */ ++ uint64_t :64, :64; /* Leaf 0x80000015. */ ++ uint64_t :64, :64; /* Leaf 0x80000016. */ ++ uint64_t :64, :64; /* Leaf 0x80000017. */ ++ uint64_t :64, :64; /* Leaf 0x80000018. */ ++ uint64_t :64, :64; /* Leaf 0x80000019 - TLB 1GB Identifiers. */ ++ uint64_t :64, :64; /* Leaf 0x8000001a - Performance related info. */ ++ uint64_t :64, :64; /* Leaf 0x8000001b - IBS feature information. */ ++ uint64_t :64, :64; /* Leaf 0x8000001c. */ ++ uint64_t :64, :64; /* Leaf 0x8000001d - Cache properties. */ ++ uint64_t :64, :64; /* Leaf 0x8000001e - Extd APIC/Core/Node IDs. */ ++ uint64_t :64, :64; /* Leaf 0x8000001f - AMD Secure Encryption. */ ++ uint64_t :64, :64; /* Leaf 0x80000020 - Platform QoS. */ ++ ++ /* Leaf 0x80000021 - Extended Feature 2 */ ++ union { ++ uint32_t e21a; ++ struct { DECL_BITFIELD(e21a); }; ++ }; ++ uint32_t /* b */:32, /* c */:32, /* d */:32; ++ }; ++ } extd; ++ ++#undef __DECL_BITFIELD ++#undef _DECL_BITFIELD ++#undef DECL_BITFIELD ++ ++ /* Toolstack selected Hypervisor max_leaf (if non-zero). */ ++ uint8_t hv_limit, hv2_limit; ++ ++ /* Value calculated from raw data above. */ ++ uint8_t x86_vendor; ++}; ++ ++/* Temporary */ ++#define cpuid_policy cpu_policy ++ + struct old_cpu_policy + { + struct cpuid_policy *cpuid; +@@ -19,6 +352,134 @@ struct cpu_policy_errors + + #define INIT_CPU_POLICY_ERRORS { -1, -1, -1 } + ++/* Fill in a featureset bitmap from a CPUID policy. */ ++static inline void cpuid_policy_to_featureset( ++ const struct cpuid_policy *p, uint32_t fs[FEATURESET_NR_ENTRIES]) ++{ ++ fs[FEATURESET_1d] = p->basic._1d; ++ fs[FEATURESET_1c] = p->basic._1c; ++ fs[FEATURESET_e1d] = p->extd.e1d; ++ fs[FEATURESET_e1c] = p->extd.e1c; ++ fs[FEATURESET_Da1] = p->xstate.Da1; ++ fs[FEATURESET_7b0] = p->feat._7b0; ++ fs[FEATURESET_7c0] = p->feat._7c0; ++ fs[FEATURESET_e7d] = p->extd.e7d; ++ fs[FEATURESET_e8b] = p->extd.e8b; ++ fs[FEATURESET_7d0] = p->feat._7d0; ++ fs[FEATURESET_7a1] = p->feat._7a1; ++ fs[FEATURESET_e21a] = p->extd.e21a; ++ fs[FEATURESET_7b1] = p->feat._7b1; ++ fs[FEATURESET_7d2] = p->feat._7d2; ++ fs[FEATURESET_7c1] = p->feat._7c1; ++ fs[FEATURESET_7d1] = p->feat._7d1; ++} ++ ++/* Fill in a CPUID policy from a featureset bitmap. */ ++static inline void cpuid_featureset_to_policy( ++ const uint32_t fs[FEATURESET_NR_ENTRIES], struct cpuid_policy *p) ++{ ++ p->basic._1d = fs[FEATURESET_1d]; ++ p->basic._1c = fs[FEATURESET_1c]; ++ p->extd.e1d = fs[FEATURESET_e1d]; ++ p->extd.e1c = fs[FEATURESET_e1c]; ++ p->xstate.Da1 = fs[FEATURESET_Da1]; ++ p->feat._7b0 = fs[FEATURESET_7b0]; ++ p->feat._7c0 = fs[FEATURESET_7c0]; ++ p->extd.e7d = fs[FEATURESET_e7d]; ++ p->extd.e8b = fs[FEATURESET_e8b]; ++ p->feat._7d0 = fs[FEATURESET_7d0]; ++ p->feat._7a1 = fs[FEATURESET_7a1]; ++ p->extd.e21a = fs[FEATURESET_e21a]; ++ p->feat._7b1 = fs[FEATURESET_7b1]; ++ p->feat._7d2 = fs[FEATURESET_7d2]; ++ p->feat._7c1 = fs[FEATURESET_7c1]; ++ p->feat._7d1 = fs[FEATURESET_7d1]; ++} ++ ++static inline uint64_t cpuid_policy_xcr0_max(const struct cpuid_policy *p) ++{ ++ return ((uint64_t)p->xstate.xcr0_high << 32) | p->xstate.xcr0_low; ++} ++ ++static inline uint64_t cpuid_policy_xstates(const struct cpuid_policy *p) ++{ ++ uint64_t val = p->xstate.xcr0_high | p->xstate.xss_high; ++ ++ return (val << 32) | p->xstate.xcr0_low | p->xstate.xss_low; ++} ++ ++const uint32_t *x86_cpuid_lookup_deep_deps(uint32_t feature); ++ ++/** ++ * Recalculate the content in a CPUID policy which is derived from raw data. ++ */ ++void x86_cpuid_policy_recalc_synth(struct cpuid_policy *p); ++ ++/** ++ * Fill a CPUID policy using the native CPUID instruction. ++ * ++ * No sanitisation is performed, but synthesised values are calculated. ++ * Values may be influenced by a hypervisor or from masking/faulting ++ * configuration. ++ */ ++void x86_cpuid_policy_fill_native(struct cpuid_policy *p); ++ ++/** ++ * Clear leaf data beyond the policies max leaf/subleaf settings. ++ * ++ * Policy serialisation purposefully omits out-of-range leaves, because there ++ * are a large number of them due to vendor differences. However, when ++ * constructing new policies (e.g. levelling down), it is possible to end up ++ * with out-of-range leaves with stale content in them. This helper clears ++ * them. ++ */ ++void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p); ++ ++#ifdef __XEN__ ++#include ++typedef XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_leaf_buffer_t; ++#else ++#include ++typedef xen_cpuid_leaf_t cpuid_leaf_buffer_t[]; ++#endif ++ ++/** ++ * Serialise a cpuid_policy object into an array of cpuid leaves. ++ * ++ * @param policy The cpuid_policy to serialise. ++ * @param leaves The array of leaves to serialise into. ++ * @param nr_entries The number of entries in 'leaves'. ++ * @returns -errno ++ * ++ * Writes at most CPUID_MAX_SERIALISED_LEAVES. May fail with -ENOBUFS if the ++ * leaves array is too short. On success, nr_entries is updated with the ++ * actual number of leaves written. ++ */ ++int x86_cpuid_copy_to_buffer(const struct cpuid_policy *policy, ++ cpuid_leaf_buffer_t leaves, uint32_t *nr_entries); ++ ++/** ++ * Unserialise a cpuid_policy object from an array of cpuid leaves. ++ * ++ * @param policy The cpuid_policy to unserialise into. ++ * @param leaves The array of leaves to unserialise from. ++ * @param nr_entries The number of entries in 'leaves'. ++ * @param err_leaf Optional hint for error diagnostics. ++ * @param err_subleaf Optional hint for error diagnostics. ++ * @returns -errno ++ * ++ * Reads at most CPUID_MAX_SERIALISED_LEAVES. May return -ERANGE if an ++ * incoming leaf is out of range of cpuid_policy, in which case the optional ++ * err_* pointers will identify the out-of-range indicies. ++ * ++ * No content validation of in-range leaves is performed. Synthesised data is ++ * recalculated. ++ */ ++int x86_cpuid_copy_from_buffer(struct cpuid_policy *policy, ++ const cpuid_leaf_buffer_t leaves, ++ uint32_t nr_entries, uint32_t *err_leaf, ++ uint32_t *err_subleaf); ++ + /* + * Calculate whether two policies are compatible. + * +diff --git a/xen/include/xen/lib/x86/cpuid.h b/xen/include/xen/lib/x86/cpuid.h +deleted file mode 100644 +index fa98b371eef4..000000000000 +--- a/xen/include/xen/lib/x86/cpuid.h ++++ /dev/null +@@ -1,475 +0,0 @@ +-/* Common data structures and functions consumed by hypervisor and toolstack */ +-#ifndef XEN_LIB_X86_CPUID_H +-#define XEN_LIB_X86_CPUID_H +- +-#include +- +-#define FEATURESET_1d 0 /* 0x00000001.edx */ +-#define FEATURESET_1c 1 /* 0x00000001.ecx */ +-#define FEATURESET_e1d 2 /* 0x80000001.edx */ +-#define FEATURESET_e1c 3 /* 0x80000001.ecx */ +-#define FEATURESET_Da1 4 /* 0x0000000d:1.eax */ +-#define FEATURESET_7b0 5 /* 0x00000007:0.ebx */ +-#define FEATURESET_7c0 6 /* 0x00000007:0.ecx */ +-#define FEATURESET_e7d 7 /* 0x80000007.edx */ +-#define FEATURESET_e8b 8 /* 0x80000008.ebx */ +-#define FEATURESET_7d0 9 /* 0x00000007:0.edx */ +-#define FEATURESET_7a1 10 /* 0x00000007:1.eax */ +-#define FEATURESET_e21a 11 /* 0x80000021.eax */ +-#define FEATURESET_7b1 12 /* 0x00000007:1.ebx */ +-#define FEATURESET_7d2 13 /* 0x00000007:2.edx */ +-#define FEATURESET_7c1 14 /* 0x00000007:1.ecx */ +-#define FEATURESET_7d1 15 /* 0x00000007:1.edx */ +- +-struct cpuid_leaf +-{ +- uint32_t a, b, c, d; +-}; +- +-/* +- * Versions of GCC before 5 unconditionally reserve %rBX as the PIC hard +- * register, and are unable to cope with spilling it. This results in a +- * rather cryptic error: +- * error: inconsistent operand constraints in an ‘asm’ +- * +- * In affected situations, work around the issue by using a separate register +- * to hold the the %rBX output, and xchg twice to leave %rBX preserved around +- * the asm() statement. +- */ +-#if defined(__PIC__) && __GNUC__ < 5 && !defined(__clang__) && defined(__i386__) +-# define XCHG_BX "xchg %%ebx, %[bx];" +-# define BX_CON [bx] "=&r" +-#elif defined(__PIC__) && __GNUC__ < 5 && !defined(__clang__) && \ +- defined(__x86_64__) && (defined(__code_model_medium__) || \ +- defined(__code_model_large__)) +-# define XCHG_BX "xchg %%rbx, %q[bx];" +-# define BX_CON [bx] "=&r" +-#else +-# define XCHG_BX "" +-# define BX_CON "=&b" +-#endif +- +-static inline void cpuid_leaf(uint32_t leaf, struct cpuid_leaf *l) +-{ +- asm ( XCHG_BX +- "cpuid;" +- XCHG_BX +- : "=a" (l->a), BX_CON (l->b), "=&c" (l->c), "=&d" (l->d) +- : "a" (leaf) ); +-} +- +-static inline void cpuid_count_leaf( +- uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *l) +-{ +- asm ( XCHG_BX +- "cpuid;" +- XCHG_BX +- : "=a" (l->a), BX_CON (l->b), "=c" (l->c), "=&d" (l->d) +- : "a" (leaf), "c" (subleaf) ); +-} +- +-#undef BX_CON +-#undef XCHG +- +-/** +- * Given the vendor id from CPUID leaf 0, look up Xen's internal integer +- * vendor ID. Returns X86_VENDOR_UNKNOWN for any unknown vendor. +- */ +-unsigned int x86_cpuid_lookup_vendor(uint32_t ebx, uint32_t ecx, uint32_t edx); +- +-/** +- * Given Xen's internal vendor ID, return a string suitable for printing. +- * Returns "Unknown" for any unrecognised ID. +- */ +-const char *x86_cpuid_vendor_to_str(unsigned int vendor); +- +-#define CPUID_GUEST_NR_BASIC (0xdu + 1) +-#define CPUID_GUEST_NR_CACHE (5u + 1) +-#define CPUID_GUEST_NR_FEAT (2u + 1) +-#define CPUID_GUEST_NR_TOPO (1u + 1) +-#define CPUID_GUEST_NR_XSTATE (62u + 1) +-#define CPUID_GUEST_NR_EXTD_INTEL (0x8u + 1) +-#define CPUID_GUEST_NR_EXTD_AMD (0x21u + 1) +-#define CPUID_GUEST_NR_EXTD MAX(CPUID_GUEST_NR_EXTD_INTEL, \ +- CPUID_GUEST_NR_EXTD_AMD) +- +-/* +- * Maximum number of leaves a struct cpuid_policy turns into when serialised +- * for interaction with the toolstack. (Sum of all leaves in each union, less +- * the entries in basic which sub-unions hang off of.) +- */ +-#define CPUID_MAX_SERIALISED_LEAVES \ +- (CPUID_GUEST_NR_BASIC + \ +- CPUID_GUEST_NR_FEAT - !!CPUID_GUEST_NR_FEAT + \ +- CPUID_GUEST_NR_CACHE - !!CPUID_GUEST_NR_CACHE + \ +- CPUID_GUEST_NR_TOPO - !!CPUID_GUEST_NR_TOPO + \ +- CPUID_GUEST_NR_XSTATE - !!CPUID_GUEST_NR_XSTATE + \ +- CPUID_GUEST_NR_EXTD + 2 /* hv_limit and hv2_limit */ ) +- +-struct cpuid_policy +-{ +-#define DECL_BITFIELD(word) _DECL_BITFIELD(FEATURESET_ ## word) +-#define _DECL_BITFIELD(x) __DECL_BITFIELD(x) +-#define __DECL_BITFIELD(x) CPUID_BITFIELD_ ## x +- +- /* Basic leaves: 0x000000xx */ +- union { +- struct cpuid_leaf raw[CPUID_GUEST_NR_BASIC]; +- struct { +- /* Leaf 0x0 - Max and vendor. */ +- uint32_t max_leaf, vendor_ebx, vendor_ecx, vendor_edx; +- +- /* Leaf 0x1 - Family/model/stepping and features. */ +- uint32_t raw_fms; +- uint8_t :8, /* Brand ID. */ +- clflush_size, /* Number of 8-byte blocks per cache line. */ +- lppp, /* Logical processors per package. */ +- apic_id; /* Initial APIC ID. */ +- union { +- uint32_t _1c; +- struct { DECL_BITFIELD(1c); }; +- }; +- union { +- uint32_t _1d; +- struct { DECL_BITFIELD(1d); }; +- }; +- +- /* Leaf 0x2 - TLB/Cache/Prefetch. */ +- uint8_t l2_nr_queries; /* Documented as fixed to 1. */ +- uint8_t l2_desc[15]; +- +- uint64_t :64, :64; /* Leaf 0x3 - PSN. */ +- uint64_t :64, :64; /* Leaf 0x4 - Structured Cache. */ +- uint64_t :64, :64; /* Leaf 0x5 - MONITOR. */ +- uint64_t :64, :64; /* Leaf 0x6 - Therm/Perf. */ +- uint64_t :64, :64; /* Leaf 0x7 - Structured Features. */ +- uint64_t :64, :64; /* Leaf 0x8 - rsvd */ +- uint64_t :64, :64; /* Leaf 0x9 - DCA */ +- +- /* Leaf 0xa - Intel PMU. */ +- uint8_t pmu_version, _pmu[15]; +- +- uint64_t :64, :64; /* Leaf 0xb - Topology. */ +- uint64_t :64, :64; /* Leaf 0xc - rsvd */ +- uint64_t :64, :64; /* Leaf 0xd - XSTATE. */ +- }; +- } basic; +- +- /* Structured cache leaf: 0x00000004[xx] */ +- union { +- struct cpuid_leaf raw[CPUID_GUEST_NR_CACHE]; +- struct cpuid_cache_leaf { +- uint32_t /* a */ type:5, level:3; +- bool self_init:1, fully_assoc:1; +- uint32_t :4, threads_per_cache:12, cores_per_package:6; +- uint32_t /* b */ line_size:12, partitions:10, ways:10; +- uint32_t /* c */ sets; +- bool /* d */ wbinvd:1, inclusive:1, complex:1; +- } subleaf[CPUID_GUEST_NR_CACHE]; +- } cache; +- +- /* Structured feature leaf: 0x00000007[xx] */ +- union { +- struct cpuid_leaf raw[CPUID_GUEST_NR_FEAT]; +- struct { +- /* Subleaf 0. */ +- uint32_t max_subleaf; +- union { +- uint32_t _7b0; +- struct { DECL_BITFIELD(7b0); }; +- }; +- union { +- uint32_t _7c0; +- struct { DECL_BITFIELD(7c0); }; +- }; +- union { +- uint32_t _7d0; +- struct { DECL_BITFIELD(7d0); }; +- }; +- +- /* Subleaf 1. */ +- union { +- uint32_t _7a1; +- struct { DECL_BITFIELD(7a1); }; +- }; +- union { +- uint32_t _7b1; +- struct { DECL_BITFIELD(7b1); }; +- }; +- union { +- uint32_t _7c1; +- struct { DECL_BITFIELD(7c1); }; +- }; +- union { +- uint32_t _7d1; +- struct { DECL_BITFIELD(7d1); }; +- }; +- +- /* Subleaf 2. */ +- uint32_t /* a */:32, /* b */:32, /* c */:32; +- union { +- uint32_t _7d2; +- struct { DECL_BITFIELD(7d2); }; +- }; +- }; +- } feat; +- +- /* Extended topology enumeration: 0x0000000B[xx] */ +- union { +- struct cpuid_leaf raw[CPUID_GUEST_NR_TOPO]; +- struct cpuid_topo_leaf { +- uint32_t id_shift:5, :27; +- uint16_t nr_logical, :16; +- uint8_t level, type, :8, :8; +- uint32_t x2apic_id; +- } subleaf[CPUID_GUEST_NR_TOPO]; +- } topo; +- +- /* Xstate feature leaf: 0x0000000D[xx] */ +- union { +- struct cpuid_leaf raw[CPUID_GUEST_NR_XSTATE]; +- +- struct { +- /* Subleaf 0. */ +- uint32_t xcr0_low, /* b */:32, max_size, xcr0_high; +- +- /* Subleaf 1. */ +- union { +- uint32_t Da1; +- struct { DECL_BITFIELD(Da1); }; +- }; +- uint32_t /* b */:32, xss_low, xss_high; +- }; +- +- /* Per-component common state. Valid for i >= 2. */ +- struct { +- uint32_t size, offset; +- bool xss:1, align:1; +- uint32_t _res_d; +- } comp[CPUID_GUEST_NR_XSTATE]; +- } xstate; +- +- /* Extended leaves: 0x800000xx */ +- union { +- struct cpuid_leaf raw[CPUID_GUEST_NR_EXTD]; +- struct { +- /* Leaf 0x80000000 - Max and vendor. */ +- uint32_t max_leaf, vendor_ebx, vendor_ecx, vendor_edx; +- +- /* Leaf 0x80000001 - Family/model/stepping and features. */ +- uint32_t raw_fms, /* b */:32; +- union { +- uint32_t e1c; +- struct { DECL_BITFIELD(e1c); }; +- }; +- union { +- uint32_t e1d; +- struct { DECL_BITFIELD(e1d); }; +- }; +- +- uint64_t :64, :64; /* Brand string. */ +- uint64_t :64, :64; /* Brand string. */ +- uint64_t :64, :64; /* Brand string. */ +- uint64_t :64, :64; /* L1 cache/TLB. */ +- uint64_t :64, :64; /* L2/3 cache/TLB. */ +- +- /* Leaf 0x80000007 - Advanced Power Management. */ +- uint32_t /* a */:32, /* b */:32, /* c */:32; +- union { +- uint32_t e7d; +- struct { DECL_BITFIELD(e7d); }; +- }; +- +- /* Leaf 0x80000008 - Misc addr/feature info. */ +- uint8_t maxphysaddr, maxlinaddr, :8, :8; +- union { +- uint32_t e8b; +- struct { DECL_BITFIELD(e8b); }; +- }; +- uint32_t nc:8, :4, apic_id_size:4, :16; +- uint32_t /* d */:32; +- +- uint64_t :64, :64; /* Leaf 0x80000009. */ +- uint64_t :64, :64; /* Leaf 0x8000000a - SVM rev and features. */ +- uint64_t :64, :64; /* Leaf 0x8000000b. */ +- uint64_t :64, :64; /* Leaf 0x8000000c. */ +- uint64_t :64, :64; /* Leaf 0x8000000d. */ +- uint64_t :64, :64; /* Leaf 0x8000000e. */ +- uint64_t :64, :64; /* Leaf 0x8000000f. */ +- uint64_t :64, :64; /* Leaf 0x80000010. */ +- uint64_t :64, :64; /* Leaf 0x80000011. */ +- uint64_t :64, :64; /* Leaf 0x80000012. */ +- uint64_t :64, :64; /* Leaf 0x80000013. */ +- uint64_t :64, :64; /* Leaf 0x80000014. */ +- uint64_t :64, :64; /* Leaf 0x80000015. */ +- uint64_t :64, :64; /* Leaf 0x80000016. */ +- uint64_t :64, :64; /* Leaf 0x80000017. */ +- uint64_t :64, :64; /* Leaf 0x80000018. */ +- uint64_t :64, :64; /* Leaf 0x80000019 - TLB 1GB Identifiers. */ +- uint64_t :64, :64; /* Leaf 0x8000001a - Performance related info. */ +- uint64_t :64, :64; /* Leaf 0x8000001b - IBS feature information. */ +- uint64_t :64, :64; /* Leaf 0x8000001c. */ +- uint64_t :64, :64; /* Leaf 0x8000001d - Cache properties. */ +- uint64_t :64, :64; /* Leaf 0x8000001e - Extd APIC/Core/Node IDs. */ +- uint64_t :64, :64; /* Leaf 0x8000001f - AMD Secure Encryption. */ +- uint64_t :64, :64; /* Leaf 0x80000020 - Platform QoS. */ +- +- /* Leaf 0x80000021 - Extended Feature 2 */ +- union { +- uint32_t e21a; +- struct { DECL_BITFIELD(e21a); }; +- }; +- uint32_t /* b */:32, /* c */:32, /* d */:32; +- }; +- } extd; +- +-#undef __DECL_BITFIELD +-#undef _DECL_BITFIELD +-#undef DECL_BITFIELD +- +- /* Toolstack selected Hypervisor max_leaf (if non-zero). */ +- uint8_t hv_limit, hv2_limit; +- +- /* Value calculated from raw data above. */ +- uint8_t x86_vendor; +-}; +- +-/* Fill in a featureset bitmap from a CPUID policy. */ +-static inline void cpuid_policy_to_featureset( +- const struct cpuid_policy *p, uint32_t fs[FEATURESET_NR_ENTRIES]) +-{ +- fs[FEATURESET_1d] = p->basic._1d; +- fs[FEATURESET_1c] = p->basic._1c; +- fs[FEATURESET_e1d] = p->extd.e1d; +- fs[FEATURESET_e1c] = p->extd.e1c; +- fs[FEATURESET_Da1] = p->xstate.Da1; +- fs[FEATURESET_7b0] = p->feat._7b0; +- fs[FEATURESET_7c0] = p->feat._7c0; +- fs[FEATURESET_e7d] = p->extd.e7d; +- fs[FEATURESET_e8b] = p->extd.e8b; +- fs[FEATURESET_7d0] = p->feat._7d0; +- fs[FEATURESET_7a1] = p->feat._7a1; +- fs[FEATURESET_e21a] = p->extd.e21a; +- fs[FEATURESET_7b1] = p->feat._7b1; +- fs[FEATURESET_7d2] = p->feat._7d2; +- fs[FEATURESET_7c1] = p->feat._7c1; +- fs[FEATURESET_7d1] = p->feat._7d1; +-} +- +-/* Fill in a CPUID policy from a featureset bitmap. */ +-static inline void cpuid_featureset_to_policy( +- const uint32_t fs[FEATURESET_NR_ENTRIES], struct cpuid_policy *p) +-{ +- p->basic._1d = fs[FEATURESET_1d]; +- p->basic._1c = fs[FEATURESET_1c]; +- p->extd.e1d = fs[FEATURESET_e1d]; +- p->extd.e1c = fs[FEATURESET_e1c]; +- p->xstate.Da1 = fs[FEATURESET_Da1]; +- p->feat._7b0 = fs[FEATURESET_7b0]; +- p->feat._7c0 = fs[FEATURESET_7c0]; +- p->extd.e7d = fs[FEATURESET_e7d]; +- p->extd.e8b = fs[FEATURESET_e8b]; +- p->feat._7d0 = fs[FEATURESET_7d0]; +- p->feat._7a1 = fs[FEATURESET_7a1]; +- p->extd.e21a = fs[FEATURESET_e21a]; +- p->feat._7b1 = fs[FEATURESET_7b1]; +- p->feat._7d2 = fs[FEATURESET_7d2]; +- p->feat._7c1 = fs[FEATURESET_7c1]; +- p->feat._7d1 = fs[FEATURESET_7d1]; +-} +- +-static inline uint64_t cpuid_policy_xcr0_max(const struct cpuid_policy *p) +-{ +- return ((uint64_t)p->xstate.xcr0_high << 32) | p->xstate.xcr0_low; +-} +- +-static inline uint64_t cpuid_policy_xstates(const struct cpuid_policy *p) +-{ +- uint64_t val = p->xstate.xcr0_high | p->xstate.xss_high; +- +- return (val << 32) | p->xstate.xcr0_low | p->xstate.xss_low; +-} +- +-const uint32_t *x86_cpuid_lookup_deep_deps(uint32_t feature); +- +-/** +- * Recalculate the content in a CPUID policy which is derived from raw data. +- */ +-void x86_cpuid_policy_recalc_synth(struct cpuid_policy *p); +- +-/** +- * Fill a CPUID policy using the native CPUID instruction. +- * +- * No sanitisation is performed, but synthesised values are calculated. +- * Values may be influenced by a hypervisor or from masking/faulting +- * configuration. +- */ +-void x86_cpuid_policy_fill_native(struct cpuid_policy *p); +- +-/** +- * Clear leaf data beyond the policies max leaf/subleaf settings. +- * +- * Policy serialisation purposefully omits out-of-range leaves, because there +- * are a large number of them due to vendor differences. However, when +- * constructing new policies (e.g. levelling down), it is possible to end up +- * with out-of-range leaves with stale content in them. This helper clears +- * them. +- */ +-void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p); +- +-#ifdef __XEN__ +-#include +-typedef XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_leaf_buffer_t; +-#else +-#include +-typedef xen_cpuid_leaf_t cpuid_leaf_buffer_t[]; +-#endif +- +-/** +- * Serialise a cpuid_policy object into an array of cpuid leaves. +- * +- * @param policy The cpuid_policy to serialise. +- * @param leaves The array of leaves to serialise into. +- * @param nr_entries The number of entries in 'leaves'. +- * @returns -errno +- * +- * Writes at most CPUID_MAX_SERIALISED_LEAVES. May fail with -ENOBUFS if the +- * leaves array is too short. On success, nr_entries is updated with the +- * actual number of leaves written. +- */ +-int x86_cpuid_copy_to_buffer(const struct cpuid_policy *policy, +- cpuid_leaf_buffer_t leaves, uint32_t *nr_entries); +- +-/** +- * Unserialise a cpuid_policy object from an array of cpuid leaves. +- * +- * @param policy The cpuid_policy to unserialise into. +- * @param leaves The array of leaves to unserialise from. +- * @param nr_entries The number of entries in 'leaves'. +- * @param err_leaf Optional hint for error diagnostics. +- * @param err_subleaf Optional hint for error diagnostics. +- * @returns -errno +- * +- * Reads at most CPUID_MAX_SERIALISED_LEAVES. May return -ERANGE if an +- * incoming leaf is out of range of cpuid_policy, in which case the optional +- * err_* pointers will identify the out-of-range indicies. +- * +- * No content validation of in-range leaves is performed. Synthesised data is +- * recalculated. +- */ +-int x86_cpuid_copy_from_buffer(struct cpuid_policy *policy, +- const cpuid_leaf_buffer_t leaves, +- uint32_t nr_entries, uint32_t *err_leaf, +- uint32_t *err_subleaf); +- +-#endif /* !XEN_LIB_X86_CPUID_H */ +- +-/* +- * Local variables: +- * mode: C +- * c-file-style: "BSD" +- * c-basic-offset: 4 +- * tab-width: 4 +- * indent-tabs-mode: nil +- * End: +- */ +diff --git a/xen/lib/x86/cpuid.c b/xen/lib/x86/cpuid.c +index 8eb88314f53c..e81f76c779c0 100644 +--- a/xen/lib/x86/cpuid.c ++++ b/xen/lib/x86/cpuid.c +@@ -1,6 +1,6 @@ + #include "private.h" + +-#include ++#include + + static void zero_leaves(struct cpuid_leaf *l, + unsigned int first, unsigned int last) +-- +2.39.2 + diff --git a/0326-x86-Merge-struct-msr_policy-into-struct-cpu_policy.patch b/0326-x86-Merge-struct-msr_policy-into-struct-cpu_policy.patch new file mode 100644 index 00000000..6e631c10 --- /dev/null +++ b/0326-x86-Merge-struct-msr_policy-into-struct-cpu_policy.patch @@ -0,0 +1,307 @@ +From cf0d5f6d58ea82c9ab18990f850fc4e01887be16 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 28 Mar 2023 21:24:20 +0100 +Subject: [PATCH 06/35] x86: Merge struct msr_policy into struct cpu_policy + +As with the cpuid side, use a temporary define to make struct msr_policy still +work. + +Note, this means that domains now have two separate struct cpu_policy +allocations with disjoint information, and system policies are in a similar +position, as well as xc_cpu_policy objects in libxenguest. All of these +duplications will be addressed in the following patches. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 03812da3754d550dd8cbee7289469069ea6f0073) +--- + tools/fuzz/cpu-policy/afl-policy-fuzzer.c | 1 - + xen/arch/x86/include/asm/msr.h | 3 +- + xen/include/xen/lib/x86/cpu-policy.h | 81 ++++++++++++++++- + xen/include/xen/lib/x86/msr.h | 104 ---------------------- + xen/lib/x86/msr.c | 2 +- + 5 files changed, 83 insertions(+), 108 deletions(-) + delete mode 100644 xen/include/xen/lib/x86/msr.h + +diff --git a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c +index 7ebe8ee7c325..316eb0efe034 100644 +--- a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c ++++ b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c +@@ -10,7 +10,6 @@ + + #include + #include +-#include + #include + + static bool debug; +diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h +index dd1eee04a637..bb32bf19adc7 100644 +--- a/xen/arch/x86/include/asm/msr.h ++++ b/xen/arch/x86/include/asm/msr.h +@@ -6,8 +6,9 @@ + #include + #include + #include ++#include + +-#include ++#include + + #include + #include +diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h +index 666505964d00..53fffca55211 100644 +--- a/xen/include/xen/lib/x86/cpu-policy.h ++++ b/xen/include/xen/lib/x86/cpu-policy.h +@@ -3,7 +3,6 @@ + #define XEN_LIB_X86_POLICIES_H + + #include +-#include + + #define FEATURESET_1d 0 /* 0x00000001.edx */ + #define FEATURESET_1c 1 /* 0x00000001.ecx */ +@@ -107,6 +106,9 @@ const char *x86_cpuid_vendor_to_str(unsigned int vendor); + CPUID_GUEST_NR_XSTATE - !!CPUID_GUEST_NR_XSTATE + \ + CPUID_GUEST_NR_EXTD + 2 /* hv_limit and hv2_limit */ ) + ++/* Maximum number of MSRs written when serialising a cpu_policy. */ ++#define MSR_MAX_SERIALISED_ENTRIES 2 ++ + struct cpu_policy + { + #define DECL_BITFIELD(word) _DECL_BITFIELD(FEATURESET_ ## word) +@@ -324,6 +326,44 @@ struct cpu_policy + }; + } extd; + ++ /* ++ * 0x000000ce - MSR_INTEL_PLATFORM_INFO ++ * ++ * This MSR is non-architectural, but for simplicy we allow it to be read ++ * unconditionally. CPUID Faulting support can be fully emulated for HVM ++ * guests so can be offered unconditionally, while support for PV guests ++ * is dependent on real hardware support. ++ */ ++ union { ++ uint32_t raw; ++ struct { ++ uint32_t :31; ++ bool cpuid_faulting:1; ++ }; ++ } platform_info; ++ ++ /* ++ * 0x0000010a - MSR_ARCH_CAPABILITIES ++ * ++ * This is an Intel-only MSR, which provides miscellaneous enumeration, ++ * including those which indicate that microarchitectrual sidechannels are ++ * fixed in hardware. ++ */ ++ union { ++ uint32_t raw; ++ struct { ++ bool rdcl_no:1; ++ bool ibrs_all:1; ++ bool rsba:1; ++ bool skip_l1dfl:1; ++ bool ssb_no:1; ++ bool mds_no:1; ++ bool if_pschange_mc_no:1; ++ bool tsx_ctrl:1; ++ bool taa_no:1; ++ }; ++ } arch_caps; ++ + #undef __DECL_BITFIELD + #undef _DECL_BITFIELD + #undef DECL_BITFIELD +@@ -337,6 +377,7 @@ struct cpu_policy + + /* Temporary */ + #define cpuid_policy cpu_policy ++#define msr_policy cpu_policy + + struct old_cpu_policy + { +@@ -438,9 +479,11 @@ void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p); + #ifdef __XEN__ + #include + typedef XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_leaf_buffer_t; ++typedef XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_entry_buffer_t; + #else + #include + typedef xen_cpuid_leaf_t cpuid_leaf_buffer_t[]; ++typedef xen_msr_entry_t msr_entry_buffer_t[]; + #endif + + /** +@@ -480,6 +523,42 @@ int x86_cpuid_copy_from_buffer(struct cpuid_policy *policy, + uint32_t nr_entries, uint32_t *err_leaf, + uint32_t *err_subleaf); + ++/** ++ * Serialise an msr_policy object into an array. ++ * ++ * @param policy The msr_policy to serialise. ++ * @param msrs The array of msrs to serialise into. ++ * @param nr_entries The number of entries in 'msrs'. ++ * @returns -errno ++ * ++ * Writes at most MSR_MAX_SERIALISED_ENTRIES. May fail with -ENOBUFS if the ++ * buffer array is too short. On success, nr_entries is updated with the ++ * actual number of msrs written. ++ */ ++int x86_msr_copy_to_buffer(const struct msr_policy *policy, ++ msr_entry_buffer_t msrs, uint32_t *nr_entries); ++ ++/** ++ * Unserialise an msr_policy object from an array of msrs. ++ * ++ * @param policy The msr_policy object to unserialise into. ++ * @param msrs The array of msrs to unserialise from. ++ * @param nr_entries The number of entries in 'msrs'. ++ * @param err_msr Optional hint for error diagnostics. ++ * @returns -errno ++ * ++ * Reads at most MSR_MAX_SERIALISED_ENTRIES. May fail for a number of reasons ++ * based on the content in an individual 'msrs' entry, including the MSR index ++ * not being valid in the policy, the flags field being nonzero, or if the ++ * value provided would truncate when stored in the policy. In such cases, ++ * the optional err_* pointer will identify the problematic MSR. ++ * ++ * No content validation is performed on the data stored in the policy object. ++ */ ++int x86_msr_copy_from_buffer(struct msr_policy *policy, ++ const msr_entry_buffer_t msrs, uint32_t nr_entries, ++ uint32_t *err_msr); ++ + /* + * Calculate whether two policies are compatible. + * +diff --git a/xen/include/xen/lib/x86/msr.h b/xen/include/xen/lib/x86/msr.h +deleted file mode 100644 +index 48ba4a59c036..000000000000 +--- a/xen/include/xen/lib/x86/msr.h ++++ /dev/null +@@ -1,104 +0,0 @@ +-/* Common data structures and functions consumed by hypervisor and toolstack */ +-#ifndef XEN_LIB_X86_MSR_H +-#define XEN_LIB_X86_MSR_H +- +-/* Maximum number of MSRs written when serialising msr_policy. */ +-#define MSR_MAX_SERIALISED_ENTRIES 2 +- +-/* MSR policy object for shared per-domain MSRs */ +-struct msr_policy +-{ +- /* +- * 0x000000ce - MSR_INTEL_PLATFORM_INFO +- * +- * This MSR is non-architectural, but for simplicy we allow it to be read +- * unconditionally. CPUID Faulting support can be fully emulated for HVM +- * guests so can be offered unconditionally, while support for PV guests +- * is dependent on real hardware support. +- */ +- union { +- uint32_t raw; +- struct { +- uint32_t :31; +- bool cpuid_faulting:1; +- }; +- } platform_info; +- +- /* +- * 0x0000010a - MSR_ARCH_CAPABILITIES +- * +- * This is an Intel-only MSR, which provides miscellaneous enumeration, +- * including those which indicate that microarchitectrual sidechannels are +- * fixed in hardware. +- */ +- union { +- uint32_t raw; +- struct { +- bool rdcl_no:1; +- bool ibrs_all:1; +- bool rsba:1; +- bool skip_l1dfl:1; +- bool ssb_no:1; +- bool mds_no:1; +- bool if_pschange_mc_no:1; +- bool tsx_ctrl:1; +- bool taa_no:1; +- }; +- } arch_caps; +-}; +- +-#ifdef __XEN__ +-#include +-typedef XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_entry_buffer_t; +-#else +-#include +-typedef xen_msr_entry_t msr_entry_buffer_t[]; +-#endif +- +-/** +- * Serialise an msr_policy object into an array. +- * +- * @param policy The msr_policy to serialise. +- * @param msrs The array of msrs to serialise into. +- * @param nr_entries The number of entries in 'msrs'. +- * @returns -errno +- * +- * Writes at most MSR_MAX_SERIALISED_ENTRIES. May fail with -ENOBUFS if the +- * buffer array is too short. On success, nr_entries is updated with the +- * actual number of msrs written. +- */ +-int x86_msr_copy_to_buffer(const struct msr_policy *policy, +- msr_entry_buffer_t msrs, uint32_t *nr_entries); +- +-/** +- * Unserialise an msr_policy object from an array of msrs. +- * +- * @param policy The msr_policy object to unserialise into. +- * @param msrs The array of msrs to unserialise from. +- * @param nr_entries The number of entries in 'msrs'. +- * @param err_msr Optional hint for error diagnostics. +- * @returns -errno +- * +- * Reads at most MSR_MAX_SERIALISED_ENTRIES. May fail for a number of reasons +- * based on the content in an individual 'msrs' entry, including the MSR index +- * not being valid in the policy, the flags field being nonzero, or if the +- * value provided would truncate when stored in the policy. In such cases, +- * the optional err_* pointer will identify the problematic MSR. +- * +- * No content validation is performed on the data stored in the policy object. +- */ +-int x86_msr_copy_from_buffer(struct msr_policy *policy, +- const msr_entry_buffer_t msrs, uint32_t nr_entries, +- uint32_t *err_msr); +- +-#endif /* !XEN_LIB_X86_MSR_H */ +- +-/* +- * Local variables: +- * mode: C +- * c-file-style: "BSD" +- * c-basic-offset: 4 +- * tab-width: 4 +- * indent-tabs-mode: nil +- * End: +- */ +diff --git a/xen/lib/x86/msr.c b/xen/lib/x86/msr.c +index 7d71e92a380a..c4d885e7b568 100644 +--- a/xen/lib/x86/msr.c ++++ b/xen/lib/x86/msr.c +@@ -1,6 +1,6 @@ + #include "private.h" + +-#include ++#include + + /* + * Copy a single MSR into the provided msr_entry_buffer_t buffer, performing a +-- +2.39.2 + diff --git a/0327-x86-Merge-the-system-cpuid-msr-policy-objects.patch b/0327-x86-Merge-the-system-cpuid-msr-policy-objects.patch new file mode 100644 index 00000000..392942ab --- /dev/null +++ b/0327-x86-Merge-the-system-cpuid-msr-policy-objects.patch @@ -0,0 +1,612 @@ +From a30d43f4b80d3472ca70ee1fbd2c8d1721c61401 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 29 Mar 2023 07:39:44 +0100 +Subject: [PATCH 07/35] x86: Merge the system {cpuid,msr} policy objects + +Right now, they're the same underlying type, containing disjoint information. + +Introduce a new cpu-policy.{h,c} to be the new location for all policy +handling logic. Place the combined objects in __ro_after_init, which is new +since the original logic was written. + +As we're trying to phase out the use of struct old_cpu_policy entirely, rework +update_domain_cpu_policy() to not pointer-chase through system_policies[]. + +This in turn allows system_policies[] in sysctl.c to become static and reduced +in scope to XEN_SYSCTL_get_cpu_policy. + +No practical change. This undoes the transient doubling of storage space from +earlier patches. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 6bc33366795d14a21a3244d0f3b63f7dccea87ef) +--- + xen/arch/x86/Makefile | 1 + + xen/arch/x86/cpu-policy.c | 18 +++++++ + xen/arch/x86/cpu/common.c | 4 +- + xen/arch/x86/cpuid.c | 66 +++++++++++-------------- + xen/arch/x86/domctl.c | 17 +++++-- + xen/arch/x86/include/asm/cpu-policy.h | 14 ++++++ + xen/arch/x86/include/asm/cpuid.h | 6 --- + xen/arch/x86/include/asm/msr.h | 7 --- + xen/arch/x86/msr.c | 38 ++++++-------- + xen/arch/x86/sysctl.c | 71 ++++++++++----------------- + 10 files changed, 116 insertions(+), 126 deletions(-) + create mode 100644 xen/arch/x86/cpu-policy.c + create mode 100644 xen/arch/x86/include/asm/cpu-policy.h + +diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile +index 5accbe4c6746..f213a6b56a4d 100644 +--- a/xen/arch/x86/Makefile ++++ b/xen/arch/x86/Makefile +@@ -18,6 +18,7 @@ obj-y += bitops.o + obj-bin-y += bzimage.init.o + obj-bin-y += clear_page.o + obj-bin-y += copy_page.o ++obj-y += cpu-policy.o + obj-y += cpuid.o + obj-$(CONFIG_PV) += compat.o + obj-$(CONFIG_PV32) += x86_64/compat.o +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +new file mode 100644 +index 000000000000..663e9a084c53 +--- /dev/null ++++ b/xen/arch/x86/cpu-policy.c +@@ -0,0 +1,18 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++#include ++#include ++ ++#include ++ ++#include ++ ++struct cpu_policy __ro_after_init raw_cpu_policy; ++struct cpu_policy __ro_after_init host_cpu_policy; ++#ifdef CONFIG_PV ++struct cpu_policy __ro_after_init pv_max_cpu_policy; ++struct cpu_policy __ro_after_init pv_def_cpu_policy; ++#endif ++#ifdef CONFIG_HVM ++struct cpu_policy __ro_after_init hvm_max_cpu_policy; ++struct cpu_policy __ro_after_init hvm_def_cpu_policy; ++#endif +diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c +index 27f73d3bbe31..665200db382f 100644 +--- a/xen/arch/x86/cpu/common.c ++++ b/xen/arch/x86/cpu/common.c +@@ -3,6 +3,8 @@ + #include + #include + #include ++ ++#include + #include + #include + #include +@@ -138,7 +140,7 @@ bool __init probe_cpuid_faulting(void) + return false; + + if ((rc = rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val)) == 0) +- raw_msr_policy.platform_info.cpuid_faulting = ++ raw_cpu_policy.platform_info.cpuid_faulting = + val & MSR_PLATFORM_INFO_CPUID_FAULTING; + + if (rc || +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index acc2f606cea8..1327dba30dd8 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -4,6 +4,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -142,17 +143,6 @@ static void zero_leaves(struct cpuid_leaf *l, + memset(&l[first], 0, sizeof(*l) * (last - first + 1)); + } + +-struct cpuid_policy __read_mostly raw_cpuid_policy, +- __read_mostly host_cpuid_policy; +-#ifdef CONFIG_PV +-struct cpuid_policy __read_mostly pv_max_cpuid_policy; +-struct cpuid_policy __read_mostly pv_def_cpuid_policy; +-#endif +-#ifdef CONFIG_HVM +-struct cpuid_policy __read_mostly hvm_max_cpuid_policy; +-struct cpuid_policy __read_mostly hvm_def_cpuid_policy; +-#endif +- + static void sanitise_featureset(uint32_t *fs) + { + /* for_each_set_bit() uses unsigned longs. Extend with zeroes. */ +@@ -344,7 +334,7 @@ static void recalculate_misc(struct cpuid_policy *p) + + static void __init calculate_raw_policy(void) + { +- struct cpuid_policy *p = &raw_cpuid_policy; ++ struct cpuid_policy *p = &raw_cpu_policy; + + x86_cpuid_policy_fill_native(p); + +@@ -354,10 +344,10 @@ static void __init calculate_raw_policy(void) + + static void __init calculate_host_policy(void) + { +- struct cpuid_policy *p = &host_cpuid_policy; ++ struct cpuid_policy *p = &host_cpu_policy; + unsigned int max_extd_leaf; + +- *p = raw_cpuid_policy; ++ *p = raw_cpu_policy; + + p->basic.max_leaf = + min_t(uint32_t, p->basic.max_leaf, ARRAY_SIZE(p->basic.raw) - 1); +@@ -449,17 +439,17 @@ static void __init guest_common_feature_adjustments(uint32_t *fs) + * of IBRS by using the AMD feature bit. An administrator may wish for + * performance reasons to offer IBPB without IBRS. + */ +- if ( host_cpuid_policy.feat.ibrsb ) ++ if ( host_cpu_policy.feat.ibrsb ) + __set_bit(X86_FEATURE_IBPB, fs); + } + + static void __init calculate_pv_max_policy(void) + { +- struct cpuid_policy *p = &pv_max_cpuid_policy; ++ struct cpuid_policy *p = &pv_max_cpu_policy; + uint32_t pv_featureset[FSCAPINTS]; + unsigned int i; + +- *p = host_cpuid_policy; ++ *p = host_cpu_policy; + cpuid_policy_to_featureset(p, pv_featureset); + + for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) +@@ -486,11 +476,11 @@ static void __init calculate_pv_max_policy(void) + + static void __init calculate_pv_def_policy(void) + { +- struct cpuid_policy *p = &pv_def_cpuid_policy; ++ struct cpuid_policy *p = &pv_def_cpu_policy; + uint32_t pv_featureset[FSCAPINTS]; + unsigned int i; + +- *p = pv_max_cpuid_policy; ++ *p = pv_max_cpu_policy; + cpuid_policy_to_featureset(p, pv_featureset); + + for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) +@@ -506,12 +496,12 @@ static void __init calculate_pv_def_policy(void) + + static void __init calculate_hvm_max_policy(void) + { +- struct cpuid_policy *p = &hvm_max_cpuid_policy; ++ struct cpuid_policy *p = &hvm_max_cpu_policy; + uint32_t hvm_featureset[FSCAPINTS]; + unsigned int i; + const uint32_t *hvm_featuremask; + +- *p = host_cpuid_policy; ++ *p = host_cpu_policy; + cpuid_policy_to_featureset(p, hvm_featureset); + + hvm_featuremask = hvm_hap_supported() ? +@@ -539,7 +529,7 @@ static void __init calculate_hvm_max_policy(void) + * HVM guests are able if running in protected mode. + */ + if ( (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) && +- raw_cpuid_policy.basic.sep ) ++ raw_cpu_policy.basic.sep ) + __set_bit(X86_FEATURE_SEP, hvm_featureset); + + /* +@@ -588,12 +578,12 @@ static void __init calculate_hvm_max_policy(void) + + static void __init calculate_hvm_def_policy(void) + { +- struct cpuid_policy *p = &hvm_def_cpuid_policy; ++ struct cpuid_policy *p = &hvm_def_cpu_policy; + uint32_t hvm_featureset[FSCAPINTS]; + unsigned int i; + const uint32_t *hvm_featuremask; + +- *p = hvm_max_cpuid_policy; ++ *p = hvm_max_cpu_policy; + cpuid_policy_to_featureset(p, hvm_featureset); + + hvm_featuremask = hvm_hap_supported() ? +@@ -661,8 +651,8 @@ void recalculate_cpuid_policy(struct domain *d) + { + struct cpuid_policy *p = d->arch.cpuid; + const struct cpuid_policy *max = is_pv_domain(d) +- ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpuid_policy : NULL) +- : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpuid_policy : NULL); ++ ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) ++ : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); + uint32_t fs[FSCAPINTS], max_fs[FSCAPINTS]; + unsigned int i; + +@@ -737,7 +727,7 @@ void recalculate_cpuid_policy(struct domain *d) + /* Fold host's FDP_EXCP_ONLY and NO_FPU_SEL into guest's view. */ + fs[FEATURESET_7b0] &= ~(cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | + cpufeat_mask(X86_FEATURE_NO_FPU_SEL)); +- fs[FEATURESET_7b0] |= (host_cpuid_policy.feat._7b0 & ++ fs[FEATURESET_7b0] |= (host_cpu_policy.feat._7b0 & + (cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | + cpufeat_mask(X86_FEATURE_NO_FPU_SEL))); + +@@ -788,8 +778,8 @@ void recalculate_cpuid_policy(struct domain *d) + int init_domain_cpuid_policy(struct domain *d) + { + struct cpuid_policy *p = is_pv_domain(d) +- ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpuid_policy : NULL) +- : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpuid_policy : NULL); ++ ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpu_policy : NULL) ++ : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpu_policy : NULL); + + if ( !p ) + { +@@ -1093,7 +1083,7 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, + if ( is_pv_domain(d) && is_hardware_domain(d) && + guest_kernel_mode(v, regs) && cpu_has_monitor && + regs->entry_vector == TRAP_gp_fault ) +- *res = raw_cpuid_policy.basic.raw[5]; ++ *res = raw_cpu_policy.basic.raw[5]; + break; + + case 0x7: +@@ -1225,14 +1215,14 @@ static void __init __maybe_unused build_assertions(void) + /* Find some more clever allocation scheme if this trips. */ + BUILD_BUG_ON(sizeof(struct cpuid_policy) > PAGE_SIZE); + +- BUILD_BUG_ON(sizeof(raw_cpuid_policy.basic) != +- sizeof(raw_cpuid_policy.basic.raw)); +- BUILD_BUG_ON(sizeof(raw_cpuid_policy.feat) != +- sizeof(raw_cpuid_policy.feat.raw)); +- BUILD_BUG_ON(sizeof(raw_cpuid_policy.xstate) != +- sizeof(raw_cpuid_policy.xstate.raw)); +- BUILD_BUG_ON(sizeof(raw_cpuid_policy.extd) != +- sizeof(raw_cpuid_policy.extd.raw)); ++ BUILD_BUG_ON(sizeof(raw_cpu_policy.basic) != ++ sizeof(raw_cpu_policy.basic.raw)); ++ BUILD_BUG_ON(sizeof(raw_cpu_policy.feat) != ++ sizeof(raw_cpu_policy.feat.raw)); ++ BUILD_BUG_ON(sizeof(raw_cpu_policy.xstate) != ++ sizeof(raw_cpu_policy.xstate.raw)); ++ BUILD_BUG_ON(sizeof(raw_cpu_policy.extd) != ++ sizeof(raw_cpu_policy.extd.raw)); + } + + /* +diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c +index 175d473e412a..2689df813b39 100644 +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -36,18 +36,25 @@ + #include + #include + #include +-#include ++#include + + static int update_domain_cpu_policy(struct domain *d, + xen_domctl_cpu_policy_t *xdpc) + { + struct old_cpu_policy new = {}; +- const struct old_cpu_policy *sys = is_pv_domain(d) +- ? &system_policies[XEN_SYSCTL_cpu_policy_pv_max] +- : &system_policies[XEN_SYSCTL_cpu_policy_hvm_max]; ++ struct cpu_policy *sys = is_pv_domain(d) ++ ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) ++ : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); ++ struct old_cpu_policy old_sys = { sys, sys }; + struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; + int ret = -ENOMEM; + ++ if ( !sys ) ++ { ++ ASSERT_UNREACHABLE(); ++ return -EOPNOTSUPP; ++ } ++ + /* Start by copying the domain's existing policies. */ + if ( !(new.cpuid = xmemdup(d->arch.cpuid)) || + !(new.msr = xmemdup(d->arch.msr)) ) +@@ -65,7 +72,7 @@ static int update_domain_cpu_policy(struct domain *d, + x86_cpuid_policy_clear_out_of_range_leaves(new.cpuid); + + /* Audit the combined dataset. */ +- ret = x86_cpu_policies_are_compatible(sys, &new, &err); ++ ret = x86_cpu_policies_are_compatible(&old_sys, &new, &err); + if ( ret ) + goto out; + +diff --git a/xen/arch/x86/include/asm/cpu-policy.h b/xen/arch/x86/include/asm/cpu-policy.h +new file mode 100644 +index 000000000000..eef14bb4267e +--- /dev/null ++++ b/xen/arch/x86/include/asm/cpu-policy.h +@@ -0,0 +1,14 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++#ifndef X86_CPU_POLICY_H ++#define X86_CPU_POLICY_H ++ ++struct cpu_policy; ++ ++extern struct cpu_policy raw_cpu_policy; ++extern struct cpu_policy host_cpu_policy; ++extern struct cpu_policy pv_max_cpu_policy; ++extern struct cpu_policy pv_def_cpu_policy; ++extern struct cpu_policy hvm_max_cpu_policy; ++extern struct cpu_policy hvm_def_cpu_policy; ++ ++#endif /* X86_CPU_POLICY_H */ +diff --git a/xen/arch/x86/include/asm/cpuid.h b/xen/arch/x86/include/asm/cpuid.h +index d418e8100dde..ea0586277331 100644 +--- a/xen/arch/x86/include/asm/cpuid.h ++++ b/xen/arch/x86/include/asm/cpuid.h +@@ -46,12 +46,6 @@ DECLARE_PER_CPU(struct cpuidmasks, cpuidmasks); + /* Default masking MSR values, calculated at boot. */ + extern struct cpuidmasks cpuidmask_defaults; + +-extern struct cpuid_policy raw_cpuid_policy, host_cpuid_policy, +- pv_max_cpuid_policy, pv_def_cpuid_policy, +- hvm_max_cpuid_policy, hvm_def_cpuid_policy; +- +-extern const struct old_cpu_policy system_policies[]; +- + /* Check that all previously present features are still available. */ + bool recheck_cpu_features(unsigned int cpu); + +diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h +index bb32bf19adc7..8a4da50c500a 100644 +--- a/xen/arch/x86/include/asm/msr.h ++++ b/xen/arch/x86/include/asm/msr.h +@@ -280,13 +280,6 @@ static inline void wrmsr_tsc_aux(uint32_t val) + + uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp); + +-extern struct msr_policy raw_msr_policy, +- host_msr_policy, +- pv_max_msr_policy, +- pv_def_msr_policy, +- hvm_max_msr_policy, +- hvm_def_msr_policy; +- + /* Container object for per-vCPU MSRs */ + struct vcpu_msrs + { +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index cf46b18aa64c..01f95603e297 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -25,6 +25,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -37,20 +38,9 @@ + + DEFINE_PER_CPU(uint32_t, tsc_aux); + +-struct msr_policy __read_mostly raw_msr_policy, +- __read_mostly host_msr_policy; +-#ifdef CONFIG_PV +-struct msr_policy __read_mostly pv_max_msr_policy; +-struct msr_policy __read_mostly pv_def_msr_policy; +-#endif +-#ifdef CONFIG_HVM +-struct msr_policy __read_mostly hvm_max_msr_policy; +-struct msr_policy __read_mostly hvm_def_msr_policy; +-#endif +- + static void __init calculate_raw_policy(void) + { +- struct msr_policy *mp = &raw_msr_policy; ++ struct msr_policy *mp = &raw_cpu_policy; + + /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ + /* Was already added by probe_cpuid_faulting() */ +@@ -61,9 +51,9 @@ static void __init calculate_raw_policy(void) + + static void __init calculate_host_policy(void) + { +- struct msr_policy *mp = &host_msr_policy; ++ struct msr_policy *mp = &host_cpu_policy; + +- *mp = raw_msr_policy; ++ *mp = raw_cpu_policy; + + /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ + /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */ +@@ -81,25 +71,25 @@ static void __init calculate_host_policy(void) + + static void __init calculate_pv_max_policy(void) + { +- struct msr_policy *mp = &pv_max_msr_policy; ++ struct msr_policy *mp = &pv_max_cpu_policy; + +- *mp = host_msr_policy; ++ *mp = host_cpu_policy; + + mp->arch_caps.raw = 0; /* Not supported yet. */ + } + + static void __init calculate_pv_def_policy(void) + { +- struct msr_policy *mp = &pv_def_msr_policy; ++ struct msr_policy *mp = &pv_def_cpu_policy; + +- *mp = pv_max_msr_policy; ++ *mp = pv_max_cpu_policy; + } + + static void __init calculate_hvm_max_policy(void) + { +- struct msr_policy *mp = &hvm_max_msr_policy; ++ struct msr_policy *mp = &hvm_max_cpu_policy; + +- *mp = host_msr_policy; ++ *mp = host_cpu_policy; + + /* It's always possible to emulate CPUID faulting for HVM guests */ + mp->platform_info.cpuid_faulting = true; +@@ -109,9 +99,9 @@ static void __init calculate_hvm_max_policy(void) + + static void __init calculate_hvm_def_policy(void) + { +- struct msr_policy *mp = &hvm_def_msr_policy; ++ struct msr_policy *mp = &hvm_def_cpu_policy; + +- *mp = hvm_max_msr_policy; ++ *mp = hvm_max_cpu_policy; + } + + void __init init_guest_msr_policy(void) +@@ -135,8 +125,8 @@ void __init init_guest_msr_policy(void) + int init_domain_msr_policy(struct domain *d) + { + struct msr_policy *mp = is_pv_domain(d) +- ? (IS_ENABLED(CONFIG_PV) ? &pv_def_msr_policy : NULL) +- : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_msr_policy : NULL); ++ ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpu_policy : NULL) ++ : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpu_policy : NULL); + + if ( !mp ) + { +diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c +index 838a9947bfe3..c68242e5bcaf 100644 +--- a/xen/arch/x86/sysctl.c ++++ b/xen/arch/x86/sysctl.c +@@ -31,38 +31,7 @@ + #include + #include + #include +-#include +- +-const struct old_cpu_policy system_policies[6] = { +- [ XEN_SYSCTL_cpu_policy_raw ] = { +- &raw_cpuid_policy, +- &raw_msr_policy, +- }, +- [ XEN_SYSCTL_cpu_policy_host ] = { +- &host_cpuid_policy, +- &host_msr_policy, +- }, +-#ifdef CONFIG_PV +- [ XEN_SYSCTL_cpu_policy_pv_max ] = { +- &pv_max_cpuid_policy, +- &pv_max_msr_policy, +- }, +- [ XEN_SYSCTL_cpu_policy_pv_default ] = { +- &pv_def_cpuid_policy, +- &pv_def_msr_policy, +- }, +-#endif +-#ifdef CONFIG_HVM +- [ XEN_SYSCTL_cpu_policy_hvm_max ] = { +- &hvm_max_cpuid_policy, +- &hvm_max_msr_policy, +- }, +- [ XEN_SYSCTL_cpu_policy_hvm_default ] = { +- &hvm_def_cpuid_policy, +- &hvm_def_msr_policy, +- }, +-#endif +-}; ++#include + + struct l3_cache_info { + int ret; +@@ -327,19 +296,19 @@ long arch_do_sysctl( + + case XEN_SYSCTL_get_cpu_featureset: + { +- static const struct cpuid_policy *const policy_table[6] = { +- [XEN_SYSCTL_cpu_featureset_raw] = &raw_cpuid_policy, +- [XEN_SYSCTL_cpu_featureset_host] = &host_cpuid_policy, ++ static const struct cpu_policy *const policy_table[6] = { ++ [XEN_SYSCTL_cpu_featureset_raw] = &raw_cpu_policy, ++ [XEN_SYSCTL_cpu_featureset_host] = &host_cpu_policy, + #ifdef CONFIG_PV +- [XEN_SYSCTL_cpu_featureset_pv] = &pv_def_cpuid_policy, +- [XEN_SYSCTL_cpu_featureset_pv_max] = &pv_max_cpuid_policy, ++ [XEN_SYSCTL_cpu_featureset_pv] = &pv_def_cpu_policy, ++ [XEN_SYSCTL_cpu_featureset_pv_max] = &pv_max_cpu_policy, + #endif + #ifdef CONFIG_HVM +- [XEN_SYSCTL_cpu_featureset_hvm] = &hvm_def_cpuid_policy, +- [XEN_SYSCTL_cpu_featureset_hvm_max] = &hvm_max_cpuid_policy, ++ [XEN_SYSCTL_cpu_featureset_hvm] = &hvm_def_cpu_policy, ++ [XEN_SYSCTL_cpu_featureset_hvm_max] = &hvm_max_cpu_policy, + #endif + }; +- const struct cpuid_policy *p = NULL; ++ const struct cpu_policy *p = NULL; + uint32_t featureset[FSCAPINTS]; + unsigned int nr; + +@@ -392,7 +361,19 @@ long arch_do_sysctl( + + case XEN_SYSCTL_get_cpu_policy: + { +- const struct old_cpu_policy *policy; ++ static const struct cpu_policy *const system_policies[6] = { ++ [XEN_SYSCTL_cpu_policy_raw] = &raw_cpu_policy, ++ [XEN_SYSCTL_cpu_policy_host] = &host_cpu_policy, ++#ifdef CONFIG_PV ++ [XEN_SYSCTL_cpu_policy_pv_max] = &pv_max_cpu_policy, ++ [XEN_SYSCTL_cpu_policy_pv_default] = &pv_def_cpu_policy, ++#endif ++#ifdef CONFIG_HVM ++ [XEN_SYSCTL_cpu_policy_hvm_max] = &hvm_max_cpu_policy, ++ [XEN_SYSCTL_cpu_policy_hvm_default] = &hvm_def_cpu_policy, ++#endif ++ }; ++ const struct cpu_policy *policy; + + /* Reserved field set, or bad policy index? */ + if ( sysctl->u.cpu_policy._rsvd || +@@ -401,11 +382,11 @@ long arch_do_sysctl( + ret = -EINVAL; + break; + } +- policy = &system_policies[ ++ policy = system_policies[ + array_index_nospec(sysctl->u.cpu_policy.index, + ARRAY_SIZE(system_policies))]; + +- if ( !policy->cpuid || !policy->msr ) ++ if ( !policy ) + { + ret = -EOPNOTSUPP; + break; +@@ -415,7 +396,7 @@ long arch_do_sysctl( + if ( guest_handle_is_null(sysctl->u.cpu_policy.leaves) ) + sysctl->u.cpu_policy.nr_leaves = CPUID_MAX_SERIALISED_LEAVES; + else if ( (ret = x86_cpuid_copy_to_buffer( +- policy->cpuid, ++ policy, + sysctl->u.cpu_policy.leaves, + &sysctl->u.cpu_policy.nr_leaves)) ) + break; +@@ -431,7 +412,7 @@ long arch_do_sysctl( + if ( guest_handle_is_null(sysctl->u.cpu_policy.msrs) ) + sysctl->u.cpu_policy.nr_msrs = MSR_MAX_SERIALISED_ENTRIES; + else if ( (ret = x86_msr_copy_to_buffer( +- policy->msr, ++ policy, + sysctl->u.cpu_policy.msrs, + &sysctl->u.cpu_policy.nr_msrs)) ) + break; +-- +2.39.2 + diff --git a/0328-x86-Merge-a-domain-s-cpuid-msr-policy-objects.patch b/0328-x86-Merge-a-domain-s-cpuid-msr-policy-objects.patch new file mode 100644 index 00000000..3da522ce --- /dev/null +++ b/0328-x86-Merge-a-domain-s-cpuid-msr-policy-objects.patch @@ -0,0 +1,423 @@ +From 1b87ec7dfde8c0d6d52bb7286e3f280061b361b5 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 29 Mar 2023 11:32:25 +0100 +Subject: [PATCH 08/35] x86: Merge a domain's {cpuid,msr} policy objects + +Right now, they're the same underlying type, containing disjoint information. + +Drop the d->arch.msr pointer, and union d->arch.cpuid to give it a second name +of cpu_policy in the interim. + +Merge init_domain_{cpuid,msr}_policy() into a single init_domain_cpu_policy(), +moving the implementation into cpu-policy.c + +No practical change. This undoes the transient doubling of storage space from +earlier patches. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit bd13dae34809e61e37ba1cd5de893c5c10c46256) +--- + xen/arch/x86/cpu-policy.c | 49 +++++++++++++++++++++++++++ + xen/arch/x86/cpuid.c | 23 ------------- + xen/arch/x86/domain.c | 15 +++----- + xen/arch/x86/domctl.c | 35 ++++++++++--------- + xen/arch/x86/include/asm/cpu-policy.h | 4 +++ + xen/arch/x86/include/asm/cpuid.h | 3 -- + xen/arch/x86/include/asm/domain.h | 13 +++++-- + xen/arch/x86/include/asm/msr.h | 1 - + xen/arch/x86/mm/mem_sharing.c | 3 +- + xen/arch/x86/msr.c | 44 ------------------------ + 10 files changed, 86 insertions(+), 104 deletions(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index 663e9a084c53..e9ac1269c35a 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -1,10 +1,13 @@ + /* SPDX-License-Identifier: GPL-2.0-or-later */ + #include + #include ++#include + + #include + + #include ++#include ++#include + + struct cpu_policy __ro_after_init raw_cpu_policy; + struct cpu_policy __ro_after_init host_cpu_policy; +@@ -16,3 +19,49 @@ struct cpu_policy __ro_after_init pv_def_cpu_policy; + struct cpu_policy __ro_after_init hvm_max_cpu_policy; + struct cpu_policy __ro_after_init hvm_def_cpu_policy; + #endif ++ ++int init_domain_cpu_policy(struct domain *d) ++{ ++ struct cpu_policy *p = is_pv_domain(d) ++ ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpu_policy : NULL) ++ : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpu_policy : NULL); ++ ++ if ( !p ) ++ { ++ ASSERT_UNREACHABLE(); ++ return -EOPNOTSUPP; ++ } ++ ++ p = xmemdup(p); ++ if ( !p ) ++ return -ENOMEM; ++ ++ /* See comment in ctxt_switch_levelling() */ ++ if ( !opt_dom0_cpuid_faulting && is_control_domain(d) && is_pv_domain(d) ) ++ p->platform_info.cpuid_faulting = false; ++ ++ /* ++ * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0, ++ * so dom0 can turn off workarounds as appropriate. Temporary, until the ++ * domain policy logic gains a better understanding of MSRs. ++ */ ++ if ( is_hardware_domain(d) && cpu_has_arch_caps ) ++ { ++ uint64_t val; ++ ++ rdmsrl(MSR_ARCH_CAPABILITIES, val); ++ ++ p->arch_caps.raw = val & ++ (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | ++ ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO | ++ ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | ++ ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | ++ ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO); ++ } ++ ++ d->arch.cpu_policy = p; ++ ++ recalculate_cpuid_policy(d); ++ ++ return 0; ++} +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index 1327dba30dd8..e074befb721d 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -775,29 +775,6 @@ void recalculate_cpuid_policy(struct domain *d) + p->extd.raw[0x19] = EMPTY_LEAF; + } + +-int init_domain_cpuid_policy(struct domain *d) +-{ +- struct cpuid_policy *p = is_pv_domain(d) +- ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpu_policy : NULL) +- : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpu_policy : NULL); +- +- if ( !p ) +- { +- ASSERT_UNREACHABLE(); +- return -EOPNOTSUPP; +- } +- +- p = xmemdup(p); +- if ( !p ) +- return -ENOMEM; +- +- d->arch.cpuid = p; +- +- recalculate_cpuid_policy(d); +- +- return 0; +-} +- + void __init init_dom0_cpuid_policy(struct domain *d) + { + struct cpuid_policy *p = d->arch.cpuid; +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index e546c9832225..faea542286c0 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -66,6 +66,7 @@ + #ifdef CONFIG_COMPAT + #include + #endif ++#include + #include + #include + #include +@@ -743,8 +744,7 @@ int arch_domain_create(struct domain *d, + + d->arch.ctxt_switch = &idle_csw; + +- d->arch.cpuid = ZERO_BLOCK_PTR; /* Catch stray misuses. */ +- d->arch.msr = ZERO_BLOCK_PTR; ++ d->arch.cpu_policy = ZERO_BLOCK_PTR; /* Catch stray misuses. */ + + return 0; + } +@@ -799,10 +799,7 @@ int arch_domain_create(struct domain *d, + goto fail; + paging_initialised = true; + +- if ( (rc = init_domain_cpuid_policy(d)) ) +- goto fail; +- +- if ( (rc = init_domain_msr_policy(d)) ) ++ if ( (rc = init_domain_cpu_policy(d)) ) + goto fail; + + d->arch.ioport_caps = +@@ -873,8 +870,7 @@ int arch_domain_create(struct domain *d, + iommu_domain_destroy(d); + cleanup_domain_irq_mapping(d); + free_xenheap_page(d->shared_info); +- xfree(d->arch.cpuid); +- xfree(d->arch.msr); ++ XFREE(d->arch.cpu_policy); + if ( paging_initialised ) + paging_final_teardown(d); + free_perdomain_mappings(d); +@@ -888,8 +884,7 @@ void arch_domain_destroy(struct domain *d) + hvm_domain_destroy(d); + + xfree(d->arch.e820); +- xfree(d->arch.cpuid); +- xfree(d->arch.msr); ++ XFREE(d->arch.cpu_policy); + + free_domain_pirqs(d); + if ( !is_idle_domain(d) ) +diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c +index 2689df813b39..857d0abe323e 100644 +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -41,11 +41,11 @@ + static int update_domain_cpu_policy(struct domain *d, + xen_domctl_cpu_policy_t *xdpc) + { +- struct old_cpu_policy new = {}; ++ struct cpu_policy *new; + struct cpu_policy *sys = is_pv_domain(d) + ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) + : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); +- struct old_cpu_policy old_sys = { sys, sys }; ++ struct old_cpu_policy old_sys = { sys, sys }, old_new; + struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; + int ret = -ENOMEM; + +@@ -55,33 +55,33 @@ static int update_domain_cpu_policy(struct domain *d, + return -EOPNOTSUPP; + } + +- /* Start by copying the domain's existing policies. */ +- if ( !(new.cpuid = xmemdup(d->arch.cpuid)) || +- !(new.msr = xmemdup(d->arch.msr)) ) ++ /* Start by copying the domain's existing policy. */ ++ if ( !(new = xmemdup(d->arch.cpu_policy)) ) + goto out; + ++ old_new = (struct old_cpu_policy){ new, new }; ++ + /* Merge the toolstack provided data. */ + if ( (ret = x86_cpuid_copy_from_buffer( +- new.cpuid, xdpc->leaves, xdpc->nr_leaves, ++ new, xdpc->leaves, xdpc->nr_leaves, + &err.leaf, &err.subleaf)) || + (ret = x86_msr_copy_from_buffer( +- new.msr, xdpc->msrs, xdpc->nr_msrs, &err.msr)) ) ++ new, xdpc->msrs, xdpc->nr_msrs, &err.msr)) ) + goto out; + + /* Trim any newly-stale out-of-range leaves. */ +- x86_cpuid_policy_clear_out_of_range_leaves(new.cpuid); ++ x86_cpuid_policy_clear_out_of_range_leaves(new); + + /* Audit the combined dataset. */ +- ret = x86_cpu_policies_are_compatible(&old_sys, &new, &err); ++ ret = x86_cpu_policies_are_compatible(&old_sys, &old_new, &err); + if ( ret ) + goto out; + + /* +- * Audit was successful. Replace existing policies, leaving the old +- * policies to be freed. ++ * Audit was successful. Replace the existing policy, leaving the old one ++ * to be freed. + */ +- SWAP(new.cpuid, d->arch.cpuid); +- SWAP(new.msr, d->arch.msr); ++ SWAP(new, d->arch.cpu_policy); + + /* TODO: Drop when x86_cpu_policies_are_compatible() is completed. */ + recalculate_cpuid_policy(d); +@@ -90,9 +90,8 @@ static int update_domain_cpu_policy(struct domain *d, + domain_cpu_policy_changed(d); + + out: +- /* Free whichever cpuid/msr structs are not installed in struct domain. */ +- xfree(new.cpuid); +- xfree(new.msr); ++ /* Free whichever struct is not installed in struct domain. */ ++ xfree(new); + + if ( ret ) + { +@@ -1328,7 +1327,7 @@ long arch_do_domctl( + if ( guest_handle_is_null(domctl->u.cpu_policy.leaves) ) + domctl->u.cpu_policy.nr_leaves = CPUID_MAX_SERIALISED_LEAVES; + else if ( (ret = x86_cpuid_copy_to_buffer( +- d->arch.cpuid, ++ d->arch.cpu_policy, + domctl->u.cpu_policy.leaves, + &domctl->u.cpu_policy.nr_leaves)) ) + break; +@@ -1337,7 +1336,7 @@ long arch_do_domctl( + if ( guest_handle_is_null(domctl->u.cpu_policy.msrs) ) + domctl->u.cpu_policy.nr_msrs = MSR_MAX_SERIALISED_ENTRIES; + else if ( (ret = x86_msr_copy_to_buffer( +- d->arch.msr, ++ d->arch.cpu_policy, + domctl->u.cpu_policy.msrs, + &domctl->u.cpu_policy.nr_msrs)) ) + break; +diff --git a/xen/arch/x86/include/asm/cpu-policy.h b/xen/arch/x86/include/asm/cpu-policy.h +index eef14bb4267e..9ba34bbf5ea1 100644 +--- a/xen/arch/x86/include/asm/cpu-policy.h ++++ b/xen/arch/x86/include/asm/cpu-policy.h +@@ -3,6 +3,7 @@ + #define X86_CPU_POLICY_H + + struct cpu_policy; ++struct domain; + + extern struct cpu_policy raw_cpu_policy; + extern struct cpu_policy host_cpu_policy; +@@ -11,4 +12,7 @@ extern struct cpu_policy pv_def_cpu_policy; + extern struct cpu_policy hvm_max_cpu_policy; + extern struct cpu_policy hvm_def_cpu_policy; + ++/* Allocate and initialise a CPU policy suitable for the domain. */ ++int init_domain_cpu_policy(struct domain *d); ++ + #endif /* X86_CPU_POLICY_H */ +diff --git a/xen/arch/x86/include/asm/cpuid.h b/xen/arch/x86/include/asm/cpuid.h +index ea0586277331..7f81b998ce01 100644 +--- a/xen/arch/x86/include/asm/cpuid.h ++++ b/xen/arch/x86/include/asm/cpuid.h +@@ -49,9 +49,6 @@ extern struct cpuidmasks cpuidmask_defaults; + /* Check that all previously present features are still available. */ + bool recheck_cpu_features(unsigned int cpu); + +-/* Allocate and initialise a CPUID policy suitable for the domain. */ +-int init_domain_cpuid_policy(struct domain *d); +- + /* Apply dom0-specific tweaks to the CPUID policy. */ + void init_dom0_cpuid_policy(struct domain *d); + +diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h +index 4e59ca8c4e14..5293c0cde405 100644 +--- a/xen/arch/x86/include/asm/domain.h ++++ b/xen/arch/x86/include/asm/domain.h +@@ -384,9 +384,16 @@ struct arch_domain + */ + uint8_t x87_fip_width; + +- /* CPUID and MSR policy objects. */ +- struct cpuid_policy *cpuid; +- struct msr_policy *msr; ++ /* ++ * The domain's CPU Policy. "cpu_policy" is considered the canonical ++ * pointer, but the "cpuid" and "msr" aliases exist so the most ++ * appropriate one can be used for local code clarity. ++ */ ++ union { ++ struct cpu_policy *cpu_policy; ++ struct cpu_policy *cpuid; ++ struct cpu_policy *msr; ++ }; + + struct PITState vpit; + +diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h +index 8a4da50c500a..a174bc6e892b 100644 +--- a/xen/arch/x86/include/asm/msr.h ++++ b/xen/arch/x86/include/asm/msr.h +@@ -398,7 +398,6 @@ struct vcpu_msrs + }; + + void init_guest_msr_policy(void); +-int init_domain_msr_policy(struct domain *d); + int init_vcpu_msr_policy(struct vcpu *v); + + /* +diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c +index 649d93dc5444..5b3449db7a11 100644 +--- a/xen/arch/x86/mm/mem_sharing.c ++++ b/xen/arch/x86/mm/mem_sharing.c +@@ -1902,8 +1902,7 @@ static int fork(struct domain *cd, struct domain *d) + + domain_pause(d); + cd->max_pages = d->max_pages; +- *cd->arch.cpuid = *d->arch.cpuid; +- *cd->arch.msr = *d->arch.msr; ++ *cd->arch.cpu_policy = *d->arch.cpu_policy; + cd->vmtrace_size = d->vmtrace_size; + cd->parent = d; + } +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index 01f95603e297..d3ca861454a7 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -122,50 +122,6 @@ void __init init_guest_msr_policy(void) + } + } + +-int init_domain_msr_policy(struct domain *d) +-{ +- struct msr_policy *mp = is_pv_domain(d) +- ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpu_policy : NULL) +- : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpu_policy : NULL); +- +- if ( !mp ) +- { +- ASSERT_UNREACHABLE(); +- return -EOPNOTSUPP; +- } +- +- mp = xmemdup(mp); +- if ( !mp ) +- return -ENOMEM; +- +- /* See comment in ctxt_switch_levelling() */ +- if ( !opt_dom0_cpuid_faulting && is_control_domain(d) && is_pv_domain(d) ) +- mp->platform_info.cpuid_faulting = false; +- +- /* +- * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0, +- * so dom0 can turn off workarounds as appropriate. Temporary, until the +- * domain policy logic gains a better understanding of MSRs. +- */ +- if ( is_hardware_domain(d) && cpu_has_arch_caps ) +- { +- uint64_t val; +- +- rdmsrl(MSR_ARCH_CAPABILITIES, val); +- +- mp->arch_caps.raw = val & +- (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | +- ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO | +- ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | +- ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | +- ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO); +- } +- +- d->arch.msr = mp; +- +- return 0; +-} +- + int init_vcpu_msr_policy(struct vcpu *v) + { + struct vcpu_msrs *msrs = xzalloc(struct vcpu_msrs); +-- +2.39.2 + diff --git a/0329-x86-Merge-xc_cpu_policy-s-cpuid-and-msr-objects.patch b/0329-x86-Merge-xc_cpu_policy-s-cpuid-and-msr-objects.patch new file mode 100644 index 00000000..9f85071a --- /dev/null +++ b/0329-x86-Merge-xc_cpu_policy-s-cpuid-and-msr-objects.patch @@ -0,0 +1,367 @@ +From c10387a42b51e6ec8239203d6449aa5f546dc324 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 29 Mar 2023 12:37:33 +0100 +Subject: [PATCH 09/35] x86: Merge xc_cpu_policy's cpuid and msr objects + +Right now, they're the same underlying type, containing disjoint information. + +Use a single object instead. Also take the opportunity to rename 'entries' to +'msrs' which is more descriptive, and more in line with nr_msrs being the +count of MSR entries in the API. + +test-tsx uses xg_private.h to access the internals of xc_cpu_policy, so needs +updating at the same time. Take the opportunity to improve the code clarity +by passing a cpu_policy rather than an xc_cpu_policy into some functions. + +No practical change. This undoes the transient doubling of storage space from +earlier patches. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit c9985233ca663fea20fc8807cf509d2e3fef0dca) +--- + tools/libs/guest/xg_cpuid_x86.c | 36 ++++++++--------- + tools/libs/guest/xg_private.h | 5 +-- + tools/tests/tsx/test-tsx.c | 71 +++++++++++++++------------------ + 3 files changed, 53 insertions(+), 59 deletions(-) + +diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c +index 76d9522c3a7c..5133d59d8a1a 100644 +--- a/tools/libs/guest/xg_cpuid_x86.c ++++ b/tools/libs/guest/xg_cpuid_x86.c +@@ -431,7 +431,7 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, + xc_dominfo_t di; + unsigned int i, nr_leaves, nr_msrs; + xen_cpuid_leaf_t *leaves = NULL; +- struct cpuid_policy *p = NULL; ++ struct cpu_policy *p = NULL; + uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1; + uint32_t host_featureset[FEATURESET_NR_ENTRIES] = {}; + uint32_t len = ARRAY_SIZE(host_featureset); +@@ -692,7 +692,7 @@ static int deserialize_policy(xc_interface *xch, xc_cpu_policy_t *policy, + uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1; + int rc; + +- rc = x86_cpuid_copy_from_buffer(&policy->cpuid, policy->leaves, ++ rc = x86_cpuid_copy_from_buffer(&policy->policy, policy->leaves, + nr_leaves, &err_leaf, &err_subleaf); + if ( rc ) + { +@@ -702,7 +702,7 @@ static int deserialize_policy(xc_interface *xch, xc_cpu_policy_t *policy, + return rc; + } + +- rc = x86_msr_copy_from_buffer(&policy->msr, policy->entries, ++ rc = x86_msr_copy_from_buffer(&policy->policy, policy->msrs, + nr_entries, &err_msr); + if ( rc ) + { +@@ -719,18 +719,18 @@ int xc_cpu_policy_get_system(xc_interface *xch, unsigned int policy_idx, + xc_cpu_policy_t *policy) + { + unsigned int nr_leaves = ARRAY_SIZE(policy->leaves); +- unsigned int nr_entries = ARRAY_SIZE(policy->entries); ++ unsigned int nr_msrs = ARRAY_SIZE(policy->msrs); + int rc; + + rc = get_system_cpu_policy(xch, policy_idx, &nr_leaves, policy->leaves, +- &nr_entries, policy->entries); ++ &nr_msrs, policy->msrs); + if ( rc ) + { + PERROR("Failed to obtain %u policy", policy_idx); + return rc; + } + +- rc = deserialize_policy(xch, policy, nr_leaves, nr_entries); ++ rc = deserialize_policy(xch, policy, nr_leaves, nr_msrs); + if ( rc ) + { + errno = -rc; +@@ -744,18 +744,18 @@ int xc_cpu_policy_get_domain(xc_interface *xch, uint32_t domid, + xc_cpu_policy_t *policy) + { + unsigned int nr_leaves = ARRAY_SIZE(policy->leaves); +- unsigned int nr_entries = ARRAY_SIZE(policy->entries); ++ unsigned int nr_msrs = ARRAY_SIZE(policy->msrs); + int rc; + + rc = get_domain_cpu_policy(xch, domid, &nr_leaves, policy->leaves, +- &nr_entries, policy->entries); ++ &nr_msrs, policy->msrs); + if ( rc ) + { + PERROR("Failed to obtain domain %u policy", domid); + return rc; + } + +- rc = deserialize_policy(xch, policy, nr_leaves, nr_entries); ++ rc = deserialize_policy(xch, policy, nr_leaves, nr_msrs); + if ( rc ) + { + errno = -rc; +@@ -770,16 +770,16 @@ int xc_cpu_policy_set_domain(xc_interface *xch, uint32_t domid, + { + uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1; + unsigned int nr_leaves = ARRAY_SIZE(policy->leaves); +- unsigned int nr_entries = ARRAY_SIZE(policy->entries); ++ unsigned int nr_msrs = ARRAY_SIZE(policy->msrs); + int rc; + + rc = xc_cpu_policy_serialise(xch, policy, policy->leaves, &nr_leaves, +- policy->entries, &nr_entries); ++ policy->msrs, &nr_msrs); + if ( rc ) + return rc; + + rc = xc_set_domain_cpu_policy(xch, domid, nr_leaves, policy->leaves, +- nr_entries, policy->entries, ++ nr_msrs, policy->msrs, + &err_leaf, &err_subleaf, &err_msr); + if ( rc ) + { +@@ -802,7 +802,7 @@ int xc_cpu_policy_serialise(xc_interface *xch, const xc_cpu_policy_t *p, + + if ( leaves ) + { +- rc = x86_cpuid_copy_to_buffer(&p->cpuid, leaves, nr_leaves); ++ rc = x86_cpuid_copy_to_buffer(&p->policy, leaves, nr_leaves); + if ( rc ) + { + ERROR("Failed to serialize CPUID policy"); +@@ -813,7 +813,7 @@ int xc_cpu_policy_serialise(xc_interface *xch, const xc_cpu_policy_t *p, + + if ( msrs ) + { +- rc = x86_msr_copy_to_buffer(&p->msr, msrs, nr_msrs); ++ rc = x86_msr_copy_to_buffer(&p->policy, msrs, nr_msrs); + if ( rc ) + { + ERROR("Failed to serialize MSR policy"); +@@ -831,7 +831,7 @@ int xc_cpu_policy_update_cpuid(xc_interface *xch, xc_cpu_policy_t *policy, + uint32_t nr) + { + unsigned int err_leaf = -1, err_subleaf = -1; +- int rc = x86_cpuid_copy_from_buffer(&policy->cpuid, leaves, nr, ++ int rc = x86_cpuid_copy_from_buffer(&policy->policy, leaves, nr, + &err_leaf, &err_subleaf); + + if ( rc ) +@@ -850,7 +850,7 @@ int xc_cpu_policy_update_msrs(xc_interface *xch, xc_cpu_policy_t *policy, + const xen_msr_entry_t *msrs, uint32_t nr) + { + unsigned int err_msr = -1; +- int rc = x86_msr_copy_from_buffer(&policy->msr, msrs, nr, &err_msr); ++ int rc = x86_msr_copy_from_buffer(&policy->policy, msrs, nr, &err_msr); + + if ( rc ) + { +@@ -868,8 +868,8 @@ bool xc_cpu_policy_is_compatible(xc_interface *xch, xc_cpu_policy_t *host, + xc_cpu_policy_t *guest) + { + struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; +- struct old_cpu_policy h = { &host->cpuid, &host->msr }; +- struct old_cpu_policy g = { &guest->cpuid, &guest->msr }; ++ struct old_cpu_policy h = { &host->policy, &host->policy }; ++ struct old_cpu_policy g = { &guest->policy, &guest->policy }; + int rc = x86_cpu_policies_are_compatible(&h, &g, &err); + + if ( !rc ) +diff --git a/tools/libs/guest/xg_private.h b/tools/libs/guest/xg_private.h +index 09e24f122760..e729a8106c3e 100644 +--- a/tools/libs/guest/xg_private.h ++++ b/tools/libs/guest/xg_private.h +@@ -173,10 +173,9 @@ int pin_table(xc_interface *xch, unsigned int type, unsigned long mfn, + #include + + struct xc_cpu_policy { +- struct cpuid_policy cpuid; +- struct msr_policy msr; ++ struct cpu_policy policy; + xen_cpuid_leaf_t leaves[CPUID_MAX_SERIALISED_LEAVES]; +- xen_msr_entry_t entries[MSR_MAX_SERIALISED_ENTRIES]; ++ xen_msr_entry_t msrs[MSR_MAX_SERIALISED_ENTRIES]; + }; + #endif /* x86 */ + +diff --git a/tools/tests/tsx/test-tsx.c b/tools/tests/tsx/test-tsx.c +index f11e8c54e0de..0f4ea5f9c462 100644 +--- a/tools/tests/tsx/test-tsx.c ++++ b/tools/tests/tsx/test-tsx.c +@@ -151,15 +151,15 @@ static void test_tsx_msrs(void) + { + printf("Testing MSR_TSX_FORCE_ABORT consistency\n"); + test_tsx_msr_consistency( +- MSR_TSX_FORCE_ABORT, host.cpuid.feat.tsx_force_abort); ++ MSR_TSX_FORCE_ABORT, host.policy.feat.tsx_force_abort); + + printf("Testing MSR_TSX_CTRL consistency\n"); + test_tsx_msr_consistency( +- MSR_TSX_CTRL, host.msr.arch_caps.tsx_ctrl); ++ MSR_TSX_CTRL, host.policy.arch_caps.tsx_ctrl); + + printf("Testing MSR_MCU_OPT_CTRL consistency\n"); + test_tsx_msr_consistency( +- MSR_MCU_OPT_CTRL, host.cpuid.feat.srbds_ctrl); ++ MSR_MCU_OPT_CTRL, host.policy.feat.srbds_ctrl); + } + + /* +@@ -281,7 +281,7 @@ static void test_rtm_behaviour(void) + else + return fail(" Got unexpected behaviour %d\n", rtm_behaviour); + +- if ( host.cpuid.feat.rtm ) ++ if ( host.policy.feat.rtm ) + { + if ( rtm_behaviour == RTM_UD ) + fail(" Host reports RTM, but appears unavailable\n"); +@@ -293,57 +293,52 @@ static void test_rtm_behaviour(void) + } + } + +-static void dump_tsx_details(const struct xc_cpu_policy *p, const char *pref) ++static void dump_tsx_details(const struct cpu_policy *p, const char *pref) + { + printf(" %s RTM %u, HLE %u, TSX_FORCE_ABORT %u, RTM_ALWAYS_ABORT %u, TSX_CTRL %u\n", + pref, +- p->cpuid.feat.rtm, +- p->cpuid.feat.hle, +- p->cpuid.feat.tsx_force_abort, +- p->cpuid.feat.rtm_always_abort, +- p->msr.arch_caps.tsx_ctrl); ++ p->feat.rtm, ++ p->feat.hle, ++ p->feat.tsx_force_abort, ++ p->feat.rtm_always_abort, ++ p->arch_caps.tsx_ctrl); + } + + /* Sanity test various invariants we expect in the default/max policies. */ +-static void test_guest_policies(const struct xc_cpu_policy *max, +- const struct xc_cpu_policy *def) ++static void test_guest_policies(const struct cpu_policy *max, ++ const struct cpu_policy *def) + { +- const struct cpuid_policy *cm = &max->cpuid; +- const struct cpuid_policy *cd = &def->cpuid; +- const struct msr_policy *mm = &max->msr; +- const struct msr_policy *md = &def->msr; +- + dump_tsx_details(max, "Max:"); + dump_tsx_details(def, "Def:"); + +- if ( ((cm->feat.raw[0].d | cd->feat.raw[0].d) & ++ if ( ((max->feat.raw[0].d | def->feat.raw[0].d) & + (bitmaskof(X86_FEATURE_TSX_FORCE_ABORT) | + bitmaskof(X86_FEATURE_RTM_ALWAYS_ABORT) | + bitmaskof(X86_FEATURE_SRBDS_CTRL))) || +- ((mm->arch_caps.raw | md->arch_caps.raw) & ARCH_CAPS_TSX_CTRL) ) ++ ((max->arch_caps.raw | def->arch_caps.raw) & ARCH_CAPS_TSX_CTRL) ) + fail(" Xen-only TSX controls offered to guest\n"); + + switch ( rtm_behaviour ) + { + case RTM_UD: +- if ( (cm->feat.raw[0].b | cd->feat.raw[0].b) & ++ if ( (max->feat.raw[0].b | def->feat.raw[0].b) & + (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) ) + fail(" HLE/RTM offered to guests despite not being available\n"); + break; + + case RTM_ABORT: +- if ( cd->feat.raw[0].b & ++ if ( def->feat.raw[0].b & + (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) ) + fail(" HLE/RTM offered to guests by default despite not being usable\n"); + break; + + case RTM_OK: +- if ( !cm->feat.rtm || !cd->feat.rtm ) ++ if ( !max->feat.rtm || !def->feat.rtm ) + fail(" RTM not offered to guests despite being available\n"); + break; + } + +- if ( cd->feat.hle ) ++ if ( def->feat.hle ) + fail(" Fail: HLE offered in default policy\n"); + } + +@@ -352,13 +347,13 @@ static void test_def_max_policies(void) + if ( xen_has_pv ) + { + printf("Testing PV default/max policies\n"); +- test_guest_policies(&pv_max, &pv_default); ++ test_guest_policies(&pv_max.policy, &pv_default.policy); + } + + if ( xen_has_hvm ) + { + printf("Testing HVM default/max policies\n"); +- test_guest_policies(&hvm_max, &hvm_default); ++ test_guest_policies(&hvm_max.policy, &hvm_default.policy); + } + } + +@@ -382,23 +377,23 @@ static void test_guest(struct xen_domctl_createdomain *c) + goto out; + } + +- dump_tsx_details(&guest_policy, "Cur:"); ++ dump_tsx_details(&guest_policy.policy, "Cur:"); + + /* + * Check defaults given to the guest. + */ +- if ( guest_policy.cpuid.feat.rtm != (rtm_behaviour == RTM_OK) ) ++ if ( guest_policy.policy.feat.rtm != (rtm_behaviour == RTM_OK) ) + fail(" RTM %u in guest, despite rtm behaviour\n", +- guest_policy.cpuid.feat.rtm); ++ guest_policy.policy.feat.rtm); + +- if ( guest_policy.cpuid.feat.hle || +- guest_policy.cpuid.feat.tsx_force_abort || +- guest_policy.cpuid.feat.rtm_always_abort || +- guest_policy.cpuid.feat.srbds_ctrl || +- guest_policy.msr.arch_caps.tsx_ctrl ) ++ if ( guest_policy.policy.feat.hle || ++ guest_policy.policy.feat.tsx_force_abort || ++ guest_policy.policy.feat.rtm_always_abort || ++ guest_policy.policy.feat.srbds_ctrl || ++ guest_policy.policy.arch_caps.tsx_ctrl ) + fail(" Unexpected features advertised\n"); + +- if ( host.cpuid.feat.rtm ) ++ if ( host.policy.feat.rtm ) + { + unsigned int _7b0; + +@@ -406,7 +401,7 @@ static void test_guest(struct xen_domctl_createdomain *c) + * If host RTM is available, all combinations of guest flags should be + * possible. Flip both HLE/RTM to check non-default settings. + */ +- _7b0 = (guest_policy.cpuid.feat.raw[0].b ^= ++ _7b0 = (guest_policy.policy.feat.raw[0].b ^= + (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM))); + + /* Set the new policy. */ +@@ -427,12 +422,12 @@ static void test_guest(struct xen_domctl_createdomain *c) + goto out; + } + +- dump_tsx_details(&guest_policy, "Cur:"); ++ dump_tsx_details(&guest_policy.policy, "Cur:"); + +- if ( guest_policy.cpuid.feat.raw[0].b != _7b0 ) ++ if ( guest_policy.policy.feat.raw[0].b != _7b0 ) + { + fail(" Expected CPUID.7[1].b 0x%08x differs from actual 0x%08x\n", +- _7b0, guest_policy.cpuid.feat.raw[0].b); ++ _7b0, guest_policy.policy.feat.raw[0].b); + goto out; + } + } +-- +2.39.2 + diff --git a/0330-x86-Drop-struct-old_cpu_policy.patch b/0330-x86-Drop-struct-old_cpu_policy.patch new file mode 100644 index 00000000..f9efb336 --- /dev/null +++ b/0330-x86-Drop-struct-old_cpu_policy.patch @@ -0,0 +1,292 @@ +From ddae2880f7efbaff9126cf7169d25c1bac1c020a Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 29 Mar 2023 12:01:33 +0100 +Subject: [PATCH 10/35] x86: Drop struct old_cpu_policy + +With all the complicated callers of x86_cpu_policies_are_compatible() updated +to use a single cpu_policy object, we can drop the final user of struct +old_cpu_policy. + +Update x86_cpu_policies_are_compatible() to take (new) cpu_policy pointers, +reducing the amount of internal pointer chasing, and update all callers to +pass their cpu_policy objects directly. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 66c5c99656314451ff9520f91cff5bb39fee9fed) +--- + tools/libs/guest/xg_cpuid_x86.c | 4 +- + tools/tests/cpu-policy/test-cpu-policy.c | 50 +++++++----------------- + xen/arch/x86/domctl.c | 7 +--- + xen/include/xen/lib/x86/cpu-policy.h | 12 ++---- + xen/lib/x86/policy.c | 12 +++--- + 5 files changed, 27 insertions(+), 58 deletions(-) + +diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c +index 5133d59d8a1a..3be9c5e85587 100644 +--- a/tools/libs/guest/xg_cpuid_x86.c ++++ b/tools/libs/guest/xg_cpuid_x86.c +@@ -868,9 +868,7 @@ bool xc_cpu_policy_is_compatible(xc_interface *xch, xc_cpu_policy_t *host, + xc_cpu_policy_t *guest) + { + struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; +- struct old_cpu_policy h = { &host->policy, &host->policy }; +- struct old_cpu_policy g = { &guest->policy, &guest->policy }; +- int rc = x86_cpu_policies_are_compatible(&h, &g, &err); ++ int rc = x86_cpu_policies_are_compatible(&host->policy, &guest->policy, &err); + + if ( !rc ) + return true; +diff --git a/tools/tests/cpu-policy/test-cpu-policy.c b/tools/tests/cpu-policy/test-cpu-policy.c +index 8a223fddb3da..4f3d09f1b780 100644 +--- a/tools/tests/cpu-policy/test-cpu-policy.c ++++ b/tools/tests/cpu-policy/test-cpu-policy.c +@@ -98,7 +98,7 @@ static bool msrs_are_sorted(const xen_msr_entry_t *entries, unsigned int nr) + + static void test_cpuid_current(void) + { +- struct cpuid_policy p; ++ struct cpu_policy p; + xen_cpuid_leaf_t leaves[CPUID_MAX_SERIALISED_LEAVES]; + unsigned int nr = ARRAY_SIZE(leaves); + int rc; +@@ -118,7 +118,7 @@ static void test_cpuid_current(void) + static void test_cpuid_serialise_success(void) + { + static const struct test { +- struct cpuid_policy p; ++ struct cpu_policy p; + const char *name; + unsigned int nr_leaves; + } tests[] = { +@@ -242,7 +242,7 @@ static void test_cpuid_serialise_success(void) + static void test_msr_serialise_success(void) + { + static const struct test { +- struct msr_policy p; ++ struct cpu_policy p; + const char *name; + unsigned int nr_msrs; + } tests[] = { +@@ -430,7 +430,7 @@ static void test_cpuid_out_of_range_clearing(void) + static const struct test { + const char *name; + unsigned int nr_markers; +- struct cpuid_policy p; ++ struct cpu_policy p; + } tests[] = { + { + .name = "basic", +@@ -550,7 +550,7 @@ static void test_cpuid_out_of_range_clearing(void) + for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i ) + { + const struct test *t = &tests[i]; +- struct cpuid_policy *p = memdup(&t->p); ++ struct cpu_policy *p = memdup(&t->p); + void *ptr; + unsigned int nr_markers; + +@@ -574,23 +574,20 @@ static void test_is_compatible_success(void) + { + static struct test { + const char *name; +- struct cpuid_policy host_cpuid; +- struct cpuid_policy guest_cpuid; +- struct msr_policy host_msr; +- struct msr_policy guest_msr; ++ struct cpu_policy host, guest; + } tests[] = { + { + .name = "Host CPUID faulting, Guest not", +- .host_msr = { ++ .host = { + .platform_info.cpuid_faulting = true, + }, + }, + { + .name = "Host CPUID faulting, Guest wanted", +- .host_msr = { ++ .host = { + .platform_info.cpuid_faulting = true, + }, +- .guest_msr = { ++ .guest = { + .platform_info.cpuid_faulting = true, + }, + }, +@@ -602,15 +599,8 @@ static void test_is_compatible_success(void) + for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i ) + { + struct test *t = &tests[i]; +- struct old_cpu_policy sys = { +- &t->host_cpuid, +- &t->host_msr, +- }, new = { +- &t->guest_cpuid, +- &t->guest_msr, +- }; + struct cpu_policy_errors e; +- int res = x86_cpu_policies_are_compatible(&sys, &new, &e); ++ int res = x86_cpu_policies_are_compatible(&t->host, &t->guest, &e); + + /* Check the expected error output. */ + if ( res != 0 || memcmp(&no_errors, &e, sizeof(no_errors)) ) +@@ -624,25 +614,22 @@ static void test_is_compatible_failure(void) + { + static struct test { + const char *name; +- struct cpuid_policy host_cpuid; +- struct cpuid_policy guest_cpuid; +- struct msr_policy host_msr; +- struct msr_policy guest_msr; ++ struct cpu_policy host, guest; + struct cpu_policy_errors e; + } tests[] = { + { + .name = "Host basic.max_leaf out of range", +- .guest_cpuid.basic.max_leaf = 1, ++ .guest.basic.max_leaf = 1, + .e = { 0, -1, -1 }, + }, + { + .name = "Host extd.max_leaf out of range", +- .guest_cpuid.extd.max_leaf = 1, ++ .guest.extd.max_leaf = 1, + .e = { 0x80000000, -1, -1 }, + }, + { + .name = "Host no CPUID faulting, Guest wanted", +- .guest_msr = { ++ .guest = { + .platform_info.cpuid_faulting = true, + }, + .e = { -1, -1, 0xce }, +@@ -654,15 +641,8 @@ static void test_is_compatible_failure(void) + for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i ) + { + struct test *t = &tests[i]; +- struct old_cpu_policy sys = { +- &t->host_cpuid, +- &t->host_msr, +- }, new = { +- &t->guest_cpuid, +- &t->guest_msr, +- }; + struct cpu_policy_errors e; +- int res = x86_cpu_policies_are_compatible(&sys, &new, &e); ++ int res = x86_cpu_policies_are_compatible(&t->host, &t->guest, &e); + + /* Check the expected error output. */ + if ( res == 0 || memcmp(&t->e, &e, sizeof(t->e)) ) +diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c +index 857d0abe323e..6d15d0c29c4e 100644 +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -42,10 +42,9 @@ static int update_domain_cpu_policy(struct domain *d, + xen_domctl_cpu_policy_t *xdpc) + { + struct cpu_policy *new; +- struct cpu_policy *sys = is_pv_domain(d) ++ const struct cpu_policy *sys = is_pv_domain(d) + ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) + : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); +- struct old_cpu_policy old_sys = { sys, sys }, old_new; + struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; + int ret = -ENOMEM; + +@@ -59,8 +58,6 @@ static int update_domain_cpu_policy(struct domain *d, + if ( !(new = xmemdup(d->arch.cpu_policy)) ) + goto out; + +- old_new = (struct old_cpu_policy){ new, new }; +- + /* Merge the toolstack provided data. */ + if ( (ret = x86_cpuid_copy_from_buffer( + new, xdpc->leaves, xdpc->nr_leaves, +@@ -73,7 +70,7 @@ static int update_domain_cpu_policy(struct domain *d, + x86_cpuid_policy_clear_out_of_range_leaves(new); + + /* Audit the combined dataset. */ +- ret = x86_cpu_policies_are_compatible(&old_sys, &old_new, &err); ++ ret = x86_cpu_policies_are_compatible(sys, new, &err); + if ( ret ) + goto out; + +diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h +index 53fffca55211..8b27a0725b8e 100644 +--- a/xen/include/xen/lib/x86/cpu-policy.h ++++ b/xen/include/xen/lib/x86/cpu-policy.h +@@ -379,12 +379,6 @@ struct cpu_policy + #define cpuid_policy cpu_policy + #define msr_policy cpu_policy + +-struct old_cpu_policy +-{ +- struct cpuid_policy *cpuid; +- struct msr_policy *msr; +-}; +- + struct cpu_policy_errors + { + uint32_t leaf, subleaf; +@@ -559,7 +553,7 @@ int x86_msr_copy_from_buffer(struct msr_policy *policy, + const msr_entry_buffer_t msrs, uint32_t nr_entries, + uint32_t *err_msr); + +-/* ++/** + * Calculate whether two policies are compatible. + * + * i.e. Can a VM configured with @guest run on a CPU supporting @host. +@@ -573,8 +567,8 @@ int x86_msr_copy_from_buffer(struct msr_policy *policy, + * incompatibility is detected, the optional err pointer may identify the + * problematic leaf/subleaf and/or MSR. + */ +-int x86_cpu_policies_are_compatible(const struct old_cpu_policy *host, +- const struct old_cpu_policy *guest, ++int x86_cpu_policies_are_compatible(const struct cpu_policy *host, ++ const struct cpu_policy *guest, + struct cpu_policy_errors *err); + + #endif /* !XEN_LIB_X86_POLICIES_H */ +diff --git a/xen/lib/x86/policy.c b/xen/lib/x86/policy.c +index 2975711d7c6c..a9c60000af9d 100644 +--- a/xen/lib/x86/policy.c ++++ b/xen/lib/x86/policy.c +@@ -2,8 +2,8 @@ + + #include + +-int x86_cpu_policies_are_compatible(const struct old_cpu_policy *host, +- const struct old_cpu_policy *guest, ++int x86_cpu_policies_are_compatible(const struct cpu_policy *host, ++ const struct cpu_policy *guest, + struct cpu_policy_errors *err) + { + struct cpu_policy_errors e = INIT_CPU_POLICY_ERRORS; +@@ -15,18 +15,18 @@ int x86_cpu_policies_are_compatible(const struct old_cpu_policy *host, + #define FAIL_MSR(m) \ + do { e.msr = (m); goto out; } while ( 0 ) + +- if ( guest->cpuid->basic.max_leaf > host->cpuid->basic.max_leaf ) ++ if ( guest->basic.max_leaf > host->basic.max_leaf ) + FAIL_CPUID(0, NA); + +- if ( guest->cpuid->feat.max_subleaf > host->cpuid->feat.max_subleaf ) ++ if ( guest->feat.max_subleaf > host->feat.max_subleaf ) + FAIL_CPUID(7, 0); + +- if ( guest->cpuid->extd.max_leaf > host->cpuid->extd.max_leaf ) ++ if ( guest->extd.max_leaf > host->extd.max_leaf ) + FAIL_CPUID(0x80000000, NA); + + /* TODO: Audit more CPUID data. */ + +- if ( ~host->msr->platform_info.raw & guest->msr->platform_info.raw ) ++ if ( ~host->platform_info.raw & guest->platform_info.raw ) + FAIL_MSR(MSR_INTEL_PLATFORM_INFO); + + #undef FAIL_MSR +-- +2.39.2 + diff --git a/0331-x86-Out-of-inline-the-policy-featureset-convertors.patch b/0331-x86-Out-of-inline-the-policy-featureset-convertors.patch new file mode 100644 index 00000000..3e521ec6 --- /dev/null +++ b/0331-x86-Out-of-inline-the-policy-featureset-convertors.patch @@ -0,0 +1,293 @@ +From fccc0212b28fc9a16dc469391a65ef9bd585d00b Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 30 Mar 2023 18:21:01 +0100 +Subject: [PATCH 11/35] x86: Out-of-inline the policy<->featureset convertors + +These are already getting over-large for being inline functions, and are only +going to grow further over time. Out of line them, yielding the following net +delta from bloat-o-meter: + + add/remove: 2/0 grow/shrink: 0/4 up/down: 276/-1877 (-1601) + +Switch to the newer cpu_policy terminology while doing so. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 1027df4c00823f8b448e3a6861cc7b6ce61ba4e4) +--- + tools/libs/guest/xg_cpuid_x86.c | 2 +- + xen/arch/x86/cpuid.c | 28 +++++++-------- + xen/arch/x86/sysctl.c | 2 +- + xen/include/xen/lib/x86/cpu-policy.h | 52 ++++++---------------------- + xen/lib/x86/cpuid.c | 42 ++++++++++++++++++++++ + 5 files changed, 68 insertions(+), 58 deletions(-) + +diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c +index 3be9c5e85587..b38e3a9de350 100644 +--- a/tools/libs/guest/xg_cpuid_x86.c ++++ b/tools/libs/guest/xg_cpuid_x86.c +@@ -565,7 +565,7 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, + } + } + +- cpuid_featureset_to_policy(feat, p); ++ x86_cpu_featureset_to_policy(feat, p); + } + else + { +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index e074befb721d..ef96af738af0 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -368,7 +368,7 @@ static void __init calculate_host_policy(void) + p->extd.max_leaf = 0x80000000 | min_t(uint32_t, max_extd_leaf & 0xffff, + ARRAY_SIZE(p->extd.raw) - 1); + +- cpuid_featureset_to_policy(boot_cpu_data.x86_capability, p); ++ x86_cpu_featureset_to_policy(boot_cpu_data.x86_capability, p); + recalculate_xstate(p); + recalculate_misc(p); + +@@ -450,7 +450,7 @@ static void __init calculate_pv_max_policy(void) + unsigned int i; + + *p = host_cpu_policy; +- cpuid_policy_to_featureset(p, pv_featureset); ++ x86_cpu_policy_to_featureset(p, pv_featureset); + + for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) + pv_featureset[i] &= pv_max_featuremask[i]; +@@ -468,7 +468,7 @@ static void __init calculate_pv_max_policy(void) + guest_common_feature_adjustments(pv_featureset); + + sanitise_featureset(pv_featureset); +- cpuid_featureset_to_policy(pv_featureset, p); ++ x86_cpu_featureset_to_policy(pv_featureset, p); + recalculate_xstate(p); + + p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ +@@ -481,7 +481,7 @@ static void __init calculate_pv_def_policy(void) + unsigned int i; + + *p = pv_max_cpu_policy; +- cpuid_policy_to_featureset(p, pv_featureset); ++ x86_cpu_policy_to_featureset(p, pv_featureset); + + for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) + pv_featureset[i] &= pv_def_featuremask[i]; +@@ -490,7 +490,7 @@ static void __init calculate_pv_def_policy(void) + guest_common_default_feature_adjustments(pv_featureset); + + sanitise_featureset(pv_featureset); +- cpuid_featureset_to_policy(pv_featureset, p); ++ x86_cpu_featureset_to_policy(pv_featureset, p); + recalculate_xstate(p); + } + +@@ -502,7 +502,7 @@ static void __init calculate_hvm_max_policy(void) + const uint32_t *hvm_featuremask; + + *p = host_cpu_policy; +- cpuid_policy_to_featureset(p, hvm_featureset); ++ x86_cpu_policy_to_featureset(p, hvm_featureset); + + hvm_featuremask = hvm_hap_supported() ? + hvm_hap_max_featuremask : hvm_shadow_max_featuremask; +@@ -572,7 +572,7 @@ static void __init calculate_hvm_max_policy(void) + guest_common_feature_adjustments(hvm_featureset); + + sanitise_featureset(hvm_featureset); +- cpuid_featureset_to_policy(hvm_featureset, p); ++ x86_cpu_featureset_to_policy(hvm_featureset, p); + recalculate_xstate(p); + } + +@@ -584,7 +584,7 @@ static void __init calculate_hvm_def_policy(void) + const uint32_t *hvm_featuremask; + + *p = hvm_max_cpu_policy; +- cpuid_policy_to_featureset(p, hvm_featureset); ++ x86_cpu_policy_to_featureset(p, hvm_featureset); + + hvm_featuremask = hvm_hap_supported() ? + hvm_hap_def_featuremask : hvm_shadow_def_featuremask; +@@ -603,7 +603,7 @@ static void __init calculate_hvm_def_policy(void) + __set_bit(X86_FEATURE_VIRT_SSBD, hvm_featureset); + + sanitise_featureset(hvm_featureset); +- cpuid_featureset_to_policy(hvm_featureset, p); ++ x86_cpu_featureset_to_policy(hvm_featureset, p); + recalculate_xstate(p); + } + +@@ -673,8 +673,8 @@ void recalculate_cpuid_policy(struct domain *d) + ? CPUID_GUEST_NR_EXTD_AMD + : CPUID_GUEST_NR_EXTD_INTEL) - 1); + +- cpuid_policy_to_featureset(p, fs); +- cpuid_policy_to_featureset(max, max_fs); ++ x86_cpu_policy_to_featureset(p, fs); ++ x86_cpu_policy_to_featureset(max, max_fs); + + if ( is_hvm_domain(d) ) + { +@@ -731,7 +731,7 @@ void recalculate_cpuid_policy(struct domain *d) + (cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | + cpufeat_mask(X86_FEATURE_NO_FPU_SEL))); + +- cpuid_featureset_to_policy(fs, p); ++ x86_cpu_featureset_to_policy(fs, p); + + /* Pass host cacheline size through to guests. */ + p->basic.clflush_size = max->basic.clflush_size; +@@ -797,7 +797,7 @@ void __init init_dom0_cpuid_policy(struct domain *d) + uint32_t fs[FSCAPINTS]; + unsigned int i; + +- cpuid_policy_to_featureset(p, fs); ++ x86_cpu_policy_to_featureset(p, fs); + + for ( i = 0; i < ARRAY_SIZE(fs); ++i ) + { +@@ -805,7 +805,7 @@ void __init init_dom0_cpuid_policy(struct domain *d) + fs[i] &= ~dom0_disable_feat[i]; + } + +- cpuid_featureset_to_policy(fs, p); ++ x86_cpu_featureset_to_policy(fs, p); + + recalculate_cpuid_policy(d); + } +diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c +index c68242e5bcaf..42dc360ad6e9 100644 +--- a/xen/arch/x86/sysctl.c ++++ b/xen/arch/x86/sysctl.c +@@ -339,7 +339,7 @@ long arch_do_sysctl( + ret = -EINVAL; + + if ( !ret ) +- cpuid_policy_to_featureset(p, featureset); ++ x86_cpu_policy_to_featureset(p, featureset); + + /* Copy the requested featureset into place. */ + if ( !ret && copy_to_guest(sysctl->u.cpu_featureset.features, +diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h +index 8b27a0725b8e..57b4633c861e 100644 +--- a/xen/include/xen/lib/x86/cpu-policy.h ++++ b/xen/include/xen/lib/x86/cpu-policy.h +@@ -387,49 +387,17 @@ struct cpu_policy_errors + + #define INIT_CPU_POLICY_ERRORS { -1, -1, -1 } + +-/* Fill in a featureset bitmap from a CPUID policy. */ +-static inline void cpuid_policy_to_featureset( +- const struct cpuid_policy *p, uint32_t fs[FEATURESET_NR_ENTRIES]) +-{ +- fs[FEATURESET_1d] = p->basic._1d; +- fs[FEATURESET_1c] = p->basic._1c; +- fs[FEATURESET_e1d] = p->extd.e1d; +- fs[FEATURESET_e1c] = p->extd.e1c; +- fs[FEATURESET_Da1] = p->xstate.Da1; +- fs[FEATURESET_7b0] = p->feat._7b0; +- fs[FEATURESET_7c0] = p->feat._7c0; +- fs[FEATURESET_e7d] = p->extd.e7d; +- fs[FEATURESET_e8b] = p->extd.e8b; +- fs[FEATURESET_7d0] = p->feat._7d0; +- fs[FEATURESET_7a1] = p->feat._7a1; +- fs[FEATURESET_e21a] = p->extd.e21a; +- fs[FEATURESET_7b1] = p->feat._7b1; +- fs[FEATURESET_7d2] = p->feat._7d2; +- fs[FEATURESET_7c1] = p->feat._7c1; +- fs[FEATURESET_7d1] = p->feat._7d1; +-} ++/** ++ * Copy the featureset words out of a cpu_policy object. ++ */ ++void x86_cpu_policy_to_featureset(const struct cpu_policy *p, ++ uint32_t fs[FEATURESET_NR_ENTRIES]); + +-/* Fill in a CPUID policy from a featureset bitmap. */ +-static inline void cpuid_featureset_to_policy( +- const uint32_t fs[FEATURESET_NR_ENTRIES], struct cpuid_policy *p) +-{ +- p->basic._1d = fs[FEATURESET_1d]; +- p->basic._1c = fs[FEATURESET_1c]; +- p->extd.e1d = fs[FEATURESET_e1d]; +- p->extd.e1c = fs[FEATURESET_e1c]; +- p->xstate.Da1 = fs[FEATURESET_Da1]; +- p->feat._7b0 = fs[FEATURESET_7b0]; +- p->feat._7c0 = fs[FEATURESET_7c0]; +- p->extd.e7d = fs[FEATURESET_e7d]; +- p->extd.e8b = fs[FEATURESET_e8b]; +- p->feat._7d0 = fs[FEATURESET_7d0]; +- p->feat._7a1 = fs[FEATURESET_7a1]; +- p->extd.e21a = fs[FEATURESET_e21a]; +- p->feat._7b1 = fs[FEATURESET_7b1]; +- p->feat._7d2 = fs[FEATURESET_7d2]; +- p->feat._7c1 = fs[FEATURESET_7c1]; +- p->feat._7d1 = fs[FEATURESET_7d1]; +-} ++/** ++ * Copy the featureset words back into a cpu_policy object. ++ */ ++void x86_cpu_featureset_to_policy(const uint32_t fs[FEATURESET_NR_ENTRIES], ++ struct cpu_policy *p); + + static inline uint64_t cpuid_policy_xcr0_max(const struct cpuid_policy *p) + { +diff --git a/xen/lib/x86/cpuid.c b/xen/lib/x86/cpuid.c +index e81f76c779c0..734e90823a63 100644 +--- a/xen/lib/x86/cpuid.c ++++ b/xen/lib/x86/cpuid.c +@@ -60,6 +60,48 @@ const char *x86_cpuid_vendor_to_str(unsigned int vendor) + } + } + ++void x86_cpu_policy_to_featureset( ++ const struct cpu_policy *p, uint32_t fs[FEATURESET_NR_ENTRIES]) ++{ ++ fs[FEATURESET_1d] = p->basic._1d; ++ fs[FEATURESET_1c] = p->basic._1c; ++ fs[FEATURESET_e1d] = p->extd.e1d; ++ fs[FEATURESET_e1c] = p->extd.e1c; ++ fs[FEATURESET_Da1] = p->xstate.Da1; ++ fs[FEATURESET_7b0] = p->feat._7b0; ++ fs[FEATURESET_7c0] = p->feat._7c0; ++ fs[FEATURESET_e7d] = p->extd.e7d; ++ fs[FEATURESET_e8b] = p->extd.e8b; ++ fs[FEATURESET_7d0] = p->feat._7d0; ++ fs[FEATURESET_7a1] = p->feat._7a1; ++ fs[FEATURESET_e21a] = p->extd.e21a; ++ fs[FEATURESET_7b1] = p->feat._7b1; ++ fs[FEATURESET_7d2] = p->feat._7d2; ++ fs[FEATURESET_7c1] = p->feat._7c1; ++ fs[FEATURESET_7d1] = p->feat._7d1; ++} ++ ++void x86_cpu_featureset_to_policy( ++ const uint32_t fs[FEATURESET_NR_ENTRIES], struct cpu_policy *p) ++{ ++ p->basic._1d = fs[FEATURESET_1d]; ++ p->basic._1c = fs[FEATURESET_1c]; ++ p->extd.e1d = fs[FEATURESET_e1d]; ++ p->extd.e1c = fs[FEATURESET_e1c]; ++ p->xstate.Da1 = fs[FEATURESET_Da1]; ++ p->feat._7b0 = fs[FEATURESET_7b0]; ++ p->feat._7c0 = fs[FEATURESET_7c0]; ++ p->extd.e7d = fs[FEATURESET_e7d]; ++ p->extd.e8b = fs[FEATURESET_e8b]; ++ p->feat._7d0 = fs[FEATURESET_7d0]; ++ p->feat._7a1 = fs[FEATURESET_7a1]; ++ p->extd.e21a = fs[FEATURESET_e21a]; ++ p->feat._7b1 = fs[FEATURESET_7b1]; ++ p->feat._7d2 = fs[FEATURESET_7d2]; ++ p->feat._7c1 = fs[FEATURESET_7c1]; ++ p->feat._7d1 = fs[FEATURESET_7d1]; ++} ++ + void x86_cpuid_policy_recalc_synth(struct cpuid_policy *p) + { + p->x86_vendor = x86_cpuid_lookup_vendor( +-- +2.39.2 + diff --git a/0332-x86-boot-Move-MSR-policy-initialisation-logic-into-c.patch b/0332-x86-boot-Move-MSR-policy-initialisation-logic-into-c.patch new file mode 100644 index 00000000..b56678e8 --- /dev/null +++ b/0332-x86-boot-Move-MSR-policy-initialisation-logic-into-c.patch @@ -0,0 +1,261 @@ +From ac2df7a2193262972e76149b2bd01ccdce908133 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 3 Apr 2023 17:48:43 +0100 +Subject: [PATCH 12/35] x86/boot: Move MSR policy initialisation logic into + cpu-policy.c + +Switch to the newer cpu_policy nomenclature. + +No practical change. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 4f20f596ce9bd95bde077a1ae0d7e07d20a5f6be) +--- + xen/arch/x86/cpu-policy.c | 84 +++++++++++++++++++++++++++ + xen/arch/x86/include/asm/cpu-policy.h | 3 + + xen/arch/x86/include/asm/msr.h | 1 - + xen/arch/x86/msr.c | 84 --------------------------- + xen/arch/x86/setup.c | 3 +- + 5 files changed, 89 insertions(+), 86 deletions(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index e9ac1269c35a..f6a2317ed7bd 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -20,6 +20,90 @@ struct cpu_policy __ro_after_init hvm_max_cpu_policy; + struct cpu_policy __ro_after_init hvm_def_cpu_policy; + #endif + ++static void __init calculate_raw_policy(void) ++{ ++ struct cpu_policy *p = &raw_cpu_policy; ++ ++ /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ ++ /* Was already added by probe_cpuid_faulting() */ ++ ++ if ( cpu_has_arch_caps ) ++ rdmsrl(MSR_ARCH_CAPABILITIES, p->arch_caps.raw); ++} ++ ++static void __init calculate_host_policy(void) ++{ ++ struct cpu_policy *p = &host_cpu_policy; ++ ++ *p = raw_cpu_policy; ++ ++ /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ ++ /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */ ++ p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; ++ ++ /* Temporary, until we have known_features[] for feature bits in MSRs. */ ++ p->arch_caps.raw &= ++ (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | ++ ARCH_CAPS_SKIP_L1DFL | ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ++ ARCH_CAPS_IF_PSCHANGE_MC_NO | ARCH_CAPS_TSX_CTRL | ARCH_CAPS_TAA_NO | ++ ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | ARCH_CAPS_PSDP_NO | ++ ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | ARCH_CAPS_BHI_NO | ++ ARCH_CAPS_PBRSB_NO); ++} ++ ++static void __init calculate_pv_max_policy(void) ++{ ++ struct cpu_policy *p = &pv_max_cpu_policy; ++ ++ *p = host_cpu_policy; ++ ++ p->arch_caps.raw = 0; /* Not supported yet. */ ++} ++ ++static void __init calculate_pv_def_policy(void) ++{ ++ struct cpu_policy *p = &pv_def_cpu_policy; ++ ++ *p = pv_max_cpu_policy; ++} ++ ++static void __init calculate_hvm_max_policy(void) ++{ ++ struct cpu_policy *p = &hvm_max_cpu_policy; ++ ++ *p = host_cpu_policy; ++ ++ /* It's always possible to emulate CPUID faulting for HVM guests */ ++ p->platform_info.cpuid_faulting = true; ++ ++ p->arch_caps.raw = 0; /* Not supported yet. */ ++} ++ ++static void __init calculate_hvm_def_policy(void) ++{ ++ struct cpu_policy *p = &hvm_def_cpu_policy; ++ ++ *p = hvm_max_cpu_policy; ++} ++ ++void __init init_guest_cpu_policies(void) ++{ ++ calculate_raw_policy(); ++ calculate_host_policy(); ++ ++ if ( IS_ENABLED(CONFIG_PV) ) ++ { ++ calculate_pv_max_policy(); ++ calculate_pv_def_policy(); ++ } ++ ++ if ( hvm_enabled ) ++ { ++ calculate_hvm_max_policy(); ++ calculate_hvm_def_policy(); ++ } ++} ++ + int init_domain_cpu_policy(struct domain *d) + { + struct cpu_policy *p = is_pv_domain(d) +diff --git a/xen/arch/x86/include/asm/cpu-policy.h b/xen/arch/x86/include/asm/cpu-policy.h +index 9ba34bbf5ea1..13e2a1f86d13 100644 +--- a/xen/arch/x86/include/asm/cpu-policy.h ++++ b/xen/arch/x86/include/asm/cpu-policy.h +@@ -12,6 +12,9 @@ extern struct cpu_policy pv_def_cpu_policy; + extern struct cpu_policy hvm_max_cpu_policy; + extern struct cpu_policy hvm_def_cpu_policy; + ++/* Initialise the guest cpu_policy objects. */ ++void init_guest_cpu_policies(void); ++ + /* Allocate and initialise a CPU policy suitable for the domain. */ + int init_domain_cpu_policy(struct domain *d); + +diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h +index a174bc6e892b..b51d92e27c74 100644 +--- a/xen/arch/x86/include/asm/msr.h ++++ b/xen/arch/x86/include/asm/msr.h +@@ -397,7 +397,6 @@ struct vcpu_msrs + uint32_t dr_mask[4]; + }; + +-void init_guest_msr_policy(void); + int init_vcpu_msr_policy(struct vcpu *v); + + /* +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index d3ca861454a7..14bcb8261c47 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -38,90 +38,6 @@ + + DEFINE_PER_CPU(uint32_t, tsc_aux); + +-static void __init calculate_raw_policy(void) +-{ +- struct msr_policy *mp = &raw_cpu_policy; +- +- /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ +- /* Was already added by probe_cpuid_faulting() */ +- +- if ( cpu_has_arch_caps ) +- rdmsrl(MSR_ARCH_CAPABILITIES, mp->arch_caps.raw); +-} +- +-static void __init calculate_host_policy(void) +-{ +- struct msr_policy *mp = &host_cpu_policy; +- +- *mp = raw_cpu_policy; +- +- /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ +- /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */ +- mp->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; +- +- /* Temporary, until we have known_features[] for feature bits in MSRs. */ +- mp->arch_caps.raw &= +- (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | +- ARCH_CAPS_SKIP_L1DFL | ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | +- ARCH_CAPS_IF_PSCHANGE_MC_NO | ARCH_CAPS_TSX_CTRL | ARCH_CAPS_TAA_NO | +- ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | ARCH_CAPS_PSDP_NO | +- ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | ARCH_CAPS_BHI_NO | +- ARCH_CAPS_PBRSB_NO); +-} +- +-static void __init calculate_pv_max_policy(void) +-{ +- struct msr_policy *mp = &pv_max_cpu_policy; +- +- *mp = host_cpu_policy; +- +- mp->arch_caps.raw = 0; /* Not supported yet. */ +-} +- +-static void __init calculate_pv_def_policy(void) +-{ +- struct msr_policy *mp = &pv_def_cpu_policy; +- +- *mp = pv_max_cpu_policy; +-} +- +-static void __init calculate_hvm_max_policy(void) +-{ +- struct msr_policy *mp = &hvm_max_cpu_policy; +- +- *mp = host_cpu_policy; +- +- /* It's always possible to emulate CPUID faulting for HVM guests */ +- mp->platform_info.cpuid_faulting = true; +- +- mp->arch_caps.raw = 0; /* Not supported yet. */ +-} +- +-static void __init calculate_hvm_def_policy(void) +-{ +- struct msr_policy *mp = &hvm_def_cpu_policy; +- +- *mp = hvm_max_cpu_policy; +-} +- +-void __init init_guest_msr_policy(void) +-{ +- calculate_raw_policy(); +- calculate_host_policy(); +- +- if ( IS_ENABLED(CONFIG_PV) ) +- { +- calculate_pv_max_policy(); +- calculate_pv_def_policy(); +- } +- +- if ( hvm_enabled ) +- { +- calculate_hvm_max_policy(); +- calculate_hvm_def_policy(); +- } +-} +- + int init_vcpu_msr_policy(struct vcpu *v) + { + struct vcpu_msrs *msrs = xzalloc(struct vcpu_msrs); +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 09c17b10167c..1d62ea1ad9d9 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1943,7 +1944,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) + panic("Could not protect TXT memory regions\n"); + + init_guest_cpuid(); +- init_guest_msr_policy(); ++ init_guest_cpu_policies(); + + if ( xen_cpuidle ) + xen_processor_pmbits |= XEN_PROCESSOR_PM_CX; +-- +2.39.2 + diff --git a/0333-x86-boot-Merge-CPUID-policy-initialisation-logic-int.patch b/0333-x86-boot-Merge-CPUID-policy-initialisation-logic-int.patch new file mode 100644 index 00000000..711fae6d --- /dev/null +++ b/0333-x86-boot-Merge-CPUID-policy-initialisation-logic-int.patch @@ -0,0 +1,1780 @@ +From a7e07cd930532e75552fd20b14a4a9c301c723d4 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 3 Apr 2023 19:06:02 +0100 +Subject: [PATCH 13/35] x86/boot: Merge CPUID policy initialisation logic into + cpu-policy.c + +Switch to the newer cpu_policy nomenclature. Do some easy cleanup of +includes. + +No practical change. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 8eb56eb959a50bf9afd0fd590ec394e9145970a4) +--- + xen/arch/x86/cpu-policy.c | 743 +++++++++++++++++++++++ + xen/arch/x86/cpuid.c | 808 +------------------------- + xen/arch/x86/hvm/hvm.c | 1 - + xen/arch/x86/include/asm/cpu-policy.h | 6 + + xen/arch/x86/include/asm/cpuid.h | 11 +- + xen/arch/x86/pv/domain.c | 1 + + xen/arch/x86/setup.c | 2 - + 7 files changed, 755 insertions(+), 817 deletions(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index f6a2317ed7bd..9c506b6614f2 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -1,13 +1,19 @@ + /* SPDX-License-Identifier: GPL-2.0-or-later */ + #include + #include ++#include + #include + + #include + ++#include + #include ++#include ++#include + #include ++#include + #include ++#include + + struct cpu_policy __ro_after_init raw_cpu_policy; + struct cpu_policy __ro_after_init host_cpu_policy; +@@ -20,10 +26,332 @@ struct cpu_policy __ro_after_init hvm_max_cpu_policy; + struct cpu_policy __ro_after_init hvm_def_cpu_policy; + #endif + ++const uint32_t known_features[] = INIT_KNOWN_FEATURES; ++ ++static const uint32_t __initconst pv_max_featuremask[] = INIT_PV_MAX_FEATURES; ++static const uint32_t hvm_shadow_max_featuremask[] = INIT_HVM_SHADOW_MAX_FEATURES; ++static const uint32_t __initconst hvm_hap_max_featuremask[] = ++ INIT_HVM_HAP_MAX_FEATURES; ++static const uint32_t __initconst pv_def_featuremask[] = INIT_PV_DEF_FEATURES; ++static const uint32_t __initconst hvm_shadow_def_featuremask[] = ++ INIT_HVM_SHADOW_DEF_FEATURES; ++static const uint32_t __initconst hvm_hap_def_featuremask[] = ++ INIT_HVM_HAP_DEF_FEATURES; ++static const uint32_t deep_features[] = INIT_DEEP_FEATURES; ++ ++static const struct feature_name { ++ const char *name; ++ unsigned int bit; ++} feature_names[] __initconstrel = INIT_FEATURE_NAMES; ++ ++/* ++ * Parse a list of cpuid feature names -> bool, calling the callback for any ++ * matches found. ++ * ++ * always_inline, because this is init code only and we really don't want a ++ * function pointer call in the middle of the loop. ++ */ ++static int __init always_inline parse_cpuid( ++ const char *s, void (*callback)(unsigned int feat, bool val)) ++{ ++ const char *ss; ++ int val, rc = 0; ++ ++ do { ++ const struct feature_name *lhs, *rhs, *mid = NULL /* GCC... */; ++ const char *feat; ++ ++ ss = strchr(s, ','); ++ if ( !ss ) ++ ss = strchr(s, '\0'); ++ ++ /* Skip the 'no-' prefix for name comparisons. */ ++ feat = s; ++ if ( strncmp(s, "no-", 3) == 0 ) ++ feat += 3; ++ ++ /* (Re)initalise lhs and rhs for binary search. */ ++ lhs = feature_names; ++ rhs = feature_names + ARRAY_SIZE(feature_names); ++ ++ while ( lhs < rhs ) ++ { ++ int res; ++ ++ mid = lhs + (rhs - lhs) / 2; ++ res = cmdline_strcmp(feat, mid->name); ++ ++ if ( res < 0 ) ++ { ++ rhs = mid; ++ continue; ++ } ++ if ( res > 0 ) ++ { ++ lhs = mid + 1; ++ continue; ++ } ++ ++ if ( (val = parse_boolean(mid->name, s, ss)) >= 0 ) ++ { ++ callback(mid->bit, val); ++ mid = NULL; ++ } ++ ++ break; ++ } ++ ++ /* ++ * Mid being NULL means that the name and boolean were successfully ++ * identified. Everything else is an error. ++ */ ++ if ( mid ) ++ rc = -EINVAL; ++ ++ s = ss + 1; ++ } while ( *ss ); ++ ++ return rc; ++} ++ ++static void __init cf_check _parse_xen_cpuid(unsigned int feat, bool val) ++{ ++ if ( !val ) ++ setup_clear_cpu_cap(feat); ++ else if ( feat == X86_FEATURE_RDRAND && ++ (cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_RDRAND)) ) ++ setup_force_cpu_cap(X86_FEATURE_RDRAND); ++} ++ ++static int __init cf_check parse_xen_cpuid(const char *s) ++{ ++ return parse_cpuid(s, _parse_xen_cpuid); ++} ++custom_param("cpuid", parse_xen_cpuid); ++ ++static bool __initdata dom0_cpuid_cmdline; ++static uint32_t __initdata dom0_enable_feat[FSCAPINTS]; ++static uint32_t __initdata dom0_disable_feat[FSCAPINTS]; ++ ++static void __init cf_check _parse_dom0_cpuid(unsigned int feat, bool val) ++{ ++ __set_bit (feat, val ? dom0_enable_feat : dom0_disable_feat); ++ __clear_bit(feat, val ? dom0_disable_feat : dom0_enable_feat ); ++} ++ ++static int __init cf_check parse_dom0_cpuid(const char *s) ++{ ++ dom0_cpuid_cmdline = true; ++ ++ return parse_cpuid(s, _parse_dom0_cpuid); ++} ++custom_param("dom0-cpuid", parse_dom0_cpuid); ++ ++#define EMPTY_LEAF ((struct cpuid_leaf){}) ++static void zero_leaves(struct cpuid_leaf *l, ++ unsigned int first, unsigned int last) ++{ ++ memset(&l[first], 0, sizeof(*l) * (last - first + 1)); ++} ++ ++static void sanitise_featureset(uint32_t *fs) ++{ ++ /* for_each_set_bit() uses unsigned longs. Extend with zeroes. */ ++ uint32_t disabled_features[ ++ ROUNDUP(FSCAPINTS, sizeof(unsigned long)/sizeof(uint32_t))] = {}; ++ unsigned int i; ++ ++ for ( i = 0; i < FSCAPINTS; ++i ) ++ { ++ /* Clamp to known mask. */ ++ fs[i] &= known_features[i]; ++ ++ /* ++ * Identify which features with deep dependencies have been ++ * disabled. ++ */ ++ disabled_features[i] = ~fs[i] & deep_features[i]; ++ } ++ ++ for_each_set_bit(i, (void *)disabled_features, ++ sizeof(disabled_features) * 8) ++ { ++ const uint32_t *dfs = x86_cpuid_lookup_deep_deps(i); ++ unsigned int j; ++ ++ ASSERT(dfs); /* deep_features[] should guarentee this. */ ++ ++ for ( j = 0; j < FSCAPINTS; ++j ) ++ { ++ fs[j] &= ~dfs[j]; ++ disabled_features[j] &= ~dfs[j]; ++ } ++ } ++} ++ ++static void recalculate_xstate(struct cpu_policy *p) ++{ ++ uint64_t xstates = XSTATE_FP_SSE; ++ uint32_t xstate_size = XSTATE_AREA_MIN_SIZE; ++ unsigned int i, Da1 = p->xstate.Da1; ++ ++ /* ++ * The Da1 leaf is the only piece of information preserved in the common ++ * case. Everything else is derived from other feature state. ++ */ ++ memset(&p->xstate, 0, sizeof(p->xstate)); ++ ++ if ( !p->basic.xsave ) ++ return; ++ ++ if ( p->basic.avx ) ++ { ++ xstates |= X86_XCR0_YMM; ++ xstate_size = max(xstate_size, ++ xstate_offsets[X86_XCR0_YMM_POS] + ++ xstate_sizes[X86_XCR0_YMM_POS]); ++ } ++ ++ if ( p->feat.mpx ) ++ { ++ xstates |= X86_XCR0_BNDREGS | X86_XCR0_BNDCSR; ++ xstate_size = max(xstate_size, ++ xstate_offsets[X86_XCR0_BNDCSR_POS] + ++ xstate_sizes[X86_XCR0_BNDCSR_POS]); ++ } ++ ++ if ( p->feat.avx512f ) ++ { ++ xstates |= X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM; ++ xstate_size = max(xstate_size, ++ xstate_offsets[X86_XCR0_HI_ZMM_POS] + ++ xstate_sizes[X86_XCR0_HI_ZMM_POS]); ++ } ++ ++ if ( p->feat.pku ) ++ { ++ xstates |= X86_XCR0_PKRU; ++ xstate_size = max(xstate_size, ++ xstate_offsets[X86_XCR0_PKRU_POS] + ++ xstate_sizes[X86_XCR0_PKRU_POS]); ++ } ++ ++ p->xstate.max_size = xstate_size; ++ p->xstate.xcr0_low = xstates & ~XSTATE_XSAVES_ONLY; ++ p->xstate.xcr0_high = (xstates & ~XSTATE_XSAVES_ONLY) >> 32; ++ ++ p->xstate.Da1 = Da1; ++ if ( p->xstate.xsaves ) ++ { ++ p->xstate.xss_low = xstates & XSTATE_XSAVES_ONLY; ++ p->xstate.xss_high = (xstates & XSTATE_XSAVES_ONLY) >> 32; ++ } ++ else ++ xstates &= ~XSTATE_XSAVES_ONLY; ++ ++ for ( i = 2; i < min(63ul, ARRAY_SIZE(p->xstate.comp)); ++i ) ++ { ++ uint64_t curr_xstate = 1ul << i; ++ ++ if ( !(xstates & curr_xstate) ) ++ continue; ++ ++ p->xstate.comp[i].size = xstate_sizes[i]; ++ p->xstate.comp[i].offset = xstate_offsets[i]; ++ p->xstate.comp[i].xss = curr_xstate & XSTATE_XSAVES_ONLY; ++ p->xstate.comp[i].align = curr_xstate & xstate_align; ++ } ++} ++ ++/* ++ * Misc adjustments to the policy. Mostly clobbering reserved fields and ++ * duplicating shared fields. Intentionally hidden fields are annotated. ++ */ ++static void recalculate_misc(struct cpu_policy *p) ++{ ++ p->basic.raw_fms &= 0x0fff0fff; /* Clobber Processor Type on Intel. */ ++ p->basic.apic_id = 0; /* Dynamic. */ ++ ++ p->basic.raw[0x5] = EMPTY_LEAF; /* MONITOR not exposed to guests. */ ++ p->basic.raw[0x6] = EMPTY_LEAF; /* Therm/Power not exposed to guests. */ ++ ++ p->basic.raw[0x8] = EMPTY_LEAF; ++ ++ /* TODO: Rework topology logic. */ ++ memset(p->topo.raw, 0, sizeof(p->topo.raw)); ++ ++ p->basic.raw[0xc] = EMPTY_LEAF; ++ ++ p->extd.e1d &= ~CPUID_COMMON_1D_FEATURES; ++ ++ /* Most of Power/RAS hidden from guests. */ ++ p->extd.raw[0x7].a = p->extd.raw[0x7].b = p->extd.raw[0x7].c = 0; ++ ++ p->extd.raw[0x8].d = 0; ++ ++ switch ( p->x86_vendor ) ++ { ++ case X86_VENDOR_INTEL: ++ p->basic.l2_nr_queries = 1; /* Fixed to 1 query. */ ++ p->basic.raw[0x3] = EMPTY_LEAF; /* PSN - always hidden. */ ++ p->basic.raw[0x9] = EMPTY_LEAF; /* DCA - always hidden. */ ++ ++ p->extd.vendor_ebx = 0; ++ p->extd.vendor_ecx = 0; ++ p->extd.vendor_edx = 0; ++ ++ p->extd.raw[0x1].a = p->extd.raw[0x1].b = 0; ++ ++ p->extd.raw[0x5] = EMPTY_LEAF; ++ p->extd.raw[0x6].a = p->extd.raw[0x6].b = p->extd.raw[0x6].d = 0; ++ ++ p->extd.raw[0x8].a &= 0x0000ffff; ++ p->extd.raw[0x8].c = 0; ++ break; ++ ++ case X86_VENDOR_AMD: ++ case X86_VENDOR_HYGON: ++ zero_leaves(p->basic.raw, 0x2, 0x3); ++ memset(p->cache.raw, 0, sizeof(p->cache.raw)); ++ zero_leaves(p->basic.raw, 0x9, 0xa); ++ ++ p->extd.vendor_ebx = p->basic.vendor_ebx; ++ p->extd.vendor_ecx = p->basic.vendor_ecx; ++ p->extd.vendor_edx = p->basic.vendor_edx; ++ ++ p->extd.raw_fms = p->basic.raw_fms; ++ p->extd.raw[0x1].b &= 0xff00ffff; ++ p->extd.e1d |= p->basic._1d & CPUID_COMMON_1D_FEATURES; ++ ++ p->extd.raw[0x8].a &= 0x0000ffff; /* GuestMaxPhysAddr hidden. */ ++ p->extd.raw[0x8].c &= 0x0003f0ff; ++ ++ p->extd.raw[0x9] = EMPTY_LEAF; ++ ++ zero_leaves(p->extd.raw, 0xb, 0x18); ++ ++ /* 0x19 - TLB details. Pass through. */ ++ /* 0x1a - Perf hints. Pass through. */ ++ ++ p->extd.raw[0x1b] = EMPTY_LEAF; /* IBS - not supported. */ ++ p->extd.raw[0x1c] = EMPTY_LEAF; /* LWP - not supported. */ ++ p->extd.raw[0x1d] = EMPTY_LEAF; /* TopoExt Cache */ ++ p->extd.raw[0x1e] = EMPTY_LEAF; /* TopoExt APIC ID/Core/Node */ ++ p->extd.raw[0x1f] = EMPTY_LEAF; /* SEV */ ++ p->extd.raw[0x20] = EMPTY_LEAF; /* Platform QoS */ ++ break; ++ } ++} ++ + static void __init calculate_raw_policy(void) + { + struct cpu_policy *p = &raw_cpu_policy; + ++ x86_cpuid_policy_fill_native(p); ++ ++ /* Nothing good will come from Xen and libx86 disagreeing on vendor. */ ++ ASSERT(p->x86_vendor == boot_cpu_data.x86_vendor); ++ + /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ + /* Was already added by probe_cpuid_faulting() */ + +@@ -34,9 +362,50 @@ static void __init calculate_raw_policy(void) + static void __init calculate_host_policy(void) + { + struct cpu_policy *p = &host_cpu_policy; ++ unsigned int max_extd_leaf; + + *p = raw_cpu_policy; + ++ p->basic.max_leaf = ++ min_t(uint32_t, p->basic.max_leaf, ARRAY_SIZE(p->basic.raw) - 1); ++ p->feat.max_subleaf = ++ min_t(uint32_t, p->feat.max_subleaf, ARRAY_SIZE(p->feat.raw) - 1); ++ ++ max_extd_leaf = p->extd.max_leaf; ++ ++ /* ++ * For AMD/Hygon hardware before Zen3, we unilaterally modify LFENCE to be ++ * dispatch serialising for Spectre mitigations. Extend max_extd_leaf ++ * beyond what hardware supports, to include the feature leaf containing ++ * this information. ++ */ ++ if ( cpu_has_lfence_dispatch ) ++ max_extd_leaf = max(max_extd_leaf, 0x80000021); ++ ++ p->extd.max_leaf = 0x80000000 | min_t(uint32_t, max_extd_leaf & 0xffff, ++ ARRAY_SIZE(p->extd.raw) - 1); ++ ++ x86_cpu_featureset_to_policy(boot_cpu_data.x86_capability, p); ++ recalculate_xstate(p); ++ recalculate_misc(p); ++ ++ /* When vPMU is disabled, drop it from the host policy. */ ++ if ( vpmu_mode == XENPMU_MODE_OFF ) ++ p->basic.raw[0xa] = EMPTY_LEAF; ++ ++ if ( p->extd.svm ) ++ { ++ /* Clamp to implemented features which require hardware support. */ ++ p->extd.raw[0xa].d &= ((1u << SVM_FEATURE_NPT) | ++ (1u << SVM_FEATURE_LBRV) | ++ (1u << SVM_FEATURE_NRIPS) | ++ (1u << SVM_FEATURE_PAUSEFILTER) | ++ (1u << SVM_FEATURE_DECODEASSISTS)); ++ /* Enable features which are always emulated. */ ++ p->extd.raw[0xa].d |= ((1u << SVM_FEATURE_VMCBCLEAN) | ++ (1u << SVM_FEATURE_TSCRATEMSR)); ++ } ++ + /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ + /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */ + p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; +@@ -51,11 +420,88 @@ static void __init calculate_host_policy(void) + ARCH_CAPS_PBRSB_NO); + } + ++static void __init guest_common_default_feature_adjustments(uint32_t *fs) ++{ ++ /* ++ * IvyBridge client parts suffer from leakage of RDRAND data due to SRBDS ++ * (XSA-320 / CVE-2020-0543), and won't be receiving microcode to ++ * compensate. ++ * ++ * Mitigate by hiding RDRAND from guests by default, unless explicitly ++ * overridden on the Xen command line (cpuid=rdrand). Irrespective of the ++ * default setting, guests can use RDRAND if explicitly enabled ++ * (cpuid="host,rdrand=1") in the VM's config file, and VMs which were ++ * previously using RDRAND can migrate in. ++ */ ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && ++ boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x3a && ++ cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) ++ __clear_bit(X86_FEATURE_RDRAND, fs); ++ ++ /* ++ * On certain hardware, speculative or errata workarounds can result in ++ * TSX being placed in "force-abort" mode, where it doesn't actually ++ * function as expected, but is technically compatible with the ISA. ++ * ++ * Do not advertise RTM to guests by default if it won't actually work. ++ */ ++ if ( rtm_disabled ) ++ __clear_bit(X86_FEATURE_RTM, fs); ++} ++ ++static void __init guest_common_feature_adjustments(uint32_t *fs) ++{ ++ /* Unconditionally claim to be able to set the hypervisor bit. */ ++ __set_bit(X86_FEATURE_HYPERVISOR, fs); ++ ++ /* ++ * If IBRS is offered to the guest, unconditionally offer STIBP. It is a ++ * nop on non-HT hardware, and has this behaviour to make heterogeneous ++ * setups easier to manage. ++ */ ++ if ( test_bit(X86_FEATURE_IBRSB, fs) ) ++ __set_bit(X86_FEATURE_STIBP, fs); ++ if ( test_bit(X86_FEATURE_IBRS, fs) ) ++ __set_bit(X86_FEATURE_AMD_STIBP, fs); ++ ++ /* ++ * On hardware which supports IBRS/IBPB, we can offer IBPB independently ++ * of IBRS by using the AMD feature bit. An administrator may wish for ++ * performance reasons to offer IBPB without IBRS. ++ */ ++ if ( host_cpu_policy.feat.ibrsb ) ++ __set_bit(X86_FEATURE_IBPB, fs); ++} ++ + static void __init calculate_pv_max_policy(void) + { + struct cpu_policy *p = &pv_max_cpu_policy; ++ uint32_t fs[FSCAPINTS]; ++ unsigned int i; + + *p = host_cpu_policy; ++ x86_cpu_policy_to_featureset(p, fs); ++ ++ for ( i = 0; i < ARRAY_SIZE(fs); ++i ) ++ fs[i] &= pv_max_featuremask[i]; ++ ++ /* ++ * If Xen isn't virtualising MSR_SPEC_CTRL for PV guests (functional ++ * availability, or admin choice), hide the feature. ++ */ ++ if ( !boot_cpu_has(X86_FEATURE_SC_MSR_PV) ) ++ { ++ __clear_bit(X86_FEATURE_IBRSB, fs); ++ __clear_bit(X86_FEATURE_IBRS, fs); ++ } ++ ++ guest_common_feature_adjustments(fs); ++ ++ sanitise_featureset(fs); ++ x86_cpu_featureset_to_policy(fs, p); ++ recalculate_xstate(p); ++ ++ p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ + + p->arch_caps.raw = 0; /* Not supported yet. */ + } +@@ -63,15 +509,103 @@ static void __init calculate_pv_max_policy(void) + static void __init calculate_pv_def_policy(void) + { + struct cpu_policy *p = &pv_def_cpu_policy; ++ uint32_t fs[FSCAPINTS]; ++ unsigned int i; + + *p = pv_max_cpu_policy; ++ x86_cpu_policy_to_featureset(p, fs); ++ ++ for ( i = 0; i < ARRAY_SIZE(fs); ++i ) ++ fs[i] &= pv_def_featuremask[i]; ++ ++ guest_common_feature_adjustments(fs); ++ guest_common_default_feature_adjustments(fs); ++ ++ sanitise_featureset(fs); ++ x86_cpu_featureset_to_policy(fs, p); ++ recalculate_xstate(p); + } + + static void __init calculate_hvm_max_policy(void) + { + struct cpu_policy *p = &hvm_max_cpu_policy; ++ uint32_t fs[FSCAPINTS]; ++ unsigned int i; ++ const uint32_t *mask; + + *p = host_cpu_policy; ++ x86_cpu_policy_to_featureset(p, fs); ++ ++ mask = hvm_hap_supported() ? ++ hvm_hap_max_featuremask : hvm_shadow_max_featuremask; ++ ++ for ( i = 0; i < ARRAY_SIZE(fs); ++i ) ++ fs[i] &= mask[i]; ++ ++ /* ++ * Xen can provide an (x2)APIC emulation to HVM guests even if the host's ++ * (x2)APIC isn't enabled. ++ */ ++ __set_bit(X86_FEATURE_APIC, fs); ++ __set_bit(X86_FEATURE_X2APIC, fs); ++ ++ /* ++ * We don't support EFER.LMSLE at all. AMD has dropped the feature from ++ * hardware and allocated a CPUID bit to indicate its absence. ++ */ ++ __set_bit(X86_FEATURE_NO_LMSL, fs); ++ ++ /* ++ * On AMD, PV guests are entirely unable to use SYSENTER as Xen runs in ++ * long mode (and init_amd() has cleared it out of host capabilities), but ++ * HVM guests are able if running in protected mode. ++ */ ++ if ( (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) && ++ raw_cpu_policy.basic.sep ) ++ __set_bit(X86_FEATURE_SEP, fs); ++ ++ /* ++ * VIRT_SSBD is exposed in the default policy as a result of ++ * amd_virt_spec_ctrl being set, it also needs exposing in the max policy. ++ */ ++ if ( amd_virt_spec_ctrl ) ++ __set_bit(X86_FEATURE_VIRT_SSBD, fs); ++ ++ /* ++ * If Xen isn't virtualising MSR_SPEC_CTRL for HVM guests (functional ++ * availability, or admin choice), hide the feature. ++ */ ++ if ( !boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ) ++ { ++ __clear_bit(X86_FEATURE_IBRSB, fs); ++ __clear_bit(X86_FEATURE_IBRS, fs); ++ } ++ else if ( boot_cpu_has(X86_FEATURE_AMD_SSBD) ) ++ /* ++ * If SPEC_CTRL.SSBD is available VIRT_SPEC_CTRL.SSBD can be exposed ++ * and implemented using the former. Expose in the max policy only as ++ * the preference is for guests to use SPEC_CTRL.SSBD if available. ++ */ ++ __set_bit(X86_FEATURE_VIRT_SSBD, fs); ++ ++ /* ++ * With VT-x, some features are only supported by Xen if dedicated ++ * hardware support is also available. ++ */ ++ if ( cpu_has_vmx ) ++ { ++ if ( !cpu_has_vmx_mpx ) ++ __clear_bit(X86_FEATURE_MPX, fs); ++ ++ if ( !cpu_has_vmx_xsaves ) ++ __clear_bit(X86_FEATURE_XSAVES, fs); ++ } ++ ++ guest_common_feature_adjustments(fs); ++ ++ sanitise_featureset(fs); ++ x86_cpu_featureset_to_policy(fs, p); ++ recalculate_xstate(p); + + /* It's always possible to emulate CPUID faulting for HVM guests */ + p->platform_info.cpuid_faulting = true; +@@ -82,8 +616,32 @@ static void __init calculate_hvm_max_policy(void) + static void __init calculate_hvm_def_policy(void) + { + struct cpu_policy *p = &hvm_def_cpu_policy; ++ uint32_t fs[FSCAPINTS]; ++ unsigned int i; ++ const uint32_t *mask; + + *p = hvm_max_cpu_policy; ++ x86_cpu_policy_to_featureset(p, fs); ++ ++ mask = hvm_hap_supported() ? ++ hvm_hap_def_featuremask : hvm_shadow_def_featuremask; ++ ++ for ( i = 0; i < ARRAY_SIZE(fs); ++i ) ++ fs[i] &= mask[i]; ++ ++ guest_common_feature_adjustments(fs); ++ guest_common_default_feature_adjustments(fs); ++ ++ /* ++ * Only expose VIRT_SSBD if AMD_SSBD is not available, and thus ++ * amd_virt_spec_ctrl is set. ++ */ ++ if ( amd_virt_spec_ctrl ) ++ __set_bit(X86_FEATURE_VIRT_SSBD, fs); ++ ++ sanitise_featureset(fs); ++ x86_cpu_featureset_to_policy(fs, p); ++ recalculate_xstate(p); + } + + void __init init_guest_cpu_policies(void) +@@ -149,3 +707,188 @@ int init_domain_cpu_policy(struct domain *d) + + return 0; + } ++ ++void recalculate_cpuid_policy(struct domain *d) ++{ ++ struct cpu_policy *p = d->arch.cpuid; ++ const struct cpu_policy *max = is_pv_domain(d) ++ ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) ++ : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); ++ uint32_t fs[FSCAPINTS], max_fs[FSCAPINTS]; ++ unsigned int i; ++ ++ if ( !max ) ++ { ++ ASSERT_UNREACHABLE(); ++ return; ++ } ++ ++ p->x86_vendor = x86_cpuid_lookup_vendor( ++ p->basic.vendor_ebx, p->basic.vendor_ecx, p->basic.vendor_edx); ++ ++ p->basic.max_leaf = min(p->basic.max_leaf, max->basic.max_leaf); ++ p->feat.max_subleaf = min(p->feat.max_subleaf, max->feat.max_subleaf); ++ p->extd.max_leaf = 0x80000000 | min(p->extd.max_leaf & 0xffff, ++ ((p->x86_vendor & (X86_VENDOR_AMD | ++ X86_VENDOR_HYGON)) ++ ? CPUID_GUEST_NR_EXTD_AMD ++ : CPUID_GUEST_NR_EXTD_INTEL) - 1); ++ ++ x86_cpu_policy_to_featureset(p, fs); ++ x86_cpu_policy_to_featureset(max, max_fs); ++ ++ if ( is_hvm_domain(d) ) ++ { ++ /* ++ * HVM domains using Shadow paging have further restrictions on their ++ * available paging features. ++ */ ++ if ( !hap_enabled(d) ) ++ { ++ for ( i = 0; i < ARRAY_SIZE(max_fs); i++ ) ++ max_fs[i] &= hvm_shadow_max_featuremask[i]; ++ } ++ ++ /* Hide nested-virt if it hasn't been explicitly configured. */ ++ if ( !nestedhvm_enabled(d) ) ++ { ++ __clear_bit(X86_FEATURE_VMX, max_fs); ++ __clear_bit(X86_FEATURE_SVM, max_fs); ++ } ++ } ++ ++ /* ++ * Allow the toolstack to set HTT, X2APIC and CMP_LEGACY. These bits ++ * affect how to interpret topology information in other cpuid leaves. ++ */ ++ __set_bit(X86_FEATURE_HTT, max_fs); ++ __set_bit(X86_FEATURE_X2APIC, max_fs); ++ __set_bit(X86_FEATURE_CMP_LEGACY, max_fs); ++ ++ /* ++ * 32bit PV domains can't use any Long Mode features, and cannot use ++ * SYSCALL on non-AMD hardware. ++ */ ++ if ( is_pv_32bit_domain(d) ) ++ { ++ __clear_bit(X86_FEATURE_LM, max_fs); ++ if ( !(boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) ) ++ __clear_bit(X86_FEATURE_SYSCALL, max_fs); ++ } ++ ++ /* Clamp the toolstacks choices to reality. */ ++ for ( i = 0; i < ARRAY_SIZE(fs); i++ ) ++ fs[i] &= max_fs[i]; ++ ++ if ( p->basic.max_leaf < XSTATE_CPUID ) ++ __clear_bit(X86_FEATURE_XSAVE, fs); ++ ++ sanitise_featureset(fs); ++ ++ /* Fold host's FDP_EXCP_ONLY and NO_FPU_SEL into guest's view. */ ++ fs[FEATURESET_7b0] &= ~(cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | ++ cpufeat_mask(X86_FEATURE_NO_FPU_SEL)); ++ fs[FEATURESET_7b0] |= (host_cpu_policy.feat._7b0 & ++ (cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | ++ cpufeat_mask(X86_FEATURE_NO_FPU_SEL))); ++ ++ x86_cpu_featureset_to_policy(fs, p); ++ ++ /* Pass host cacheline size through to guests. */ ++ p->basic.clflush_size = max->basic.clflush_size; ++ ++ p->extd.maxphysaddr = min(p->extd.maxphysaddr, max->extd.maxphysaddr); ++ p->extd.maxphysaddr = min_t(uint8_t, p->extd.maxphysaddr, ++ paging_max_paddr_bits(d)); ++ p->extd.maxphysaddr = max_t(uint8_t, p->extd.maxphysaddr, ++ (p->basic.pae || p->basic.pse36) ? 36 : 32); ++ ++ p->extd.maxlinaddr = p->extd.lm ? 48 : 32; ++ ++ recalculate_xstate(p); ++ recalculate_misc(p); ++ ++ for ( i = 0; i < ARRAY_SIZE(p->cache.raw); ++i ) ++ { ++ if ( p->cache.subleaf[i].type >= 1 && ++ p->cache.subleaf[i].type <= 3 ) ++ { ++ /* Subleaf has a valid cache type. Zero reserved fields. */ ++ p->cache.raw[i].a &= 0xffffc3ffu; ++ p->cache.raw[i].d &= 0x00000007u; ++ } ++ else ++ { ++ /* Subleaf is not valid. Zero the rest of the union. */ ++ zero_leaves(p->cache.raw, i, ARRAY_SIZE(p->cache.raw) - 1); ++ break; ++ } ++ } ++ ++ if ( vpmu_mode == XENPMU_MODE_OFF || ++ ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) ) ++ p->basic.raw[0xa] = EMPTY_LEAF; ++ ++ if ( !p->extd.svm ) ++ p->extd.raw[0xa] = EMPTY_LEAF; ++ ++ if ( !p->extd.page1gb ) ++ p->extd.raw[0x19] = EMPTY_LEAF; ++} ++ ++void __init init_dom0_cpuid_policy(struct domain *d) ++{ ++ struct cpu_policy *p = d->arch.cpuid; ++ ++ /* dom0 can't migrate. Give it ITSC if available. */ ++ if ( cpu_has_itsc ) ++ p->extd.itsc = true; ++ ++ /* ++ * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0, ++ * so dom0 can turn off workarounds as appropriate. Temporary, until the ++ * domain policy logic gains a better understanding of MSRs. ++ */ ++ if ( cpu_has_arch_caps ) ++ p->feat.arch_caps = true; ++ ++ /* Apply dom0-cpuid= command line settings, if provided. */ ++ if ( dom0_cpuid_cmdline ) ++ { ++ uint32_t fs[FSCAPINTS]; ++ unsigned int i; ++ ++ x86_cpu_policy_to_featureset(p, fs); ++ ++ for ( i = 0; i < ARRAY_SIZE(fs); ++i ) ++ { ++ fs[i] |= dom0_enable_feat [i]; ++ fs[i] &= ~dom0_disable_feat[i]; ++ } ++ ++ x86_cpu_featureset_to_policy(fs, p); ++ ++ recalculate_cpuid_policy(d); ++ } ++} ++ ++static void __init __maybe_unused build_assertions(void) ++{ ++ BUILD_BUG_ON(ARRAY_SIZE(known_features) != FSCAPINTS); ++ BUILD_BUG_ON(ARRAY_SIZE(pv_max_featuremask) != FSCAPINTS); ++ BUILD_BUG_ON(ARRAY_SIZE(hvm_shadow_max_featuremask) != FSCAPINTS); ++ BUILD_BUG_ON(ARRAY_SIZE(hvm_hap_max_featuremask) != FSCAPINTS); ++ BUILD_BUG_ON(ARRAY_SIZE(deep_features) != FSCAPINTS); ++ ++ /* Find some more clever allocation scheme if this trips. */ ++ BUILD_BUG_ON(sizeof(struct cpu_policy) > PAGE_SIZE); ++ ++ BUILD_BUG_ON(sizeof(raw_cpu_policy.basic) != ++ sizeof(raw_cpu_policy.basic.raw)); ++ BUILD_BUG_ON(sizeof(raw_cpu_policy.feat) != ++ sizeof(raw_cpu_policy.feat.raw)); ++ BUILD_BUG_ON(sizeof(raw_cpu_policy.xstate) != ++ sizeof(raw_cpu_policy.xstate.raw)); ++ BUILD_BUG_ON(sizeof(raw_cpu_policy.extd) != ++ sizeof(raw_cpu_policy.extd.raw)); ++} +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index ef96af738af0..3f20c342fde8 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -1,629 +1,14 @@ +-#include +-#include +-#include + #include +-#include +-#include ++#include ++ ++#include ++ + #include + #include +-#include +-#include +-#include + #include +-#include +-#include +-#include + #include + +-const uint32_t known_features[] = INIT_KNOWN_FEATURES; +- +-static const uint32_t __initconst pv_max_featuremask[] = INIT_PV_MAX_FEATURES; +-static const uint32_t hvm_shadow_max_featuremask[] = INIT_HVM_SHADOW_MAX_FEATURES; +-static const uint32_t __initconst hvm_hap_max_featuremask[] = +- INIT_HVM_HAP_MAX_FEATURES; +-static const uint32_t __initconst pv_def_featuremask[] = INIT_PV_DEF_FEATURES; +-static const uint32_t __initconst hvm_shadow_def_featuremask[] = +- INIT_HVM_SHADOW_DEF_FEATURES; +-static const uint32_t __initconst hvm_hap_def_featuremask[] = +- INIT_HVM_HAP_DEF_FEATURES; +-static const uint32_t deep_features[] = INIT_DEEP_FEATURES; +- +-static const struct feature_name { +- const char *name; +- unsigned int bit; +-} feature_names[] __initconstrel = INIT_FEATURE_NAMES; +- +-/* +- * Parse a list of cpuid feature names -> bool, calling the callback for any +- * matches found. +- * +- * always_inline, because this is init code only and we really don't want a +- * function pointer call in the middle of the loop. +- */ +-static int __init always_inline parse_cpuid( +- const char *s, void (*callback)(unsigned int feat, bool val)) +-{ +- const char *ss; +- int val, rc = 0; +- +- do { +- const struct feature_name *lhs, *rhs, *mid = NULL /* GCC... */; +- const char *feat; +- +- ss = strchr(s, ','); +- if ( !ss ) +- ss = strchr(s, '\0'); +- +- /* Skip the 'no-' prefix for name comparisons. */ +- feat = s; +- if ( strncmp(s, "no-", 3) == 0 ) +- feat += 3; +- +- /* (Re)initalise lhs and rhs for binary search. */ +- lhs = feature_names; +- rhs = feature_names + ARRAY_SIZE(feature_names); +- +- while ( lhs < rhs ) +- { +- int res; +- +- mid = lhs + (rhs - lhs) / 2; +- res = cmdline_strcmp(feat, mid->name); +- +- if ( res < 0 ) +- { +- rhs = mid; +- continue; +- } +- if ( res > 0 ) +- { +- lhs = mid + 1; +- continue; +- } +- +- if ( (val = parse_boolean(mid->name, s, ss)) >= 0 ) +- { +- callback(mid->bit, val); +- mid = NULL; +- } +- +- break; +- } +- +- /* +- * Mid being NULL means that the name and boolean were successfully +- * identified. Everything else is an error. +- */ +- if ( mid ) +- rc = -EINVAL; +- +- s = ss + 1; +- } while ( *ss ); +- +- return rc; +-} +- +-static void __init cf_check _parse_xen_cpuid(unsigned int feat, bool val) +-{ +- if ( !val ) +- setup_clear_cpu_cap(feat); +- else if ( feat == X86_FEATURE_RDRAND && +- (cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_RDRAND)) ) +- setup_force_cpu_cap(X86_FEATURE_RDRAND); +-} +- +-static int __init cf_check parse_xen_cpuid(const char *s) +-{ +- return parse_cpuid(s, _parse_xen_cpuid); +-} +-custom_param("cpuid", parse_xen_cpuid); +- +-static bool __initdata dom0_cpuid_cmdline; +-static uint32_t __initdata dom0_enable_feat[FSCAPINTS]; +-static uint32_t __initdata dom0_disable_feat[FSCAPINTS]; +- +-static void __init cf_check _parse_dom0_cpuid(unsigned int feat, bool val) +-{ +- __set_bit (feat, val ? dom0_enable_feat : dom0_disable_feat); +- __clear_bit(feat, val ? dom0_disable_feat : dom0_enable_feat ); +-} +- +-static int __init cf_check parse_dom0_cpuid(const char *s) +-{ +- dom0_cpuid_cmdline = true; +- +- return parse_cpuid(s, _parse_dom0_cpuid); +-} +-custom_param("dom0-cpuid", parse_dom0_cpuid); +- + #define EMPTY_LEAF ((struct cpuid_leaf){}) +-static void zero_leaves(struct cpuid_leaf *l, +- unsigned int first, unsigned int last) +-{ +- memset(&l[first], 0, sizeof(*l) * (last - first + 1)); +-} +- +-static void sanitise_featureset(uint32_t *fs) +-{ +- /* for_each_set_bit() uses unsigned longs. Extend with zeroes. */ +- uint32_t disabled_features[ +- ROUNDUP(FSCAPINTS, sizeof(unsigned long)/sizeof(uint32_t))] = {}; +- unsigned int i; +- +- for ( i = 0; i < FSCAPINTS; ++i ) +- { +- /* Clamp to known mask. */ +- fs[i] &= known_features[i]; +- +- /* +- * Identify which features with deep dependencies have been +- * disabled. +- */ +- disabled_features[i] = ~fs[i] & deep_features[i]; +- } +- +- for_each_set_bit(i, (void *)disabled_features, +- sizeof(disabled_features) * 8) +- { +- const uint32_t *dfs = x86_cpuid_lookup_deep_deps(i); +- unsigned int j; +- +- ASSERT(dfs); /* deep_features[] should guarentee this. */ +- +- for ( j = 0; j < FSCAPINTS; ++j ) +- { +- fs[j] &= ~dfs[j]; +- disabled_features[j] &= ~dfs[j]; +- } +- } +-} +- +-static void recalculate_xstate(struct cpuid_policy *p) +-{ +- uint64_t xstates = XSTATE_FP_SSE; +- uint32_t xstate_size = XSTATE_AREA_MIN_SIZE; +- unsigned int i, Da1 = p->xstate.Da1; +- +- /* +- * The Da1 leaf is the only piece of information preserved in the common +- * case. Everything else is derived from other feature state. +- */ +- memset(&p->xstate, 0, sizeof(p->xstate)); +- +- if ( !p->basic.xsave ) +- return; +- +- if ( p->basic.avx ) +- { +- xstates |= X86_XCR0_YMM; +- xstate_size = max(xstate_size, +- xstate_offsets[X86_XCR0_YMM_POS] + +- xstate_sizes[X86_XCR0_YMM_POS]); +- } +- +- if ( p->feat.mpx ) +- { +- xstates |= X86_XCR0_BNDREGS | X86_XCR0_BNDCSR; +- xstate_size = max(xstate_size, +- xstate_offsets[X86_XCR0_BNDCSR_POS] + +- xstate_sizes[X86_XCR0_BNDCSR_POS]); +- } +- +- if ( p->feat.avx512f ) +- { +- xstates |= X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM; +- xstate_size = max(xstate_size, +- xstate_offsets[X86_XCR0_HI_ZMM_POS] + +- xstate_sizes[X86_XCR0_HI_ZMM_POS]); +- } +- +- if ( p->feat.pku ) +- { +- xstates |= X86_XCR0_PKRU; +- xstate_size = max(xstate_size, +- xstate_offsets[X86_XCR0_PKRU_POS] + +- xstate_sizes[X86_XCR0_PKRU_POS]); +- } +- +- p->xstate.max_size = xstate_size; +- p->xstate.xcr0_low = xstates & ~XSTATE_XSAVES_ONLY; +- p->xstate.xcr0_high = (xstates & ~XSTATE_XSAVES_ONLY) >> 32; +- +- p->xstate.Da1 = Da1; +- if ( p->xstate.xsaves ) +- { +- p->xstate.xss_low = xstates & XSTATE_XSAVES_ONLY; +- p->xstate.xss_high = (xstates & XSTATE_XSAVES_ONLY) >> 32; +- } +- else +- xstates &= ~XSTATE_XSAVES_ONLY; +- +- for ( i = 2; i < min(63ul, ARRAY_SIZE(p->xstate.comp)); ++i ) +- { +- uint64_t curr_xstate = 1ul << i; +- +- if ( !(xstates & curr_xstate) ) +- continue; +- +- p->xstate.comp[i].size = xstate_sizes[i]; +- p->xstate.comp[i].offset = xstate_offsets[i]; +- p->xstate.comp[i].xss = curr_xstate & XSTATE_XSAVES_ONLY; +- p->xstate.comp[i].align = curr_xstate & xstate_align; +- } +-} +- +-/* +- * Misc adjustments to the policy. Mostly clobbering reserved fields and +- * duplicating shared fields. Intentionally hidden fields are annotated. +- */ +-static void recalculate_misc(struct cpuid_policy *p) +-{ +- p->basic.raw_fms &= 0x0fff0fff; /* Clobber Processor Type on Intel. */ +- p->basic.apic_id = 0; /* Dynamic. */ +- +- p->basic.raw[0x5] = EMPTY_LEAF; /* MONITOR not exposed to guests. */ +- p->basic.raw[0x6] = EMPTY_LEAF; /* Therm/Power not exposed to guests. */ +- +- p->basic.raw[0x8] = EMPTY_LEAF; +- +- /* TODO: Rework topology logic. */ +- memset(p->topo.raw, 0, sizeof(p->topo.raw)); +- +- p->basic.raw[0xc] = EMPTY_LEAF; +- +- p->extd.e1d &= ~CPUID_COMMON_1D_FEATURES; +- +- /* Most of Power/RAS hidden from guests. */ +- p->extd.raw[0x7].a = p->extd.raw[0x7].b = p->extd.raw[0x7].c = 0; +- +- p->extd.raw[0x8].d = 0; +- +- switch ( p->x86_vendor ) +- { +- case X86_VENDOR_INTEL: +- p->basic.l2_nr_queries = 1; /* Fixed to 1 query. */ +- p->basic.raw[0x3] = EMPTY_LEAF; /* PSN - always hidden. */ +- p->basic.raw[0x9] = EMPTY_LEAF; /* DCA - always hidden. */ +- +- p->extd.vendor_ebx = 0; +- p->extd.vendor_ecx = 0; +- p->extd.vendor_edx = 0; +- +- p->extd.raw[0x1].a = p->extd.raw[0x1].b = 0; +- +- p->extd.raw[0x5] = EMPTY_LEAF; +- p->extd.raw[0x6].a = p->extd.raw[0x6].b = p->extd.raw[0x6].d = 0; +- +- p->extd.raw[0x8].a &= 0x0000ffff; +- p->extd.raw[0x8].c = 0; +- break; +- +- case X86_VENDOR_AMD: +- case X86_VENDOR_HYGON: +- zero_leaves(p->basic.raw, 0x2, 0x3); +- memset(p->cache.raw, 0, sizeof(p->cache.raw)); +- zero_leaves(p->basic.raw, 0x9, 0xa); +- +- p->extd.vendor_ebx = p->basic.vendor_ebx; +- p->extd.vendor_ecx = p->basic.vendor_ecx; +- p->extd.vendor_edx = p->basic.vendor_edx; +- +- p->extd.raw_fms = p->basic.raw_fms; +- p->extd.raw[0x1].b &= 0xff00ffff; +- p->extd.e1d |= p->basic._1d & CPUID_COMMON_1D_FEATURES; +- +- p->extd.raw[0x8].a &= 0x0000ffff; /* GuestMaxPhysAddr hidden. */ +- p->extd.raw[0x8].c &= 0x0003f0ff; +- +- p->extd.raw[0x9] = EMPTY_LEAF; +- +- zero_leaves(p->extd.raw, 0xb, 0x18); +- +- /* 0x19 - TLB details. Pass through. */ +- /* 0x1a - Perf hints. Pass through. */ +- +- p->extd.raw[0x1b] = EMPTY_LEAF; /* IBS - not supported. */ +- p->extd.raw[0x1c] = EMPTY_LEAF; /* LWP - not supported. */ +- p->extd.raw[0x1d] = EMPTY_LEAF; /* TopoExt Cache */ +- p->extd.raw[0x1e] = EMPTY_LEAF; /* TopoExt APIC ID/Core/Node */ +- p->extd.raw[0x1f] = EMPTY_LEAF; /* SEV */ +- p->extd.raw[0x20] = EMPTY_LEAF; /* Platform QoS */ +- break; +- } +-} +- +-static void __init calculate_raw_policy(void) +-{ +- struct cpuid_policy *p = &raw_cpu_policy; +- +- x86_cpuid_policy_fill_native(p); +- +- /* Nothing good will come from Xen and libx86 disagreeing on vendor. */ +- ASSERT(p->x86_vendor == boot_cpu_data.x86_vendor); +-} +- +-static void __init calculate_host_policy(void) +-{ +- struct cpuid_policy *p = &host_cpu_policy; +- unsigned int max_extd_leaf; +- +- *p = raw_cpu_policy; +- +- p->basic.max_leaf = +- min_t(uint32_t, p->basic.max_leaf, ARRAY_SIZE(p->basic.raw) - 1); +- p->feat.max_subleaf = +- min_t(uint32_t, p->feat.max_subleaf, ARRAY_SIZE(p->feat.raw) - 1); +- +- max_extd_leaf = p->extd.max_leaf; +- +- /* +- * For AMD/Hygon hardware before Zen3, we unilaterally modify LFENCE to be +- * dispatch serialising for Spectre mitigations. Extend max_extd_leaf +- * beyond what hardware supports, to include the feature leaf containing +- * this information. +- */ +- if ( cpu_has_lfence_dispatch ) +- max_extd_leaf = max(max_extd_leaf, 0x80000021); +- +- p->extd.max_leaf = 0x80000000 | min_t(uint32_t, max_extd_leaf & 0xffff, +- ARRAY_SIZE(p->extd.raw) - 1); +- +- x86_cpu_featureset_to_policy(boot_cpu_data.x86_capability, p); +- recalculate_xstate(p); +- recalculate_misc(p); +- +- /* When vPMU is disabled, drop it from the host policy. */ +- if ( vpmu_mode == XENPMU_MODE_OFF ) +- p->basic.raw[0xa] = EMPTY_LEAF; +- +- if ( p->extd.svm ) +- { +- /* Clamp to implemented features which require hardware support. */ +- p->extd.raw[0xa].d &= ((1u << SVM_FEATURE_NPT) | +- (1u << SVM_FEATURE_LBRV) | +- (1u << SVM_FEATURE_NRIPS) | +- (1u << SVM_FEATURE_PAUSEFILTER) | +- (1u << SVM_FEATURE_DECODEASSISTS)); +- /* Enable features which are always emulated. */ +- p->extd.raw[0xa].d |= ((1u << SVM_FEATURE_VMCBCLEAN) | +- (1u << SVM_FEATURE_TSCRATEMSR)); +- } +-} +- +-static void __init guest_common_default_feature_adjustments(uint32_t *fs) +-{ +- /* +- * IvyBridge client parts suffer from leakage of RDRAND data due to SRBDS +- * (XSA-320 / CVE-2020-0543), and won't be receiving microcode to +- * compensate. +- * +- * Mitigate by hiding RDRAND from guests by default, unless explicitly +- * overridden on the Xen command line (cpuid=rdrand). Irrespective of the +- * default setting, guests can use RDRAND if explicitly enabled +- * (cpuid="host,rdrand=1") in the VM's config file, and VMs which were +- * previously using RDRAND can migrate in. +- */ +- if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && +- boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x3a && +- cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) +- __clear_bit(X86_FEATURE_RDRAND, fs); +- +- /* +- * On certain hardware, speculative or errata workarounds can result in +- * TSX being placed in "force-abort" mode, where it doesn't actually +- * function as expected, but is technically compatible with the ISA. +- * +- * Do not advertise RTM to guests by default if it won't actually work. +- */ +- if ( rtm_disabled ) +- __clear_bit(X86_FEATURE_RTM, fs); +-} +- +-static void __init guest_common_feature_adjustments(uint32_t *fs) +-{ +- /* Unconditionally claim to be able to set the hypervisor bit. */ +- __set_bit(X86_FEATURE_HYPERVISOR, fs); +- +- /* +- * If IBRS is offered to the guest, unconditionally offer STIBP. It is a +- * nop on non-HT hardware, and has this behaviour to make heterogeneous +- * setups easier to manage. +- */ +- if ( test_bit(X86_FEATURE_IBRSB, fs) ) +- __set_bit(X86_FEATURE_STIBP, fs); +- if ( test_bit(X86_FEATURE_IBRS, fs) ) +- __set_bit(X86_FEATURE_AMD_STIBP, fs); +- +- /* +- * On hardware which supports IBRS/IBPB, we can offer IBPB independently +- * of IBRS by using the AMD feature bit. An administrator may wish for +- * performance reasons to offer IBPB without IBRS. +- */ +- if ( host_cpu_policy.feat.ibrsb ) +- __set_bit(X86_FEATURE_IBPB, fs); +-} +- +-static void __init calculate_pv_max_policy(void) +-{ +- struct cpuid_policy *p = &pv_max_cpu_policy; +- uint32_t pv_featureset[FSCAPINTS]; +- unsigned int i; +- +- *p = host_cpu_policy; +- x86_cpu_policy_to_featureset(p, pv_featureset); +- +- for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) +- pv_featureset[i] &= pv_max_featuremask[i]; +- +- /* +- * If Xen isn't virtualising MSR_SPEC_CTRL for PV guests (functional +- * availability, or admin choice), hide the feature. +- */ +- if ( !boot_cpu_has(X86_FEATURE_SC_MSR_PV) ) +- { +- __clear_bit(X86_FEATURE_IBRSB, pv_featureset); +- __clear_bit(X86_FEATURE_IBRS, pv_featureset); +- } +- +- guest_common_feature_adjustments(pv_featureset); +- +- sanitise_featureset(pv_featureset); +- x86_cpu_featureset_to_policy(pv_featureset, p); +- recalculate_xstate(p); +- +- p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ +-} +- +-static void __init calculate_pv_def_policy(void) +-{ +- struct cpuid_policy *p = &pv_def_cpu_policy; +- uint32_t pv_featureset[FSCAPINTS]; +- unsigned int i; +- +- *p = pv_max_cpu_policy; +- x86_cpu_policy_to_featureset(p, pv_featureset); +- +- for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) +- pv_featureset[i] &= pv_def_featuremask[i]; +- +- guest_common_feature_adjustments(pv_featureset); +- guest_common_default_feature_adjustments(pv_featureset); +- +- sanitise_featureset(pv_featureset); +- x86_cpu_featureset_to_policy(pv_featureset, p); +- recalculate_xstate(p); +-} +- +-static void __init calculate_hvm_max_policy(void) +-{ +- struct cpuid_policy *p = &hvm_max_cpu_policy; +- uint32_t hvm_featureset[FSCAPINTS]; +- unsigned int i; +- const uint32_t *hvm_featuremask; +- +- *p = host_cpu_policy; +- x86_cpu_policy_to_featureset(p, hvm_featureset); +- +- hvm_featuremask = hvm_hap_supported() ? +- hvm_hap_max_featuremask : hvm_shadow_max_featuremask; +- +- for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i ) +- hvm_featureset[i] &= hvm_featuremask[i]; +- +- /* +- * Xen can provide an (x2)APIC emulation to HVM guests even if the host's +- * (x2)APIC isn't enabled. +- */ +- __set_bit(X86_FEATURE_APIC, hvm_featureset); +- __set_bit(X86_FEATURE_X2APIC, hvm_featureset); +- +- /* +- * We don't support EFER.LMSLE at all. AMD has dropped the feature from +- * hardware and allocated a CPUID bit to indicate its absence. +- */ +- __set_bit(X86_FEATURE_NO_LMSL, hvm_featureset); +- +- /* +- * On AMD, PV guests are entirely unable to use SYSENTER as Xen runs in +- * long mode (and init_amd() has cleared it out of host capabilities), but +- * HVM guests are able if running in protected mode. +- */ +- if ( (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) && +- raw_cpu_policy.basic.sep ) +- __set_bit(X86_FEATURE_SEP, hvm_featureset); +- +- /* +- * VIRT_SSBD is exposed in the default policy as a result of +- * amd_virt_spec_ctrl being set, it also needs exposing in the max policy. +- */ +- if ( amd_virt_spec_ctrl ) +- __set_bit(X86_FEATURE_VIRT_SSBD, hvm_featureset); +- +- /* +- * If Xen isn't virtualising MSR_SPEC_CTRL for HVM guests (functional +- * availability, or admin choice), hide the feature. +- */ +- if ( !boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ) +- { +- __clear_bit(X86_FEATURE_IBRSB, hvm_featureset); +- __clear_bit(X86_FEATURE_IBRS, hvm_featureset); +- } +- else if ( boot_cpu_has(X86_FEATURE_AMD_SSBD) ) +- /* +- * If SPEC_CTRL.SSBD is available VIRT_SPEC_CTRL.SSBD can be exposed +- * and implemented using the former. Expose in the max policy only as +- * the preference is for guests to use SPEC_CTRL.SSBD if available. +- */ +- __set_bit(X86_FEATURE_VIRT_SSBD, hvm_featureset); +- +- /* +- * With VT-x, some features are only supported by Xen if dedicated +- * hardware support is also available. +- */ +- if ( cpu_has_vmx ) +- { +- if ( !cpu_has_vmx_mpx ) +- __clear_bit(X86_FEATURE_MPX, hvm_featureset); +- +- if ( !cpu_has_vmx_xsaves ) +- __clear_bit(X86_FEATURE_XSAVES, hvm_featureset); +- } +- +- guest_common_feature_adjustments(hvm_featureset); +- +- sanitise_featureset(hvm_featureset); +- x86_cpu_featureset_to_policy(hvm_featureset, p); +- recalculate_xstate(p); +-} +- +-static void __init calculate_hvm_def_policy(void) +-{ +- struct cpuid_policy *p = &hvm_def_cpu_policy; +- uint32_t hvm_featureset[FSCAPINTS]; +- unsigned int i; +- const uint32_t *hvm_featuremask; +- +- *p = hvm_max_cpu_policy; +- x86_cpu_policy_to_featureset(p, hvm_featureset); +- +- hvm_featuremask = hvm_hap_supported() ? +- hvm_hap_def_featuremask : hvm_shadow_def_featuremask; +- +- for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i ) +- hvm_featureset[i] &= hvm_featuremask[i]; +- +- guest_common_feature_adjustments(hvm_featureset); +- guest_common_default_feature_adjustments(hvm_featureset); +- +- /* +- * Only expose VIRT_SSBD if AMD_SSBD is not available, and thus +- * amd_virt_spec_ctrl is set. +- */ +- if ( amd_virt_spec_ctrl ) +- __set_bit(X86_FEATURE_VIRT_SSBD, hvm_featureset); +- +- sanitise_featureset(hvm_featureset); +- x86_cpu_featureset_to_policy(hvm_featureset, p); +- recalculate_xstate(p); +-} +- +-void __init init_guest_cpuid(void) +-{ +- calculate_raw_policy(); +- calculate_host_policy(); +- +- if ( IS_ENABLED(CONFIG_PV) ) +- { +- calculate_pv_max_policy(); +- calculate_pv_def_policy(); +- } +- +- if ( hvm_enabled ) +- { +- calculate_hvm_max_policy(); +- calculate_hvm_def_policy(); +- } +-} + + bool recheck_cpu_features(unsigned int cpu) + { +@@ -647,170 +32,6 @@ bool recheck_cpu_features(unsigned int cpu) + return okay; + } + +-void recalculate_cpuid_policy(struct domain *d) +-{ +- struct cpuid_policy *p = d->arch.cpuid; +- const struct cpuid_policy *max = is_pv_domain(d) +- ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) +- : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); +- uint32_t fs[FSCAPINTS], max_fs[FSCAPINTS]; +- unsigned int i; +- +- if ( !max ) +- { +- ASSERT_UNREACHABLE(); +- return; +- } +- +- p->x86_vendor = x86_cpuid_lookup_vendor( +- p->basic.vendor_ebx, p->basic.vendor_ecx, p->basic.vendor_edx); +- +- p->basic.max_leaf = min(p->basic.max_leaf, max->basic.max_leaf); +- p->feat.max_subleaf = min(p->feat.max_subleaf, max->feat.max_subleaf); +- p->extd.max_leaf = 0x80000000 | min(p->extd.max_leaf & 0xffff, +- ((p->x86_vendor & (X86_VENDOR_AMD | +- X86_VENDOR_HYGON)) +- ? CPUID_GUEST_NR_EXTD_AMD +- : CPUID_GUEST_NR_EXTD_INTEL) - 1); +- +- x86_cpu_policy_to_featureset(p, fs); +- x86_cpu_policy_to_featureset(max, max_fs); +- +- if ( is_hvm_domain(d) ) +- { +- /* +- * HVM domains using Shadow paging have further restrictions on their +- * available paging features. +- */ +- if ( !hap_enabled(d) ) +- { +- for ( i = 0; i < ARRAY_SIZE(max_fs); i++ ) +- max_fs[i] &= hvm_shadow_max_featuremask[i]; +- } +- +- /* Hide nested-virt if it hasn't been explicitly configured. */ +- if ( !nestedhvm_enabled(d) ) +- { +- __clear_bit(X86_FEATURE_VMX, max_fs); +- __clear_bit(X86_FEATURE_SVM, max_fs); +- } +- } +- +- /* +- * Allow the toolstack to set HTT, X2APIC and CMP_LEGACY. These bits +- * affect how to interpret topology information in other cpuid leaves. +- */ +- __set_bit(X86_FEATURE_HTT, max_fs); +- __set_bit(X86_FEATURE_X2APIC, max_fs); +- __set_bit(X86_FEATURE_CMP_LEGACY, max_fs); +- +- /* +- * 32bit PV domains can't use any Long Mode features, and cannot use +- * SYSCALL on non-AMD hardware. +- */ +- if ( is_pv_32bit_domain(d) ) +- { +- __clear_bit(X86_FEATURE_LM, max_fs); +- if ( !(boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) ) +- __clear_bit(X86_FEATURE_SYSCALL, max_fs); +- } +- +- /* Clamp the toolstacks choices to reality. */ +- for ( i = 0; i < ARRAY_SIZE(fs); i++ ) +- fs[i] &= max_fs[i]; +- +- if ( p->basic.max_leaf < XSTATE_CPUID ) +- __clear_bit(X86_FEATURE_XSAVE, fs); +- +- sanitise_featureset(fs); +- +- /* Fold host's FDP_EXCP_ONLY and NO_FPU_SEL into guest's view. */ +- fs[FEATURESET_7b0] &= ~(cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | +- cpufeat_mask(X86_FEATURE_NO_FPU_SEL)); +- fs[FEATURESET_7b0] |= (host_cpu_policy.feat._7b0 & +- (cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | +- cpufeat_mask(X86_FEATURE_NO_FPU_SEL))); +- +- x86_cpu_featureset_to_policy(fs, p); +- +- /* Pass host cacheline size through to guests. */ +- p->basic.clflush_size = max->basic.clflush_size; +- +- p->extd.maxphysaddr = min(p->extd.maxphysaddr, max->extd.maxphysaddr); +- p->extd.maxphysaddr = min_t(uint8_t, p->extd.maxphysaddr, +- paging_max_paddr_bits(d)); +- p->extd.maxphysaddr = max_t(uint8_t, p->extd.maxphysaddr, +- (p->basic.pae || p->basic.pse36) ? 36 : 32); +- +- p->extd.maxlinaddr = p->extd.lm ? 48 : 32; +- +- recalculate_xstate(p); +- recalculate_misc(p); +- +- for ( i = 0; i < ARRAY_SIZE(p->cache.raw); ++i ) +- { +- if ( p->cache.subleaf[i].type >= 1 && +- p->cache.subleaf[i].type <= 3 ) +- { +- /* Subleaf has a valid cache type. Zero reserved fields. */ +- p->cache.raw[i].a &= 0xffffc3ffu; +- p->cache.raw[i].d &= 0x00000007u; +- } +- else +- { +- /* Subleaf is not valid. Zero the rest of the union. */ +- zero_leaves(p->cache.raw, i, ARRAY_SIZE(p->cache.raw) - 1); +- break; +- } +- } +- +- if ( vpmu_mode == XENPMU_MODE_OFF || +- ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) ) +- p->basic.raw[0xa] = EMPTY_LEAF; +- +- if ( !p->extd.svm ) +- p->extd.raw[0xa] = EMPTY_LEAF; +- +- if ( !p->extd.page1gb ) +- p->extd.raw[0x19] = EMPTY_LEAF; +-} +- +-void __init init_dom0_cpuid_policy(struct domain *d) +-{ +- struct cpuid_policy *p = d->arch.cpuid; +- +- /* dom0 can't migrate. Give it ITSC if available. */ +- if ( cpu_has_itsc ) +- p->extd.itsc = true; +- +- /* +- * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0, +- * so dom0 can turn off workarounds as appropriate. Temporary, until the +- * domain policy logic gains a better understanding of MSRs. +- */ +- if ( cpu_has_arch_caps ) +- p->feat.arch_caps = true; +- +- /* Apply dom0-cpuid= command line settings, if provided. */ +- if ( dom0_cpuid_cmdline ) +- { +- uint32_t fs[FSCAPINTS]; +- unsigned int i; +- +- x86_cpu_policy_to_featureset(p, fs); +- +- for ( i = 0; i < ARRAY_SIZE(fs); ++i ) +- { +- fs[i] |= dom0_enable_feat [i]; +- fs[i] &= ~dom0_disable_feat[i]; +- } +- +- x86_cpu_featureset_to_policy(fs, p); +- +- recalculate_cpuid_policy(d); +- } +-} +- + void guest_cpuid(const struct vcpu *v, uint32_t leaf, + uint32_t subleaf, struct cpuid_leaf *res) + { +@@ -1181,27 +402,6 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, + } + } + +-static void __init __maybe_unused build_assertions(void) +-{ +- BUILD_BUG_ON(ARRAY_SIZE(known_features) != FSCAPINTS); +- BUILD_BUG_ON(ARRAY_SIZE(pv_max_featuremask) != FSCAPINTS); +- BUILD_BUG_ON(ARRAY_SIZE(hvm_shadow_max_featuremask) != FSCAPINTS); +- BUILD_BUG_ON(ARRAY_SIZE(hvm_hap_max_featuremask) != FSCAPINTS); +- BUILD_BUG_ON(ARRAY_SIZE(deep_features) != FSCAPINTS); +- +- /* Find some more clever allocation scheme if this trips. */ +- BUILD_BUG_ON(sizeof(struct cpuid_policy) > PAGE_SIZE); +- +- BUILD_BUG_ON(sizeof(raw_cpu_policy.basic) != +- sizeof(raw_cpu_policy.basic.raw)); +- BUILD_BUG_ON(sizeof(raw_cpu_policy.feat) != +- sizeof(raw_cpu_policy.feat.raw)); +- BUILD_BUG_ON(sizeof(raw_cpu_policy.xstate) != +- sizeof(raw_cpu_policy.xstate.raw)); +- BUILD_BUG_ON(sizeof(raw_cpu_policy.extd) != +- sizeof(raw_cpu_policy.extd.raw)); +-} +- + /* + * Local variables: + * mode: C +diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c +index 2f20df787425..b486c0efe061 100644 +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -77,7 +77,6 @@ + #include + #include + #include +-#include + + #include + +diff --git a/xen/arch/x86/include/asm/cpu-policy.h b/xen/arch/x86/include/asm/cpu-policy.h +index 13e2a1f86d13..b361537a602b 100644 +--- a/xen/arch/x86/include/asm/cpu-policy.h ++++ b/xen/arch/x86/include/asm/cpu-policy.h +@@ -18,4 +18,10 @@ void init_guest_cpu_policies(void); + /* Allocate and initialise a CPU policy suitable for the domain. */ + int init_domain_cpu_policy(struct domain *d); + ++/* Apply dom0-specific tweaks to the CPUID policy. */ ++void init_dom0_cpuid_policy(struct domain *d); ++ ++/* Clamp the CPUID policy to reality. */ ++void recalculate_cpuid_policy(struct domain *d); ++ + #endif /* X86_CPU_POLICY_H */ +diff --git a/xen/arch/x86/include/asm/cpuid.h b/xen/arch/x86/include/asm/cpuid.h +index 7f81b998ce01..b32ba0bbfe5c 100644 +--- a/xen/arch/x86/include/asm/cpuid.h ++++ b/xen/arch/x86/include/asm/cpuid.h +@@ -8,14 +8,10 @@ + #include + #include + +-#include +- + #include + + extern const uint32_t known_features[FSCAPINTS]; + +-void init_guest_cpuid(void); +- + /* + * Expected levelling capabilities (given cpuid vendor/family information), + * and levelling capabilities actually available (given MSR probing). +@@ -49,13 +45,8 @@ extern struct cpuidmasks cpuidmask_defaults; + /* Check that all previously present features are still available. */ + bool recheck_cpu_features(unsigned int cpu); + +-/* Apply dom0-specific tweaks to the CPUID policy. */ +-void init_dom0_cpuid_policy(struct domain *d); +- +-/* Clamp the CPUID policy to reality. */ +-void recalculate_cpuid_policy(struct domain *d); +- + struct vcpu; ++struct cpuid_leaf; + void guest_cpuid(const struct vcpu *v, uint32_t leaf, + uint32_t subleaf, struct cpuid_leaf *res); + +diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c +index f94f28c8e271..95492715d8ad 100644 +--- a/xen/arch/x86/pv/domain.c ++++ b/xen/arch/x86/pv/domain.c +@@ -10,6 +10,7 @@ + #include + #include + ++#include + #include + #include + #include +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 1d62ea1ad9d9..0c00ea875d6f 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -51,7 +51,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -1943,7 +1942,6 @@ void __init noreturn __start_xen(unsigned long mbi_p) + if ( !tboot_protect_mem_regions() ) + panic("Could not protect TXT memory regions\n"); + +- init_guest_cpuid(); + init_guest_cpu_policies(); + + if ( xen_cpuidle ) +-- +2.39.2 + diff --git a/0334-x86-emul-Switch-x86_emulate_ctxt-to-cpu_policy.patch b/0334-x86-emul-Switch-x86_emulate_ctxt-to-cpu_policy.patch new file mode 100644 index 00000000..0e1071ac --- /dev/null +++ b/0334-x86-emul-Switch-x86_emulate_ctxt-to-cpu_policy.patch @@ -0,0 +1,142 @@ +From 42b3d10d5d0e10586a4e3cbb5b428fd4ecac570b Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 3 Apr 2023 20:03:57 +0100 +Subject: [PATCH 14/35] x86/emul: Switch x86_emulate_ctxt to cpu_policy + +As with struct domain, retain cpuid as a valid alias for local code clarity. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 441b1b2a50ea3656954d75e06d42c96d619ea0fc) +--- + tools/fuzz/x86_instruction_emulator/fuzz-emul.c | 2 +- + tools/tests/x86_emulator/test_x86_emulator.c | 2 +- + tools/tests/x86_emulator/x86-emulate.c | 2 +- + xen/arch/x86/hvm/emulate.c | 4 ++-- + xen/arch/x86/mm/shadow/hvm.c | 2 +- + xen/arch/x86/pv/emul-priv-op.c | 2 +- + xen/arch/x86/pv/ro-page-fault.c | 2 +- + xen/arch/x86/x86_emulate/x86_emulate.h | 7 +++++-- + 8 files changed, 13 insertions(+), 10 deletions(-) + +diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c +index 966e46bee199..4885a68210d0 100644 +--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c ++++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c +@@ -893,7 +893,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size) + struct x86_emulate_ctxt ctxt = { + .data = &state, + .regs = &input.regs, +- .cpuid = &cp, ++ .cpu_policy = &cp, + .addr_size = 8 * sizeof(void *), + .sp_size = 8 * sizeof(void *), + }; +diff --git a/tools/tests/x86_emulator/test_x86_emulator.c b/tools/tests/x86_emulator/test_x86_emulator.c +index 31586f805726..7b7fbaaf45ec 100644 +--- a/tools/tests/x86_emulator/test_x86_emulator.c ++++ b/tools/tests/x86_emulator/test_x86_emulator.c +@@ -909,7 +909,7 @@ int main(int argc, char **argv) + + ctxt.regs = ®s; + ctxt.force_writeback = 0; +- ctxt.cpuid = &cp; ++ ctxt.cpu_policy = &cp; + ctxt.lma = sizeof(void *) == 8; + ctxt.addr_size = 8 * sizeof(void *); + ctxt.sp_size = 8 * sizeof(void *); +diff --git a/tools/tests/x86_emulator/x86-emulate.c b/tools/tests/x86_emulator/x86-emulate.c +index ea286d6ad87b..5ad282b57545 100644 +--- a/tools/tests/x86_emulator/x86-emulate.c ++++ b/tools/tests/x86_emulator/x86-emulate.c +@@ -38,7 +38,7 @@ + #define put_stub(stb) ((stb).addr = 0) + + uint32_t mxcsr_mask = 0x0000ffbf; +-struct cpuid_policy cp; ++struct cpu_policy cp; + + static char fpu_save_area[0x4000] __attribute__((__aligned__((64)))); + static bool use_xsave; +diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c +index cb221f70e8f0..275451dd3615 100644 +--- a/xen/arch/x86/hvm/emulate.c ++++ b/xen/arch/x86/hvm/emulate.c +@@ -2772,7 +2772,7 @@ int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla) + void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr, + unsigned int errcode) + { +- struct hvm_emulate_ctxt ctx = {{ 0 }}; ++ struct hvm_emulate_ctxt ctx = {}; + int rc; + + hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs()); +@@ -2847,7 +2847,7 @@ void hvm_emulate_init_once( + + hvmemul_ctxt->validate = validate; + hvmemul_ctxt->ctxt.regs = regs; +- hvmemul_ctxt->ctxt.cpuid = curr->domain->arch.cpuid; ++ hvmemul_ctxt->ctxt.cpu_policy = curr->domain->arch.cpu_policy; + hvmemul_ctxt->ctxt.force_writeback = true; + } + +diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c +index 39abf4732d91..c00ce550a1da 100644 +--- a/xen/arch/x86/mm/shadow/hvm.c ++++ b/xen/arch/x86/mm/shadow/hvm.c +@@ -319,7 +319,7 @@ const struct x86_emulate_ops *shadow_init_emulation( + memset(sh_ctxt, 0, sizeof(*sh_ctxt)); + + sh_ctxt->ctxt.regs = regs; +- sh_ctxt->ctxt.cpuid = curr->domain->arch.cpuid; ++ sh_ctxt->ctxt.cpu_policy = curr->domain->arch.cpu_policy; + sh_ctxt->ctxt.lma = hvm_long_mode_active(curr); + + /* Segment cache initialisation. Primed with CS. */ +diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c +index 5da00e24e4ff..ab52768271c5 100644 +--- a/xen/arch/x86/pv/emul-priv-op.c ++++ b/xen/arch/x86/pv/emul-priv-op.c +@@ -1327,7 +1327,7 @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs) + struct domain *currd = curr->domain; + struct priv_op_ctxt ctxt = { + .ctxt.regs = regs, +- .ctxt.cpuid = currd->arch.cpuid, ++ .ctxt.cpu_policy = currd->arch.cpu_policy, + .ctxt.lma = !is_pv_32bit_domain(currd), + }; + int rc; +diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c +index 5963f5ee2d51..0d02c7d2ab10 100644 +--- a/xen/arch/x86/pv/ro-page-fault.c ++++ b/xen/arch/x86/pv/ro-page-fault.c +@@ -356,7 +356,7 @@ int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs) + unsigned int addr_size = is_pv_32bit_domain(currd) ? 32 : BITS_PER_LONG; + struct x86_emulate_ctxt ctxt = { + .regs = regs, +- .cpuid = currd->arch.cpuid, ++ .cpu_policy = currd->arch.cpu_policy, + .addr_size = addr_size, + .sp_size = addr_size, + .lma = addr_size > 32, +diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h +index c89c53e83bfe..e1be0435f891 100644 +--- a/xen/arch/x86/x86_emulate/x86_emulate.h ++++ b/xen/arch/x86/x86_emulate/x86_emulate.h +@@ -565,8 +565,11 @@ struct x86_emulate_ctxt + * Input-only state: + */ + +- /* CPUID Policy for the domain. */ +- const struct cpuid_policy *cpuid; ++ /* CPU policy for the domain. Allow aliases for local code clarity. */ ++ union { ++ struct cpu_policy *cpu_policy; ++ struct cpu_policy *cpuid; ++ }; + + /* Set this if writes may have side effects. */ + bool force_writeback; +-- +2.39.2 + diff --git a/0335-tools-fuzz-Rework-afl-policy-fuzzer.patch b/0335-tools-fuzz-Rework-afl-policy-fuzzer.patch new file mode 100644 index 00000000..08a089c1 --- /dev/null +++ b/0335-tools-fuzz-Rework-afl-policy-fuzzer.patch @@ -0,0 +1,133 @@ +From eb84c7783e997e4b4bca0e000c7827a556e57d58 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 3 Apr 2023 17:14:14 +0100 +Subject: [PATCH 15/35] tools/fuzz: Rework afl-policy-fuzzer + +With cpuid_policy and msr_policy merged to form cpu_policy, merge the +respective fuzzing logic. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit a16dcd48c2db3f6820a15ea482551d289bd9cdec) +--- + tools/fuzz/cpu-policy/afl-policy-fuzzer.c | 57 ++++++++--------------- + 1 file changed, 20 insertions(+), 37 deletions(-) + +diff --git a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c +index 316eb0efe034..585324e41774 100644 +--- a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c ++++ b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c +@@ -16,16 +16,19 @@ static bool debug; + + #define EMPTY_LEAF ((struct cpuid_leaf){}) + +-static void check_cpuid(struct cpuid_policy *cp) ++static void check_policy(struct cpu_policy *cp) + { +- struct cpuid_policy new = {}; ++ struct cpu_policy new = {}; + size_t data_end; + xen_cpuid_leaf_t *leaves = malloc(CPUID_MAX_SERIALISED_LEAVES * + sizeof(xen_cpuid_leaf_t)); +- unsigned int nr = CPUID_MAX_SERIALISED_LEAVES; ++ xen_msr_entry_t *msrs = malloc(MSR_MAX_SERIALISED_ENTRIES * ++ sizeof(xen_cpuid_leaf_t)); ++ unsigned int nr_leaves = CPUID_MAX_SERIALISED_LEAVES; ++ unsigned int nr_msrs = MSR_MAX_SERIALISED_ENTRIES; + int rc; + +- if ( !leaves ) ++ if ( !leaves || !msrs ) + return; + + /* +@@ -49,12 +52,19 @@ static void check_cpuid(struct cpuid_policy *cp) + x86_cpuid_policy_recalc_synth(cp); + + /* Serialise... */ +- rc = x86_cpuid_copy_to_buffer(cp, leaves, &nr); ++ rc = x86_cpuid_copy_to_buffer(cp, leaves, &nr_leaves); ++ assert(rc == 0); ++ assert(nr_leaves <= CPUID_MAX_SERIALISED_LEAVES); ++ ++ rc = x86_msr_copy_to_buffer(cp, msrs, &nr_msrs); + assert(rc == 0); +- assert(nr <= CPUID_MAX_SERIALISED_LEAVES); ++ assert(nr_msrs <= MSR_MAX_SERIALISED_ENTRIES); + + /* ... and deserialise. */ +- rc = x86_cpuid_copy_from_buffer(&new, leaves, nr, NULL, NULL); ++ rc = x86_cpuid_copy_from_buffer(&new, leaves, nr_leaves, NULL, NULL); ++ assert(rc == 0); ++ ++ rc = x86_msr_copy_from_buffer(&new, msrs, nr_msrs, NULL); + assert(rc == 0); + + /* The result after serialisation/deserialisaion should be identical... */ +@@ -76,28 +86,6 @@ static void check_cpuid(struct cpuid_policy *cp) + free(leaves); + } + +-static void check_msr(struct msr_policy *mp) +-{ +- struct msr_policy new = {}; +- xen_msr_entry_t *msrs = malloc(MSR_MAX_SERIALISED_ENTRIES * +- sizeof(xen_msr_entry_t)); +- unsigned int nr = MSR_MAX_SERIALISED_ENTRIES; +- int rc; +- +- if ( !msrs ) +- return; +- +- rc = x86_msr_copy_to_buffer(mp, msrs, &nr); +- assert(rc == 0); +- assert(nr <= MSR_MAX_SERIALISED_ENTRIES); +- +- rc = x86_msr_copy_from_buffer(&new, msrs, nr, NULL); +- assert(rc == 0); +- assert(memcmp(mp, &new, sizeof(*mp)) == 0); +- +- free(msrs); +-} +- + int main(int argc, char **argv) + { + FILE *fp = NULL; +@@ -144,8 +132,7 @@ int main(int argc, char **argv) + while ( __AFL_LOOP(1000) ) + #endif + { +- struct cpuid_policy *cp = NULL; +- struct msr_policy *mp = NULL; ++ struct cpu_policy *cp = NULL; + + if ( fp != stdin ) + { +@@ -160,22 +147,18 @@ int main(int argc, char **argv) + } + + cp = calloc(1, sizeof(*cp)); +- mp = calloc(1, sizeof(*mp)); +- if ( !cp || !mp ) ++ if ( !cp ) + goto skip; + + fread(cp, sizeof(*cp), 1, fp); +- fread(mp, sizeof(*mp), 1, fp); + + if ( !feof(fp) ) + goto skip; + +- check_cpuid(cp); +- check_msr(mp); ++ check_policy(cp); + + skip: + free(cp); +- free(mp); + + if ( fp != stdin ) + { +-- +2.39.2 + diff --git a/0336-libx86-Update-library-API-for-cpu_policy.patch b/0336-libx86-Update-library-API-for-cpu_policy.patch new file mode 100644 index 00000000..381e659f --- /dev/null +++ b/0336-libx86-Update-library-API-for-cpu_policy.patch @@ -0,0 +1,448 @@ +From 34e02246efaac0038fa5c57cb810c4e5f523a80f Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 3 Apr 2023 14:18:43 +0100 +Subject: [PATCH 16/35] libx86: Update library API for cpu_policy +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Adjust the API and comments appropriately. + +x86_cpu_policy_fill_native() will eventually contain MSR reads, but leave a +TODO in the short term. + +No practical change. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 1b67fccf3b02825f6a036bad06cd17963d0972d2) + +tools/libs/guest: Fix build following libx86 changes + +I appear to have lost this hunk somewhere... + +Fixes: 1b67fccf3b02 ("libx86: Update library API for cpu_policy") +Signed-off-by: Andrew Cooper +Acked-by: Roger Pau Monné +(cherry picked from commit 48d76e6da92f9ef76c8468e299349a2f698362fa) +--- + tools/fuzz/cpu-policy/afl-policy-fuzzer.c | 4 +- + tools/libs/guest/xg_cpuid_x86.c | 2 +- + tools/tests/cpu-policy/test-cpu-policy.c | 4 +- + tools/tests/x86_emulator/x86-emulate.c | 2 +- + xen/arch/x86/cpu-policy.c | 4 +- + xen/arch/x86/cpu/common.c | 2 +- + xen/arch/x86/domctl.c | 2 +- + xen/arch/x86/xstate.c | 4 +- + xen/include/xen/lib/x86/cpu-policy.h | 49 +++++++++++++---------- + xen/lib/x86/cpuid.c | 26 ++++++------ + xen/lib/x86/msr.c | 4 +- + 11 files changed, 56 insertions(+), 47 deletions(-) + +diff --git a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c +index 585324e41774..11df2f780234 100644 +--- a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c ++++ b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c +@@ -48,8 +48,8 @@ static void check_policy(struct cpu_policy *cp) + * Fix up the data in the source policy which isn't expected to survive + * serialisation. + */ +- x86_cpuid_policy_clear_out_of_range_leaves(cp); +- x86_cpuid_policy_recalc_synth(cp); ++ x86_cpu_policy_clear_out_of_range_leaves(cp); ++ x86_cpu_policy_recalc_synth(cp); + + /* Serialise... */ + rc = x86_cpuid_copy_to_buffer(cp, leaves, &nr_leaves); +diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c +index b38e3a9de350..5d658534ef6e 100644 +--- a/tools/libs/guest/xg_cpuid_x86.c ++++ b/tools/libs/guest/xg_cpuid_x86.c +@@ -555,7 +555,7 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, + const uint32_t *dfs; + + if ( !test_bit(b, disabled_features) || +- !(dfs = x86_cpuid_lookup_deep_deps(b)) ) ++ !(dfs = x86_cpu_policy_lookup_deep_deps(b)) ) + continue; + + for ( i = 0; i < ARRAY_SIZE(disabled_features); ++i ) +diff --git a/tools/tests/cpu-policy/test-cpu-policy.c b/tools/tests/cpu-policy/test-cpu-policy.c +index 4f3d09f1b780..fea0eb8c3549 100644 +--- a/tools/tests/cpu-policy/test-cpu-policy.c ++++ b/tools/tests/cpu-policy/test-cpu-policy.c +@@ -105,7 +105,7 @@ static void test_cpuid_current(void) + + printf("Testing CPUID on current CPU\n"); + +- x86_cpuid_policy_fill_native(&p); ++ x86_cpu_policy_fill_native(&p); + + rc = x86_cpuid_copy_to_buffer(&p, leaves, &nr); + if ( rc != 0 ) +@@ -554,7 +554,7 @@ static void test_cpuid_out_of_range_clearing(void) + void *ptr; + unsigned int nr_markers; + +- x86_cpuid_policy_clear_out_of_range_leaves(p); ++ x86_cpu_policy_clear_out_of_range_leaves(p); + + /* Count the number of 0xc2's still remaining. */ + for ( ptr = p, nr_markers = 0; +diff --git a/tools/tests/x86_emulator/x86-emulate.c b/tools/tests/x86_emulator/x86-emulate.c +index 5ad282b57545..3a092ea3ec7e 100644 +--- a/tools/tests/x86_emulator/x86-emulate.c ++++ b/tools/tests/x86_emulator/x86-emulate.c +@@ -85,7 +85,7 @@ bool emul_test_init(void) + + unsigned long sp; + +- x86_cpuid_policy_fill_native(&cp); ++ x86_cpu_policy_fill_native(&cp); + + /* + * The emulator doesn't use these instructions, so can always emulate +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index 9c506b6614f2..19766e87b68f 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -176,7 +176,7 @@ static void sanitise_featureset(uint32_t *fs) + for_each_set_bit(i, (void *)disabled_features, + sizeof(disabled_features) * 8) + { +- const uint32_t *dfs = x86_cpuid_lookup_deep_deps(i); ++ const uint32_t *dfs = x86_cpu_policy_lookup_deep_deps(i); + unsigned int j; + + ASSERT(dfs); /* deep_features[] should guarentee this. */ +@@ -347,7 +347,7 @@ static void __init calculate_raw_policy(void) + { + struct cpu_policy *p = &raw_cpu_policy; + +- x86_cpuid_policy_fill_native(p); ++ x86_cpu_policy_fill_native(p); + + /* Nothing good will come from Xen and libx86 disagreeing on vendor. */ + ASSERT(p->x86_vendor == boot_cpu_data.x86_vendor); +diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c +index 665200db382f..ce692328086e 100644 +--- a/xen/arch/x86/cpu/common.c ++++ b/xen/arch/x86/cpu/common.c +@@ -72,7 +72,7 @@ void __init setup_clear_cpu_cap(unsigned int cap) + __builtin_return_address(0), cap); + + __clear_bit(cap, boot_cpu_data.x86_capability); +- dfs = x86_cpuid_lookup_deep_deps(cap); ++ dfs = x86_cpu_policy_lookup_deep_deps(cap); + + if (!dfs) + return; +diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c +index 6d15d0c29c4e..1ce12b79e674 100644 +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -67,7 +67,7 @@ static int update_domain_cpu_policy(struct domain *d, + goto out; + + /* Trim any newly-stale out-of-range leaves. */ +- x86_cpuid_policy_clear_out_of_range_leaves(new); ++ x86_cpu_policy_clear_out_of_range_leaves(new); + + /* Audit the combined dataset. */ + ret = x86_cpu_policies_are_compatible(sys, new, &err); +diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c +index 3b32bdc51cf4..cea3d0b81f0b 100644 +--- a/xen/arch/x86/xstate.c ++++ b/xen/arch/x86/xstate.c +@@ -685,7 +685,7 @@ void xstate_init(struct cpuinfo_x86 *c) + int validate_xstate(const struct domain *d, uint64_t xcr0, uint64_t xcr0_accum, + const struct xsave_hdr *hdr) + { +- uint64_t xcr0_max = cpuid_policy_xcr0_max(d->arch.cpuid); ++ uint64_t xcr0_max = cpu_policy_xcr0_max(d->arch.cpuid); + unsigned int i; + + if ( (hdr->xstate_bv & ~xcr0_accum) || +@@ -709,7 +709,7 @@ int validate_xstate(const struct domain *d, uint64_t xcr0, uint64_t xcr0_accum, + int handle_xsetbv(u32 index, u64 new_bv) + { + struct vcpu *curr = current; +- uint64_t xcr0_max = cpuid_policy_xcr0_max(curr->domain->arch.cpuid); ++ uint64_t xcr0_max = cpu_policy_xcr0_max(curr->domain->arch.cpuid); + u64 mask; + + if ( index != XCR_XFEATURE_ENABLED_MASK ) +diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h +index 57b4633c861e..cf7de0f29ccd 100644 +--- a/xen/include/xen/lib/x86/cpu-policy.h ++++ b/xen/include/xen/lib/x86/cpu-policy.h +@@ -399,33 +399,38 @@ void x86_cpu_policy_to_featureset(const struct cpu_policy *p, + void x86_cpu_featureset_to_policy(const uint32_t fs[FEATURESET_NR_ENTRIES], + struct cpu_policy *p); + +-static inline uint64_t cpuid_policy_xcr0_max(const struct cpuid_policy *p) ++static inline uint64_t cpu_policy_xcr0_max(const struct cpu_policy *p) + { + return ((uint64_t)p->xstate.xcr0_high << 32) | p->xstate.xcr0_low; + } + +-static inline uint64_t cpuid_policy_xstates(const struct cpuid_policy *p) ++static inline uint64_t cpu_policy_xstates(const struct cpu_policy *p) + { + uint64_t val = p->xstate.xcr0_high | p->xstate.xss_high; + + return (val << 32) | p->xstate.xcr0_low | p->xstate.xss_low; + } + +-const uint32_t *x86_cpuid_lookup_deep_deps(uint32_t feature); ++/** ++ * For a specific feature, look up the dependent features. Returns NULL if ++ * this feature has no dependencies. Otherwise return a featureset of ++ * dependent features, which has been recursively flattened. ++ */ ++const uint32_t *x86_cpu_policy_lookup_deep_deps(uint32_t feature); + + /** +- * Recalculate the content in a CPUID policy which is derived from raw data. ++ * Recalculate the content in a CPU policy which is derived from raw data. + */ +-void x86_cpuid_policy_recalc_synth(struct cpuid_policy *p); ++void x86_cpu_policy_recalc_synth(struct cpu_policy *p); + + /** +- * Fill a CPUID policy using the native CPUID instruction. ++ * Fill CPU policy using the native CPUID/RDMSR instruction. + * + * No sanitisation is performed, but synthesised values are calculated. + * Values may be influenced by a hypervisor or from masking/faulting + * configuration. + */ +-void x86_cpuid_policy_fill_native(struct cpuid_policy *p); ++void x86_cpu_policy_fill_native(struct cpu_policy *p); + + /** + * Clear leaf data beyond the policies max leaf/subleaf settings. +@@ -436,7 +441,7 @@ void x86_cpuid_policy_fill_native(struct cpuid_policy *p); + * with out-of-range leaves with stale content in them. This helper clears + * them. + */ +-void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p); ++void x86_cpu_policy_clear_out_of_range_leaves(struct cpu_policy *p); + + #ifdef __XEN__ + #include +@@ -449,9 +454,10 @@ typedef xen_msr_entry_t msr_entry_buffer_t[]; + #endif + + /** +- * Serialise a cpuid_policy object into an array of cpuid leaves. ++ * Serialise the CPUID leaves of a cpu_policy object into an array of cpuid ++ * leaves. + * +- * @param policy The cpuid_policy to serialise. ++ * @param policy The cpu_policy to serialise. + * @param leaves The array of leaves to serialise into. + * @param nr_entries The number of entries in 'leaves'. + * @returns -errno +@@ -460,13 +466,14 @@ typedef xen_msr_entry_t msr_entry_buffer_t[]; + * leaves array is too short. On success, nr_entries is updated with the + * actual number of leaves written. + */ +-int x86_cpuid_copy_to_buffer(const struct cpuid_policy *policy, ++int x86_cpuid_copy_to_buffer(const struct cpu_policy *policy, + cpuid_leaf_buffer_t leaves, uint32_t *nr_entries); + + /** +- * Unserialise a cpuid_policy object from an array of cpuid leaves. ++ * Unserialise the CPUID leaves of a cpu_policy object into an array of cpuid ++ * leaves. + * +- * @param policy The cpuid_policy to unserialise into. ++ * @param policy The cpu_policy to unserialise into. + * @param leaves The array of leaves to unserialise from. + * @param nr_entries The number of entries in 'leaves'. + * @param err_leaf Optional hint for error diagnostics. +@@ -474,21 +481,21 @@ int x86_cpuid_copy_to_buffer(const struct cpuid_policy *policy, + * @returns -errno + * + * Reads at most CPUID_MAX_SERIALISED_LEAVES. May return -ERANGE if an +- * incoming leaf is out of range of cpuid_policy, in which case the optional ++ * incoming leaf is out of range of cpu_policy, in which case the optional + * err_* pointers will identify the out-of-range indicies. + * + * No content validation of in-range leaves is performed. Synthesised data is + * recalculated. + */ +-int x86_cpuid_copy_from_buffer(struct cpuid_policy *policy, ++int x86_cpuid_copy_from_buffer(struct cpu_policy *policy, + const cpuid_leaf_buffer_t leaves, + uint32_t nr_entries, uint32_t *err_leaf, + uint32_t *err_subleaf); + + /** +- * Serialise an msr_policy object into an array. ++ * Serialise the MSRs of a cpu_policy object into an array. + * +- * @param policy The msr_policy to serialise. ++ * @param policy The cpu_policy to serialise. + * @param msrs The array of msrs to serialise into. + * @param nr_entries The number of entries in 'msrs'. + * @returns -errno +@@ -497,13 +504,13 @@ int x86_cpuid_copy_from_buffer(struct cpuid_policy *policy, + * buffer array is too short. On success, nr_entries is updated with the + * actual number of msrs written. + */ +-int x86_msr_copy_to_buffer(const struct msr_policy *policy, ++int x86_msr_copy_to_buffer(const struct cpu_policy *policy, + msr_entry_buffer_t msrs, uint32_t *nr_entries); + + /** +- * Unserialise an msr_policy object from an array of msrs. ++ * Unserialise the MSRs of a cpu_policy object from an array of msrs. + * +- * @param policy The msr_policy object to unserialise into. ++ * @param policy The cpu_policy object to unserialise into. + * @param msrs The array of msrs to unserialise from. + * @param nr_entries The number of entries in 'msrs'. + * @param err_msr Optional hint for error diagnostics. +@@ -517,7 +524,7 @@ int x86_msr_copy_to_buffer(const struct msr_policy *policy, + * + * No content validation is performed on the data stored in the policy object. + */ +-int x86_msr_copy_from_buffer(struct msr_policy *policy, ++int x86_msr_copy_from_buffer(struct cpu_policy *policy, + const msr_entry_buffer_t msrs, uint32_t nr_entries, + uint32_t *err_msr); + +diff --git a/xen/lib/x86/cpuid.c b/xen/lib/x86/cpuid.c +index 734e90823a63..68aafb404927 100644 +--- a/xen/lib/x86/cpuid.c ++++ b/xen/lib/x86/cpuid.c +@@ -102,13 +102,13 @@ void x86_cpu_featureset_to_policy( + p->feat._7d1 = fs[FEATURESET_7d1]; + } + +-void x86_cpuid_policy_recalc_synth(struct cpuid_policy *p) ++void x86_cpu_policy_recalc_synth(struct cpu_policy *p) + { + p->x86_vendor = x86_cpuid_lookup_vendor( + p->basic.vendor_ebx, p->basic.vendor_ecx, p->basic.vendor_edx); + } + +-void x86_cpuid_policy_fill_native(struct cpuid_policy *p) ++void x86_cpu_policy_fill_native(struct cpu_policy *p) + { + unsigned int i; + +@@ -199,7 +199,7 @@ void x86_cpuid_policy_fill_native(struct cpuid_policy *p) + cpuid_count_leaf(0xd, 0, &p->xstate.raw[0]); + cpuid_count_leaf(0xd, 1, &p->xstate.raw[1]); + +- xstates = cpuid_policy_xstates(p); ++ xstates = cpu_policy_xstates(p); + + /* This logic will probably need adjusting when XCR0[63] gets used. */ + BUILD_BUG_ON(ARRAY_SIZE(p->xstate.raw) > 63); +@@ -222,10 +222,12 @@ void x86_cpuid_policy_fill_native(struct cpuid_policy *p) + p->hv_limit = 0; + p->hv2_limit = 0; + +- x86_cpuid_policy_recalc_synth(p); ++ /* TODO MSRs */ ++ ++ x86_cpu_policy_recalc_synth(p); + } + +-void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p) ++void x86_cpu_policy_clear_out_of_range_leaves(struct cpu_policy *p) + { + unsigned int i; + +@@ -260,7 +262,7 @@ void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p) + zero_leaves(p->topo.raw, i, ARRAY_SIZE(p->topo.raw) - 1); + } + +- if ( p->basic.max_leaf < 0xd || !cpuid_policy_xstates(p) ) ++ if ( p->basic.max_leaf < 0xd || !cpu_policy_xstates(p) ) + memset(p->xstate.raw, 0, sizeof(p->xstate.raw)); + else + { +@@ -268,7 +270,7 @@ void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p) + BUILD_BUG_ON(ARRAY_SIZE(p->xstate.raw) > 63); + + /* First two leaves always valid. Rest depend on xstates. */ +- i = max(2, 64 - __builtin_clzll(cpuid_policy_xstates(p))); ++ i = max(2, 64 - __builtin_clzll(cpu_policy_xstates(p))); + + zero_leaves(p->xstate.raw, i, + ARRAY_SIZE(p->xstate.raw) - 1); +@@ -278,7 +280,7 @@ void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p) + ARRAY_SIZE(p->extd.raw) - 1); + } + +-const uint32_t *x86_cpuid_lookup_deep_deps(uint32_t feature) ++const uint32_t *x86_cpu_policy_lookup_deep_deps(uint32_t feature) + { + static const uint32_t deep_features[] = INIT_DEEP_FEATURES; + static const struct { +@@ -333,7 +335,7 @@ static int copy_leaf_to_buffer(uint32_t leaf, uint32_t subleaf, + return 0; + } + +-int x86_cpuid_copy_to_buffer(const struct cpuid_policy *p, ++int x86_cpuid_copy_to_buffer(const struct cpu_policy *p, + cpuid_leaf_buffer_t leaves, uint32_t *nr_entries_p) + { + const uint32_t nr_entries = *nr_entries_p; +@@ -383,7 +385,7 @@ int x86_cpuid_copy_to_buffer(const struct cpuid_policy *p, + + case 0xd: + { +- uint64_t xstates = cpuid_policy_xstates(p); ++ uint64_t xstates = cpu_policy_xstates(p); + + COPY_LEAF(leaf, 0, &p->xstate.raw[0]); + COPY_LEAF(leaf, 1, &p->xstate.raw[1]); +@@ -419,7 +421,7 @@ int x86_cpuid_copy_to_buffer(const struct cpuid_policy *p, + return 0; + } + +-int x86_cpuid_copy_from_buffer(struct cpuid_policy *p, ++int x86_cpuid_copy_from_buffer(struct cpu_policy *p, + const cpuid_leaf_buffer_t leaves, + uint32_t nr_entries, uint32_t *err_leaf, + uint32_t *err_subleaf) +@@ -522,7 +524,7 @@ int x86_cpuid_copy_from_buffer(struct cpuid_policy *p, + } + } + +- x86_cpuid_policy_recalc_synth(p); ++ x86_cpu_policy_recalc_synth(p); + + return 0; + +diff --git a/xen/lib/x86/msr.c b/xen/lib/x86/msr.c +index c4d885e7b568..e04b9ca01302 100644 +--- a/xen/lib/x86/msr.c ++++ b/xen/lib/x86/msr.c +@@ -23,7 +23,7 @@ static int copy_msr_to_buffer(uint32_t idx, uint64_t val, + return 0; + } + +-int x86_msr_copy_to_buffer(const struct msr_policy *p, ++int x86_msr_copy_to_buffer(const struct cpu_policy *p, + msr_entry_buffer_t msrs, uint32_t *nr_entries_p) + { + const uint32_t nr_entries = *nr_entries_p; +@@ -48,7 +48,7 @@ int x86_msr_copy_to_buffer(const struct msr_policy *p, + return 0; + } + +-int x86_msr_copy_from_buffer(struct msr_policy *p, ++int x86_msr_copy_from_buffer(struct cpu_policy *p, + const msr_entry_buffer_t msrs, uint32_t nr_entries, + uint32_t *err_msr) + { +-- +2.39.2 + diff --git a/0337-x86-Remove-temporary-cpuid-msr-_policy-defines.patch b/0337-x86-Remove-temporary-cpuid-msr-_policy-defines.patch new file mode 100644 index 00000000..3db33b0e --- /dev/null +++ b/0337-x86-Remove-temporary-cpuid-msr-_policy-defines.patch @@ -0,0 +1,334 @@ +From c07eb947082104a75fb4b58b3f85f5076577c728 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 29 Mar 2023 13:07:03 +0100 +Subject: [PATCH 17/35] x86: Remove temporary {cpuid,msr}_policy defines + +With all code areas updated, drop the temporary defines and adjust all +remaining users. + +No practical change. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 994c1553a158ada9db5ab64c9178a0d23c0a42ce) +--- + xen/arch/x86/cpu/mcheck/mce_intel.c | 2 +- + xen/arch/x86/cpuid.c | 2 +- + xen/arch/x86/domain.c | 2 +- + xen/arch/x86/hvm/hvm.c | 4 ++-- + xen/arch/x86/hvm/svm/svm.c | 2 +- + xen/arch/x86/hvm/vlapic.c | 2 +- + xen/arch/x86/hvm/vmx/vmx.c | 4 ++-- + xen/arch/x86/include/asm/msr.h | 2 +- + xen/arch/x86/msr.c | 20 +++++++++----------- + xen/arch/x86/pv/domain.c | 2 +- + xen/arch/x86/pv/emul-priv-op.c | 4 ++-- + xen/arch/x86/traps.c | 2 +- + xen/arch/x86/x86_emulate/x86_emulate.c | 6 +++--- + xen/include/xen/lib/x86/cpu-policy.h | 4 ---- + 14 files changed, 26 insertions(+), 32 deletions(-) + +diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c +index 28a605a5cbc7..ce7678f242a8 100644 +--- a/xen/arch/x86/cpu/mcheck/mce_intel.c ++++ b/xen/arch/x86/cpu/mcheck/mce_intel.c +@@ -1008,7 +1008,7 @@ int vmce_intel_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + + int vmce_intel_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) + { +- const struct cpuid_policy *cp = v->domain->arch.cpuid; ++ const struct cpu_policy *cp = v->domain->arch.cpu_policy; + unsigned int bank = msr - MSR_IA32_MC0_CTL2; + + switch ( msr ) +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index 3f20c342fde8..f311372cdf1f 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -36,7 +36,7 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, + uint32_t subleaf, struct cpuid_leaf *res) + { + const struct domain *d = v->domain; +- const struct cpuid_policy *p = d->arch.cpuid; ++ const struct cpu_policy *p = d->arch.cpu_policy; + + *res = EMPTY_LEAF; + +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index faea542286c0..aca9fa310cc8 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -283,7 +283,7 @@ void update_guest_memory_policy(struct vcpu *v, + + void domain_cpu_policy_changed(struct domain *d) + { +- const struct cpuid_policy *p = d->arch.cpuid; ++ const struct cpu_policy *p = d->arch.cpu_policy; + struct vcpu *v; + + if ( is_pv_domain(d) ) +diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c +index b486c0efe061..d6c6ab889757 100644 +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -905,7 +905,7 @@ const char *hvm_efer_valid(const struct vcpu *v, uint64_t value, + signed int cr0_pg) + { + const struct domain *d = v->domain; +- const struct cpuid_policy *p = d->arch.cpuid; ++ const struct cpu_policy *p = d->arch.cpu_policy; + + if ( value & ~EFER_KNOWN_MASK ) + return "Unknown bits set"; +@@ -942,7 +942,7 @@ const char *hvm_efer_valid(const struct vcpu *v, uint64_t value, + /* These bits in CR4 can be set by the guest. */ + unsigned long hvm_cr4_guest_valid_bits(const struct domain *d) + { +- const struct cpuid_policy *p = d->arch.cpuid; ++ const struct cpu_policy *p = d->arch.cpu_policy; + bool mce, vmxe, cet; + + /* Logic broken out simply to aid readability below. */ +diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c +index fa7325720328..5fa945c526ec 100644 +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -596,7 +596,7 @@ static void cf_check svm_cpuid_policy_changed(struct vcpu *v) + { + struct svm_vcpu *svm = &v->arch.hvm.svm; + struct vmcb_struct *vmcb = svm->vmcb; +- const struct cpuid_policy *cp = v->domain->arch.cpuid; ++ const struct cpu_policy *cp = v->domain->arch.cpu_policy; + u32 bitmap = vmcb_get_exception_intercepts(vmcb); + + if ( opt_hvm_fep || +diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c +index eb32f12e2d14..5909935e0b3e 100644 +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -1083,7 +1083,7 @@ static void set_x2apic_id(struct vlapic *vlapic) + + int guest_wrmsr_apic_base(struct vcpu *v, uint64_t value) + { +- const struct cpuid_policy *cp = v->domain->arch.cpuid; ++ const struct cpu_policy *cp = v->domain->arch.cpu_policy; + struct vlapic *vlapic = vcpu_vlapic(v); + + if ( !has_vlapic(v->domain) ) +diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c +index 64dbd501974b..8da6be33eeec 100644 +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -738,7 +738,7 @@ void vmx_update_exception_bitmap(struct vcpu *v) + + static void cf_check vmx_cpuid_policy_changed(struct vcpu *v) + { +- const struct cpuid_policy *cp = v->domain->arch.cpuid; ++ const struct cpu_policy *cp = v->domain->arch.cpu_policy; + int rc = 0; + + if ( opt_hvm_fep || +@@ -3465,7 +3465,7 @@ static int cf_check vmx_msr_write_intercept( + unsigned int msr, uint64_t msr_content) + { + struct vcpu *v = current; +- const struct cpuid_policy *cp = v->domain->arch.cpuid; ++ const struct cpu_policy *cp = v->domain->arch.cpu_policy; + + HVM_DBG_LOG(DBG_LEVEL_MSR, "ecx=%#x, msr_value=%#"PRIx64, msr, msr_content); + +diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h +index b51d92e27c74..adda736efc4f 100644 +--- a/xen/arch/x86/include/asm/msr.h ++++ b/xen/arch/x86/include/asm/msr.h +@@ -278,7 +278,7 @@ static inline void wrmsr_tsc_aux(uint32_t val) + } + } + +-uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp); ++uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp); + + /* Container object for per-vCPU MSRs */ + struct vcpu_msrs +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index 14bcb8261c47..a79021774b5c 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -54,8 +54,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) + { + const struct vcpu *curr = current; + const struct domain *d = v->domain; +- const struct cpuid_policy *cp = d->arch.cpuid; +- const struct msr_policy *mp = d->arch.msr; ++ const struct cpu_policy *cp = d->arch.cpu_policy; + const struct vcpu_msrs *msrs = v->arch.msrs; + int ret = X86EMUL_OKAY; + +@@ -139,13 +138,13 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) + goto get_reg; + + case MSR_INTEL_PLATFORM_INFO: +- *val = mp->platform_info.raw; ++ *val = cp->platform_info.raw; + break; + + case MSR_ARCH_CAPABILITIES: + if ( !cp->feat.arch_caps ) + goto gp_fault; +- *val = mp->arch_caps.raw; ++ *val = cp->arch_caps.raw; + break; + + case MSR_INTEL_MISC_FEATURES_ENABLES: +@@ -321,7 +320,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) + * separate CPUID features for this functionality, but only set will be + * active. + */ +-uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp) ++uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp) + { + bool ssbd = cp->feat.ssbd || cp->extd.amd_ssbd; + bool psfd = cp->feat.intel_psfd || cp->extd.psfd; +@@ -340,8 +339,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + { + const struct vcpu *curr = current; + struct domain *d = v->domain; +- const struct cpuid_policy *cp = d->arch.cpuid; +- const struct msr_policy *mp = d->arch.msr; ++ const struct cpu_policy *cp = d->arch.cpu_policy; + struct vcpu_msrs *msrs = v->arch.msrs; + int ret = X86EMUL_OKAY; + +@@ -382,7 +380,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + * for backwards compatiblity, the OS should write 0 to it before + * trying to access the current microcode version. + */ +- if ( d->arch.cpuid->x86_vendor != X86_VENDOR_INTEL || val != 0 ) ++ if ( cp->x86_vendor != X86_VENDOR_INTEL || val != 0 ) + goto gp_fault; + break; + +@@ -392,7 +390,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + * to AMD CPUs as well (at least the architectural/CPUID part does). + */ + if ( is_pv_domain(d) || +- d->arch.cpuid->x86_vendor != X86_VENDOR_AMD ) ++ cp->x86_vendor != X86_VENDOR_AMD ) + goto gp_fault; + break; + +@@ -404,7 +402,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + * by any CPUID bit. + */ + if ( is_pv_domain(d) || +- d->arch.cpuid->x86_vendor != X86_VENDOR_INTEL ) ++ cp->x86_vendor != X86_VENDOR_INTEL ) + goto gp_fault; + break; + +@@ -441,7 +439,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + bool old_cpuid_faulting = msrs->misc_features_enables.cpuid_faulting; + + rsvd = ~0ull; +- if ( mp->platform_info.cpuid_faulting ) ++ if ( cp->platform_info.cpuid_faulting ) + rsvd &= ~MSR_MISC_FEATURES_CPUID_FAULTING; + + if ( val & rsvd ) +diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c +index 95492715d8ad..5c92812dc67a 100644 +--- a/xen/arch/x86/pv/domain.c ++++ b/xen/arch/x86/pv/domain.c +@@ -146,7 +146,7 @@ static void release_compat_l4(struct vcpu *v) + + unsigned long pv_fixup_guest_cr4(const struct vcpu *v, unsigned long cr4) + { +- const struct cpuid_policy *p = v->domain->arch.cpuid; ++ const struct cpu_policy *p = v->domain->arch.cpu_policy; + + /* Discard attempts to set guest controllable bits outside of the policy. */ + cr4 &= ~((p->basic.tsc ? 0 : X86_CR4_TSD) | +diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c +index ab52768271c5..04416f197951 100644 +--- a/xen/arch/x86/pv/emul-priv-op.c ++++ b/xen/arch/x86/pv/emul-priv-op.c +@@ -885,7 +885,7 @@ static int cf_check read_msr( + { + struct vcpu *curr = current; + const struct domain *currd = curr->domain; +- const struct cpuid_policy *cp = currd->arch.cpuid; ++ const struct cpu_policy *cp = currd->arch.cpu_policy; + bool vpmu_msr = false, warn = false; + uint64_t tmp; + int ret; +@@ -1034,7 +1034,7 @@ static int cf_check write_msr( + { + struct vcpu *curr = current; + const struct domain *currd = curr->domain; +- const struct cpuid_policy *cp = currd->arch.cpuid; ++ const struct cpu_policy *cp = currd->arch.cpu_policy; + bool vpmu_msr = false; + int ret; + +diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c +index cade9e12f8fa..d12004b1c6fc 100644 +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -1035,7 +1035,7 @@ void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf, + uint32_t subleaf, struct cpuid_leaf *res) + { + const struct domain *d = v->domain; +- const struct cpuid_policy *p = d->arch.cpuid; ++ const struct cpu_policy *p = d->arch.cpu_policy; + uint32_t base = is_viridian_domain(d) ? 0x40000100 : 0x40000000; + uint32_t idx = leaf - base; + unsigned int limit = is_viridian_domain(d) ? p->hv2_limit : p->hv_limit; +diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c +index 94dd72585ab9..7a4d3437dd62 100644 +--- a/xen/arch/x86/x86_emulate/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate/x86_emulate.c +@@ -1923,7 +1923,7 @@ in_protmode( + } + + static bool +-_amd_like(const struct cpuid_policy *cp) ++_amd_like(const struct cpu_policy *cp) + { + return cp->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON); + } +@@ -1931,7 +1931,7 @@ _amd_like(const struct cpuid_policy *cp) + static bool + amd_like(const struct x86_emulate_ctxt *ctxt) + { +- return _amd_like(ctxt->cpuid); ++ return _amd_like(ctxt->cpu_policy); + } + + #define vcpu_has_fpu() (ctxt->cpuid->basic.fpu) +@@ -2078,7 +2078,7 @@ protmode_load_seg( + struct x86_emulate_ctxt *ctxt, + const struct x86_emulate_ops *ops) + { +- const struct cpuid_policy *cp = ctxt->cpuid; ++ const struct cpu_policy *cp = ctxt->cpu_policy; + enum x86_segment sel_seg = (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr; + struct { uint32_t a, b; } desc, desc_hi = {}; + uint8_t dpl, rpl; +diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h +index cf7de0f29ccd..bfa425060464 100644 +--- a/xen/include/xen/lib/x86/cpu-policy.h ++++ b/xen/include/xen/lib/x86/cpu-policy.h +@@ -375,10 +375,6 @@ struct cpu_policy + uint8_t x86_vendor; + }; + +-/* Temporary */ +-#define cpuid_policy cpu_policy +-#define msr_policy cpu_policy +- + struct cpu_policy_errors + { + uint32_t leaf, subleaf; +-- +2.39.2 + diff --git a/0338-x86-cpuid-Calculate-FEATURESET_NR_ENTRIES-more-helpf.patch b/0338-x86-cpuid-Calculate-FEATURESET_NR_ENTRIES-more-helpf.patch new file mode 100644 index 00000000..3b7523b1 --- /dev/null +++ b/0338-x86-cpuid-Calculate-FEATURESET_NR_ENTRIES-more-helpf.patch @@ -0,0 +1,110 @@ +From c72185f9463dd021f810c19ed025dc20ee1d0a6f Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 10 May 2023 19:58:43 +0100 +Subject: [PATCH 18/35] x86/cpuid: Calculate FEATURESET_NR_ENTRIES more + helpfully + +When adding new featureset words, it is convenient to split the work into +several patches. However, GCC 12 spotted that the way we prefer to split the +work results in a real (transient) breakage whereby the policy <-> featureset +helpers perform out-of-bounds accesses on the featureset array. + +Fix this by having gen-cpuid.py calculate FEATURESET_NR_ENTRIES from the +comments describing the word blocks, rather than from the XEN_CPUFEATURE() +with the greatest value. + +For simplicty, require that the word blocks appear in order. This can be +revisted if we find a good reason to have blocks out of order. + +No functional change. + +Reported-by: Jan Beulich +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 56e2c8e5860090a35d5f0cafe168223a2a7c0e62) +--- + xen/tools/gen-cpuid.py | 42 ++++++++++++++++++++++++++++++++++++------ + 1 file changed, 36 insertions(+), 6 deletions(-) + +diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py +index 4f7c8d78cce7..83b19c8515cd 100755 +--- a/xen/tools/gen-cpuid.py ++++ b/xen/tools/gen-cpuid.py +@@ -50,13 +50,37 @@ def parse_definitions(state): + "\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)" + "\s+/\*([\w!]*) .*$") + ++ word_regex = re.compile( ++ r"^/\* .* word (\d*) \*/$") ++ last_word = -1 ++ + this = sys.modules[__name__] + + for l in state.input.readlines(): +- # Short circuit the regex... +- if not l.startswith("XEN_CPUFEATURE("): ++ ++ # Short circuit the regexes... ++ if not (l.startswith("XEN_CPUFEATURE(") or ++ l.startswith("/* ")): + continue + ++ # Handle /* ... word $N */ lines ++ if l.startswith("/* "): ++ ++ res = word_regex.match(l) ++ if res is None: ++ continue # Some other comment ++ ++ word = int(res.groups()[0]) ++ ++ if word != last_word + 1: ++ raise Fail("Featureset word %u out of order (last word %u)" ++ % (word, last_word)) ++ ++ last_word = word ++ state.nr_entries = word + 1 ++ continue ++ ++ # Handle XEN_CPUFEATURE( lines + res = feat_regex.match(l) + + if res is None: +@@ -94,6 +118,15 @@ def parse_definitions(state): + if len(state.names) == 0: + raise Fail("No features found") + ++ if state.nr_entries == 0: ++ raise Fail("No featureset word info found") ++ ++ max_val = max(state.names.keys()) ++ if (max_val >> 5) >= state.nr_entries: ++ max_name = state.names[max_val] ++ raise Fail("Feature %s (%d*32+%d) exceeds FEATURESET_NR_ENTRIES (%d)" ++ % (max_name, max_val >> 5, max_val & 31, state.nr_entries)) ++ + def featureset_to_uint32s(fs, nr): + """ Represent a featureset as a list of C-compatible uint32_t's """ + +@@ -122,9 +155,6 @@ def format_uint32s(state, featureset, indent): + + def crunch_numbers(state): + +- # Size of bitmaps +- state.nr_entries = nr_entries = (max(state.names.keys()) >> 5) + 1 +- + # Features common between 1d and e1d. + common_1d = (FPU, VME, DE, PSE, TSC, MSR, PAE, MCE, CX8, APIC, + MTRR, PGE, MCA, CMOV, PAT, PSE36, MMX, FXSR) +@@ -328,7 +358,7 @@ def crunch_numbers(state): + state.nr_deep_deps = len(state.deep_deps.keys()) + + # Calculate the bitfield name declarations +- for word in range(nr_entries): ++ for word in range(state.nr_entries): + + names = [] + for bit in range(32): +-- +2.39.2 + diff --git a/0339-x86-boot-Rework-dom0-feature-configuration.patch b/0339-x86-boot-Rework-dom0-feature-configuration.patch new file mode 100644 index 00000000..007a30cf --- /dev/null +++ b/0339-x86-boot-Rework-dom0-feature-configuration.patch @@ -0,0 +1,136 @@ +From e74dbbf0bdf6dc895ae0d6082cc2d764c88d6ec1 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 12 May 2023 13:52:39 +0100 +Subject: [PATCH 19/35] x86/boot: Rework dom0 feature configuration + +Right now, dom0's feature configuration is split between between the common +path and a dom0-specific one. This mostly is by accident, and causes some +very subtle bugs. + +First, start by clearly defining init_dom0_cpuid_policy() to be the domain +that Xen builds automatically. The late hwdom case is still constructed in a +mostly normal way, with the control domain having full discretion over the CPU +policy. + +Identifying this highlights a latent bug - the two halves of the MSR_ARCH_CAPS +bodge are asymmetric with respect to the hardware domain. This means that +shim, or a control-only dom0 sees the MSR_ARCH_CAPS CPUID bit but none of the +MSR content. This in turn declares the hardware to be retpoline-safe by +failing to advertise the {R,}RSBA bits appropriately. Restrict this logic to +the hardware domain, although the special case will cease to exist shortly. + +For the CPUID Faulting adjustment, the comment in ctxt_switch_levelling() +isn't actually relevant. Provide a better explanation. + +Move the recalculate_cpuid_policy() call outside of the dom0-cpuid= case. +This is no change for now, but will become necessary shortly. + +Finally, place the second half of the MSR_ARCH_CAPS bodge after the +recalculate_cpuid_policy() call. This is necessary to avoid transiently +breaking the hardware domain's view while the handling is cleaned up. This +special case will cease to exist shortly. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit ef1987fcb0fdfaa7ee148024037cb5fa335a7b2d) +--- + xen/arch/x86/cpu-policy.c | 57 +++++++++++++++++++++------------------ + 1 file changed, 31 insertions(+), 26 deletions(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index 19766e87b68f..0f5182386227 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -678,29 +678,6 @@ int init_domain_cpu_policy(struct domain *d) + if ( !p ) + return -ENOMEM; + +- /* See comment in ctxt_switch_levelling() */ +- if ( !opt_dom0_cpuid_faulting && is_control_domain(d) && is_pv_domain(d) ) +- p->platform_info.cpuid_faulting = false; +- +- /* +- * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0, +- * so dom0 can turn off workarounds as appropriate. Temporary, until the +- * domain policy logic gains a better understanding of MSRs. +- */ +- if ( is_hardware_domain(d) && cpu_has_arch_caps ) +- { +- uint64_t val; +- +- rdmsrl(MSR_ARCH_CAPABILITIES, val); +- +- p->arch_caps.raw = val & +- (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | +- ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO | +- ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | +- ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | +- ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO); +- } +- + d->arch.cpu_policy = p; + + recalculate_cpuid_policy(d); +@@ -836,11 +813,15 @@ void recalculate_cpuid_policy(struct domain *d) + p->extd.raw[0x19] = EMPTY_LEAF; + } + ++/* ++ * Adjust the CPU policy for dom0. Really, this is "the domain Xen builds ++ * automatically on boot", and might not have the domid 0 (e.g. pvshim). ++ */ + void __init init_dom0_cpuid_policy(struct domain *d) + { + struct cpu_policy *p = d->arch.cpuid; + +- /* dom0 can't migrate. Give it ITSC if available. */ ++ /* Dom0 doesn't migrate relative to Xen. Give it ITSC if available. */ + if ( cpu_has_itsc ) + p->extd.itsc = true; + +@@ -849,7 +830,7 @@ void __init init_dom0_cpuid_policy(struct domain *d) + * so dom0 can turn off workarounds as appropriate. Temporary, until the + * domain policy logic gains a better understanding of MSRs. + */ +- if ( cpu_has_arch_caps ) ++ if ( is_hardware_domain(d) && cpu_has_arch_caps ) + p->feat.arch_caps = true; + + /* Apply dom0-cpuid= command line settings, if provided. */ +@@ -867,8 +848,32 @@ void __init init_dom0_cpuid_policy(struct domain *d) + } + + x86_cpu_featureset_to_policy(fs, p); ++ } ++ ++ /* ++ * PV Control domains used to require unfiltered CPUID. This was fixed in ++ * Xen 4.13, but there is an cmdline knob to restore the prior behaviour. ++ * ++ * If the domain is getting unfiltered CPUID, don't let the guest kernel ++ * play with CPUID faulting either, as Xen's CPUID path won't cope. ++ */ ++ if ( !opt_dom0_cpuid_faulting && is_control_domain(d) && is_pv_domain(d) ) ++ p->platform_info.cpuid_faulting = false; + +- recalculate_cpuid_policy(d); ++ recalculate_cpuid_policy(d); ++ ++ if ( is_hardware_domain(d) && cpu_has_arch_caps ) ++ { ++ uint64_t val; ++ ++ rdmsrl(MSR_ARCH_CAPABILITIES, val); ++ ++ p->arch_caps.raw = val & ++ (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | ++ ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO | ++ ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | ++ ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | ++ ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO); + } + } + +-- +2.39.2 + diff --git a/0340-x86-boot-Adjust-MSR_ARCH_CAPS-handling-for-the-Host-.patch b/0340-x86-boot-Adjust-MSR_ARCH_CAPS-handling-for-the-Host-.patch new file mode 100644 index 00000000..fce1a78f --- /dev/null +++ b/0340-x86-boot-Adjust-MSR_ARCH_CAPS-handling-for-the-Host-.patch @@ -0,0 +1,39 @@ +From 78543222faaecfcdb23239b7a686f06186e287bf Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 15 May 2023 14:14:53 +0100 +Subject: [PATCH 20/35] x86/boot: Adjust MSR_ARCH_CAPS handling for the Host + policy + +We are about to move MSR_ARCH_CAPS into featureset, but the order of +operations (copy raw policy, then copy x86_capabilitiles[] in) will end up +clobbering the ARCH_CAPS value. + +Some toolstacks use this information to handle TSX compatibility across the +CPUs and microcode versions where support was removed. + +To avoid this transient breakage, read from raw_cpu_policy rather than +modifying it in place. This logic will be removed entirely in due course. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 43912f8dbb1888ffd7f00adb10724c70e71927c4) +--- + xen/arch/x86/cpu-policy.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index 0f5182386227..630c133daf08 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -411,7 +411,7 @@ static void __init calculate_host_policy(void) + p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; + + /* Temporary, until we have known_features[] for feature bits in MSRs. */ +- p->arch_caps.raw &= ++ p->arch_caps.raw = raw_cpu_policy.arch_caps.raw & + (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | + ARCH_CAPS_SKIP_L1DFL | ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | + ARCH_CAPS_IF_PSCHANGE_MC_NO | ARCH_CAPS_TSX_CTRL | ARCH_CAPS_TAA_NO | +-- +2.39.2 + diff --git a/0341-x86-cpu-policy-Infrastructure-for-MSR_ARCH_CAPS.patch b/0341-x86-cpu-policy-Infrastructure-for-MSR_ARCH_CAPS.patch new file mode 100644 index 00000000..a086ddfb --- /dev/null +++ b/0341-x86-cpu-policy-Infrastructure-for-MSR_ARCH_CAPS.patch @@ -0,0 +1,216 @@ +From b088cf5f3a9553bfc6ac1238fdcd32e854d62c3f Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 12 May 2023 17:55:21 +0100 +Subject: [PATCH 21/35] x86/cpu-policy: Infrastructure for MSR_ARCH_CAPS + +Bits through 24 are already defined, meaning that we're not far off needing +the second word. Put both in right away. + +As both halves are present now, the arch_caps field is full width. Adjust the +unit test, which notices. + +The bool bitfield names in the arch_caps union are unused, and somewhat out of +date. They'll shortly be automatically generated. + +Add CPUID and MSR prefixes to the ./xen-cpuid verbose output, now that there +are a mix of the two. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit d9fe459ffad8a6eac2f695adb2331aff83c345d1) +--- + tools/misc/xen-cpuid.c | 44 +++++++++++------- + tools/tests/cpu-policy/test-cpu-policy.c | 5 --- + xen/include/public/arch-x86/cpufeatureset.h | 4 ++ + xen/include/xen/lib/x86/cpu-policy.h | 50 ++++++++++----------- + xen/lib/x86/cpuid.c | 4 ++ + 5 files changed, 59 insertions(+), 48 deletions(-) + +diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c +index 859345ae8ab2..642e62efdf20 100644 +--- a/tools/misc/xen-cpuid.c ++++ b/tools/misc/xen-cpuid.c +@@ -218,31 +218,41 @@ static const char *const str_7d2[32] = + [ 4] = "bhi-ctrl", [ 5] = "mcdt-no", + }; + ++static const char *const str_m10Al[32] = ++{ ++}; ++ ++static const char *const str_m10Ah[32] = ++{ ++}; ++ + static const struct { + const char *name; + const char *abbr; + const char *const *strs; + } decodes[] = + { +- { "0x00000001.edx", "1d", str_1d }, +- { "0x00000001.ecx", "1c", str_1c }, +- { "0x80000001.edx", "e1d", str_e1d }, +- { "0x80000001.ecx", "e1c", str_e1c }, +- { "0x0000000d:1.eax", "Da1", str_Da1 }, +- { "0x00000007:0.ebx", "7b0", str_7b0 }, +- { "0x00000007:0.ecx", "7c0", str_7c0 }, +- { "0x80000007.edx", "e7d", str_e7d }, +- { "0x80000008.ebx", "e8b", str_e8b }, +- { "0x00000007:0.edx", "7d0", str_7d0 }, +- { "0x00000007:1.eax", "7a1", str_7a1 }, +- { "0x80000021.eax", "e21a", str_e21a }, +- { "0x00000007:1.ebx", "7b1", str_7b1 }, +- { "0x00000007:2.edx", "7d2", str_7d2 }, +- { "0x00000007:1.ecx", "7c1", str_7c1 }, +- { "0x00000007:1.edx", "7d1", str_7d1 }, ++ { "CPUID 0x00000001.edx", "1d", str_1d }, ++ { "CPUID 0x00000001.ecx", "1c", str_1c }, ++ { "CPUID 0x80000001.edx", "e1d", str_e1d }, ++ { "CPUID 0x80000001.ecx", "e1c", str_e1c }, ++ { "CPUID 0x0000000d:1.eax", "Da1", str_Da1 }, ++ { "CPUID 0x00000007:0.ebx", "7b0", str_7b0 }, ++ { "CPUID 0x00000007:0.ecx", "7c0", str_7c0 }, ++ { "CPUID 0x80000007.edx", "e7d", str_e7d }, ++ { "CPUID 0x80000008.ebx", "e8b", str_e8b }, ++ { "CPUID 0x00000007:0.edx", "7d0", str_7d0 }, ++ { "CPUID 0x00000007:1.eax", "7a1", str_7a1 }, ++ { "CPUID 0x80000021.eax", "e21a", str_e21a }, ++ { "CPUID 0x00000007:1.ebx", "7b1", str_7b1 }, ++ { "CPUID 0x00000007:2.edx", "7d2", str_7d2 }, ++ { "CPUID 0x00000007:1.ecx", "7c1", str_7c1 }, ++ { "CPUID 0x00000007:1.edx", "7d1", str_7d1 }, ++ { "MSR_ARCH_CAPS.lo", "m10Al", str_m10Al }, ++ { "MSR_ARCH_CAPS.hi", "m10Ah", str_m10Ah }, + }; + +-#define COL_ALIGN "18" ++#define COL_ALIGN "24" + + static const char *const fs_names[] = { + [XEN_SYSCTL_cpu_featureset_raw] = "Raw", +diff --git a/tools/tests/cpu-policy/test-cpu-policy.c b/tools/tests/cpu-policy/test-cpu-policy.c +index fea0eb8c3549..cac28c76256c 100644 +--- a/tools/tests/cpu-policy/test-cpu-policy.c ++++ b/tools/tests/cpu-policy/test-cpu-policy.c +@@ -391,11 +391,6 @@ static void test_msr_deserialise_failure(void) + .msr = { .idx = 0xce, .val = ~0ull }, + .rc = -EOVERFLOW, + }, +- { +- .name = "truncated val", +- .msr = { .idx = 0x10a, .val = ~0ull }, +- .rc = -EOVERFLOW, +- }, + }; + + printf("Testing MSR deserialise failure:\n"); +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 08600cfdc784..52e862b67810 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -300,6 +300,10 @@ XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ + /* Intel-defined CPU features, CPUID level 0x00000007:1.edx, word 15 */ + XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks safe to use */ + ++/* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.eax, word 16 */ ++ ++/* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ ++ + #endif /* XEN_CPUFEATURE */ + + /* Clean up from a default include. Close the enum (for C). */ +diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h +index bfa425060464..6d5e9edd269b 100644 +--- a/xen/include/xen/lib/x86/cpu-policy.h ++++ b/xen/include/xen/lib/x86/cpu-policy.h +@@ -4,22 +4,24 @@ + + #include + +-#define FEATURESET_1d 0 /* 0x00000001.edx */ +-#define FEATURESET_1c 1 /* 0x00000001.ecx */ +-#define FEATURESET_e1d 2 /* 0x80000001.edx */ +-#define FEATURESET_e1c 3 /* 0x80000001.ecx */ +-#define FEATURESET_Da1 4 /* 0x0000000d:1.eax */ +-#define FEATURESET_7b0 5 /* 0x00000007:0.ebx */ +-#define FEATURESET_7c0 6 /* 0x00000007:0.ecx */ +-#define FEATURESET_e7d 7 /* 0x80000007.edx */ +-#define FEATURESET_e8b 8 /* 0x80000008.ebx */ +-#define FEATURESET_7d0 9 /* 0x00000007:0.edx */ +-#define FEATURESET_7a1 10 /* 0x00000007:1.eax */ +-#define FEATURESET_e21a 11 /* 0x80000021.eax */ +-#define FEATURESET_7b1 12 /* 0x00000007:1.ebx */ +-#define FEATURESET_7d2 13 /* 0x00000007:2.edx */ +-#define FEATURESET_7c1 14 /* 0x00000007:1.ecx */ +-#define FEATURESET_7d1 15 /* 0x00000007:1.edx */ ++#define FEATURESET_1d 0 /* 0x00000001.edx */ ++#define FEATURESET_1c 1 /* 0x00000001.ecx */ ++#define FEATURESET_e1d 2 /* 0x80000001.edx */ ++#define FEATURESET_e1c 3 /* 0x80000001.ecx */ ++#define FEATURESET_Da1 4 /* 0x0000000d:1.eax */ ++#define FEATURESET_7b0 5 /* 0x00000007:0.ebx */ ++#define FEATURESET_7c0 6 /* 0x00000007:0.ecx */ ++#define FEATURESET_e7d 7 /* 0x80000007.edx */ ++#define FEATURESET_e8b 8 /* 0x80000008.ebx */ ++#define FEATURESET_7d0 9 /* 0x00000007:0.edx */ ++#define FEATURESET_7a1 10 /* 0x00000007:1.eax */ ++#define FEATURESET_e21a 11 /* 0x80000021.eax */ ++#define FEATURESET_7b1 12 /* 0x00000007:1.ebx */ ++#define FEATURESET_7d2 13 /* 0x00000007:2.edx */ ++#define FEATURESET_7c1 14 /* 0x00000007:1.ecx */ ++#define FEATURESET_7d1 15 /* 0x00000007:1.edx */ ++#define FEATURESET_m10Al 16 /* 0x0000010a.eax */ ++#define FEATURESET_m10Ah 17 /* 0x0000010a.edx */ + + struct cpuid_leaf + { +@@ -350,17 +352,13 @@ struct cpu_policy + * fixed in hardware. + */ + union { +- uint32_t raw; ++ uint64_t raw; ++ struct { ++ uint32_t lo, hi; ++ }; + struct { +- bool rdcl_no:1; +- bool ibrs_all:1; +- bool rsba:1; +- bool skip_l1dfl:1; +- bool ssb_no:1; +- bool mds_no:1; +- bool if_pschange_mc_no:1; +- bool tsx_ctrl:1; +- bool taa_no:1; ++ DECL_BITFIELD(m10Al); ++ DECL_BITFIELD(m10Ah); + }; + } arch_caps; + +diff --git a/xen/lib/x86/cpuid.c b/xen/lib/x86/cpuid.c +index 68aafb404927..e795ce375032 100644 +--- a/xen/lib/x86/cpuid.c ++++ b/xen/lib/x86/cpuid.c +@@ -79,6 +79,8 @@ void x86_cpu_policy_to_featureset( + fs[FEATURESET_7d2] = p->feat._7d2; + fs[FEATURESET_7c1] = p->feat._7c1; + fs[FEATURESET_7d1] = p->feat._7d1; ++ fs[FEATURESET_m10Al] = p->arch_caps.lo; ++ fs[FEATURESET_m10Ah] = p->arch_caps.hi; + } + + void x86_cpu_featureset_to_policy( +@@ -100,6 +102,8 @@ void x86_cpu_featureset_to_policy( + p->feat._7d2 = fs[FEATURESET_7d2]; + p->feat._7c1 = fs[FEATURESET_7c1]; + p->feat._7d1 = fs[FEATURESET_7d1]; ++ p->arch_caps.lo = fs[FEATURESET_m10Al]; ++ p->arch_caps.hi = fs[FEATURESET_m10Ah]; + } + + void x86_cpu_policy_recalc_synth(struct cpu_policy *p) +-- +2.39.2 + diff --git a/0342-x86-cpu-policy-MSR_ARCH_CAPS-feature-names.patch b/0342-x86-cpu-policy-MSR_ARCH_CAPS-feature-names.patch new file mode 100644 index 00000000..8a7368ba --- /dev/null +++ b/0342-x86-cpu-policy-MSR_ARCH_CAPS-feature-names.patch @@ -0,0 +1,102 @@ +From 2d1e0ef2f13d913e8d4c2959cfb7f97be4116a1f Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 12 May 2023 18:50:59 +0100 +Subject: [PATCH 22/35] x86/cpu-policy: MSR_ARCH_CAPS feature names + +Seed the default visibility from the dom0 special case, which for the most +part just exposes the *_NO bits. EIBRS is the one non-*_NO bit, which is +"just" a status bit to the guest indicating a change in implemention of IBRS +which is already fully supported. + +Insert a block dependency from the ARCH_CAPS CPUID bit to the entire content +of the MSR. This is because MSRs have no structure information similar to +CPUID, and used by x86_cpu_policy_clear_out_of_range_leaves(), in order to +bulk-clear inaccessable words. + +The overall CPUID bit is still max-only, so all of MSR_ARCH_CAPS is hidden in +the default policies. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit ce8c930851a5ca21c4e70f83be7e8b290ce1b519) +--- + tools/misc/xen-cpuid.c | 13 ++++++++++++ + xen/include/public/arch-x86/cpufeatureset.h | 23 +++++++++++++++++++++ + xen/tools/gen-cpuid.py | 3 +++ + 3 files changed, 39 insertions(+) + +diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c +index 642e62efdf20..9eaa6c920976 100644 +--- a/tools/misc/xen-cpuid.c ++++ b/tools/misc/xen-cpuid.c +@@ -220,6 +220,19 @@ static const char *const str_7d2[32] = + + static const char *const str_m10Al[32] = + { ++ [ 0] = "rdcl-no", [ 1] = "eibrs", ++ [ 2] = "rsba", [ 3] = "skip-l1dfl", ++ [ 4] = "intel-ssb-no", [ 5] = "mds-no", ++ [ 6] = "if-pschange-mc-no", [ 7] = "tsx-ctrl", ++ [ 8] = "taa-no", [ 9] = "mcu-ctrl", ++ [10] = "misc-pkg-ctrl", [11] = "energy-ctrl", ++ [12] = "doitm", [13] = "sbdr-ssdp-no", ++ [14] = "fbsdp-no", [15] = "psdp-no", ++ /* 16 */ [17] = "fb-clear", ++ [18] = "fb-clear-ctrl", [19] = "rrsba", ++ [20] = "bhi-no", [21] = "xapic-status", ++ /* 22 */ [23] = "ovrclk-status", ++ [24] = "pbrsb-no", + }; + + static const char *const str_m10Ah[32] = +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 52e862b67810..23b72094c64f 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -301,6 +301,29 @@ XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ + XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks safe to use */ + + /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.eax, word 16 */ ++XEN_CPUFEATURE(RDCL_NO, 16*32+ 0) /*A No Rogue Data Cache Load (Meltdown) */ ++XEN_CPUFEATURE(EIBRS, 16*32+ 1) /*A Enhanced IBRS */ ++XEN_CPUFEATURE(RSBA, 16*32+ 2) /*!A RSB Alternative (Retpoline not safe) */ ++XEN_CPUFEATURE(SKIP_L1DFL, 16*32+ 3) /* Don't need to flush L1D on VMEntry */ ++XEN_CPUFEATURE(INTEL_SSB_NO, 16*32+ 4) /*A No Speculative Store Bypass */ ++XEN_CPUFEATURE(MDS_NO, 16*32+ 5) /*A No Microarchitectural Data Sampling */ ++XEN_CPUFEATURE(IF_PSCHANGE_MC_NO, 16*32+ 6) /*A No Instruction fetch #MC */ ++XEN_CPUFEATURE(TSX_CTRL, 16*32+ 7) /* MSR_TSX_CTRL */ ++XEN_CPUFEATURE(TAA_NO, 16*32+ 8) /*A No TSX Async Abort */ ++XEN_CPUFEATURE(MCU_CTRL, 16*32+ 9) /* MSR_MCU_CTRL */ ++XEN_CPUFEATURE(MISC_PKG_CTRL, 16*32+10) /* MSR_MISC_PKG_CTRL */ ++XEN_CPUFEATURE(ENERGY_FILTERING, 16*32+11) /* MSR_MISC_PKG_CTRL.ENERGY_FILTERING */ ++XEN_CPUFEATURE(DOITM, 16*32+12) /* Data Operand Invariant Timing Mode */ ++XEN_CPUFEATURE(SBDR_SSDP_NO, 16*32+13) /*A No Shared Buffer Data Read or Sideband Stale Data Propagation */ ++XEN_CPUFEATURE(FBSDP_NO, 16*32+14) /*A No Fill Buffer Stale Data Propagation */ ++XEN_CPUFEATURE(PSDP_NO, 16*32+15) /*A No Primary Stale Data Propagation */ ++XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*A Fill Buffers cleared by VERW */ ++XEN_CPUFEATURE(FB_CLEAR_CTRL, 16*32+18) /* MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */ ++XEN_CPUFEATURE(RRSBA, 16*32+19) /*!A Restricted RSB Alternative */ ++XEN_CPUFEATURE(BHI_NO, 16*32+20) /*A No Branch History Injection */ ++XEN_CPUFEATURE(XAPIC_STATUS, 16*32+21) /* MSR_XAPIC_DISABLE_STATUS */ ++XEN_CPUFEATURE(OVRCLK_STATUS, 16*32+23) /* MSR_OVERCLOCKING_STATUS */ ++XEN_CPUFEATURE(PBRSB_NO, 16*32+24) /*A No Post-Barrier RSB predictions */ + + /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ + +diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py +index 83b19c8515cd..72497b3cb0a1 100755 +--- a/xen/tools/gen-cpuid.py ++++ b/xen/tools/gen-cpuid.py +@@ -325,6 +325,9 @@ def crunch_numbers(state): + + # In principle the TSXLDTRK insns could also be considered independent. + RTM: [TSXLDTRK], ++ ++ # The ARCH_CAPS CPUID bit enumerates the availability of the whole register. ++ ARCH_CAPS: list(range(RDCL_NO, RDCL_NO + 64)), + } + + deep_features = tuple(sorted(deps.keys())) +-- +2.39.2 + diff --git a/0343-x86-boot-Record-MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch b/0343-x86-boot-Record-MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch new file mode 100644 index 00000000..035b5af0 --- /dev/null +++ b/0343-x86-boot-Record-MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch @@ -0,0 +1,96 @@ +From fe363a4e142f8849d2a8e7e84a95e148bc494930 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 12 May 2023 15:37:02 +0100 +Subject: [PATCH 23/35] x86/boot: Record MSR_ARCH_CAPS for the Raw and Host CPU + policy + +Extend x86_cpu_policy_fill_native() with a read of ARCH_CAPS based on the +CPUID information just read, removing the specially handling in +calculate_raw_cpu_policy(). + +Right now, the only use of x86_cpu_policy_fill_native() outside of Xen is the +unit tests. Getting MSR data in this context is left to whomever first +encounters a genuine need to have it. + +Extend generic_identify() to read ARCH_CAPS into x86_capability[], which is +fed into the Host Policy. This in turn means there's no need to special case +arch_caps in calculate_host_policy(). + +No practical change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 70553000d6b44dd7c271a35932b0b3e1f22c5532) +--- + xen/arch/x86/cpu-policy.c | 12 ------------ + xen/arch/x86/cpu/common.c | 5 +++++ + xen/lib/x86/cpuid.c | 7 ++++++- + 3 files changed, 11 insertions(+), 13 deletions(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index 630c133daf08..db04ffb8992d 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -354,9 +354,6 @@ static void __init calculate_raw_policy(void) + + /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ + /* Was already added by probe_cpuid_faulting() */ +- +- if ( cpu_has_arch_caps ) +- rdmsrl(MSR_ARCH_CAPABILITIES, p->arch_caps.raw); + } + + static void __init calculate_host_policy(void) +@@ -409,15 +406,6 @@ static void __init calculate_host_policy(void) + /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ + /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */ + p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; +- +- /* Temporary, until we have known_features[] for feature bits in MSRs. */ +- p->arch_caps.raw = raw_cpu_policy.arch_caps.raw & +- (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | +- ARCH_CAPS_SKIP_L1DFL | ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | +- ARCH_CAPS_IF_PSCHANGE_MC_NO | ARCH_CAPS_TSX_CTRL | ARCH_CAPS_TAA_NO | +- ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | ARCH_CAPS_PSDP_NO | +- ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | ARCH_CAPS_BHI_NO | +- ARCH_CAPS_PBRSB_NO); + } + + static void __init guest_common_default_feature_adjustments(uint32_t *fs) +diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c +index ce692328086e..ffa609930766 100644 +--- a/xen/arch/x86/cpu/common.c ++++ b/xen/arch/x86/cpu/common.c +@@ -471,6 +471,11 @@ static void generic_identify(struct cpuinfo_x86 *c) + cpuid_count(0xd, 1, + &c->x86_capability[FEATURESET_Da1], + &tmp, &tmp, &tmp); ++ ++ if (test_bit(X86_FEATURE_ARCH_CAPS, c->x86_capability)) ++ rdmsr(MSR_ARCH_CAPABILITIES, ++ c->x86_capability[FEATURESET_m10Al], ++ c->x86_capability[FEATURESET_m10Ah]); + } + + /* +diff --git a/xen/lib/x86/cpuid.c b/xen/lib/x86/cpuid.c +index e795ce375032..07e550191448 100644 +--- a/xen/lib/x86/cpuid.c ++++ b/xen/lib/x86/cpuid.c +@@ -226,7 +226,12 @@ void x86_cpu_policy_fill_native(struct cpu_policy *p) + p->hv_limit = 0; + p->hv2_limit = 0; + +- /* TODO MSRs */ ++#ifdef __XEN__ ++ /* TODO MSR_PLATFORM_INFO */ ++ ++ if ( p->feat.arch_caps ) ++ rdmsrl(MSR_ARCH_CAPABILITIES, p->arch_caps.raw); ++#endif + + x86_cpu_policy_recalc_synth(p); + } +-- +2.39.2 + diff --git a/0344-x86-boot-Expose-MSR_ARCH_CAPS-data-in-guest-max-poli.patch b/0344-x86-boot-Expose-MSR_ARCH_CAPS-data-in-guest-max-poli.patch new file mode 100644 index 00000000..b3de21e4 --- /dev/null +++ b/0344-x86-boot-Expose-MSR_ARCH_CAPS-data-in-guest-max-poli.patch @@ -0,0 +1,138 @@ +From fb06e624f268c5fc4b8bd7c4e5ed7b1b6c9032f9 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 12 May 2023 15:53:35 +0100 +Subject: [PATCH 24/35] x86/boot: Expose MSR_ARCH_CAPS data in guest max + policies + +We already have common and default feature adjustment helpers. Introduce one +for max featuresets too. + +Offer MSR_ARCH_CAPS unconditionally in the max policy, and stop clobbering the +data inherited from the Host policy. This will be necessary to level a VM +safely for migration. Annotate the ARCH_CAPS CPUID bit as special. Note: +ARCH_CAPS is still max-only for now, so will not be inhereted by the default +policies. + +With this done, the special case for dom0 can be shrunk to just resampling the +Host policy (as ARCH_CAPS isn't visible by default yet). + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit bbb289f3d5bdd3358af748d7c567343532ac45b5) +--- + xen/arch/x86/cpu-policy.c | 42 ++++++++++++--------- + xen/include/public/arch-x86/cpufeatureset.h | 2 +- + 2 files changed, 25 insertions(+), 19 deletions(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index db04ffb8992d..d76b544816dd 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -408,6 +408,25 @@ static void __init calculate_host_policy(void) + p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; + } + ++static void __init guest_common_max_feature_adjustments(uint32_t *fs) ++{ ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) ++ { ++ /* ++ * MSR_ARCH_CAPS is just feature data, and we can offer it to guests ++ * unconditionally, although limit it to Intel systems as it is highly ++ * uarch-specific. ++ * ++ * In particular, the RSBA and RRSBA bits mean "you might migrate to a ++ * system where RSB underflow uses alternative predictors (a.k.a ++ * Retpoline not safe)", so these need to be visible to a guest in all ++ * cases, even when it's only some other server in the pool which ++ * suffers the identified behaviour. ++ */ ++ __set_bit(X86_FEATURE_ARCH_CAPS, fs); ++ } ++} ++ + static void __init guest_common_default_feature_adjustments(uint32_t *fs) + { + /* +@@ -483,6 +502,7 @@ static void __init calculate_pv_max_policy(void) + __clear_bit(X86_FEATURE_IBRS, fs); + } + ++ guest_common_max_feature_adjustments(fs); + guest_common_feature_adjustments(fs); + + sanitise_featureset(fs); +@@ -490,8 +510,6 @@ static void __init calculate_pv_max_policy(void) + recalculate_xstate(p); + + p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ +- +- p->arch_caps.raw = 0; /* Not supported yet. */ + } + + static void __init calculate_pv_def_policy(void) +@@ -589,6 +607,7 @@ static void __init calculate_hvm_max_policy(void) + __clear_bit(X86_FEATURE_XSAVES, fs); + } + ++ guest_common_max_feature_adjustments(fs); + guest_common_feature_adjustments(fs); + + sanitise_featureset(fs); +@@ -597,8 +616,6 @@ static void __init calculate_hvm_max_policy(void) + + /* It's always possible to emulate CPUID faulting for HVM guests */ + p->platform_info.cpuid_faulting = true; +- +- p->arch_caps.raw = 0; /* Not supported yet. */ + } + + static void __init calculate_hvm_def_policy(void) +@@ -819,7 +836,10 @@ void __init init_dom0_cpuid_policy(struct domain *d) + * domain policy logic gains a better understanding of MSRs. + */ + if ( is_hardware_domain(d) && cpu_has_arch_caps ) ++ { + p->feat.arch_caps = true; ++ p->arch_caps.raw = host_cpu_policy.arch_caps.raw; ++ } + + /* Apply dom0-cpuid= command line settings, if provided. */ + if ( dom0_cpuid_cmdline ) +@@ -849,20 +869,6 @@ void __init init_dom0_cpuid_policy(struct domain *d) + p->platform_info.cpuid_faulting = false; + + recalculate_cpuid_policy(d); +- +- if ( is_hardware_domain(d) && cpu_has_arch_caps ) +- { +- uint64_t val; +- +- rdmsrl(MSR_ARCH_CAPABILITIES, val); +- +- p->arch_caps.raw = val & +- (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | +- ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO | +- ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | +- ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | +- ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO); +- } + } + + static void __init __maybe_unused build_assertions(void) +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 23b72094c64f..02a80b0c0c35 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -270,7 +270,7 @@ XEN_CPUFEATURE(AVX512_FP16, 9*32+23) /* AVX512 FP16 instructions */ + XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ + XEN_CPUFEATURE(STIBP, 9*32+27) /*A STIBP */ + XEN_CPUFEATURE(L1D_FLUSH, 9*32+28) /*S MSR_FLUSH_CMD and L1D flush. */ +-XEN_CPUFEATURE(ARCH_CAPS, 9*32+29) /*a IA32_ARCH_CAPABILITIES MSR */ ++XEN_CPUFEATURE(ARCH_CAPS, 9*32+29) /*!a IA32_ARCH_CAPABILITIES MSR */ + XEN_CPUFEATURE(CORE_CAPS, 9*32+30) /* IA32_CORE_CAPABILITIES MSR */ + XEN_CPUFEATURE(SSBD, 9*32+31) /*A MSR_SPEC_CTRL.SSBD available */ + +-- +2.39.2 + diff --git a/0345-x86-vtx-Remove-opencoded-MSR_ARCH_CAPS-check.patch b/0345-x86-vtx-Remove-opencoded-MSR_ARCH_CAPS-check.patch new file mode 100644 index 00000000..01a501bb --- /dev/null +++ b/0345-x86-vtx-Remove-opencoded-MSR_ARCH_CAPS-check.patch @@ -0,0 +1,58 @@ +From f2c655a3dc286d287b7aa8974e15a712db9bf713 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 15 May 2023 16:59:25 +0100 +Subject: [PATCH 25/35] x86/vtx: Remove opencoded MSR_ARCH_CAPS check + +MSR_ARCH_CAPS data is now included in featureset information. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 8f6bc7f9b72eb7cf0c8c5ae5d80498a58ba0b7c3) +--- + xen/arch/x86/hvm/vmx/vmx.c | 8 ++------ + xen/arch/x86/include/asm/cpufeature.h | 3 +++ + 2 files changed, 5 insertions(+), 6 deletions(-) + +diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c +index 8da6be33eeec..f256dc2635f5 100644 +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -2808,8 +2808,6 @@ static void __init ler_to_fixup_check(void); + */ + static bool __init has_if_pschange_mc(void) + { +- uint64_t caps = 0; +- + /* + * If we are virtualised, there is nothing we can do. Our EPT tables are + * shadowed by our hypervisor, and not walked by hardware. +@@ -2817,10 +2815,8 @@ static bool __init has_if_pschange_mc(void) + if ( cpu_has_hypervisor ) + return false; + +- if ( cpu_has_arch_caps ) +- rdmsrl(MSR_ARCH_CAPABILITIES, caps); +- +- if ( caps & ARCH_CAPS_IF_PSCHANGE_MC_NO ) ++ /* Hardware reports itself as fixed. */ ++ if ( cpu_has_if_pschange_mc_no ) + return false; + + /* +diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h +index a3ad9ebee4e9..448d5c1e0560 100644 +--- a/xen/arch/x86/include/asm/cpufeature.h ++++ b/xen/arch/x86/include/asm/cpufeature.h +@@ -145,6 +145,9 @@ + #define cpu_has_avx_vnni boot_cpu_has(X86_FEATURE_AVX_VNNI) + #define cpu_has_avx512_bf16 boot_cpu_has(X86_FEATURE_AVX512_BF16) + ++/* MSR_ARCH_CAPS */ ++#define cpu_has_if_pschange_mc_no boot_cpu_has(X86_FEATURE_IF_PSCHANGE_MC_NO) ++ + /* Synthesized. */ + #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) + #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) +-- +2.39.2 + diff --git a/0346-x86-tsx-Remove-opencoded-MSR_ARCH_CAPS-check.patch b/0346-x86-tsx-Remove-opencoded-MSR_ARCH_CAPS-check.patch new file mode 100644 index 00000000..8a9439f9 --- /dev/null +++ b/0346-x86-tsx-Remove-opencoded-MSR_ARCH_CAPS-check.patch @@ -0,0 +1,95 @@ +From d5b1cad4fa54bc67d5743ff21cd3d1eadafc6e72 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 15 May 2023 19:05:01 +0100 +Subject: [PATCH 26/35] x86/tsx: Remove opencoded MSR_ARCH_CAPS check + +The current cpu_has_tsx_ctrl tristate is serving double pupose; to signal the +first pass through tsx_init(), and the availability of MSR_TSX_CTRL. + +Drop the variable, replacing it with a once boolean, and altering +cpu_has_tsx_ctrl to come out of the feature information. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 205a9f970378c31ae3e00b52d59103a2e881b9e0) +--- + xen/arch/x86/include/asm/cpufeature.h | 1 + + xen/arch/x86/include/asm/processor.h | 2 +- + xen/arch/x86/tsx.c | 13 ++++++++----- + 3 files changed, 10 insertions(+), 6 deletions(-) + +diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h +index 448d5c1e0560..31ab4495b3a6 100644 +--- a/xen/arch/x86/include/asm/cpufeature.h ++++ b/xen/arch/x86/include/asm/cpufeature.h +@@ -147,6 +147,7 @@ + + /* MSR_ARCH_CAPS */ + #define cpu_has_if_pschange_mc_no boot_cpu_has(X86_FEATURE_IF_PSCHANGE_MC_NO) ++#define cpu_has_tsx_ctrl boot_cpu_has(X86_FEATURE_TSX_CTRL) + + /* Synthesized. */ + #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) +diff --git a/xen/arch/x86/include/asm/processor.h b/xen/arch/x86/include/asm/processor.h +index 8e2816fae9b9..40e4e2b02442 100644 +--- a/xen/arch/x86/include/asm/processor.h ++++ b/xen/arch/x86/include/asm/processor.h +@@ -624,7 +624,7 @@ static inline uint8_t get_cpu_family(uint32_t raw, uint8_t *model, + return fam; + } + +-extern int8_t opt_tsx, cpu_has_tsx_ctrl; ++extern int8_t opt_tsx; + extern bool rtm_disabled; + void tsx_init(void); + +diff --git a/xen/arch/x86/tsx.c b/xen/arch/x86/tsx.c +index 41b6092cfe16..80c6f4cedd6b 100644 +--- a/xen/arch/x86/tsx.c ++++ b/xen/arch/x86/tsx.c +@@ -19,7 +19,6 @@ + * controlling TSX behaviour, and where TSX isn't force-disabled by firmware. + */ + int8_t __read_mostly opt_tsx = -1; +-int8_t __read_mostly cpu_has_tsx_ctrl = -1; + bool __read_mostly rtm_disabled; + + static int __init cf_check parse_tsx(const char *s) +@@ -37,24 +36,28 @@ custom_param("tsx", parse_tsx); + + void tsx_init(void) + { ++ static bool __read_mostly once; ++ + /* + * This function is first called between microcode being loaded, and CPUID + * being scanned generally. Read into boot_cpu_data.x86_capability[] for + * the cpu_has_* bits we care about using here. + */ +- if ( unlikely(cpu_has_tsx_ctrl < 0) ) ++ if ( unlikely(!once) ) + { +- uint64_t caps = 0; + bool has_rtm_always_abort; + ++ once = true; ++ + if ( boot_cpu_data.cpuid_level >= 7 ) + boot_cpu_data.x86_capability[FEATURESET_7d0] + = cpuid_count_edx(7, 0); + + if ( cpu_has_arch_caps ) +- rdmsrl(MSR_ARCH_CAPABILITIES, caps); ++ rdmsr(MSR_ARCH_CAPABILITIES, ++ boot_cpu_data.x86_capability[FEATURESET_m10Al], ++ boot_cpu_data.x86_capability[FEATURESET_m10Ah]); + +- cpu_has_tsx_ctrl = !!(caps & ARCH_CAPS_TSX_CTRL); + has_rtm_always_abort = cpu_has_rtm_always_abort; + + if ( cpu_has_tsx_ctrl && cpu_has_srbds_ctrl ) +-- +2.39.2 + diff --git a/0347-x86-spec-ctrl-Remove-opencoded-MSR_ARCH_CAPS-check.patch b/0347-x86-spec-ctrl-Remove-opencoded-MSR_ARCH_CAPS-check.patch new file mode 100644 index 00000000..7a51cb03 --- /dev/null +++ b/0347-x86-spec-ctrl-Remove-opencoded-MSR_ARCH_CAPS-check.patch @@ -0,0 +1,247 @@ +From 2211ad7df8fa5a9ae3e09c2aa2718fee9aba120f Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 15 May 2023 19:15:48 +0100 +Subject: [PATCH 27/35] x86/spec-ctrl: Remove opencoded MSR_ARCH_CAPS check + +MSR_ARCH_CAPS data is now included in featureset information. Replace +opencoded checks with regular feature ones. + +No functional change. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 511b9f286c3dadd041e0d90beeff7d47c9bf3b7a) +--- + xen/arch/x86/include/asm/cpufeature.h | 7 ++++ + xen/arch/x86/spec_ctrl.c | 56 +++++++++++++-------------- + 2 files changed, 33 insertions(+), 30 deletions(-) + +diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h +index 31ab4495b3a6..2460bc7e12c8 100644 +--- a/xen/arch/x86/include/asm/cpufeature.h ++++ b/xen/arch/x86/include/asm/cpufeature.h +@@ -146,8 +146,15 @@ + #define cpu_has_avx512_bf16 boot_cpu_has(X86_FEATURE_AVX512_BF16) + + /* MSR_ARCH_CAPS */ ++#define cpu_has_rdcl_no boot_cpu_has(X86_FEATURE_RDCL_NO) ++#define cpu_has_eibrs boot_cpu_has(X86_FEATURE_EIBRS) ++#define cpu_has_rsba boot_cpu_has(X86_FEATURE_RSBA) ++#define cpu_has_skip_l1dfl boot_cpu_has(X86_FEATURE_SKIP_L1DFL) ++#define cpu_has_mds_no boot_cpu_has(X86_FEATURE_MDS_NO) + #define cpu_has_if_pschange_mc_no boot_cpu_has(X86_FEATURE_IF_PSCHANGE_MC_NO) + #define cpu_has_tsx_ctrl boot_cpu_has(X86_FEATURE_TSX_CTRL) ++#define cpu_has_taa_no boot_cpu_has(X86_FEATURE_TAA_NO) ++#define cpu_has_fb_clear boot_cpu_has(X86_FEATURE_FB_CLEAR) + + /* Synthesized. */ + #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index e80e2a5ed1a9..4bba5e8c2992 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -294,12 +294,10 @@ custom_param("spec-ctrl", parse_spec_ctrl); + int8_t __read_mostly opt_xpti_hwdom = -1; + int8_t __read_mostly opt_xpti_domu = -1; + +-static __init void xpti_init_default(uint64_t caps) ++static __init void xpti_init_default(void) + { +- if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) ) +- caps = ARCH_CAPS_RDCL_NO; +- +- if ( caps & ARCH_CAPS_RDCL_NO ) ++ if ( (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) || ++ cpu_has_rdcl_no ) + { + if ( opt_xpti_hwdom < 0 ) + opt_xpti_hwdom = 0; +@@ -402,9 +400,10 @@ static int __init cf_check parse_pv_l1tf(const char *s) + } + custom_param("pv-l1tf", parse_pv_l1tf); + +-static void __init print_details(enum ind_thunk thunk, uint64_t caps) ++static void __init print_details(enum ind_thunk thunk) + { + unsigned int _7d0 = 0, _7d2 = 0, e8b = 0, max = 0, tmp; ++ uint64_t caps = 0; + + /* Collect diagnostics about available mitigations. */ + if ( boot_cpu_data.cpuid_level >= 7 ) +@@ -413,6 +412,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + cpuid_count(7, 2, &tmp, &tmp, &tmp, &_7d2); + if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 ) + cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp); ++ if ( cpu_has_arch_caps ) ++ rdmsrl(MSR_ARCH_CAPABILITIES, caps); + + printk("Speculative mitigation facilities:\n"); + +@@ -590,7 +591,7 @@ static bool __init check_smt_enabled(void) + } + + /* Calculate whether Retpoline is known-safe on this CPU. */ +-static bool __init retpoline_safe(uint64_t caps) ++static bool __init retpoline_safe(void) + { + unsigned int ucode_rev = this_cpu(cpu_sig).rev; + +@@ -608,7 +609,7 @@ static bool __init retpoline_safe(uint64_t caps) + * Processors offering Enhanced IBRS are not guarenteed to be + * repoline-safe. + */ +- if ( caps & (ARCH_CAPS_RSBA | ARCH_CAPS_IBRS_ALL) ) ++ if ( cpu_has_rsba || cpu_has_eibrs ) + return false; + + switch ( boot_cpu_data.x86_model ) +@@ -857,7 +858,7 @@ static void __init ibpb_calculations(void) + } + + /* Calculate whether this CPU is vulnerable to L1TF. */ +-static __init void l1tf_calculations(uint64_t caps) ++static __init void l1tf_calculations(void) + { + bool hit_default = false; + +@@ -945,7 +946,7 @@ static __init void l1tf_calculations(uint64_t caps) + } + + /* Any processor advertising RDCL_NO should be not vulnerable to L1TF. */ +- if ( caps & ARCH_CAPS_RDCL_NO ) ++ if ( cpu_has_rdcl_no ) + cpu_has_bug_l1tf = false; + + if ( cpu_has_bug_l1tf && hit_default ) +@@ -1004,7 +1005,7 @@ static __init void l1tf_calculations(uint64_t caps) + } + + /* Calculate whether this CPU is vulnerable to MDS. */ +-static __init void mds_calculations(uint64_t caps) ++static __init void mds_calculations(void) + { + /* MDS is only known to affect Intel Family 6 processors at this time. */ + if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || +@@ -1012,7 +1013,7 @@ static __init void mds_calculations(uint64_t caps) + return; + + /* Any processor advertising MDS_NO should be not vulnerable to MDS. */ +- if ( caps & ARCH_CAPS_MDS_NO ) ++ if ( cpu_has_mds_no ) + return; + + switch ( boot_cpu_data.x86_model ) +@@ -1125,10 +1126,6 @@ void __init init_speculation_mitigations(void) + enum ind_thunk thunk = THUNK_DEFAULT; + bool has_spec_ctrl, ibrs = false, hw_smt_enabled; + bool cpu_has_bug_taa; +- uint64_t caps = 0; +- +- if ( cpu_has_arch_caps ) +- rdmsrl(MSR_ARCH_CAPABILITIES, caps); + + hw_smt_enabled = check_smt_enabled(); + +@@ -1175,7 +1172,7 @@ void __init init_speculation_mitigations(void) + * On all hardware, we'd like to use retpoline in preference to + * IBRS, but only if it is safe on this hardware. + */ +- if ( retpoline_safe(caps) ) ++ if ( retpoline_safe() ) + thunk = THUNK_RETPOLINE; + else if ( has_spec_ctrl ) + ibrs = true; +@@ -1404,13 +1401,13 @@ void __init init_speculation_mitigations(void) + * threads. Activate this if SMT is enabled, and Xen is using a non-zero + * MSR_SPEC_CTRL setting. + */ +- if ( boot_cpu_has(X86_FEATURE_IBRSB) && !(caps & ARCH_CAPS_IBRS_ALL) && ++ if ( boot_cpu_has(X86_FEATURE_IBRSB) && !cpu_has_eibrs && + hw_smt_enabled && default_xen_spec_ctrl ) + setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE); + +- xpti_init_default(caps); ++ xpti_init_default(); + +- l1tf_calculations(caps); ++ l1tf_calculations(); + + /* + * By default, enable PV domU L1TF mitigations on all L1TF-vulnerable +@@ -1431,7 +1428,7 @@ void __init init_speculation_mitigations(void) + if ( !boot_cpu_has(X86_FEATURE_L1D_FLUSH) ) + opt_l1d_flush = 0; + else if ( opt_l1d_flush == -1 ) +- opt_l1d_flush = cpu_has_bug_l1tf && !(caps & ARCH_CAPS_SKIP_L1DFL); ++ opt_l1d_flush = cpu_has_bug_l1tf && !cpu_has_skip_l1dfl; + + /* We compile lfence's in by default, and nop them out if requested. */ + if ( !opt_branch_harden ) +@@ -1454,7 +1451,7 @@ void __init init_speculation_mitigations(void) + "enabled. Please assess your configuration and choose an\n" + "explicit 'smt=' setting. See XSA-273.\n"); + +- mds_calculations(caps); ++ mds_calculations(); + + /* + * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have +@@ -1466,7 +1463,7 @@ void __init init_speculation_mitigations(void) + * the return-to-guest path. + */ + if ( opt_unpriv_mmio ) +- opt_fb_clear_mmio = caps & ARCH_CAPS_FB_CLEAR; ++ opt_fb_clear_mmio = cpu_has_fb_clear; + + /* + * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. +@@ -1496,7 +1493,7 @@ void __init init_speculation_mitigations(void) + */ + if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio ) + setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); +- opt_md_clear_hvm &= !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush; ++ opt_md_clear_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; + + /* + * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT +@@ -1527,8 +1524,7 @@ void __init init_speculation_mitigations(void) + * we check both to spot TSX in a microcode/cmdline independent way. + */ + cpu_has_bug_taa = +- (cpu_has_rtm || (caps & ARCH_CAPS_TSX_CTRL)) && +- (caps & (ARCH_CAPS_MDS_NO | ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO; ++ (cpu_has_rtm || cpu_has_tsx_ctrl) && cpu_has_mds_no && !cpu_has_taa_no; + + /* + * On TAA-affected hardware, disabling TSX is the preferred mitigation, vs +@@ -1547,7 +1543,7 @@ void __init init_speculation_mitigations(void) + * plausibly value TSX higher than Hyperthreading...), disable TSX to + * mitigate TAA. + */ +- if ( opt_tsx == -1 && cpu_has_bug_taa && (caps & ARCH_CAPS_TSX_CTRL) && ++ if ( opt_tsx == -1 && cpu_has_bug_taa && cpu_has_tsx_ctrl && + ((hw_smt_enabled && opt_smt) || + !boot_cpu_has(X86_FEATURE_SC_VERW_IDLE)) ) + { +@@ -1572,15 +1568,15 @@ void __init init_speculation_mitigations(void) + if ( cpu_has_srbds_ctrl ) + { + if ( opt_srb_lock == -1 && !opt_unpriv_mmio && +- (caps & (ARCH_CAPS_MDS_NO|ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO && +- (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && rtm_disabled)) ) ++ cpu_has_mds_no && !cpu_has_taa_no && ++ (!cpu_has_hle || (cpu_has_tsx_ctrl && rtm_disabled)) ) + opt_srb_lock = 0; + + set_in_mcu_opt_ctrl(MCU_OPT_CTRL_RNGDS_MITG_DIS, + opt_srb_lock ? 0 : MCU_OPT_CTRL_RNGDS_MITG_DIS); + } + +- print_details(thunk, caps); ++ print_details(thunk); + + /* + * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard +-- +2.39.2 + diff --git a/0348-x86-spec-ctrl-Update-hardware-hints.patch b/0348-x86-spec-ctrl-Update-hardware-hints.patch new file mode 100644 index 00000000..7f7c1377 --- /dev/null +++ b/0348-x86-spec-ctrl-Update-hardware-hints.patch @@ -0,0 +1,51 @@ +From 6b659d91fb77b0c9e1bfbf48e70764a508f9e886 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 30 May 2023 16:03:16 +0100 +Subject: [PATCH 28/35] x86/spec-ctrl: Update hardware hints + + * Rename IBRS_ALL to EIBRS. EIBRS is the term that everyone knows, and this + makes ARCH_CAPS_EIBRS match the X86_FEATURE_EIBRS form. + * Print RRSBA too, which is also a hint about behaviour. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 94200e1bae07e725cc07238c11569c5cab7befb7) +--- + xen/arch/x86/include/asm/msr-index.h | 2 +- + xen/arch/x86/spec_ctrl.c | 5 +++-- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h +index 0a8852f3c246..0daa265a5f12 100644 +--- a/xen/arch/x86/include/asm/msr-index.h ++++ b/xen/arch/x86/include/asm/msr-index.h +@@ -66,7 +66,7 @@ + + #define MSR_ARCH_CAPABILITIES 0x0000010a + #define ARCH_CAPS_RDCL_NO (_AC(1, ULL) << 0) +-#define ARCH_CAPS_IBRS_ALL (_AC(1, ULL) << 1) ++#define ARCH_CAPS_EIBRS (_AC(1, ULL) << 1) + #define ARCH_CAPS_RSBA (_AC(1, ULL) << 2) + #define ARCH_CAPS_SKIP_L1DFL (_AC(1, ULL) << 3) + #define ARCH_CAPS_SSB_NO (_AC(1, ULL) << 4) +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 4bba5e8c2992..2e94eded7f55 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -421,10 +421,11 @@ static void __init print_details(enum ind_thunk thunk) + * Hardware read-only information, stating immunity to certain issues, or + * suggestions of which mitigation to use. + */ +- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", ++ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", +- (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "", ++ (caps & ARCH_CAPS_EIBRS) ? " EIBRS" : "", + (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", ++ (caps & ARCH_CAPS_RRSBA) ? " RRSBA" : "", + (caps & ARCH_CAPS_SKIP_L1DFL) ? " SKIP_L1DFL" : "", + (e8b & cpufeat_mask(X86_FEATURE_SSB_NO)) || + (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : "", +-- +2.39.2 + diff --git a/0349-x86-cpu-policy-Rearrange-guest_common_default_featur.patch b/0349-x86-cpu-policy-Rearrange-guest_common_default_featur.patch new file mode 100644 index 00000000..87e0ed0a --- /dev/null +++ b/0349-x86-cpu-policy-Rearrange-guest_common_default_featur.patch @@ -0,0 +1,76 @@ +From 1a2f6ec3cc77fe71877ac579ea751eae6debb28b Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 10 Mar 2023 16:23:20 +0000 +Subject: [PATCH 29/35] x86/cpu-policy: Rearrange + guest_common_default_feature_adjustments() + +This is prep work, split out to simply the diff on the following change. + + * Split the INTEL check out of the IvyBridge RDRAND check, as the former will + be reused. + * Use asm/intel-family.h to remove a raw 0x3a model number. + +No functional change. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 064f572f96f1558faae0a74cad616ba95ec8ff34) +--- + xen/arch/x86/cpu-policy.c | 34 +++++++++++++++++++--------------- + 1 file changed, 19 insertions(+), 15 deletions(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index d76b544816dd..4ec3c2fb93c7 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -429,21 +430,24 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) + + static void __init guest_common_default_feature_adjustments(uint32_t *fs) + { +- /* +- * IvyBridge client parts suffer from leakage of RDRAND data due to SRBDS +- * (XSA-320 / CVE-2020-0543), and won't be receiving microcode to +- * compensate. +- * +- * Mitigate by hiding RDRAND from guests by default, unless explicitly +- * overridden on the Xen command line (cpuid=rdrand). Irrespective of the +- * default setting, guests can use RDRAND if explicitly enabled +- * (cpuid="host,rdrand=1") in the VM's config file, and VMs which were +- * previously using RDRAND can migrate in. +- */ +- if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && +- boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x3a && +- cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) +- __clear_bit(X86_FEATURE_RDRAND, fs); ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) ++ { ++ /* ++ * IvyBridge client parts suffer from leakage of RDRAND data due to SRBDS ++ * (XSA-320 / CVE-2020-0543), and won't be receiving microcode to ++ * compensate. ++ * ++ * Mitigate by hiding RDRAND from guests by default, unless explicitly ++ * overridden on the Xen command line (cpuid=rdrand). Irrespective of the ++ * default setting, guests can use RDRAND if explicitly enabled ++ * (cpuid="host,rdrand=1") in the VM's config file, and VMs which were ++ * previously using RDRAND can migrate in. ++ */ ++ if ( boot_cpu_data.x86 == 6 && ++ boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE && ++ cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) ++ __clear_bit(X86_FEATURE_RDRAND, fs); ++ } + + /* + * On certain hardware, speculative or errata workarounds can result in +-- +2.39.2 + diff --git a/0350-x86-spec-ctrl-Fix-the-rendering-of-FB_CLEAR.patch b/0350-x86-spec-ctrl-Fix-the-rendering-of-FB_CLEAR.patch new file mode 100644 index 00000000..48270816 --- /dev/null +++ b/0350-x86-spec-ctrl-Fix-the-rendering-of-FB_CLEAR.patch @@ -0,0 +1,56 @@ +From 7f70f13b94818273a10419b2ff2b8af6e8946f82 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 12 Jun 2023 20:24:00 +0100 +Subject: [PATCH 30/35] x86/spec-ctrl: Fix the rendering of FB_CLEAR + +FB_CLEAR is a read-only status bit, not a read-write control. Move it from +"Hardware features" into "Hardware hints". + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 921afcbae843bb3f575a8f4a270b8e6cf471f4ca) +--- + xen/arch/x86/spec_ctrl.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 2e94eded7f55..d5f56d74366e 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -421,7 +421,7 @@ static void __init print_details(enum ind_thunk thunk) + * Hardware read-only information, stating immunity to certain issues, or + * suggestions of which mitigation to use. + */ +- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", ++ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", + (caps & ARCH_CAPS_EIBRS) ? " EIBRS" : "", + (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", +@@ -434,6 +434,7 @@ static void __init print_details(enum ind_thunk thunk) + (caps & ARCH_CAPS_SBDR_SSDP_NO) ? " SBDR_SSDP_NO" : "", + (caps & ARCH_CAPS_FBSDP_NO) ? " FBSDP_NO" : "", + (caps & ARCH_CAPS_PSDP_NO) ? " PSDP_NO" : "", ++ (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "", + (caps & ARCH_CAPS_PBRSB_NO) ? " PBRSB_NO" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "", + (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "", +@@ -443,7 +444,7 @@ static void __init print_details(enum ind_thunk thunk) + (e8b & cpufeat_mask(X86_FEATURE_IBPB_RET)) ? " IBPB_RET" : ""); + + /* Hardware features which need driving to mitigate issues. */ +- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n", ++ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s\n", + (e8b & cpufeat_mask(X86_FEATURE_IBPB)) || + (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBPB" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBRS)) || +@@ -459,7 +460,6 @@ static void __init print_details(enum ind_thunk thunk) + (_7d0 & cpufeat_mask(X86_FEATURE_SRBDS_CTRL)) ? " SRBDS_CTRL" : "", + (e8b & cpufeat_mask(X86_FEATURE_VIRT_SSBD)) ? " VIRT_SSBD" : "", + (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : "", +- (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "", + (caps & ARCH_CAPS_FB_CLEAR_CTRL) ? " FB_CLEAR_CTRL" : ""); + + /* Compiled-in support which pertains to mitigations. */ +-- +2.39.2 + diff --git a/0351-x86-spec-ctrl-Use-a-taint-for-CET-without-MSR_SPEC_C.patch b/0351-x86-spec-ctrl-Use-a-taint-for-CET-without-MSR_SPEC_C.patch new file mode 100644 index 00000000..3b7bf05a --- /dev/null +++ b/0351-x86-spec-ctrl-Use-a-taint-for-CET-without-MSR_SPEC_C.patch @@ -0,0 +1,48 @@ +From b58b319f6ff12d96ab13dc8ee322491001e61d7e Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 5 Jun 2023 11:09:11 +0100 +Subject: [PATCH 31/35] x86/spec-ctrl: Use a taint for CET without + MSR_SPEC_CTRL + +Reword the comment for 'S' to include an incompatible set of features on the +same core. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 3f63f4510422c29fda7ba238b880cbb53eca34fe) +--- + xen/arch/x86/spec_ctrl.c | 3 +++ + xen/common/kernel.c | 2 +- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index d5f56d74366e..c75521d3a6e4 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -1144,7 +1144,10 @@ void __init init_speculation_mitigations(void) + if ( read_cr4() & X86_CR4_CET ) + { + if ( !has_spec_ctrl ) ++ { + printk(XENLOG_WARNING "?!? CET active, but no MSR_SPEC_CTRL?\n"); ++ add_taint(TAINT_CPU_OUT_OF_SPEC); ++ } + else if ( opt_ibrs == -1 ) + opt_ibrs = ibrs = true; + +diff --git a/xen/common/kernel.c b/xen/common/kernel.c +index f8134d3e7a9d..0e8abe0cf8a8 100644 +--- a/xen/common/kernel.c ++++ b/xen/common/kernel.c +@@ -339,7 +339,7 @@ unsigned int tainted; + * 'H' - HVM forced emulation prefix is permitted. + * 'M' - Machine had a machine check experience. + * 'U' - Platform is unsecure (usually due to an errata on the platform). +- * 'S' - Out of spec CPU (One core has a feature incompatible with others). ++ * 'S' - Out of spec CPU (Incompatible features on one or more cores). + * + * The string is overwritten by the next call to print_taint(). + */ +-- +2.39.2 + diff --git a/0352-x86-spec-ctrl-Rename-retpoline_safe-to-retpoline_cal.patch b/0352-x86-spec-ctrl-Rename-retpoline_safe-to-retpoline_cal.patch new file mode 100644 index 00000000..99557d1a --- /dev/null +++ b/0352-x86-spec-ctrl-Rename-retpoline_safe-to-retpoline_cal.patch @@ -0,0 +1,140 @@ +From 7e2164b44adba2e1fbee316faeaba229920c0474 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 26 May 2023 10:35:47 +0100 +Subject: [PATCH 32/35] x86/spec-ctrl: Rename retpoline_safe() to + retpoline_calculations() + +This is prep work, split out to simply the diff on the following change. + + * Rename to retpoline_calculations(), and call unconditionally. It is + shortly going to synthesise missing enumerations required for guest safety. + * For the model check switch statement, store the result in a variable and + break rather than returning directly. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 724c0d94ff79b208312d30676392bfdd693403be) +--- + xen/arch/x86/spec_ctrl.c | 41 +++++++++++++++++++++++++--------------- + 1 file changed, 26 insertions(+), 15 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index c75521d3a6e4..a6fd2fe9f56f 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -592,9 +592,10 @@ static bool __init check_smt_enabled(void) + } + + /* Calculate whether Retpoline is known-safe on this CPU. */ +-static bool __init retpoline_safe(void) ++static bool __init retpoline_calculations(void) + { + unsigned int ucode_rev = this_cpu(cpu_sig).rev; ++ bool safe = false; + + if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) ) + return true; +@@ -632,29 +633,31 @@ static bool __init retpoline_safe(void) + case 0x3f: /* Haswell EX/EP */ + case 0x45: /* Haswell D */ + case 0x46: /* Haswell H */ +- return true; ++ safe = true; ++ break; + + /* + * Broadwell processors are retpoline-safe after specific microcode + * versions. + */ + case 0x3d: /* Broadwell */ +- return ucode_rev >= 0x2a; ++ safe = ucode_rev >= 0x2a; break; + case 0x47: /* Broadwell H */ +- return ucode_rev >= 0x1d; ++ safe = ucode_rev >= 0x1d; break; + case 0x4f: /* Broadwell EP/EX */ +- return ucode_rev >= 0xb000021; ++ safe = ucode_rev >= 0xb000021; break; + case 0x56: /* Broadwell D */ + switch ( boot_cpu_data.x86_mask ) + { +- case 2: return ucode_rev >= 0x15; +- case 3: return ucode_rev >= 0x7000012; +- case 4: return ucode_rev >= 0xf000011; +- case 5: return ucode_rev >= 0xe000009; ++ case 2: safe = ucode_rev >= 0x15; break; ++ case 3: safe = ucode_rev >= 0x7000012; break; ++ case 4: safe = ucode_rev >= 0xf000011; break; ++ case 5: safe = ucode_rev >= 0xe000009; break; + default: + printk("Unrecognised CPU stepping %#x - assuming not reptpoline safe\n", + boot_cpu_data.x86_mask); +- return false; ++ safe = false; ++ break; + } + break; + +@@ -668,7 +671,8 @@ static bool __init retpoline_safe(void) + case 0x67: /* Cannonlake? */ + case 0x8e: /* Kabylake M */ + case 0x9e: /* Kabylake D */ +- return false; ++ safe = false; ++ break; + + /* + * Atom processors before Goldmont Plus/Gemini Lake are retpoline-safe. +@@ -687,13 +691,17 @@ static bool __init retpoline_safe(void) + case 0x5c: /* Goldmont */ + case 0x5f: /* Denverton */ + case 0x85: /* Knights Mill */ +- return true; ++ safe = true; ++ break; + + default: + printk("Unrecognised CPU model %#x - assuming not reptpoline safe\n", + boot_cpu_data.x86_model); +- return false; ++ safe = false; ++ break; + } ++ ++ return safe; + } + + /* +@@ -1126,7 +1134,7 @@ void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; + bool has_spec_ctrl, ibrs = false, hw_smt_enabled; +- bool cpu_has_bug_taa; ++ bool cpu_has_bug_taa, retpoline_safe; + + hw_smt_enabled = check_smt_enabled(); + +@@ -1155,6 +1163,9 @@ void __init init_speculation_mitigations(void) + thunk = THUNK_JMP; + } + ++ /* Determine if retpoline is safe on this CPU. */ ++ retpoline_safe = retpoline_calculations(); ++ + /* + * Has the user specified any custom BTI mitigations? If so, follow their + * instructions exactly and disable all heuristics. +@@ -1176,7 +1187,7 @@ void __init init_speculation_mitigations(void) + * On all hardware, we'd like to use retpoline in preference to + * IBRS, but only if it is safe on this hardware. + */ +- if ( retpoline_safe() ) ++ if ( retpoline_safe ) + thunk = THUNK_RETPOLINE; + else if ( has_spec_ctrl ) + ibrs = true; +-- +2.39.2 + diff --git a/0353-x86-spec-ctrl-Fix-up-the-RSBA-RRSBA-bits-as-appropri.patch b/0353-x86-spec-ctrl-Fix-up-the-RSBA-RRSBA-bits-as-appropri.patch new file mode 100644 index 00000000..f278653e --- /dev/null +++ b/0353-x86-spec-ctrl-Fix-up-the-RSBA-RRSBA-bits-as-appropri.patch @@ -0,0 +1,172 @@ +From f07a464a832acb9f92a281e5600ec580e06f315a Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 25 May 2023 20:31:22 +0100 +Subject: [PATCH 33/35] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as + appropriate + +In order to level a VM safely for migration, the toolstack needs to know the +RSBA/RRSBA properties of the CPU, whether or not they happen to be enumerated. + +See the code comment for details. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 36525a964fb629d0bd26e5a1c42de467af7a42a7) +--- + xen/arch/x86/include/asm/cpufeature.h | 1 + + xen/arch/x86/spec_ctrl.c | 100 ++++++++++++++++++++++++-- + 2 files changed, 96 insertions(+), 5 deletions(-) + +diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h +index 2460bc7e12c8..ec9456e1fdc3 100644 +--- a/xen/arch/x86/include/asm/cpufeature.h ++++ b/xen/arch/x86/include/asm/cpufeature.h +@@ -155,6 +155,7 @@ + #define cpu_has_tsx_ctrl boot_cpu_has(X86_FEATURE_TSX_CTRL) + #define cpu_has_taa_no boot_cpu_has(X86_FEATURE_TAA_NO) + #define cpu_has_fb_clear boot_cpu_has(X86_FEATURE_FB_CLEAR) ++#define cpu_has_rrsba boot_cpu_has(X86_FEATURE_RRSBA) + + /* Synthesized. */ + #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index a6fd2fe9f56f..8f9500bc64ae 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -591,7 +591,10 @@ static bool __init check_smt_enabled(void) + return false; + } + +-/* Calculate whether Retpoline is known-safe on this CPU. */ ++/* ++ * Calculate whether Retpoline is known-safe on this CPU. Fix up the ++ * RSBA/RRSBA bits as necessary. ++ */ + static bool __init retpoline_calculations(void) + { + unsigned int ucode_rev = this_cpu(cpu_sig).rev; +@@ -605,15 +608,93 @@ static bool __init retpoline_calculations(void) + return false; + + /* +- * RSBA may be set by a hypervisor to indicate that we may move to a +- * processor which isn't retpoline-safe. ++ * The meaning of the RSBA and RRSBA bits have evolved over time. The ++ * agreed upon meaning at the time of writing (May 2023) is thus: ++ * ++ * - RSBA (RSB Alternative) means that an RSB may fall back to an ++ * alternative predictor on underflow. Skylake uarch and later all have ++ * this property. Broadwell too, when running microcode versions prior ++ * to Jan 2018. ++ * ++ * - All eIBRS-capable processors suffer RSBA, but eIBRS also introduces ++ * tagging of predictions with the mode in which they were learned. So ++ * when eIBRS is active, RSBA becomes RRSBA (Restricted RSBA). ++ * ++ * - CPUs are not expected to enumerate both RSBA and RRSBA. ++ * ++ * Some parts (Broadwell) are not expected to ever enumerate this ++ * behaviour directly. Other parts have differing enumeration with ++ * microcode version. Fix up Xen's idea, so we can advertise them safely ++ * to guests, and so toolstacks can level a VM safety for migration. ++ * ++ * The following states exist: ++ * ++ * | | RSBA | EIBRS | RRSBA | Notes | Action (in principle) | ++ * |---+------+-------+-------+--------------------+-----------------------| ++ * | 1 | 0 | 0 | 0 | OK (older parts) | Maybe +RSBA | ++ * | 2 | 0 | 0 | 1 | Broken | (+RSBA, -RRSBA) | ++ * | 3 | 0 | 1 | 0 | OK (pre-Aug ucode) | +RRSBA | ++ * | 4 | 0 | 1 | 1 | OK | | ++ * | 5 | 1 | 0 | 0 | OK | | ++ * | 6 | 1 | 0 | 1 | Broken | (-RRSBA) | ++ * | 7 | 1 | 1 | 0 | Broken | (-RSBA, +RRSBA) | ++ * | 8 | 1 | 1 | 1 | Broken | (-RSBA) | ++ * ++ * However, we don't need perfect adherence to the spec. We only need ++ * RSBA || RRSBA to indicate "alternative predictors potentially in use". ++ * Rows 1 & 3 are fixed up by later logic, as they're known configurations ++ * which exist in the world. + * ++ * Complain loudly at the broken cases. They're safe for Xen to use (so we ++ * don't attempt to correct), and may or may not exist in reality, but if ++ * we ever encounter them in practice, something is wrong and needs ++ * further investigation. ++ */ ++ if ( cpu_has_eibrs ? cpu_has_rsba /* Rows 7, 8 */ ++ : cpu_has_rrsba /* Rows 2, 6 */ ) ++ { ++ printk(XENLOG_ERR ++ "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RSBA %u, EIBRS %u, RRSBA %u\n", ++ boot_cpu_data.x86, boot_cpu_data.x86_model, ++ boot_cpu_data.x86_mask, ucode_rev, ++ cpu_has_rsba, cpu_has_eibrs, cpu_has_rrsba); ++ add_taint(TAINT_CPU_OUT_OF_SPEC); ++ } ++ ++ /* + * Processors offering Enhanced IBRS are not guarenteed to be + * repoline-safe. + */ +- if ( cpu_has_rsba || cpu_has_eibrs ) ++ if ( cpu_has_eibrs ) ++ { ++ /* ++ * Prior to the August 2023 microcode, many eIBRS-capable parts did ++ * not enumerate RRSBA. ++ */ ++ if ( !cpu_has_rrsba ) ++ setup_force_cpu_cap(X86_FEATURE_RRSBA); ++ ++ return false; ++ } ++ ++ /* ++ * RSBA is explicitly enumerated in some cases, but may also be set by a ++ * hypervisor to indicate that we may move to a processor which isn't ++ * retpoline-safe. ++ */ ++ if ( cpu_has_rsba ) + return false; + ++ /* ++ * At this point, we've filtered all the legal RSBA || RRSBA cases (or the ++ * known non-ideal cases). If ARCH_CAPS is visible, trust the absence of ++ * RSBA || RRSBA. There's no known microcode which advertises ARCH_CAPS ++ * without RSBA or EIBRS, and if we're virtualised we can't rely the model ++ * check anyway. ++ */ ++ if ( cpu_has_arch_caps ) ++ return true; ++ + switch ( boot_cpu_data.x86_model ) + { + case 0x17: /* Penryn */ +@@ -701,6 +782,15 @@ static bool __init retpoline_calculations(void) + break; + } + ++ if ( !safe ) ++ { ++ /* ++ * Note: the eIBRS-capable parts are filtered out earlier, so the ++ * remainder here are the ones which suffer RSBA behaviour. ++ */ ++ setup_force_cpu_cap(X86_FEATURE_RSBA); ++ } ++ + return safe; + } + +@@ -1163,7 +1253,7 @@ void __init init_speculation_mitigations(void) + thunk = THUNK_JMP; + } + +- /* Determine if retpoline is safe on this CPU. */ ++ /* Determine if retpoline is safe on this CPU. Fix up RSBA/RRSBA enumerations. */ + retpoline_safe = retpoline_calculations(); + + /* +-- +2.39.2 + diff --git a/0354-x86-cpu-policy-Derive-RSBA-RRSBA-for-guest-policies.patch b/0354-x86-cpu-policy-Derive-RSBA-RRSBA-for-guest-policies.patch new file mode 100644 index 00000000..be48e0d0 --- /dev/null +++ b/0354-x86-cpu-policy-Derive-RSBA-RRSBA-for-guest-policies.patch @@ -0,0 +1,158 @@ +From 138822c15d870b9cc699ea6a9f98f3998eda6978 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 24 May 2023 15:41:21 +0100 +Subject: [PATCH 34/35] x86/cpu-policy: Derive RSBA/RRSBA for guest policies + +The RSBA bit, "RSB Alternative", means that the RSB may use alternative +predictors when empty. From a practical point of view, this mean "Retpoline +not safe". + +Enhanced IBRS (officially IBRS_ALL in Intel's docs, previously IBRS_ATT) is a +statement that IBRS is implemented in hardware (as opposed to the form +retrofitted to existing CPUs in microcode). + +The RRSBA bit, "Restricted-RSBA", is a combination of RSBA, and the eIBRS +property that predictions are tagged with the mode in which they were learnt. +Therefore, it means "when eIBRS is active, the RSB may fall back to +alternative predictors but restricted to the current prediction mode". As +such, it's stronger statement than RSBA, but still means "Retpoline not safe". + +CPUs are not expected to enumerate both RSBA and RRSBA. + +Add feature dependencies for EIBRS and RRSBA. While technically they're not +linked, absolutely nothing good can come of letting the guest see RRSBA +without EIBRS. Nor a guest seeing EIBRS without IBRSB. Furthermore, we use +this dependency to simplify the max derivation logic. + +The max policies gets RSBA and RRSBA unconditionally set (with the EIBRS +dependency maybe hiding RRSBA). We can run any VM, even if it has been told +"somewhere you might run, Retpoline isn't safe". + +The default policies are more complicated. A guest shouldn't see both bits, +but it needs to see one if the current host suffers from any form of RSBA, and +which bit it needs to see depends on whether eIBRS is visible or not. +Therefore, the calculation must be performed after sanitise_featureset(). + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit e0586a4ff514590eec50185e2440b97f9a31cb7f) +--- + xen/arch/x86/cpu-policy.c | 39 +++++++++++++++++++++ + xen/include/public/arch-x86/cpufeatureset.h | 4 +-- + xen/tools/gen-cpuid.py | 5 ++- + 3 files changed, 45 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index 4ec3c2fb93c7..55524e28e168 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -423,8 +423,17 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) + * Retpoline not safe)", so these need to be visible to a guest in all + * cases, even when it's only some other server in the pool which + * suffers the identified behaviour. ++ * ++ * We can always run any VM which has previously (or will ++ * subsequently) run on hardware where Retpoline is not safe. ++ * Note: ++ * - The dependency logic may hide RRSBA for other reasons. ++ * - The max policy does not constitute a sensible configuration to ++ * run a guest in. + */ + __set_bit(X86_FEATURE_ARCH_CAPS, fs); ++ __set_bit(X86_FEATURE_RSBA, fs); ++ __set_bit(X86_FEATURE_RRSBA, fs); + } + } + +@@ -532,6 +541,21 @@ static void __init calculate_pv_def_policy(void) + guest_common_default_feature_adjustments(fs); + + sanitise_featureset(fs); ++ ++ /* ++ * If the host suffers from RSBA of any form, and the guest can see ++ * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest ++ * depending on the visibility of eIBRS. ++ */ ++ if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) && ++ (cpu_has_rsba || cpu_has_rrsba) ) ++ { ++ bool eibrs = test_bit(X86_FEATURE_EIBRS, fs); ++ ++ __set_bit(eibrs ? X86_FEATURE_RRSBA ++ : X86_FEATURE_RSBA, fs); ++ } ++ + x86_cpu_featureset_to_policy(fs, p); + recalculate_xstate(p); + } +@@ -649,6 +673,21 @@ static void __init calculate_hvm_def_policy(void) + __set_bit(X86_FEATURE_VIRT_SSBD, fs); + + sanitise_featureset(fs); ++ ++ /* ++ * If the host suffers from RSBA of any form, and the guest can see ++ * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest ++ * depending on the visibility of eIBRS. ++ */ ++ if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) && ++ (cpu_has_rsba || cpu_has_rrsba) ) ++ { ++ bool eibrs = test_bit(X86_FEATURE_EIBRS, fs); ++ ++ __set_bit(eibrs ? X86_FEATURE_RRSBA ++ : X86_FEATURE_RSBA, fs); ++ } ++ + x86_cpu_featureset_to_policy(fs, p); + recalculate_xstate(p); + } +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 02a80b0c0c35..fe01dc231e5f 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -303,7 +303,7 @@ XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks s + /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.eax, word 16 */ + XEN_CPUFEATURE(RDCL_NO, 16*32+ 0) /*A No Rogue Data Cache Load (Meltdown) */ + XEN_CPUFEATURE(EIBRS, 16*32+ 1) /*A Enhanced IBRS */ +-XEN_CPUFEATURE(RSBA, 16*32+ 2) /*!A RSB Alternative (Retpoline not safe) */ ++XEN_CPUFEATURE(RSBA, 16*32+ 2) /*! RSB Alternative (Retpoline not safe) */ + XEN_CPUFEATURE(SKIP_L1DFL, 16*32+ 3) /* Don't need to flush L1D on VMEntry */ + XEN_CPUFEATURE(INTEL_SSB_NO, 16*32+ 4) /*A No Speculative Store Bypass */ + XEN_CPUFEATURE(MDS_NO, 16*32+ 5) /*A No Microarchitectural Data Sampling */ +@@ -319,7 +319,7 @@ XEN_CPUFEATURE(FBSDP_NO, 16*32+14) /*A No Fill Buffer Stale Data Prop + XEN_CPUFEATURE(PSDP_NO, 16*32+15) /*A No Primary Stale Data Propagation */ + XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*A Fill Buffers cleared by VERW */ + XEN_CPUFEATURE(FB_CLEAR_CTRL, 16*32+18) /* MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */ +-XEN_CPUFEATURE(RRSBA, 16*32+19) /*!A Restricted RSB Alternative */ ++XEN_CPUFEATURE(RRSBA, 16*32+19) /*! Restricted RSB Alternative */ + XEN_CPUFEATURE(BHI_NO, 16*32+20) /*A No Branch History Injection */ + XEN_CPUFEATURE(XAPIC_STATUS, 16*32+21) /* MSR_XAPIC_DISABLE_STATUS */ + XEN_CPUFEATURE(OVRCLK_STATUS, 16*32+23) /* MSR_OVERCLOCKING_STATUS */ +diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py +index 72497b3cb0a1..8a7516ae0f96 100755 +--- a/xen/tools/gen-cpuid.py ++++ b/xen/tools/gen-cpuid.py +@@ -318,7 +318,7 @@ def crunch_numbers(state): + # IBRSB/IBRS, and we pass this MSR directly to guests. Treating them + # as dependent features simplifies Xen's logic, and prevents the guest + # from seeing implausible configurations. +- IBRSB: [STIBP, SSBD, INTEL_PSFD], ++ IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS], + IBRS: [AMD_STIBP, AMD_SSBD, PSFD, + IBRS_ALWAYS, IBRS_FAST, IBRS_SAME_MODE], + AMD_STIBP: [STIBP_ALWAYS], +@@ -328,6 +328,9 @@ def crunch_numbers(state): + + # The ARCH_CAPS CPUID bit enumerates the availability of the whole register. + ARCH_CAPS: list(range(RDCL_NO, RDCL_NO + 64)), ++ ++ # The behaviour described by RRSBA depend on eIBRS being active. ++ EIBRS: [RRSBA], + } + + deep_features = tuple(sorted(deps.keys())) +-- +2.39.2 + diff --git a/0355-def-arch-caps.patch b/0355-def-arch-caps.patch new file mode 100644 index 00000000..94d1a764 --- /dev/null +++ b/0355-def-arch-caps.patch @@ -0,0 +1,50 @@ +From 74b8fcb1279fb4f1267d0ce85146b3108c12ee1c Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 17 May 2023 10:13:36 +0100 +Subject: [PATCH 35/35] def-arch-caps + +--- + xen/arch/x86/cpu-policy.c | 6 ++++++ + xen/include/public/arch-x86/cpufeatureset.h | 2 +- + 2 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index 55524e28e168..3e8cbe984957 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -27,6 +27,9 @@ struct cpu_policy __ro_after_init hvm_max_cpu_policy; + struct cpu_policy __ro_after_init hvm_def_cpu_policy; + #endif + ++static bool opt_def_ac = true; ++boolean_param("def-ac", opt_def_ac); ++ + const uint32_t known_features[] = INIT_KNOWN_FEATURES; + + static const uint32_t __initconst pv_max_featuremask[] = INIT_PV_MAX_FEATURES; +@@ -467,6 +470,9 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) + */ + if ( rtm_disabled ) + __clear_bit(X86_FEATURE_RTM, fs); ++ ++ if ( !opt_def_ac ) ++ __clear_bit(X86_FEATURE_ARCH_CAPS, fs); + } + + static void __init guest_common_feature_adjustments(uint32_t *fs) +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index fe01dc231e5f..6ae3117687c9 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -270,7 +270,7 @@ XEN_CPUFEATURE(AVX512_FP16, 9*32+23) /* AVX512 FP16 instructions */ + XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ + XEN_CPUFEATURE(STIBP, 9*32+27) /*A STIBP */ + XEN_CPUFEATURE(L1D_FLUSH, 9*32+28) /*S MSR_FLUSH_CMD and L1D flush. */ +-XEN_CPUFEATURE(ARCH_CAPS, 9*32+29) /*!a IA32_ARCH_CAPABILITIES MSR */ ++XEN_CPUFEATURE(ARCH_CAPS, 9*32+29) /*!A IA32_ARCH_CAPABILITIES MSR */ + XEN_CPUFEATURE(CORE_CAPS, 9*32+30) /* IA32_CORE_CAPABILITIES MSR */ + XEN_CPUFEATURE(SSBD, 9*32+31) /*A MSR_SPEC_CTRL.SSBD available */ + +-- +2.39.2 + diff --git a/1017-Disable-TSX-by-default.patch b/1017-Disable-TSX-by-default.patch index 7d0d8095..c26c3c6a 100644 --- a/1017-Disable-TSX-by-default.patch +++ b/1017-Disable-TSX-by-default.patch @@ -26,9 +26,9 @@ index 41b6092cfe16..996c22b56be7 100644 */ -int8_t __read_mostly opt_tsx = -1; +int8_t __read_mostly opt_tsx = -2; - int8_t __read_mostly cpu_has_tsx_ctrl = -1; bool __read_mostly rtm_disabled; + static int __init cf_check parse_tsx(const char *s) @@ -197,6 +197,13 @@ void tsx_init(void) } } diff --git a/1020-xen-tell-guests-what-speculative-workarounds-are-not.patch b/1020-xen-tell-guests-what-speculative-workarounds-are-not.patch deleted file mode 100644 index 47928b36..00000000 --- a/1020-xen-tell-guests-what-speculative-workarounds-are-not.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 6a3be9a4d752ecc5af6fb61dbfc8f53a948c2556 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= - -Date: Thu, 2 Feb 2023 20:42:27 +0100 -Subject: [PATCH] xen: tell guests what speculative workarounds are not - necessary -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Expose arch caps to guests, so the guest kernel can do a better choice -what workarounds are necessary on a given platform. This is -fundamentally incompatible with migration, but since we don't do -migration, we can recover some performance. - -Signed-off-by: Marek Marczykowski-Górecki ---- - xen/arch/x86/msr.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index 317b154d244d..140a3c297be0 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -157,7 +157,7 @@ int init_domain_msr_policy(struct domain *d) - * so dom0 can turn off workarounds as appropriate. Temporary, until the - * domain policy logic gains a better understanding of MSRs. - */ -- if ( is_hardware_domain(d) && cpu_has_arch_caps ) -+ if ( cpu_has_arch_caps ) - { - uint64_t val; - --- -2.37.3 - diff --git a/xen.spec.in b/xen.spec.in index 753778e3..a075656a 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -105,6 +105,41 @@ Patch0308: 0308-x86-Replace-EPT_EMT_-constants-with-X86_MT_.patch Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch Patch0310: 0310-ns16550-enable-memory-decoding-on-MMIO-based-PCI-con.patch Patch0311: 0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch +Patch0321: 0321-tools-xen-cpuid-Rework-the-handling-of-dynamic-featu.patch +Patch0322: 0322-x86-sysctl-Retrofit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch +Patch0323: 0323-x86-Rename-struct-cpu_policy-to-struct-old_cpuid_pol.patch +Patch0324: 0324-x86-Rename-domctl-sysctl-.cpu_policy.-cpuid-msr-_pol.patch +Patch0325: 0325-x86-Rename-struct-cpuid_policy-to-struct-cpu_policy.patch +Patch0326: 0326-x86-Merge-struct-msr_policy-into-struct-cpu_policy.patch +Patch0327: 0327-x86-Merge-the-system-cpuid-msr-policy-objects.patch +Patch0328: 0328-x86-Merge-a-domain-s-cpuid-msr-policy-objects.patch +Patch0329: 0329-x86-Merge-xc_cpu_policy-s-cpuid-and-msr-objects.patch +Patch0330: 0330-x86-Drop-struct-old_cpu_policy.patch +Patch0331: 0331-x86-Out-of-inline-the-policy-featureset-convertors.patch +Patch0332: 0332-x86-boot-Move-MSR-policy-initialisation-logic-into-c.patch +Patch0333: 0333-x86-boot-Merge-CPUID-policy-initialisation-logic-int.patch +Patch0334: 0334-x86-emul-Switch-x86_emulate_ctxt-to-cpu_policy.patch +Patch0335: 0335-tools-fuzz-Rework-afl-policy-fuzzer.patch +Patch0336: 0336-libx86-Update-library-API-for-cpu_policy.patch +Patch0337: 0337-x86-Remove-temporary-cpuid-msr-_policy-defines.patch +Patch0338: 0338-x86-cpuid-Calculate-FEATURESET_NR_ENTRIES-more-helpf.patch +Patch0339: 0339-x86-boot-Rework-dom0-feature-configuration.patch +Patch0340: 0340-x86-boot-Adjust-MSR_ARCH_CAPS-handling-for-the-Host-.patch +Patch0341: 0341-x86-cpu-policy-Infrastructure-for-MSR_ARCH_CAPS.patch +Patch0342: 0342-x86-cpu-policy-MSR_ARCH_CAPS-feature-names.patch +Patch0343: 0343-x86-boot-Record-MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch +Patch0344: 0344-x86-boot-Expose-MSR_ARCH_CAPS-data-in-guest-max-poli.patch +Patch0345: 0345-x86-vtx-Remove-opencoded-MSR_ARCH_CAPS-check.patch +Patch0346: 0346-x86-tsx-Remove-opencoded-MSR_ARCH_CAPS-check.patch +Patch0347: 0347-x86-spec-ctrl-Remove-opencoded-MSR_ARCH_CAPS-check.patch +Patch0348: 0348-x86-spec-ctrl-Update-hardware-hints.patch +Patch0349: 0349-x86-cpu-policy-Rearrange-guest_common_default_featur.patch +Patch0350: 0350-x86-spec-ctrl-Fix-the-rendering-of-FB_CLEAR.patch +Patch0351: 0351-x86-spec-ctrl-Use-a-taint-for-CET-without-MSR_SPEC_C.patch +Patch0352: 0352-x86-spec-ctrl-Rename-retpoline_safe-to-retpoline_cal.patch +Patch0353: 0353-x86-spec-ctrl-Fix-up-the-RSBA-RRSBA-bits-as-appropri.patch +Patch0354: 0354-x86-cpu-policy-Derive-RSBA-RRSBA-for-guest-policies.patch +Patch0355: 0355-def-arch-caps.patch # Security fixes Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch @@ -174,7 +209,6 @@ Patch1016: 1016-gnttab-disable-grant-tables-v2-by-default.patch Patch1017: 1017-Disable-TSX-by-default.patch Patch1018: 1018-Fix-IGD-passthrough-with-linux-stubdomain.patch Patch1019: 1019-Use-Linux-s-PAT.patch -Patch1020: 1020-xen-tell-guests-what-speculative-workarounds-are-not.patch # Reproducible builds Patch1100: 1100-Define-build-dates-time-based-on-SOURCE_DATE_EPOCH.patch From 8d8f66a3868ff8645ed044d35a398c37c49c6650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sun, 2 Jul 2023 03:45:54 +0200 Subject: [PATCH 09/64] version 4.17.1-2 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index d00491fd..0cfbf088 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -1 +2 From 172ff3648b4500a676de836946a6e8edd0e5a704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 25 Jul 2023 02:03:33 +0200 Subject: [PATCH 10/64] Apply XSA-433 fix Microcode update is available only for some models at this moment, so include the workaround patch too to cover remaining models. --- 0502-xsa433-4.17.patch | 138 +++++++++++++++++++++++++++++++++++++++++ xen.spec.in | 1 + 2 files changed, 139 insertions(+) create mode 100644 0502-xsa433-4.17.patch diff --git a/0502-xsa433-4.17.patch b/0502-xsa433-4.17.patch new file mode 100644 index 00000000..668f556b --- /dev/null +++ b/0502-xsa433-4.17.patch @@ -0,0 +1,138 @@ +From: Andrew Cooper +Subject: x86/amd: Mitigations for Zenbleed + +Zenbleed is a malfunction on AMD Zen2 uarch parts which results in corruption +of the vector registers. An attacker can trigger this bug deliberately in +order to access stale data in the physical vector register file. This can +include data from sibling threads, or a higher-privilege context. + +Microcode is the preferred mitigation but in the case that's not available use +the chickenbit as instructed by AMD. Re-evaluate the mitigation on late +microcode load too. + +This is XSA-433 / CVE-2023-20593. + +Signed-off-by: Andrew Cooper +Acked-by: Roger Pau Monné + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index b6a20d375ad1..8d23a5be0c5f 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + #include "cpu.h" + +@@ -878,6 +879,72 @@ void __init detect_zen2_null_seg_behaviour(void) + + } + ++void amd_check_zenbleed(void) ++{ ++ const struct cpu_signature *sig = &this_cpu(cpu_sig); ++ unsigned int good_rev, chickenbit = (1 << 9); ++ uint64_t val, old_val; ++ ++ /* ++ * If we're virtualised, we can't do family/model checks safely, and ++ * we likely wouldn't have access to DE_CFG even if we could see a ++ * microcode revision. ++ * ++ * A hypervisor may hide AVX as a stopgap mitigation. We're not in a ++ * position to care either way. An admin doesn't want to be disabling ++ * AVX as a mitigation on any build of Xen with this logic present. ++ */ ++ if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17) ++ return; ++ ++ switch (boot_cpu_data.x86_model) { ++ case 0x30 ... 0x3f: good_rev = 0x0830107a; break; ++ case 0x60 ... 0x67: good_rev = 0x0860010b; break; ++ case 0x68 ... 0x6f: good_rev = 0x08608105; break; ++ case 0x70 ... 0x7f: good_rev = 0x08701032; break; ++ case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; ++ default: ++ /* ++ * With the Fam17h check above, parts getting here are Zen1. ++ * They're not affected. ++ */ ++ return; ++ } ++ ++ rdmsrl(MSR_AMD64_DE_CFG, val); ++ old_val = val; ++ ++ /* ++ * Microcode is the preferred mitigation, in terms of performance. ++ * However, without microcode, this chickenbit (specific to the Zen2 ++ * uarch) disables Floating Point Mov-Elimination to mitigate the ++ * issue. ++ */ ++ val &= ~chickenbit; ++ if (sig->rev < good_rev) ++ val |= chickenbit; ++ ++ if (val == old_val) ++ /* Nothing to change. */ ++ return; ++ ++ /* ++ * DE_CFG is a Core-scoped MSR, and this write is racy during late ++ * microcode load. However, both threads calculate the new value from ++ * state which is shared, and unrelated to the old value, so the ++ * result should be consistent. ++ */ ++ wrmsrl(MSR_AMD64_DE_CFG, val); ++ ++ /* ++ * Inform the admin that we changed something, but don't spam, ++ * especially during a late microcode load. ++ */ ++ if (smp_processor_id() == 0) ++ printk(XENLOG_INFO "Zenbleed mitigation - using %s\n", ++ val & chickenbit ? "chickenbit" : "microcode"); ++} ++ + static void cf_check init_amd(struct cpuinfo_x86 *c) + { + u32 l, h; +@@ -1150,6 +1217,8 @@ static void cf_check init_amd(struct cpuinfo_x86 *c) + if ((smp_processor_id() == 1) && !cpu_has(c, X86_FEATURE_ITSC)) + disable_c1_ramping(); + ++ amd_check_zenbleed(); ++ + check_syscfg_dram_mod_en(); + + amd_log_freq(c); +diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c +index ded8fe90e650..c6d13f3fb35f 100644 +--- a/xen/arch/x86/cpu/microcode/amd.c ++++ b/xen/arch/x86/cpu/microcode/amd.c +@@ -262,6 +262,8 @@ static int cf_check apply_microcode(const struct microcode_patch *patch) + "microcode: CPU%u updated from revision %#x to %#x, date = %04x-%02x-%02x\n", + cpu, old_rev, rev, patch->year, patch->month, patch->day); + ++ amd_check_zenbleed(); ++ + return 0; + } + +diff --git a/xen/arch/x86/include/asm/processor.h b/xen/arch/x86/include/asm/processor.h +index 8e2816fae9b9..66611df6efc1 100644 +--- a/xen/arch/x86/include/asm/processor.h ++++ b/xen/arch/x86/include/asm/processor.h +@@ -637,6 +637,8 @@ enum ap_boot_method { + }; + extern enum ap_boot_method ap_boot_method; + ++void amd_check_zenbleed(void); ++ + #endif /* !__ASSEMBLY__ */ + + #endif /* __ASM_X86_PROCESSOR_H */ + diff --git a/xen.spec.in b/xen.spec.in index a075656a..da1a429b 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -144,6 +144,7 @@ Patch0355: 0355-def-arch-caps.patch # Security fixes Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch Patch0501: 0501-xsa431.patch +Patch0502: 0502-xsa433-4.17.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch From 06d5e309fe7bdcfabad8fe8ac30ef8998ede4467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 25 Jul 2023 02:34:25 +0200 Subject: [PATCH 11/64] version 4.17.1-3 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 0cfbf088..00750edc 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -2 +3 From cd1ccd8fc7730b738b992bee12503a58e1fd7f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 31 Jul 2023 19:38:14 +0200 Subject: [PATCH 12/64] Apply fix for the original XSA-433 patch --- 0503-xsa433-bugfix.patch | 29 +++++++++++++++++++++++++++++ xen.spec.in | 1 + 2 files changed, 30 insertions(+) create mode 100644 0503-xsa433-bugfix.patch diff --git a/0503-xsa433-bugfix.patch b/0503-xsa433-bugfix.patch new file mode 100644 index 00000000..8ad4eda9 --- /dev/null +++ b/0503-xsa433-bugfix.patch @@ -0,0 +1,29 @@ +From: Andrew Cooper +Subject: x86/amd: Fix DE_CFG truncation in amd_check_zenbleed() + +This line: + + val &= ~chickenbit; + +ends up truncating val to 32 bits, and turning off various errata workarounds +in Zen2 systems. + +Fixes: f91c5ea97067 ("x86/amd: Mitigations for Zenbleed") +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index 3ed06f670491..df2681b7c455 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -909,8 +909,8 @@ void __init detect_zen2_null_seg_behaviour(void) + void amd_check_zenbleed(void) + { + const struct cpu_signature *sig = &this_cpu(cpu_sig); +- unsigned int good_rev, chickenbit = (1 << 9); +- uint64_t val, old_val; ++ unsigned int good_rev; ++ uint64_t val, old_val, chickenbit = (1 << 9); + + /* + * If we're virtualised, we can't do family/model checks safely, and diff --git a/xen.spec.in b/xen.spec.in index da1a429b..6b5d4cff 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -145,6 +145,7 @@ Patch0355: 0355-def-arch-caps.patch Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch Patch0501: 0501-xsa431.patch Patch0502: 0502-xsa433-4.17.patch +Patch0503: 0503-xsa433-bugfix.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch From 82eaa96959cd0caab27a6117da66974d87518d9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 31 Jul 2023 22:30:06 +0200 Subject: [PATCH 13/64] version 4.17.1-4 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 00750edc..b8626c4c 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -3 +4 From fbc7e445fdf5cc0a8895382f861231cf611d7d07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 9 Aug 2023 04:14:30 +0200 Subject: [PATCH 14/64] Rebase patches on top of 4.17.2 And drop those already included upstream. --- ...emory-decoding-on-MMIO-based-PCI-con.patch | 51 - ...uest-assist-gcc13-s-realloc-analyzer.patch | 59 - ...Rework-the-handling-of-dynamic-featu.patch | 118 -- ...fit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch | 127 -- ...t-cpu_policy-to-struct-old_cpuid_pol.patch | 157 -- ...l-sysctl-.cpu_policy.-cpuid-msr-_pol.patch | 173 -- ...ct-cpuid_policy-to-struct-cpu_policy.patch | 1073 ---------- ...ct-msr_policy-into-struct-cpu_policy.patch | 307 --- ...-the-system-cpuid-msr-policy-objects.patch | 612 ------ ...-a-domain-s-cpuid-msr-policy-objects.patch | 423 ---- ...c_cpu_policy-s-cpuid-and-msr-objects.patch | 367 ---- 0330-x86-Drop-struct-old_cpu_policy.patch | 292 --- ...ine-the-policy-featureset-convertors.patch | 293 --- ...R-policy-initialisation-logic-into-c.patch | 261 --- ...PUID-policy-initialisation-logic-int.patch | 1780 ----------------- ...witch-x86_emulate_ctxt-to-cpu_policy.patch | 142 -- ...-tools-fuzz-Rework-afl-policy-fuzzer.patch | 133 -- ...86-Update-library-API-for-cpu_policy.patch | 448 ----- ...-temporary-cpuid-msr-_policy-defines.patch | 334 ---- ...ate-FEATURESET_NR_ENTRIES-more-helpf.patch | 110 - ...ot-Rework-dom0-feature-configuration.patch | 136 -- ...MSR_ARCH_CAPS-handling-for-the-Host-.patch | 39 - ...icy-Infrastructure-for-MSR_ARCH_CAPS.patch | 216 -- ...u-policy-MSR_ARCH_CAPS-feature-names.patch | 102 - ...MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch | 96 - ...MSR_ARCH_CAPS-data-in-guest-max-poli.patch | 138 -- ...Remove-opencoded-MSR_ARCH_CAPS-check.patch | 58 - ...Remove-opencoded-MSR_ARCH_CAPS-check.patch | 95 - ...Remove-opencoded-MSR_ARCH_CAPS-check.patch | 247 --- ...-x86-spec-ctrl-Update-hardware-hints.patch | 51 - ...earrange-guest_common_default_featur.patch | 76 - ...c-ctrl-Fix-the-rendering-of-FB_CLEAR.patch | 56 - ...e-a-taint-for-CET-without-MSR_SPEC_C.patch | 48 - ...name-retpoline_safe-to-retpoline_cal.patch | 140 -- ...x-up-the-RSBA-RRSBA-bits-as-appropri.patch | 172 -- ...Derive-RSBA-RRSBA-for-guest-policies.patch | 158 -- 0355-def-arch-caps.patch | 50 - ...a-Operand-Invariant-Timing-Mode-by-d.patch | 14 - 0501-xsa431.patch | 96 - 0502-xsa433-4.17.patch | 138 -- 0503-xsa433-bugfix.patch | 29 - archlinux/PKGBUILD.in | 1 - xen.spec.in | 40 - 43 files changed, 9456 deletions(-) delete mode 100644 0310-ns16550-enable-memory-decoding-on-MMIO-based-PCI-con.patch delete mode 100644 0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch delete mode 100644 0321-tools-xen-cpuid-Rework-the-handling-of-dynamic-featu.patch delete mode 100644 0322-x86-sysctl-Retrofit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch delete mode 100644 0323-x86-Rename-struct-cpu_policy-to-struct-old_cpuid_pol.patch delete mode 100644 0324-x86-Rename-domctl-sysctl-.cpu_policy.-cpuid-msr-_pol.patch delete mode 100644 0325-x86-Rename-struct-cpuid_policy-to-struct-cpu_policy.patch delete mode 100644 0326-x86-Merge-struct-msr_policy-into-struct-cpu_policy.patch delete mode 100644 0327-x86-Merge-the-system-cpuid-msr-policy-objects.patch delete mode 100644 0328-x86-Merge-a-domain-s-cpuid-msr-policy-objects.patch delete mode 100644 0329-x86-Merge-xc_cpu_policy-s-cpuid-and-msr-objects.patch delete mode 100644 0330-x86-Drop-struct-old_cpu_policy.patch delete mode 100644 0331-x86-Out-of-inline-the-policy-featureset-convertors.patch delete mode 100644 0332-x86-boot-Move-MSR-policy-initialisation-logic-into-c.patch delete mode 100644 0333-x86-boot-Merge-CPUID-policy-initialisation-logic-int.patch delete mode 100644 0334-x86-emul-Switch-x86_emulate_ctxt-to-cpu_policy.patch delete mode 100644 0335-tools-fuzz-Rework-afl-policy-fuzzer.patch delete mode 100644 0336-libx86-Update-library-API-for-cpu_policy.patch delete mode 100644 0337-x86-Remove-temporary-cpuid-msr-_policy-defines.patch delete mode 100644 0338-x86-cpuid-Calculate-FEATURESET_NR_ENTRIES-more-helpf.patch delete mode 100644 0339-x86-boot-Rework-dom0-feature-configuration.patch delete mode 100644 0340-x86-boot-Adjust-MSR_ARCH_CAPS-handling-for-the-Host-.patch delete mode 100644 0341-x86-cpu-policy-Infrastructure-for-MSR_ARCH_CAPS.patch delete mode 100644 0342-x86-cpu-policy-MSR_ARCH_CAPS-feature-names.patch delete mode 100644 0343-x86-boot-Record-MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch delete mode 100644 0344-x86-boot-Expose-MSR_ARCH_CAPS-data-in-guest-max-poli.patch delete mode 100644 0345-x86-vtx-Remove-opencoded-MSR_ARCH_CAPS-check.patch delete mode 100644 0346-x86-tsx-Remove-opencoded-MSR_ARCH_CAPS-check.patch delete mode 100644 0347-x86-spec-ctrl-Remove-opencoded-MSR_ARCH_CAPS-check.patch delete mode 100644 0348-x86-spec-ctrl-Update-hardware-hints.patch delete mode 100644 0349-x86-cpu-policy-Rearrange-guest_common_default_featur.patch delete mode 100644 0350-x86-spec-ctrl-Fix-the-rendering-of-FB_CLEAR.patch delete mode 100644 0351-x86-spec-ctrl-Use-a-taint-for-CET-without-MSR_SPEC_C.patch delete mode 100644 0352-x86-spec-ctrl-Rename-retpoline_safe-to-retpoline_cal.patch delete mode 100644 0353-x86-spec-ctrl-Fix-up-the-RSBA-RRSBA-bits-as-appropri.patch delete mode 100644 0354-x86-cpu-policy-Derive-RSBA-RRSBA-for-guest-policies.patch delete mode 100644 0355-def-arch-caps.patch delete mode 100644 0501-xsa431.patch delete mode 100644 0502-xsa433-4.17.patch delete mode 100644 0503-xsa433-bugfix.patch diff --git a/0310-ns16550-enable-memory-decoding-on-MMIO-based-PCI-con.patch b/0310-ns16550-enable-memory-decoding-on-MMIO-based-PCI-con.patch deleted file mode 100644 index 104d7cf6..00000000 --- a/0310-ns16550-enable-memory-decoding-on-MMIO-based-PCI-con.patch +++ /dev/null @@ -1,51 +0,0 @@ -From a16fb78515d54be95f81c0d1c0a3a7b954a54d0a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= - -Date: Mon, 8 May 2023 14:15:38 +0200 -Subject: [PATCH] ns16550: enable memory decoding on MMIO-based PCI console - card -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -pci_serial_early_init() enables PCI_COMMAND_IO for IO-based UART -devices, add setting PCI_COMMAND_MEMORY for MMIO-based UART devices too. -Note the MMIO-based devices in practice need a "pci" sub-option, -otherwise a few parameters are not initialized (including bar_idx, -reg_shift, reg_width etc). The "pci" is not supposed to be used with -explicit BDF, so do not key setting PCI_COMMAND_MEMORY on explicit BDF -being set. Contrary to the IO-based UART, pci_serial_early_init() will -not attempt to set BAR0 address, even if user provided io_base manually -- in most cases, those are with an offest and the current cmdline syntax -doesn't allow expressing it. Due to this, enable PCI_COMMAND_MEMORY only -if uart->bar is already populated. In similar spirit, this patch does -not support setting BAR0 of the bridge. - -Signed-off-by: Marek Marczykowski-Górecki -Acked-by: Jan Beulich ---- - xen/drivers/char/ns16550.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c -index 1b21eb93c45f..212a9c49ae8e 100644 ---- a/xen/drivers/char/ns16550.c -+++ b/xen/drivers/char/ns16550.c -@@ -272,6 +272,14 @@ static int cf_check ns16550_getc(struct serial_port *port, char *pc) - static void pci_serial_early_init(struct ns16550 *uart) - { - #ifdef NS16550_PCI -+ if ( uart->bar && uart->io_base >= 0x10000 ) -+ { -+ pci_conf_write16(PCI_SBDF(0, uart->ps_bdf[0], uart->ps_bdf[1], -+ uart->ps_bdf[2]), -+ PCI_COMMAND, PCI_COMMAND_MEMORY); -+ return; -+ } -+ - if ( !uart->ps_bdf_enable || uart->io_base >= 0x10000 ) - return; - --- -2.39.2 - diff --git a/0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch b/0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch deleted file mode 100644 index f8205c6c..00000000 --- a/0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 99a9c3d7141063ae3f357892c6181cfa3be8a280 Mon Sep 17 00:00:00 2001 -From: Olaf Hering -Date: Wed, 3 May 2023 15:06:41 +0200 -Subject: [PATCH] tools/libs/guest: assist gcc13's realloc analyzer - -gcc13 fails to track the allocated memory in backup_ptes: - -xg_offline_page.c: In function 'backup_ptes': -xg_offline_page.c:191:13: error: pointer 'orig' may be used after 'realloc' [-Werror=use-after-free] - 191 | free(orig); - -Assist the analyzer by slightly rearranging the code: -In case realloc succeeds, the previous allocation is either extended -or released internally. In case realloc fails, the previous allocation -is left unchanged. Return an error in this case, the caller will -release the currently allocated memory in its error path. - -http://bugzilla.suse.com/show_bug.cgi?id=1210570 - -Signed-off-by: Olaf Hering -Reviewed-by: Juergen Gross -Compile-tested-by: Jason Andryuk -Acked-by: Jan Beulich ---- - tools/libs/guest/xg_offline_page.c | 16 ++++++---------- - 1 file changed, 6 insertions(+), 10 deletions(-) - -diff --git a/tools/libs/guest/xg_offline_page.c b/tools/libs/guest/xg_offline_page.c -index ccd0299f0fc7..8f0a252417a5 100644 ---- a/tools/libs/guest/xg_offline_page.c -+++ b/tools/libs/guest/xg_offline_page.c -@@ -181,18 +181,14 @@ static int backup_ptes(xen_pfn_t table_mfn, int offset, - - if (backup->max == backup->cur) - { -- void *orig = backup->entries; -+ void *entries = realloc(backup->entries, backup->max * 2 * -+ sizeof(struct pte_backup_entry)); - -- backup->entries = realloc( -- orig, backup->max * 2 * sizeof(struct pte_backup_entry)); -- -- if (backup->entries == NULL) -- { -- free(orig); -+ if (entries == NULL) - return -1; -- } -- else -- backup->max *= 2; -+ -+ backup->entries = entries; -+ backup->max *= 2; - } - - backup->entries[backup->cur].table_mfn = table_mfn; --- -2.39.2 - diff --git a/0321-tools-xen-cpuid-Rework-the-handling-of-dynamic-featu.patch b/0321-tools-xen-cpuid-Rework-the-handling-of-dynamic-featu.patch deleted file mode 100644 index bd06bb94..00000000 --- a/0321-tools-xen-cpuid-Rework-the-handling-of-dynamic-featu.patch +++ /dev/null @@ -1,118 +0,0 @@ -From e509e270347f569f2112340155c78eb3ecb54c98 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 10 Mar 2023 19:04:22 +0000 -Subject: [PATCH 01/35] tools/xen-cpuid: Rework the handling of dynamic - featuresets - -struct fsinfo is the vestigial remnant of an older internal design which -didn't survive very long. - -Simplify things by inlining get_featureset() and having a single memory -allocation that gets reused. This in turn changes featuresets[] to be a -simple list of names, so rename it to fs_names[]. - -No functional change. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit ec3474e1dd42e6f410601f50b6e74fb7c442cfb9) ---- - tools/misc/xen-cpuid.c | 53 ++++++++++++++++++------------------------ - 1 file changed, 22 insertions(+), 31 deletions(-) - -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index 45e443f5d9ad..e774a9950ebb 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -244,16 +244,11 @@ static const struct { - - #define COL_ALIGN "18" - --static struct fsinfo { -- const char *name; -- uint32_t len; -- uint32_t *fs; --} featuresets[] = --{ -- [XEN_SYSCTL_cpu_featureset_host] = { "Host", 0, NULL }, -- [XEN_SYSCTL_cpu_featureset_raw] = { "Raw", 0, NULL }, -- [XEN_SYSCTL_cpu_featureset_pv] = { "PV", 0, NULL }, -- [XEN_SYSCTL_cpu_featureset_hvm] = { "HVM", 0, NULL }, -+static const char *const fs_names[] = { -+ [XEN_SYSCTL_cpu_featureset_host] = "Host", -+ [XEN_SYSCTL_cpu_featureset_raw] = "Raw", -+ [XEN_SYSCTL_cpu_featureset_pv] = "PV", -+ [XEN_SYSCTL_cpu_featureset_hvm] = "HVM", - }; - - static void dump_leaf(uint32_t leaf, const char *const *strs) -@@ -300,22 +295,10 @@ static void decode_featureset(const uint32_t *features, - } - } - --static int get_featureset(xc_interface *xch, unsigned int idx) --{ -- struct fsinfo *f = &featuresets[idx]; -- -- f->len = nr_features; -- f->fs = calloc(nr_features, sizeof(*f->fs)); -- -- if ( !f->fs ) -- err(1, "calloc(, featureset)"); -- -- return xc_get_cpu_featureset(xch, idx, &f->len, f->fs); --} -- - static void dump_info(xc_interface *xch, bool detail) - { - unsigned int i; -+ uint32_t *fs; - - printf("nr_features: %u\n", nr_features); - -@@ -346,26 +329,34 @@ static void dump_info(xc_interface *xch, bool detail) - nr_features, "HVM Hap Default", detail); - - printf("\nDynamic sets:\n"); -- for ( i = 0; i < ARRAY_SIZE(featuresets); ++i ) -+ -+ fs = malloc(sizeof(*fs) * nr_features); -+ if ( !fs ) -+ err(1, "malloc(featureset)"); -+ -+ for ( i = 0; i < ARRAY_SIZE(fs_names); ++i ) - { -- if ( get_featureset(xch, i) ) -+ uint32_t len = nr_features; -+ int ret; -+ -+ memset(fs, 0, sizeof(*fs) * nr_features); -+ -+ ret = xc_get_cpu_featureset(xch, i, &len, fs); -+ if ( ret ) - { - if ( errno == EOPNOTSUPP ) - { -- printf("%s featureset not supported by Xen\n", -- featuresets[i].name); -+ printf("%s featureset not supported by Xen\n", fs_names[i]); - continue; - } - - err(1, "xc_get_featureset()"); - } - -- decode_featureset(featuresets[i].fs, featuresets[i].len, -- featuresets[i].name, detail); -+ decode_featureset(fs, len, fs_names[i], detail); - } - -- for ( i = 0; i < ARRAY_SIZE(featuresets); ++i ) -- free(featuresets[i].fs); -+ free(fs); - } - - static void print_policy(const char *name, --- -2.39.2 - diff --git a/0322-x86-sysctl-Retrofit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch b/0322-x86-sysctl-Retrofit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch deleted file mode 100644 index f75d9206..00000000 --- a/0322-x86-sysctl-Retrofit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 81e4ade2e5977fa3e72443ff2894765abb4de5dd Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 10 Mar 2023 19:37:56 +0000 -Subject: [PATCH 02/35] x86/sysctl: Retrofit - XEN_SYSCTL_cpu_featureset_{pv,hvm}_max - -Featuresets are supposed to be disappearing when the CPU policy infrastructure -is complete, but that has taken longer than expected, and isn't going to be -complete imminently either. - -In the meantime, Xen does have proper default/max featuresets, and xen-cpuid -can even get them via the XEN_SYSCTL_cpu_policy_* interface, but only knows -now to render them nicely via the featureset interface. - -Differences between default and max are a frequent source of errors, -frequently too in secret leading up to an embargo, so extend the featureset -sysctl to allow xen-cpuid to render them all nicely. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -Acked-by: Christian Lindig -(cherry picked from commit 433d012c6c2737ad5a9aaa994355a4140d601852) ---- - tools/misc/xen-cpuid.c | 10 ++++++---- - tools/ocaml/libs/xc/xenctrl.ml | 8 +++++++- - tools/ocaml/libs/xc/xenctrl.mli | 8 +++++++- - xen/arch/x86/sysctl.c | 4 +++- - xen/include/public/sysctl.h | 2 ++ - 5 files changed, 25 insertions(+), 7 deletions(-) - -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index e774a9950ebb..859345ae8ab2 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -245,10 +245,12 @@ static const struct { - #define COL_ALIGN "18" - - static const char *const fs_names[] = { -- [XEN_SYSCTL_cpu_featureset_host] = "Host", -- [XEN_SYSCTL_cpu_featureset_raw] = "Raw", -- [XEN_SYSCTL_cpu_featureset_pv] = "PV", -- [XEN_SYSCTL_cpu_featureset_hvm] = "HVM", -+ [XEN_SYSCTL_cpu_featureset_raw] = "Raw", -+ [XEN_SYSCTL_cpu_featureset_host] = "Host", -+ [XEN_SYSCTL_cpu_featureset_pv] = "PV Default", -+ [XEN_SYSCTL_cpu_featureset_hvm] = "HVM Default", -+ [XEN_SYSCTL_cpu_featureset_pv_max] = "PV Max", -+ [XEN_SYSCTL_cpu_featureset_hvm_max] = "HVM Max", - }; - - static void dump_leaf(uint32_t leaf, const char *const *strs) -diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml -index aa650533f718..49aa9102f503 100644 ---- a/tools/ocaml/libs/xc/xenctrl.ml -+++ b/tools/ocaml/libs/xc/xenctrl.ml -@@ -309,7 +309,13 @@ external version_changeset: handle -> string = "stub_xc_version_changeset" - external version_capabilities: handle -> string = - "stub_xc_version_capabilities" - --type featureset_index = Featureset_raw | Featureset_host | Featureset_pv | Featureset_hvm -+type featureset_index = -+ | Featureset_raw -+ | Featureset_host -+ | Featureset_pv -+ | Featureset_hvm -+ | Featureset_pv_max -+ | Featureset_hvm_max - external get_cpu_featureset : handle -> featureset_index -> int64 array = "stub_xc_get_cpu_featureset" - - external watchdog : handle -> int -> int32 -> int -diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli -index 5bf5f5dfea36..b9b92741296d 100644 ---- a/tools/ocaml/libs/xc/xenctrl.mli -+++ b/tools/ocaml/libs/xc/xenctrl.mli -@@ -235,7 +235,13 @@ external version_changeset : handle -> string = "stub_xc_version_changeset" - external version_capabilities : handle -> string - = "stub_xc_version_capabilities" - --type featureset_index = Featureset_raw | Featureset_host | Featureset_pv | Featureset_hvm -+type featureset_index = -+ | Featureset_raw -+ | Featureset_host -+ | Featureset_pv -+ | Featureset_hvm -+ | Featureset_pv_max -+ | Featureset_hvm_max - external get_cpu_featureset : handle -> featureset_index -> int64 array = "stub_xc_get_cpu_featureset" - - external pages_to_kib : int64 -> int64 = "stub_pages_to_kib" -diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c -index f8f8d797557e..d6612a17078d 100644 ---- a/xen/arch/x86/sysctl.c -+++ b/xen/arch/x86/sysctl.c -@@ -327,14 +327,16 @@ long arch_do_sysctl( - - case XEN_SYSCTL_get_cpu_featureset: - { -- static const struct cpuid_policy *const policy_table[4] = { -+ static const struct cpuid_policy *const policy_table[6] = { - [XEN_SYSCTL_cpu_featureset_raw] = &raw_cpuid_policy, - [XEN_SYSCTL_cpu_featureset_host] = &host_cpuid_policy, - #ifdef CONFIG_PV - [XEN_SYSCTL_cpu_featureset_pv] = &pv_def_cpuid_policy, -+ [XEN_SYSCTL_cpu_featureset_pv_max] = &pv_max_cpuid_policy, - #endif - #ifdef CONFIG_HVM - [XEN_SYSCTL_cpu_featureset_hvm] = &hvm_def_cpuid_policy, -+ [XEN_SYSCTL_cpu_featureset_hvm_max] = &hvm_max_cpuid_policy, - #endif - }; - const struct cpuid_policy *p = NULL; -diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h -index 001a4de27375..e8dded9fb94a 100644 ---- a/xen/include/public/sysctl.h -+++ b/xen/include/public/sysctl.h -@@ -796,6 +796,8 @@ struct xen_sysctl_cpu_featureset { - #define XEN_SYSCTL_cpu_featureset_host 1 - #define XEN_SYSCTL_cpu_featureset_pv 2 - #define XEN_SYSCTL_cpu_featureset_hvm 3 -+#define XEN_SYSCTL_cpu_featureset_pv_max 4 -+#define XEN_SYSCTL_cpu_featureset_hvm_max 5 - uint32_t index; /* IN: Which featureset to query? */ - uint32_t nr_features; /* IN/OUT: Number of entries in/written to - * 'features', or the maximum number of features if --- -2.39.2 - diff --git a/0323-x86-Rename-struct-cpu_policy-to-struct-old_cpuid_pol.patch b/0323-x86-Rename-struct-cpu_policy-to-struct-old_cpuid_pol.patch deleted file mode 100644 index e304cbd8..00000000 --- a/0323-x86-Rename-struct-cpu_policy-to-struct-old_cpuid_pol.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 7dac2c4267256ffd0e2bcab53d27281e7d26510e Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 28 Mar 2023 20:31:33 +0100 -Subject: [PATCH 03/35] x86: Rename struct cpu_policy to struct - old_cpuid_policy - -We want to merge struct cpuid_policy and struct msr_policy together, and the -result wants to be called struct cpu_policy. - -The current struct cpu_policy, being a pair of pointers, isn't terribly -useful. Rename the type to struct old_cpu_policy, but it will disappear -entirely once the merge is complete. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit c2ec94c370f211d73f336ccfbdb32499f1b05f82) ---- - tools/libs/guest/xg_cpuid_x86.c | 4 ++-- - tools/tests/cpu-policy/test-cpu-policy.c | 4 ++-- - xen/arch/x86/domctl.c | 4 ++-- - xen/arch/x86/include/asm/cpuid.h | 2 +- - xen/arch/x86/sysctl.c | 4 ++-- - xen/include/xen/lib/x86/cpu-policy.h | 6 +++--- - xen/lib/x86/policy.c | 4 ++-- - 7 files changed, 14 insertions(+), 14 deletions(-) - -diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c -index 57f81eb0a082..a22c7bf7923f 100644 ---- a/tools/libs/guest/xg_cpuid_x86.c -+++ b/tools/libs/guest/xg_cpuid_x86.c -@@ -868,8 +868,8 @@ bool xc_cpu_policy_is_compatible(xc_interface *xch, xc_cpu_policy_t *host, - xc_cpu_policy_t *guest) - { - struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; -- struct cpu_policy h = { &host->cpuid, &host->msr }; -- struct cpu_policy g = { &guest->cpuid, &guest->msr }; -+ struct old_cpu_policy h = { &host->cpuid, &host->msr }; -+ struct old_cpu_policy g = { &guest->cpuid, &guest->msr }; - int rc = x86_cpu_policies_are_compatible(&h, &g, &err); - - if ( !rc ) -diff --git a/tools/tests/cpu-policy/test-cpu-policy.c b/tools/tests/cpu-policy/test-cpu-policy.c -index ed450a099709..8a223fddb3da 100644 ---- a/tools/tests/cpu-policy/test-cpu-policy.c -+++ b/tools/tests/cpu-policy/test-cpu-policy.c -@@ -602,7 +602,7 @@ static void test_is_compatible_success(void) - for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i ) - { - struct test *t = &tests[i]; -- struct cpu_policy sys = { -+ struct old_cpu_policy sys = { - &t->host_cpuid, - &t->host_msr, - }, new = { -@@ -654,7 +654,7 @@ static void test_is_compatible_failure(void) - for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i ) - { - struct test *t = &tests[i]; -- struct cpu_policy sys = { -+ struct old_cpu_policy sys = { - &t->host_cpuid, - &t->host_msr, - }, new = { -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index e9bfbc57a794..971d4937a1e6 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -41,8 +41,8 @@ - static int update_domain_cpu_policy(struct domain *d, - xen_domctl_cpu_policy_t *xdpc) - { -- struct cpu_policy new = {}; -- const struct cpu_policy *sys = is_pv_domain(d) -+ struct old_cpu_policy new = {}; -+ const struct old_cpu_policy *sys = is_pv_domain(d) - ? &system_policies[XEN_SYSCTL_cpu_policy_pv_max] - : &system_policies[XEN_SYSCTL_cpu_policy_hvm_max]; - struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; -diff --git a/xen/arch/x86/include/asm/cpuid.h b/xen/arch/x86/include/asm/cpuid.h -index 9c3637549a10..49b3128f06f9 100644 ---- a/xen/arch/x86/include/asm/cpuid.h -+++ b/xen/arch/x86/include/asm/cpuid.h -@@ -51,7 +51,7 @@ extern struct cpuid_policy raw_cpuid_policy, host_cpuid_policy, - pv_max_cpuid_policy, pv_def_cpuid_policy, - hvm_max_cpuid_policy, hvm_def_cpuid_policy; - --extern const struct cpu_policy system_policies[]; -+extern const struct old_cpu_policy system_policies[]; - - /* Check that all previously present features are still available. */ - bool recheck_cpu_features(unsigned int cpu); -diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c -index d6612a17078d..4afe73ff4f4f 100644 ---- a/xen/arch/x86/sysctl.c -+++ b/xen/arch/x86/sysctl.c -@@ -33,7 +33,7 @@ - #include - #include - --const struct cpu_policy system_policies[6] = { -+const struct old_cpu_policy system_policies[6] = { - [ XEN_SYSCTL_cpu_policy_raw ] = { - &raw_cpuid_policy, - &raw_msr_policy, -@@ -392,7 +392,7 @@ long arch_do_sysctl( - - case XEN_SYSCTL_get_cpu_policy: - { -- const struct cpu_policy *policy; -+ const struct old_cpu_policy *policy; - - /* Reserved field set, or bad policy index? */ - if ( sysctl->u.cpu_policy._rsvd || -diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h -index 5a2c4c7b2d90..3a5300d1078c 100644 ---- a/xen/include/xen/lib/x86/cpu-policy.h -+++ b/xen/include/xen/lib/x86/cpu-policy.h -@@ -5,7 +5,7 @@ - #include - #include - --struct cpu_policy -+struct old_cpu_policy - { - struct cpuid_policy *cpuid; - struct msr_policy *msr; -@@ -33,8 +33,8 @@ struct cpu_policy_errors - * incompatibility is detected, the optional err pointer may identify the - * problematic leaf/subleaf and/or MSR. - */ --int x86_cpu_policies_are_compatible(const struct cpu_policy *host, -- const struct cpu_policy *guest, -+int x86_cpu_policies_are_compatible(const struct old_cpu_policy *host, -+ const struct old_cpu_policy *guest, - struct cpu_policy_errors *err); - - #endif /* !XEN_LIB_X86_POLICIES_H */ -diff --git a/xen/lib/x86/policy.c b/xen/lib/x86/policy.c -index f6cea4e2f9bd..2975711d7c6c 100644 ---- a/xen/lib/x86/policy.c -+++ b/xen/lib/x86/policy.c -@@ -2,8 +2,8 @@ - - #include - --int x86_cpu_policies_are_compatible(const struct cpu_policy *host, -- const struct cpu_policy *guest, -+int x86_cpu_policies_are_compatible(const struct old_cpu_policy *host, -+ const struct old_cpu_policy *guest, - struct cpu_policy_errors *err) - { - struct cpu_policy_errors e = INIT_CPU_POLICY_ERRORS; --- -2.39.2 - diff --git a/0324-x86-Rename-domctl-sysctl-.cpu_policy.-cpuid-msr-_pol.patch b/0324-x86-Rename-domctl-sysctl-.cpu_policy.-cpuid-msr-_pol.patch deleted file mode 100644 index e4b964a7..00000000 --- a/0324-x86-Rename-domctl-sysctl-.cpu_policy.-cpuid-msr-_pol.patch +++ /dev/null @@ -1,173 +0,0 @@ -From ea2ccf5cdbea3bbe8684ee2f1eb440644c8e385d Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 28 Mar 2023 20:48:29 +0100 -Subject: [PATCH 04/35] x86: Rename - {domctl,sysctl}.cpu_policy.{cpuid,msr}_policy fields - -These weren't great names to begin with, and using {leaves,msrs} matches up -better with the existing nr_{leaves,msr} parameters anyway. - -Furthermore, by renaming these fields we can get away with using some #define -trickery to avoid the struct {cpuid,msr}_policy merge needing to happen in a -single changeset. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 21e3ef57e0406b6b9a783f721f29df8f91a00f99) ---- - tools/libs/guest/xg_cpuid_x86.c | 12 ++++++------ - xen/arch/x86/domctl.c | 12 ++++++------ - xen/arch/x86/sysctl.c | 8 ++++---- - xen/include/public/domctl.h | 4 ++-- - xen/include/public/sysctl.h | 4 ++-- - 5 files changed, 20 insertions(+), 20 deletions(-) - -diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c -index a22c7bf7923f..76d9522c3a7c 100644 ---- a/tools/libs/guest/xg_cpuid_x86.c -+++ b/tools/libs/guest/xg_cpuid_x86.c -@@ -145,9 +145,9 @@ static int get_system_cpu_policy(xc_interface *xch, uint32_t index, - sysctl.cmd = XEN_SYSCTL_get_cpu_policy; - sysctl.u.cpu_policy.index = index; - sysctl.u.cpu_policy.nr_leaves = *nr_leaves; -- set_xen_guest_handle(sysctl.u.cpu_policy.cpuid_policy, leaves); -+ set_xen_guest_handle(sysctl.u.cpu_policy.leaves, leaves); - sysctl.u.cpu_policy.nr_msrs = *nr_msrs; -- set_xen_guest_handle(sysctl.u.cpu_policy.msr_policy, msrs); -+ set_xen_guest_handle(sysctl.u.cpu_policy.msrs, msrs); - - ret = do_sysctl(xch, &sysctl); - -@@ -183,9 +183,9 @@ static int get_domain_cpu_policy(xc_interface *xch, uint32_t domid, - domctl.cmd = XEN_DOMCTL_get_cpu_policy; - domctl.domain = domid; - domctl.u.cpu_policy.nr_leaves = *nr_leaves; -- set_xen_guest_handle(domctl.u.cpu_policy.cpuid_policy, leaves); -+ set_xen_guest_handle(domctl.u.cpu_policy.leaves, leaves); - domctl.u.cpu_policy.nr_msrs = *nr_msrs; -- set_xen_guest_handle(domctl.u.cpu_policy.msr_policy, msrs); -+ set_xen_guest_handle(domctl.u.cpu_policy.msrs, msrs); - - ret = do_domctl(xch, &domctl); - -@@ -232,9 +232,9 @@ int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid, - domctl.cmd = XEN_DOMCTL_set_cpu_policy; - domctl.domain = domid; - domctl.u.cpu_policy.nr_leaves = nr_leaves; -- set_xen_guest_handle(domctl.u.cpu_policy.cpuid_policy, leaves); -+ set_xen_guest_handle(domctl.u.cpu_policy.leaves, leaves); - domctl.u.cpu_policy.nr_msrs = nr_msrs; -- set_xen_guest_handle(domctl.u.cpu_policy.msr_policy, msrs); -+ set_xen_guest_handle(domctl.u.cpu_policy.msrs, msrs); - domctl.u.cpu_policy.err_leaf = -1; - domctl.u.cpu_policy.err_subleaf = -1; - domctl.u.cpu_policy.err_msr = -1; -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index 971d4937a1e6..175d473e412a 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -55,10 +55,10 @@ static int update_domain_cpu_policy(struct domain *d, - - /* Merge the toolstack provided data. */ - if ( (ret = x86_cpuid_copy_from_buffer( -- new.cpuid, xdpc->cpuid_policy, xdpc->nr_leaves, -+ new.cpuid, xdpc->leaves, xdpc->nr_leaves, - &err.leaf, &err.subleaf)) || - (ret = x86_msr_copy_from_buffer( -- new.msr, xdpc->msr_policy, xdpc->nr_msrs, &err.msr)) ) -+ new.msr, xdpc->msrs, xdpc->nr_msrs, &err.msr)) ) - goto out; - - /* Trim any newly-stale out-of-range leaves. */ -@@ -1318,20 +1318,20 @@ long arch_do_domctl( - - case XEN_DOMCTL_get_cpu_policy: - /* Process the CPUID leaves. */ -- if ( guest_handle_is_null(domctl->u.cpu_policy.cpuid_policy) ) -+ if ( guest_handle_is_null(domctl->u.cpu_policy.leaves) ) - domctl->u.cpu_policy.nr_leaves = CPUID_MAX_SERIALISED_LEAVES; - else if ( (ret = x86_cpuid_copy_to_buffer( - d->arch.cpuid, -- domctl->u.cpu_policy.cpuid_policy, -+ domctl->u.cpu_policy.leaves, - &domctl->u.cpu_policy.nr_leaves)) ) - break; - - /* Process the MSR entries. */ -- if ( guest_handle_is_null(domctl->u.cpu_policy.msr_policy) ) -+ if ( guest_handle_is_null(domctl->u.cpu_policy.msrs) ) - domctl->u.cpu_policy.nr_msrs = MSR_MAX_SERIALISED_ENTRIES; - else if ( (ret = x86_msr_copy_to_buffer( - d->arch.msr, -- domctl->u.cpu_policy.msr_policy, -+ domctl->u.cpu_policy.msrs, - &domctl->u.cpu_policy.nr_msrs)) ) - break; - -diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c -index 4afe73ff4f4f..838a9947bfe3 100644 ---- a/xen/arch/x86/sysctl.c -+++ b/xen/arch/x86/sysctl.c -@@ -412,11 +412,11 @@ long arch_do_sysctl( - } - - /* Process the CPUID leaves. */ -- if ( guest_handle_is_null(sysctl->u.cpu_policy.cpuid_policy) ) -+ if ( guest_handle_is_null(sysctl->u.cpu_policy.leaves) ) - sysctl->u.cpu_policy.nr_leaves = CPUID_MAX_SERIALISED_LEAVES; - else if ( (ret = x86_cpuid_copy_to_buffer( - policy->cpuid, -- sysctl->u.cpu_policy.cpuid_policy, -+ sysctl->u.cpu_policy.leaves, - &sysctl->u.cpu_policy.nr_leaves)) ) - break; - -@@ -428,11 +428,11 @@ long arch_do_sysctl( - } - - /* Process the MSR entries. */ -- if ( guest_handle_is_null(sysctl->u.cpu_policy.msr_policy) ) -+ if ( guest_handle_is_null(sysctl->u.cpu_policy.msrs) ) - sysctl->u.cpu_policy.nr_msrs = MSR_MAX_SERIALISED_ENTRIES; - else if ( (ret = x86_msr_copy_to_buffer( - policy->msr, -- sysctl->u.cpu_policy.msr_policy, -+ sysctl->u.cpu_policy.msrs, - &sysctl->u.cpu_policy.nr_msrs)) ) - break; - -diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h -index 51be28c3de7c..c377e8365225 100644 ---- a/xen/include/public/domctl.h -+++ b/xen/include/public/domctl.h -@@ -683,8 +683,8 @@ struct xen_domctl_cpu_policy { - * 'cpuid_policy'. */ - uint32_t nr_msrs; /* IN/OUT: Number of MSRs in/written to - * 'msr_policy' */ -- XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* IN/OUT */ -- XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy; /* IN/OUT */ -+ XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) leaves; /* IN/OUT */ -+ XEN_GUEST_HANDLE_64(xen_msr_entry_t) msrs; /* IN/OUT */ - - /* - * OUT, set_policy only. Written in some (but not all) error cases to -diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h -index e8dded9fb94a..2b24d6bfd00e 100644 ---- a/xen/include/public/sysctl.h -+++ b/xen/include/public/sysctl.h -@@ -1050,8 +1050,8 @@ struct xen_sysctl_cpu_policy { - * 'msr_policy', or the maximum number of MSRs if - * the guest handle is NULL. */ - uint32_t _rsvd; /* Must be zero. */ -- XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_policy; /* OUT */ -- XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_policy; /* OUT */ -+ XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) leaves; /* OUT */ -+ XEN_GUEST_HANDLE_64(xen_msr_entry_t) msrs; /* OUT */ - }; - typedef struct xen_sysctl_cpu_policy xen_sysctl_cpu_policy_t; - DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_policy_t); --- -2.39.2 - diff --git a/0325-x86-Rename-struct-cpuid_policy-to-struct-cpu_policy.patch b/0325-x86-Rename-struct-cpuid_policy-to-struct-cpu_policy.patch deleted file mode 100644 index ad580570..00000000 --- a/0325-x86-Rename-struct-cpuid_policy-to-struct-cpu_policy.patch +++ /dev/null @@ -1,1073 +0,0 @@ -From 78bef4f330ced9ac39f0a262777cc160d087f252 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 28 Mar 2023 18:55:19 +0100 -Subject: [PATCH 05/35] x86: Rename struct cpuid_policy to struct cpu_policy - -Also merge lib/x86/cpuid.h entirely into lib/x86/cpu-policy.h - -Use a temporary define to make struct cpuid_policy still work. - -There's one forward declaration of struct cpuid_policy in -tools/tests/x86_emulator/x86-emulate.h that isn't covered by the define, and -it's easier to rename that now than to rearrange the includes. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 743e530380a007774017df9dc2d8cb0659040ee3) ---- - tools/fuzz/cpu-policy/afl-policy-fuzzer.c | 2 +- - tools/tests/x86_emulator/Makefile | 2 +- - tools/tests/x86_emulator/x86-emulate.h | 2 +- - xen/arch/x86/include/asm/cpuid.h | 1 - - xen/arch/x86/x86_emulate/x86_emulate.h | 2 +- - xen/include/xen/lib/x86/cpu-policy.h | 463 ++++++++++++++++++++- - xen/include/xen/lib/x86/cpuid.h | 475 ---------------------- - xen/lib/x86/cpuid.c | 2 +- - 8 files changed, 467 insertions(+), 482 deletions(-) - delete mode 100644 xen/include/xen/lib/x86/cpuid.h - -diff --git a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -index 79a849a044c7..7ebe8ee7c325 100644 ---- a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -+++ b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -@@ -9,7 +9,7 @@ - #include - - #include --#include -+#include - #include - #include - -diff --git a/tools/tests/x86_emulator/Makefile b/tools/tests/x86_emulator/Makefile -index 7b07c31bbde4..bd82598f9766 100644 ---- a/tools/tests/x86_emulator/Makefile -+++ b/tools/tests/x86_emulator/Makefile -@@ -286,7 +286,7 @@ HOSTCFLAGS += $(CFLAGS_xeninclude) -I. $(HOSTCFLAGS-$(XEN_COMPILE_ARCH)) - x86.h := $(addprefix $(XEN_ROOT)/tools/include/xen/asm/,\ - x86-vendors.h x86-defns.h msr-index.h) \ - $(addprefix $(XEN_ROOT)/tools/include/xen/lib/x86/, \ -- cpuid.h cpuid-autogen.h) -+ cpu-policy.h cpuid-autogen.h) - x86_emulate.h := x86-emulate.h x86_emulate/x86_emulate.h $(x86.h) - - x86-emulate.o cpuid.o test_x86_emulator.o evex-disp8.o predicates.o wrappers.o: %.o: %.c $(x86_emulate.h) -diff --git a/tools/tests/x86_emulator/x86-emulate.h b/tools/tests/x86_emulator/x86-emulate.h -index 18ae40d01712..19bea9c38d58 100644 ---- a/tools/tests/x86_emulator/x86-emulate.h -+++ b/tools/tests/x86_emulator/x86-emulate.h -@@ -70,7 +70,7 @@ - #define is_canonical_address(x) (((int64_t)(x) >> 47) == ((int64_t)(x) >> 63)) - - extern uint32_t mxcsr_mask; --extern struct cpuid_policy cp; -+extern struct cpu_policy cp; - - #define MMAP_SZ 16384 - bool emul_test_init(void); -diff --git a/xen/arch/x86/include/asm/cpuid.h b/xen/arch/x86/include/asm/cpuid.h -index 49b3128f06f9..d418e8100dde 100644 ---- a/xen/arch/x86/include/asm/cpuid.h -+++ b/xen/arch/x86/include/asm/cpuid.h -@@ -9,7 +9,6 @@ - #include - - #include --#include - - #include - -diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h -index 4732855c40ed..c89c53e83bfe 100644 ---- a/xen/arch/x86/x86_emulate/x86_emulate.h -+++ b/xen/arch/x86/x86_emulate/x86_emulate.h -@@ -23,7 +23,7 @@ - #ifndef __X86_EMULATE_H__ - #define __X86_EMULATE_H__ - --#include -+#include - - #define MAX_INST_LEN 15 - -diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h -index 3a5300d1078c..666505964d00 100644 ---- a/xen/include/xen/lib/x86/cpu-policy.h -+++ b/xen/include/xen/lib/x86/cpu-policy.h -@@ -2,9 +2,342 @@ - #ifndef XEN_LIB_X86_POLICIES_H - #define XEN_LIB_X86_POLICIES_H - --#include -+#include - #include - -+#define FEATURESET_1d 0 /* 0x00000001.edx */ -+#define FEATURESET_1c 1 /* 0x00000001.ecx */ -+#define FEATURESET_e1d 2 /* 0x80000001.edx */ -+#define FEATURESET_e1c 3 /* 0x80000001.ecx */ -+#define FEATURESET_Da1 4 /* 0x0000000d:1.eax */ -+#define FEATURESET_7b0 5 /* 0x00000007:0.ebx */ -+#define FEATURESET_7c0 6 /* 0x00000007:0.ecx */ -+#define FEATURESET_e7d 7 /* 0x80000007.edx */ -+#define FEATURESET_e8b 8 /* 0x80000008.ebx */ -+#define FEATURESET_7d0 9 /* 0x00000007:0.edx */ -+#define FEATURESET_7a1 10 /* 0x00000007:1.eax */ -+#define FEATURESET_e21a 11 /* 0x80000021.eax */ -+#define FEATURESET_7b1 12 /* 0x00000007:1.ebx */ -+#define FEATURESET_7d2 13 /* 0x00000007:2.edx */ -+#define FEATURESET_7c1 14 /* 0x00000007:1.ecx */ -+#define FEATURESET_7d1 15 /* 0x00000007:1.edx */ -+ -+struct cpuid_leaf -+{ -+ uint32_t a, b, c, d; -+}; -+ -+/* -+ * Versions of GCC before 5 unconditionally reserve %rBX as the PIC hard -+ * register, and are unable to cope with spilling it. This results in a -+ * rather cryptic error: -+ * error: inconsistent operand constraints in an ‘asm’ -+ * -+ * In affected situations, work around the issue by using a separate register -+ * to hold the the %rBX output, and xchg twice to leave %rBX preserved around -+ * the asm() statement. -+ */ -+#if defined(__PIC__) && __GNUC__ < 5 && !defined(__clang__) && defined(__i386__) -+# define XCHG_BX "xchg %%ebx, %[bx];" -+# define BX_CON [bx] "=&r" -+#elif defined(__PIC__) && __GNUC__ < 5 && !defined(__clang__) && \ -+ defined(__x86_64__) && (defined(__code_model_medium__) || \ -+ defined(__code_model_large__)) -+# define XCHG_BX "xchg %%rbx, %q[bx];" -+# define BX_CON [bx] "=&r" -+#else -+# define XCHG_BX "" -+# define BX_CON "=&b" -+#endif -+ -+static inline void cpuid_leaf(uint32_t leaf, struct cpuid_leaf *l) -+{ -+ asm ( XCHG_BX -+ "cpuid;" -+ XCHG_BX -+ : "=a" (l->a), BX_CON (l->b), "=&c" (l->c), "=&d" (l->d) -+ : "a" (leaf) ); -+} -+ -+static inline void cpuid_count_leaf( -+ uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *l) -+{ -+ asm ( XCHG_BX -+ "cpuid;" -+ XCHG_BX -+ : "=a" (l->a), BX_CON (l->b), "=c" (l->c), "=&d" (l->d) -+ : "a" (leaf), "c" (subleaf) ); -+} -+ -+#undef BX_CON -+#undef XCHG -+ -+/** -+ * Given the vendor id from CPUID leaf 0, look up Xen's internal integer -+ * vendor ID. Returns X86_VENDOR_UNKNOWN for any unknown vendor. -+ */ -+unsigned int x86_cpuid_lookup_vendor(uint32_t ebx, uint32_t ecx, uint32_t edx); -+ -+/** -+ * Given Xen's internal vendor ID, return a string suitable for printing. -+ * Returns "Unknown" for any unrecognised ID. -+ */ -+const char *x86_cpuid_vendor_to_str(unsigned int vendor); -+ -+#define CPUID_GUEST_NR_BASIC (0xdu + 1) -+#define CPUID_GUEST_NR_CACHE (5u + 1) -+#define CPUID_GUEST_NR_FEAT (2u + 1) -+#define CPUID_GUEST_NR_TOPO (1u + 1) -+#define CPUID_GUEST_NR_XSTATE (62u + 1) -+#define CPUID_GUEST_NR_EXTD_INTEL (0x8u + 1) -+#define CPUID_GUEST_NR_EXTD_AMD (0x21u + 1) -+#define CPUID_GUEST_NR_EXTD MAX(CPUID_GUEST_NR_EXTD_INTEL, \ -+ CPUID_GUEST_NR_EXTD_AMD) -+ -+/* -+ * Maximum number of leaves a struct cpu_policy turns into when serialised for -+ * interaction with the toolstack. (Sum of all leaves in each union, less the -+ * entries in basic which sub-unions hang off of.) -+ */ -+#define CPUID_MAX_SERIALISED_LEAVES \ -+ (CPUID_GUEST_NR_BASIC + \ -+ CPUID_GUEST_NR_FEAT - !!CPUID_GUEST_NR_FEAT + \ -+ CPUID_GUEST_NR_CACHE - !!CPUID_GUEST_NR_CACHE + \ -+ CPUID_GUEST_NR_TOPO - !!CPUID_GUEST_NR_TOPO + \ -+ CPUID_GUEST_NR_XSTATE - !!CPUID_GUEST_NR_XSTATE + \ -+ CPUID_GUEST_NR_EXTD + 2 /* hv_limit and hv2_limit */ ) -+ -+struct cpu_policy -+{ -+#define DECL_BITFIELD(word) _DECL_BITFIELD(FEATURESET_ ## word) -+#define _DECL_BITFIELD(x) __DECL_BITFIELD(x) -+#define __DECL_BITFIELD(x) CPUID_BITFIELD_ ## x -+ -+ /* Basic leaves: 0x000000xx */ -+ union { -+ struct cpuid_leaf raw[CPUID_GUEST_NR_BASIC]; -+ struct { -+ /* Leaf 0x0 - Max and vendor. */ -+ uint32_t max_leaf, vendor_ebx, vendor_ecx, vendor_edx; -+ -+ /* Leaf 0x1 - Family/model/stepping and features. */ -+ uint32_t raw_fms; -+ uint8_t :8, /* Brand ID. */ -+ clflush_size, /* Number of 8-byte blocks per cache line. */ -+ lppp, /* Logical processors per package. */ -+ apic_id; /* Initial APIC ID. */ -+ union { -+ uint32_t _1c; -+ struct { DECL_BITFIELD(1c); }; -+ }; -+ union { -+ uint32_t _1d; -+ struct { DECL_BITFIELD(1d); }; -+ }; -+ -+ /* Leaf 0x2 - TLB/Cache/Prefetch. */ -+ uint8_t l2_nr_queries; /* Documented as fixed to 1. */ -+ uint8_t l2_desc[15]; -+ -+ uint64_t :64, :64; /* Leaf 0x3 - PSN. */ -+ uint64_t :64, :64; /* Leaf 0x4 - Structured Cache. */ -+ uint64_t :64, :64; /* Leaf 0x5 - MONITOR. */ -+ uint64_t :64, :64; /* Leaf 0x6 - Therm/Perf. */ -+ uint64_t :64, :64; /* Leaf 0x7 - Structured Features. */ -+ uint64_t :64, :64; /* Leaf 0x8 - rsvd */ -+ uint64_t :64, :64; /* Leaf 0x9 - DCA */ -+ -+ /* Leaf 0xa - Intel PMU. */ -+ uint8_t pmu_version, _pmu[15]; -+ -+ uint64_t :64, :64; /* Leaf 0xb - Topology. */ -+ uint64_t :64, :64; /* Leaf 0xc - rsvd */ -+ uint64_t :64, :64; /* Leaf 0xd - XSTATE. */ -+ }; -+ } basic; -+ -+ /* Structured cache leaf: 0x00000004[xx] */ -+ union { -+ struct cpuid_leaf raw[CPUID_GUEST_NR_CACHE]; -+ struct cpuid_cache_leaf { -+ uint32_t /* a */ type:5, level:3; -+ bool self_init:1, fully_assoc:1; -+ uint32_t :4, threads_per_cache:12, cores_per_package:6; -+ uint32_t /* b */ line_size:12, partitions:10, ways:10; -+ uint32_t /* c */ sets; -+ bool /* d */ wbinvd:1, inclusive:1, complex:1; -+ } subleaf[CPUID_GUEST_NR_CACHE]; -+ } cache; -+ -+ /* Structured feature leaf: 0x00000007[xx] */ -+ union { -+ struct cpuid_leaf raw[CPUID_GUEST_NR_FEAT]; -+ struct { -+ /* Subleaf 0. */ -+ uint32_t max_subleaf; -+ union { -+ uint32_t _7b0; -+ struct { DECL_BITFIELD(7b0); }; -+ }; -+ union { -+ uint32_t _7c0; -+ struct { DECL_BITFIELD(7c0); }; -+ }; -+ union { -+ uint32_t _7d0; -+ struct { DECL_BITFIELD(7d0); }; -+ }; -+ -+ /* Subleaf 1. */ -+ union { -+ uint32_t _7a1; -+ struct { DECL_BITFIELD(7a1); }; -+ }; -+ union { -+ uint32_t _7b1; -+ struct { DECL_BITFIELD(7b1); }; -+ }; -+ union { -+ uint32_t _7c1; -+ struct { DECL_BITFIELD(7c1); }; -+ }; -+ union { -+ uint32_t _7d1; -+ struct { DECL_BITFIELD(7d1); }; -+ }; -+ -+ /* Subleaf 2. */ -+ uint32_t /* a */:32, /* b */:32, /* c */:32; -+ union { -+ uint32_t _7d2; -+ struct { DECL_BITFIELD(7d2); }; -+ }; -+ }; -+ } feat; -+ -+ /* Extended topology enumeration: 0x0000000B[xx] */ -+ union { -+ struct cpuid_leaf raw[CPUID_GUEST_NR_TOPO]; -+ struct cpuid_topo_leaf { -+ uint32_t id_shift:5, :27; -+ uint16_t nr_logical, :16; -+ uint8_t level, type, :8, :8; -+ uint32_t x2apic_id; -+ } subleaf[CPUID_GUEST_NR_TOPO]; -+ } topo; -+ -+ /* Xstate feature leaf: 0x0000000D[xx] */ -+ union { -+ struct cpuid_leaf raw[CPUID_GUEST_NR_XSTATE]; -+ -+ struct { -+ /* Subleaf 0. */ -+ uint32_t xcr0_low, /* b */:32, max_size, xcr0_high; -+ -+ /* Subleaf 1. */ -+ union { -+ uint32_t Da1; -+ struct { DECL_BITFIELD(Da1); }; -+ }; -+ uint32_t /* b */:32, xss_low, xss_high; -+ }; -+ -+ /* Per-component common state. Valid for i >= 2. */ -+ struct { -+ uint32_t size, offset; -+ bool xss:1, align:1; -+ uint32_t _res_d; -+ } comp[CPUID_GUEST_NR_XSTATE]; -+ } xstate; -+ -+ /* Extended leaves: 0x800000xx */ -+ union { -+ struct cpuid_leaf raw[CPUID_GUEST_NR_EXTD]; -+ struct { -+ /* Leaf 0x80000000 - Max and vendor. */ -+ uint32_t max_leaf, vendor_ebx, vendor_ecx, vendor_edx; -+ -+ /* Leaf 0x80000001 - Family/model/stepping and features. */ -+ uint32_t raw_fms, /* b */:32; -+ union { -+ uint32_t e1c; -+ struct { DECL_BITFIELD(e1c); }; -+ }; -+ union { -+ uint32_t e1d; -+ struct { DECL_BITFIELD(e1d); }; -+ }; -+ -+ uint64_t :64, :64; /* Brand string. */ -+ uint64_t :64, :64; /* Brand string. */ -+ uint64_t :64, :64; /* Brand string. */ -+ uint64_t :64, :64; /* L1 cache/TLB. */ -+ uint64_t :64, :64; /* L2/3 cache/TLB. */ -+ -+ /* Leaf 0x80000007 - Advanced Power Management. */ -+ uint32_t /* a */:32, /* b */:32, /* c */:32; -+ union { -+ uint32_t e7d; -+ struct { DECL_BITFIELD(e7d); }; -+ }; -+ -+ /* Leaf 0x80000008 - Misc addr/feature info. */ -+ uint8_t maxphysaddr, maxlinaddr, :8, :8; -+ union { -+ uint32_t e8b; -+ struct { DECL_BITFIELD(e8b); }; -+ }; -+ uint32_t nc:8, :4, apic_id_size:4, :16; -+ uint32_t /* d */:32; -+ -+ uint64_t :64, :64; /* Leaf 0x80000009. */ -+ uint64_t :64, :64; /* Leaf 0x8000000a - SVM rev and features. */ -+ uint64_t :64, :64; /* Leaf 0x8000000b. */ -+ uint64_t :64, :64; /* Leaf 0x8000000c. */ -+ uint64_t :64, :64; /* Leaf 0x8000000d. */ -+ uint64_t :64, :64; /* Leaf 0x8000000e. */ -+ uint64_t :64, :64; /* Leaf 0x8000000f. */ -+ uint64_t :64, :64; /* Leaf 0x80000010. */ -+ uint64_t :64, :64; /* Leaf 0x80000011. */ -+ uint64_t :64, :64; /* Leaf 0x80000012. */ -+ uint64_t :64, :64; /* Leaf 0x80000013. */ -+ uint64_t :64, :64; /* Leaf 0x80000014. */ -+ uint64_t :64, :64; /* Leaf 0x80000015. */ -+ uint64_t :64, :64; /* Leaf 0x80000016. */ -+ uint64_t :64, :64; /* Leaf 0x80000017. */ -+ uint64_t :64, :64; /* Leaf 0x80000018. */ -+ uint64_t :64, :64; /* Leaf 0x80000019 - TLB 1GB Identifiers. */ -+ uint64_t :64, :64; /* Leaf 0x8000001a - Performance related info. */ -+ uint64_t :64, :64; /* Leaf 0x8000001b - IBS feature information. */ -+ uint64_t :64, :64; /* Leaf 0x8000001c. */ -+ uint64_t :64, :64; /* Leaf 0x8000001d - Cache properties. */ -+ uint64_t :64, :64; /* Leaf 0x8000001e - Extd APIC/Core/Node IDs. */ -+ uint64_t :64, :64; /* Leaf 0x8000001f - AMD Secure Encryption. */ -+ uint64_t :64, :64; /* Leaf 0x80000020 - Platform QoS. */ -+ -+ /* Leaf 0x80000021 - Extended Feature 2 */ -+ union { -+ uint32_t e21a; -+ struct { DECL_BITFIELD(e21a); }; -+ }; -+ uint32_t /* b */:32, /* c */:32, /* d */:32; -+ }; -+ } extd; -+ -+#undef __DECL_BITFIELD -+#undef _DECL_BITFIELD -+#undef DECL_BITFIELD -+ -+ /* Toolstack selected Hypervisor max_leaf (if non-zero). */ -+ uint8_t hv_limit, hv2_limit; -+ -+ /* Value calculated from raw data above. */ -+ uint8_t x86_vendor; -+}; -+ -+/* Temporary */ -+#define cpuid_policy cpu_policy -+ - struct old_cpu_policy - { - struct cpuid_policy *cpuid; -@@ -19,6 +352,134 @@ struct cpu_policy_errors - - #define INIT_CPU_POLICY_ERRORS { -1, -1, -1 } - -+/* Fill in a featureset bitmap from a CPUID policy. */ -+static inline void cpuid_policy_to_featureset( -+ const struct cpuid_policy *p, uint32_t fs[FEATURESET_NR_ENTRIES]) -+{ -+ fs[FEATURESET_1d] = p->basic._1d; -+ fs[FEATURESET_1c] = p->basic._1c; -+ fs[FEATURESET_e1d] = p->extd.e1d; -+ fs[FEATURESET_e1c] = p->extd.e1c; -+ fs[FEATURESET_Da1] = p->xstate.Da1; -+ fs[FEATURESET_7b0] = p->feat._7b0; -+ fs[FEATURESET_7c0] = p->feat._7c0; -+ fs[FEATURESET_e7d] = p->extd.e7d; -+ fs[FEATURESET_e8b] = p->extd.e8b; -+ fs[FEATURESET_7d0] = p->feat._7d0; -+ fs[FEATURESET_7a1] = p->feat._7a1; -+ fs[FEATURESET_e21a] = p->extd.e21a; -+ fs[FEATURESET_7b1] = p->feat._7b1; -+ fs[FEATURESET_7d2] = p->feat._7d2; -+ fs[FEATURESET_7c1] = p->feat._7c1; -+ fs[FEATURESET_7d1] = p->feat._7d1; -+} -+ -+/* Fill in a CPUID policy from a featureset bitmap. */ -+static inline void cpuid_featureset_to_policy( -+ const uint32_t fs[FEATURESET_NR_ENTRIES], struct cpuid_policy *p) -+{ -+ p->basic._1d = fs[FEATURESET_1d]; -+ p->basic._1c = fs[FEATURESET_1c]; -+ p->extd.e1d = fs[FEATURESET_e1d]; -+ p->extd.e1c = fs[FEATURESET_e1c]; -+ p->xstate.Da1 = fs[FEATURESET_Da1]; -+ p->feat._7b0 = fs[FEATURESET_7b0]; -+ p->feat._7c0 = fs[FEATURESET_7c0]; -+ p->extd.e7d = fs[FEATURESET_e7d]; -+ p->extd.e8b = fs[FEATURESET_e8b]; -+ p->feat._7d0 = fs[FEATURESET_7d0]; -+ p->feat._7a1 = fs[FEATURESET_7a1]; -+ p->extd.e21a = fs[FEATURESET_e21a]; -+ p->feat._7b1 = fs[FEATURESET_7b1]; -+ p->feat._7d2 = fs[FEATURESET_7d2]; -+ p->feat._7c1 = fs[FEATURESET_7c1]; -+ p->feat._7d1 = fs[FEATURESET_7d1]; -+} -+ -+static inline uint64_t cpuid_policy_xcr0_max(const struct cpuid_policy *p) -+{ -+ return ((uint64_t)p->xstate.xcr0_high << 32) | p->xstate.xcr0_low; -+} -+ -+static inline uint64_t cpuid_policy_xstates(const struct cpuid_policy *p) -+{ -+ uint64_t val = p->xstate.xcr0_high | p->xstate.xss_high; -+ -+ return (val << 32) | p->xstate.xcr0_low | p->xstate.xss_low; -+} -+ -+const uint32_t *x86_cpuid_lookup_deep_deps(uint32_t feature); -+ -+/** -+ * Recalculate the content in a CPUID policy which is derived from raw data. -+ */ -+void x86_cpuid_policy_recalc_synth(struct cpuid_policy *p); -+ -+/** -+ * Fill a CPUID policy using the native CPUID instruction. -+ * -+ * No sanitisation is performed, but synthesised values are calculated. -+ * Values may be influenced by a hypervisor or from masking/faulting -+ * configuration. -+ */ -+void x86_cpuid_policy_fill_native(struct cpuid_policy *p); -+ -+/** -+ * Clear leaf data beyond the policies max leaf/subleaf settings. -+ * -+ * Policy serialisation purposefully omits out-of-range leaves, because there -+ * are a large number of them due to vendor differences. However, when -+ * constructing new policies (e.g. levelling down), it is possible to end up -+ * with out-of-range leaves with stale content in them. This helper clears -+ * them. -+ */ -+void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p); -+ -+#ifdef __XEN__ -+#include -+typedef XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_leaf_buffer_t; -+#else -+#include -+typedef xen_cpuid_leaf_t cpuid_leaf_buffer_t[]; -+#endif -+ -+/** -+ * Serialise a cpuid_policy object into an array of cpuid leaves. -+ * -+ * @param policy The cpuid_policy to serialise. -+ * @param leaves The array of leaves to serialise into. -+ * @param nr_entries The number of entries in 'leaves'. -+ * @returns -errno -+ * -+ * Writes at most CPUID_MAX_SERIALISED_LEAVES. May fail with -ENOBUFS if the -+ * leaves array is too short. On success, nr_entries is updated with the -+ * actual number of leaves written. -+ */ -+int x86_cpuid_copy_to_buffer(const struct cpuid_policy *policy, -+ cpuid_leaf_buffer_t leaves, uint32_t *nr_entries); -+ -+/** -+ * Unserialise a cpuid_policy object from an array of cpuid leaves. -+ * -+ * @param policy The cpuid_policy to unserialise into. -+ * @param leaves The array of leaves to unserialise from. -+ * @param nr_entries The number of entries in 'leaves'. -+ * @param err_leaf Optional hint for error diagnostics. -+ * @param err_subleaf Optional hint for error diagnostics. -+ * @returns -errno -+ * -+ * Reads at most CPUID_MAX_SERIALISED_LEAVES. May return -ERANGE if an -+ * incoming leaf is out of range of cpuid_policy, in which case the optional -+ * err_* pointers will identify the out-of-range indicies. -+ * -+ * No content validation of in-range leaves is performed. Synthesised data is -+ * recalculated. -+ */ -+int x86_cpuid_copy_from_buffer(struct cpuid_policy *policy, -+ const cpuid_leaf_buffer_t leaves, -+ uint32_t nr_entries, uint32_t *err_leaf, -+ uint32_t *err_subleaf); -+ - /* - * Calculate whether two policies are compatible. - * -diff --git a/xen/include/xen/lib/x86/cpuid.h b/xen/include/xen/lib/x86/cpuid.h -deleted file mode 100644 -index fa98b371eef4..000000000000 ---- a/xen/include/xen/lib/x86/cpuid.h -+++ /dev/null -@@ -1,475 +0,0 @@ --/* Common data structures and functions consumed by hypervisor and toolstack */ --#ifndef XEN_LIB_X86_CPUID_H --#define XEN_LIB_X86_CPUID_H -- --#include -- --#define FEATURESET_1d 0 /* 0x00000001.edx */ --#define FEATURESET_1c 1 /* 0x00000001.ecx */ --#define FEATURESET_e1d 2 /* 0x80000001.edx */ --#define FEATURESET_e1c 3 /* 0x80000001.ecx */ --#define FEATURESET_Da1 4 /* 0x0000000d:1.eax */ --#define FEATURESET_7b0 5 /* 0x00000007:0.ebx */ --#define FEATURESET_7c0 6 /* 0x00000007:0.ecx */ --#define FEATURESET_e7d 7 /* 0x80000007.edx */ --#define FEATURESET_e8b 8 /* 0x80000008.ebx */ --#define FEATURESET_7d0 9 /* 0x00000007:0.edx */ --#define FEATURESET_7a1 10 /* 0x00000007:1.eax */ --#define FEATURESET_e21a 11 /* 0x80000021.eax */ --#define FEATURESET_7b1 12 /* 0x00000007:1.ebx */ --#define FEATURESET_7d2 13 /* 0x00000007:2.edx */ --#define FEATURESET_7c1 14 /* 0x00000007:1.ecx */ --#define FEATURESET_7d1 15 /* 0x00000007:1.edx */ -- --struct cpuid_leaf --{ -- uint32_t a, b, c, d; --}; -- --/* -- * Versions of GCC before 5 unconditionally reserve %rBX as the PIC hard -- * register, and are unable to cope with spilling it. This results in a -- * rather cryptic error: -- * error: inconsistent operand constraints in an ‘asm’ -- * -- * In affected situations, work around the issue by using a separate register -- * to hold the the %rBX output, and xchg twice to leave %rBX preserved around -- * the asm() statement. -- */ --#if defined(__PIC__) && __GNUC__ < 5 && !defined(__clang__) && defined(__i386__) --# define XCHG_BX "xchg %%ebx, %[bx];" --# define BX_CON [bx] "=&r" --#elif defined(__PIC__) && __GNUC__ < 5 && !defined(__clang__) && \ -- defined(__x86_64__) && (defined(__code_model_medium__) || \ -- defined(__code_model_large__)) --# define XCHG_BX "xchg %%rbx, %q[bx];" --# define BX_CON [bx] "=&r" --#else --# define XCHG_BX "" --# define BX_CON "=&b" --#endif -- --static inline void cpuid_leaf(uint32_t leaf, struct cpuid_leaf *l) --{ -- asm ( XCHG_BX -- "cpuid;" -- XCHG_BX -- : "=a" (l->a), BX_CON (l->b), "=&c" (l->c), "=&d" (l->d) -- : "a" (leaf) ); --} -- --static inline void cpuid_count_leaf( -- uint32_t leaf, uint32_t subleaf, struct cpuid_leaf *l) --{ -- asm ( XCHG_BX -- "cpuid;" -- XCHG_BX -- : "=a" (l->a), BX_CON (l->b), "=c" (l->c), "=&d" (l->d) -- : "a" (leaf), "c" (subleaf) ); --} -- --#undef BX_CON --#undef XCHG -- --/** -- * Given the vendor id from CPUID leaf 0, look up Xen's internal integer -- * vendor ID. Returns X86_VENDOR_UNKNOWN for any unknown vendor. -- */ --unsigned int x86_cpuid_lookup_vendor(uint32_t ebx, uint32_t ecx, uint32_t edx); -- --/** -- * Given Xen's internal vendor ID, return a string suitable for printing. -- * Returns "Unknown" for any unrecognised ID. -- */ --const char *x86_cpuid_vendor_to_str(unsigned int vendor); -- --#define CPUID_GUEST_NR_BASIC (0xdu + 1) --#define CPUID_GUEST_NR_CACHE (5u + 1) --#define CPUID_GUEST_NR_FEAT (2u + 1) --#define CPUID_GUEST_NR_TOPO (1u + 1) --#define CPUID_GUEST_NR_XSTATE (62u + 1) --#define CPUID_GUEST_NR_EXTD_INTEL (0x8u + 1) --#define CPUID_GUEST_NR_EXTD_AMD (0x21u + 1) --#define CPUID_GUEST_NR_EXTD MAX(CPUID_GUEST_NR_EXTD_INTEL, \ -- CPUID_GUEST_NR_EXTD_AMD) -- --/* -- * Maximum number of leaves a struct cpuid_policy turns into when serialised -- * for interaction with the toolstack. (Sum of all leaves in each union, less -- * the entries in basic which sub-unions hang off of.) -- */ --#define CPUID_MAX_SERIALISED_LEAVES \ -- (CPUID_GUEST_NR_BASIC + \ -- CPUID_GUEST_NR_FEAT - !!CPUID_GUEST_NR_FEAT + \ -- CPUID_GUEST_NR_CACHE - !!CPUID_GUEST_NR_CACHE + \ -- CPUID_GUEST_NR_TOPO - !!CPUID_GUEST_NR_TOPO + \ -- CPUID_GUEST_NR_XSTATE - !!CPUID_GUEST_NR_XSTATE + \ -- CPUID_GUEST_NR_EXTD + 2 /* hv_limit and hv2_limit */ ) -- --struct cpuid_policy --{ --#define DECL_BITFIELD(word) _DECL_BITFIELD(FEATURESET_ ## word) --#define _DECL_BITFIELD(x) __DECL_BITFIELD(x) --#define __DECL_BITFIELD(x) CPUID_BITFIELD_ ## x -- -- /* Basic leaves: 0x000000xx */ -- union { -- struct cpuid_leaf raw[CPUID_GUEST_NR_BASIC]; -- struct { -- /* Leaf 0x0 - Max and vendor. */ -- uint32_t max_leaf, vendor_ebx, vendor_ecx, vendor_edx; -- -- /* Leaf 0x1 - Family/model/stepping and features. */ -- uint32_t raw_fms; -- uint8_t :8, /* Brand ID. */ -- clflush_size, /* Number of 8-byte blocks per cache line. */ -- lppp, /* Logical processors per package. */ -- apic_id; /* Initial APIC ID. */ -- union { -- uint32_t _1c; -- struct { DECL_BITFIELD(1c); }; -- }; -- union { -- uint32_t _1d; -- struct { DECL_BITFIELD(1d); }; -- }; -- -- /* Leaf 0x2 - TLB/Cache/Prefetch. */ -- uint8_t l2_nr_queries; /* Documented as fixed to 1. */ -- uint8_t l2_desc[15]; -- -- uint64_t :64, :64; /* Leaf 0x3 - PSN. */ -- uint64_t :64, :64; /* Leaf 0x4 - Structured Cache. */ -- uint64_t :64, :64; /* Leaf 0x5 - MONITOR. */ -- uint64_t :64, :64; /* Leaf 0x6 - Therm/Perf. */ -- uint64_t :64, :64; /* Leaf 0x7 - Structured Features. */ -- uint64_t :64, :64; /* Leaf 0x8 - rsvd */ -- uint64_t :64, :64; /* Leaf 0x9 - DCA */ -- -- /* Leaf 0xa - Intel PMU. */ -- uint8_t pmu_version, _pmu[15]; -- -- uint64_t :64, :64; /* Leaf 0xb - Topology. */ -- uint64_t :64, :64; /* Leaf 0xc - rsvd */ -- uint64_t :64, :64; /* Leaf 0xd - XSTATE. */ -- }; -- } basic; -- -- /* Structured cache leaf: 0x00000004[xx] */ -- union { -- struct cpuid_leaf raw[CPUID_GUEST_NR_CACHE]; -- struct cpuid_cache_leaf { -- uint32_t /* a */ type:5, level:3; -- bool self_init:1, fully_assoc:1; -- uint32_t :4, threads_per_cache:12, cores_per_package:6; -- uint32_t /* b */ line_size:12, partitions:10, ways:10; -- uint32_t /* c */ sets; -- bool /* d */ wbinvd:1, inclusive:1, complex:1; -- } subleaf[CPUID_GUEST_NR_CACHE]; -- } cache; -- -- /* Structured feature leaf: 0x00000007[xx] */ -- union { -- struct cpuid_leaf raw[CPUID_GUEST_NR_FEAT]; -- struct { -- /* Subleaf 0. */ -- uint32_t max_subleaf; -- union { -- uint32_t _7b0; -- struct { DECL_BITFIELD(7b0); }; -- }; -- union { -- uint32_t _7c0; -- struct { DECL_BITFIELD(7c0); }; -- }; -- union { -- uint32_t _7d0; -- struct { DECL_BITFIELD(7d0); }; -- }; -- -- /* Subleaf 1. */ -- union { -- uint32_t _7a1; -- struct { DECL_BITFIELD(7a1); }; -- }; -- union { -- uint32_t _7b1; -- struct { DECL_BITFIELD(7b1); }; -- }; -- union { -- uint32_t _7c1; -- struct { DECL_BITFIELD(7c1); }; -- }; -- union { -- uint32_t _7d1; -- struct { DECL_BITFIELD(7d1); }; -- }; -- -- /* Subleaf 2. */ -- uint32_t /* a */:32, /* b */:32, /* c */:32; -- union { -- uint32_t _7d2; -- struct { DECL_BITFIELD(7d2); }; -- }; -- }; -- } feat; -- -- /* Extended topology enumeration: 0x0000000B[xx] */ -- union { -- struct cpuid_leaf raw[CPUID_GUEST_NR_TOPO]; -- struct cpuid_topo_leaf { -- uint32_t id_shift:5, :27; -- uint16_t nr_logical, :16; -- uint8_t level, type, :8, :8; -- uint32_t x2apic_id; -- } subleaf[CPUID_GUEST_NR_TOPO]; -- } topo; -- -- /* Xstate feature leaf: 0x0000000D[xx] */ -- union { -- struct cpuid_leaf raw[CPUID_GUEST_NR_XSTATE]; -- -- struct { -- /* Subleaf 0. */ -- uint32_t xcr0_low, /* b */:32, max_size, xcr0_high; -- -- /* Subleaf 1. */ -- union { -- uint32_t Da1; -- struct { DECL_BITFIELD(Da1); }; -- }; -- uint32_t /* b */:32, xss_low, xss_high; -- }; -- -- /* Per-component common state. Valid for i >= 2. */ -- struct { -- uint32_t size, offset; -- bool xss:1, align:1; -- uint32_t _res_d; -- } comp[CPUID_GUEST_NR_XSTATE]; -- } xstate; -- -- /* Extended leaves: 0x800000xx */ -- union { -- struct cpuid_leaf raw[CPUID_GUEST_NR_EXTD]; -- struct { -- /* Leaf 0x80000000 - Max and vendor. */ -- uint32_t max_leaf, vendor_ebx, vendor_ecx, vendor_edx; -- -- /* Leaf 0x80000001 - Family/model/stepping and features. */ -- uint32_t raw_fms, /* b */:32; -- union { -- uint32_t e1c; -- struct { DECL_BITFIELD(e1c); }; -- }; -- union { -- uint32_t e1d; -- struct { DECL_BITFIELD(e1d); }; -- }; -- -- uint64_t :64, :64; /* Brand string. */ -- uint64_t :64, :64; /* Brand string. */ -- uint64_t :64, :64; /* Brand string. */ -- uint64_t :64, :64; /* L1 cache/TLB. */ -- uint64_t :64, :64; /* L2/3 cache/TLB. */ -- -- /* Leaf 0x80000007 - Advanced Power Management. */ -- uint32_t /* a */:32, /* b */:32, /* c */:32; -- union { -- uint32_t e7d; -- struct { DECL_BITFIELD(e7d); }; -- }; -- -- /* Leaf 0x80000008 - Misc addr/feature info. */ -- uint8_t maxphysaddr, maxlinaddr, :8, :8; -- union { -- uint32_t e8b; -- struct { DECL_BITFIELD(e8b); }; -- }; -- uint32_t nc:8, :4, apic_id_size:4, :16; -- uint32_t /* d */:32; -- -- uint64_t :64, :64; /* Leaf 0x80000009. */ -- uint64_t :64, :64; /* Leaf 0x8000000a - SVM rev and features. */ -- uint64_t :64, :64; /* Leaf 0x8000000b. */ -- uint64_t :64, :64; /* Leaf 0x8000000c. */ -- uint64_t :64, :64; /* Leaf 0x8000000d. */ -- uint64_t :64, :64; /* Leaf 0x8000000e. */ -- uint64_t :64, :64; /* Leaf 0x8000000f. */ -- uint64_t :64, :64; /* Leaf 0x80000010. */ -- uint64_t :64, :64; /* Leaf 0x80000011. */ -- uint64_t :64, :64; /* Leaf 0x80000012. */ -- uint64_t :64, :64; /* Leaf 0x80000013. */ -- uint64_t :64, :64; /* Leaf 0x80000014. */ -- uint64_t :64, :64; /* Leaf 0x80000015. */ -- uint64_t :64, :64; /* Leaf 0x80000016. */ -- uint64_t :64, :64; /* Leaf 0x80000017. */ -- uint64_t :64, :64; /* Leaf 0x80000018. */ -- uint64_t :64, :64; /* Leaf 0x80000019 - TLB 1GB Identifiers. */ -- uint64_t :64, :64; /* Leaf 0x8000001a - Performance related info. */ -- uint64_t :64, :64; /* Leaf 0x8000001b - IBS feature information. */ -- uint64_t :64, :64; /* Leaf 0x8000001c. */ -- uint64_t :64, :64; /* Leaf 0x8000001d - Cache properties. */ -- uint64_t :64, :64; /* Leaf 0x8000001e - Extd APIC/Core/Node IDs. */ -- uint64_t :64, :64; /* Leaf 0x8000001f - AMD Secure Encryption. */ -- uint64_t :64, :64; /* Leaf 0x80000020 - Platform QoS. */ -- -- /* Leaf 0x80000021 - Extended Feature 2 */ -- union { -- uint32_t e21a; -- struct { DECL_BITFIELD(e21a); }; -- }; -- uint32_t /* b */:32, /* c */:32, /* d */:32; -- }; -- } extd; -- --#undef __DECL_BITFIELD --#undef _DECL_BITFIELD --#undef DECL_BITFIELD -- -- /* Toolstack selected Hypervisor max_leaf (if non-zero). */ -- uint8_t hv_limit, hv2_limit; -- -- /* Value calculated from raw data above. */ -- uint8_t x86_vendor; --}; -- --/* Fill in a featureset bitmap from a CPUID policy. */ --static inline void cpuid_policy_to_featureset( -- const struct cpuid_policy *p, uint32_t fs[FEATURESET_NR_ENTRIES]) --{ -- fs[FEATURESET_1d] = p->basic._1d; -- fs[FEATURESET_1c] = p->basic._1c; -- fs[FEATURESET_e1d] = p->extd.e1d; -- fs[FEATURESET_e1c] = p->extd.e1c; -- fs[FEATURESET_Da1] = p->xstate.Da1; -- fs[FEATURESET_7b0] = p->feat._7b0; -- fs[FEATURESET_7c0] = p->feat._7c0; -- fs[FEATURESET_e7d] = p->extd.e7d; -- fs[FEATURESET_e8b] = p->extd.e8b; -- fs[FEATURESET_7d0] = p->feat._7d0; -- fs[FEATURESET_7a1] = p->feat._7a1; -- fs[FEATURESET_e21a] = p->extd.e21a; -- fs[FEATURESET_7b1] = p->feat._7b1; -- fs[FEATURESET_7d2] = p->feat._7d2; -- fs[FEATURESET_7c1] = p->feat._7c1; -- fs[FEATURESET_7d1] = p->feat._7d1; --} -- --/* Fill in a CPUID policy from a featureset bitmap. */ --static inline void cpuid_featureset_to_policy( -- const uint32_t fs[FEATURESET_NR_ENTRIES], struct cpuid_policy *p) --{ -- p->basic._1d = fs[FEATURESET_1d]; -- p->basic._1c = fs[FEATURESET_1c]; -- p->extd.e1d = fs[FEATURESET_e1d]; -- p->extd.e1c = fs[FEATURESET_e1c]; -- p->xstate.Da1 = fs[FEATURESET_Da1]; -- p->feat._7b0 = fs[FEATURESET_7b0]; -- p->feat._7c0 = fs[FEATURESET_7c0]; -- p->extd.e7d = fs[FEATURESET_e7d]; -- p->extd.e8b = fs[FEATURESET_e8b]; -- p->feat._7d0 = fs[FEATURESET_7d0]; -- p->feat._7a1 = fs[FEATURESET_7a1]; -- p->extd.e21a = fs[FEATURESET_e21a]; -- p->feat._7b1 = fs[FEATURESET_7b1]; -- p->feat._7d2 = fs[FEATURESET_7d2]; -- p->feat._7c1 = fs[FEATURESET_7c1]; -- p->feat._7d1 = fs[FEATURESET_7d1]; --} -- --static inline uint64_t cpuid_policy_xcr0_max(const struct cpuid_policy *p) --{ -- return ((uint64_t)p->xstate.xcr0_high << 32) | p->xstate.xcr0_low; --} -- --static inline uint64_t cpuid_policy_xstates(const struct cpuid_policy *p) --{ -- uint64_t val = p->xstate.xcr0_high | p->xstate.xss_high; -- -- return (val << 32) | p->xstate.xcr0_low | p->xstate.xss_low; --} -- --const uint32_t *x86_cpuid_lookup_deep_deps(uint32_t feature); -- --/** -- * Recalculate the content in a CPUID policy which is derived from raw data. -- */ --void x86_cpuid_policy_recalc_synth(struct cpuid_policy *p); -- --/** -- * Fill a CPUID policy using the native CPUID instruction. -- * -- * No sanitisation is performed, but synthesised values are calculated. -- * Values may be influenced by a hypervisor or from masking/faulting -- * configuration. -- */ --void x86_cpuid_policy_fill_native(struct cpuid_policy *p); -- --/** -- * Clear leaf data beyond the policies max leaf/subleaf settings. -- * -- * Policy serialisation purposefully omits out-of-range leaves, because there -- * are a large number of them due to vendor differences. However, when -- * constructing new policies (e.g. levelling down), it is possible to end up -- * with out-of-range leaves with stale content in them. This helper clears -- * them. -- */ --void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p); -- --#ifdef __XEN__ --#include --typedef XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_leaf_buffer_t; --#else --#include --typedef xen_cpuid_leaf_t cpuid_leaf_buffer_t[]; --#endif -- --/** -- * Serialise a cpuid_policy object into an array of cpuid leaves. -- * -- * @param policy The cpuid_policy to serialise. -- * @param leaves The array of leaves to serialise into. -- * @param nr_entries The number of entries in 'leaves'. -- * @returns -errno -- * -- * Writes at most CPUID_MAX_SERIALISED_LEAVES. May fail with -ENOBUFS if the -- * leaves array is too short. On success, nr_entries is updated with the -- * actual number of leaves written. -- */ --int x86_cpuid_copy_to_buffer(const struct cpuid_policy *policy, -- cpuid_leaf_buffer_t leaves, uint32_t *nr_entries); -- --/** -- * Unserialise a cpuid_policy object from an array of cpuid leaves. -- * -- * @param policy The cpuid_policy to unserialise into. -- * @param leaves The array of leaves to unserialise from. -- * @param nr_entries The number of entries in 'leaves'. -- * @param err_leaf Optional hint for error diagnostics. -- * @param err_subleaf Optional hint for error diagnostics. -- * @returns -errno -- * -- * Reads at most CPUID_MAX_SERIALISED_LEAVES. May return -ERANGE if an -- * incoming leaf is out of range of cpuid_policy, in which case the optional -- * err_* pointers will identify the out-of-range indicies. -- * -- * No content validation of in-range leaves is performed. Synthesised data is -- * recalculated. -- */ --int x86_cpuid_copy_from_buffer(struct cpuid_policy *policy, -- const cpuid_leaf_buffer_t leaves, -- uint32_t nr_entries, uint32_t *err_leaf, -- uint32_t *err_subleaf); -- --#endif /* !XEN_LIB_X86_CPUID_H */ -- --/* -- * Local variables: -- * mode: C -- * c-file-style: "BSD" -- * c-basic-offset: 4 -- * tab-width: 4 -- * indent-tabs-mode: nil -- * End: -- */ -diff --git a/xen/lib/x86/cpuid.c b/xen/lib/x86/cpuid.c -index 8eb88314f53c..e81f76c779c0 100644 ---- a/xen/lib/x86/cpuid.c -+++ b/xen/lib/x86/cpuid.c -@@ -1,6 +1,6 @@ - #include "private.h" - --#include -+#include - - static void zero_leaves(struct cpuid_leaf *l, - unsigned int first, unsigned int last) --- -2.39.2 - diff --git a/0326-x86-Merge-struct-msr_policy-into-struct-cpu_policy.patch b/0326-x86-Merge-struct-msr_policy-into-struct-cpu_policy.patch deleted file mode 100644 index 6e631c10..00000000 --- a/0326-x86-Merge-struct-msr_policy-into-struct-cpu_policy.patch +++ /dev/null @@ -1,307 +0,0 @@ -From cf0d5f6d58ea82c9ab18990f850fc4e01887be16 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 28 Mar 2023 21:24:20 +0100 -Subject: [PATCH 06/35] x86: Merge struct msr_policy into struct cpu_policy - -As with the cpuid side, use a temporary define to make struct msr_policy still -work. - -Note, this means that domains now have two separate struct cpu_policy -allocations with disjoint information, and system policies are in a similar -position, as well as xc_cpu_policy objects in libxenguest. All of these -duplications will be addressed in the following patches. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 03812da3754d550dd8cbee7289469069ea6f0073) ---- - tools/fuzz/cpu-policy/afl-policy-fuzzer.c | 1 - - xen/arch/x86/include/asm/msr.h | 3 +- - xen/include/xen/lib/x86/cpu-policy.h | 81 ++++++++++++++++- - xen/include/xen/lib/x86/msr.h | 104 ---------------------- - xen/lib/x86/msr.c | 2 +- - 5 files changed, 83 insertions(+), 108 deletions(-) - delete mode 100644 xen/include/xen/lib/x86/msr.h - -diff --git a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -index 7ebe8ee7c325..316eb0efe034 100644 ---- a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -+++ b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -@@ -10,7 +10,6 @@ - - #include - #include --#include - #include - - static bool debug; -diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h -index dd1eee04a637..bb32bf19adc7 100644 ---- a/xen/arch/x86/include/asm/msr.h -+++ b/xen/arch/x86/include/asm/msr.h -@@ -6,8 +6,9 @@ - #include - #include - #include -+#include - --#include -+#include - - #include - #include -diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h -index 666505964d00..53fffca55211 100644 ---- a/xen/include/xen/lib/x86/cpu-policy.h -+++ b/xen/include/xen/lib/x86/cpu-policy.h -@@ -3,7 +3,6 @@ - #define XEN_LIB_X86_POLICIES_H - - #include --#include - - #define FEATURESET_1d 0 /* 0x00000001.edx */ - #define FEATURESET_1c 1 /* 0x00000001.ecx */ -@@ -107,6 +106,9 @@ const char *x86_cpuid_vendor_to_str(unsigned int vendor); - CPUID_GUEST_NR_XSTATE - !!CPUID_GUEST_NR_XSTATE + \ - CPUID_GUEST_NR_EXTD + 2 /* hv_limit and hv2_limit */ ) - -+/* Maximum number of MSRs written when serialising a cpu_policy. */ -+#define MSR_MAX_SERIALISED_ENTRIES 2 -+ - struct cpu_policy - { - #define DECL_BITFIELD(word) _DECL_BITFIELD(FEATURESET_ ## word) -@@ -324,6 +326,44 @@ struct cpu_policy - }; - } extd; - -+ /* -+ * 0x000000ce - MSR_INTEL_PLATFORM_INFO -+ * -+ * This MSR is non-architectural, but for simplicy we allow it to be read -+ * unconditionally. CPUID Faulting support can be fully emulated for HVM -+ * guests so can be offered unconditionally, while support for PV guests -+ * is dependent on real hardware support. -+ */ -+ union { -+ uint32_t raw; -+ struct { -+ uint32_t :31; -+ bool cpuid_faulting:1; -+ }; -+ } platform_info; -+ -+ /* -+ * 0x0000010a - MSR_ARCH_CAPABILITIES -+ * -+ * This is an Intel-only MSR, which provides miscellaneous enumeration, -+ * including those which indicate that microarchitectrual sidechannels are -+ * fixed in hardware. -+ */ -+ union { -+ uint32_t raw; -+ struct { -+ bool rdcl_no:1; -+ bool ibrs_all:1; -+ bool rsba:1; -+ bool skip_l1dfl:1; -+ bool ssb_no:1; -+ bool mds_no:1; -+ bool if_pschange_mc_no:1; -+ bool tsx_ctrl:1; -+ bool taa_no:1; -+ }; -+ } arch_caps; -+ - #undef __DECL_BITFIELD - #undef _DECL_BITFIELD - #undef DECL_BITFIELD -@@ -337,6 +377,7 @@ struct cpu_policy - - /* Temporary */ - #define cpuid_policy cpu_policy -+#define msr_policy cpu_policy - - struct old_cpu_policy - { -@@ -438,9 +479,11 @@ void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p); - #ifdef __XEN__ - #include - typedef XEN_GUEST_HANDLE_64(xen_cpuid_leaf_t) cpuid_leaf_buffer_t; -+typedef XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_entry_buffer_t; - #else - #include - typedef xen_cpuid_leaf_t cpuid_leaf_buffer_t[]; -+typedef xen_msr_entry_t msr_entry_buffer_t[]; - #endif - - /** -@@ -480,6 +523,42 @@ int x86_cpuid_copy_from_buffer(struct cpuid_policy *policy, - uint32_t nr_entries, uint32_t *err_leaf, - uint32_t *err_subleaf); - -+/** -+ * Serialise an msr_policy object into an array. -+ * -+ * @param policy The msr_policy to serialise. -+ * @param msrs The array of msrs to serialise into. -+ * @param nr_entries The number of entries in 'msrs'. -+ * @returns -errno -+ * -+ * Writes at most MSR_MAX_SERIALISED_ENTRIES. May fail with -ENOBUFS if the -+ * buffer array is too short. On success, nr_entries is updated with the -+ * actual number of msrs written. -+ */ -+int x86_msr_copy_to_buffer(const struct msr_policy *policy, -+ msr_entry_buffer_t msrs, uint32_t *nr_entries); -+ -+/** -+ * Unserialise an msr_policy object from an array of msrs. -+ * -+ * @param policy The msr_policy object to unserialise into. -+ * @param msrs The array of msrs to unserialise from. -+ * @param nr_entries The number of entries in 'msrs'. -+ * @param err_msr Optional hint for error diagnostics. -+ * @returns -errno -+ * -+ * Reads at most MSR_MAX_SERIALISED_ENTRIES. May fail for a number of reasons -+ * based on the content in an individual 'msrs' entry, including the MSR index -+ * not being valid in the policy, the flags field being nonzero, or if the -+ * value provided would truncate when stored in the policy. In such cases, -+ * the optional err_* pointer will identify the problematic MSR. -+ * -+ * No content validation is performed on the data stored in the policy object. -+ */ -+int x86_msr_copy_from_buffer(struct msr_policy *policy, -+ const msr_entry_buffer_t msrs, uint32_t nr_entries, -+ uint32_t *err_msr); -+ - /* - * Calculate whether two policies are compatible. - * -diff --git a/xen/include/xen/lib/x86/msr.h b/xen/include/xen/lib/x86/msr.h -deleted file mode 100644 -index 48ba4a59c036..000000000000 ---- a/xen/include/xen/lib/x86/msr.h -+++ /dev/null -@@ -1,104 +0,0 @@ --/* Common data structures and functions consumed by hypervisor and toolstack */ --#ifndef XEN_LIB_X86_MSR_H --#define XEN_LIB_X86_MSR_H -- --/* Maximum number of MSRs written when serialising msr_policy. */ --#define MSR_MAX_SERIALISED_ENTRIES 2 -- --/* MSR policy object for shared per-domain MSRs */ --struct msr_policy --{ -- /* -- * 0x000000ce - MSR_INTEL_PLATFORM_INFO -- * -- * This MSR is non-architectural, but for simplicy we allow it to be read -- * unconditionally. CPUID Faulting support can be fully emulated for HVM -- * guests so can be offered unconditionally, while support for PV guests -- * is dependent on real hardware support. -- */ -- union { -- uint32_t raw; -- struct { -- uint32_t :31; -- bool cpuid_faulting:1; -- }; -- } platform_info; -- -- /* -- * 0x0000010a - MSR_ARCH_CAPABILITIES -- * -- * This is an Intel-only MSR, which provides miscellaneous enumeration, -- * including those which indicate that microarchitectrual sidechannels are -- * fixed in hardware. -- */ -- union { -- uint32_t raw; -- struct { -- bool rdcl_no:1; -- bool ibrs_all:1; -- bool rsba:1; -- bool skip_l1dfl:1; -- bool ssb_no:1; -- bool mds_no:1; -- bool if_pschange_mc_no:1; -- bool tsx_ctrl:1; -- bool taa_no:1; -- }; -- } arch_caps; --}; -- --#ifdef __XEN__ --#include --typedef XEN_GUEST_HANDLE_64(xen_msr_entry_t) msr_entry_buffer_t; --#else --#include --typedef xen_msr_entry_t msr_entry_buffer_t[]; --#endif -- --/** -- * Serialise an msr_policy object into an array. -- * -- * @param policy The msr_policy to serialise. -- * @param msrs The array of msrs to serialise into. -- * @param nr_entries The number of entries in 'msrs'. -- * @returns -errno -- * -- * Writes at most MSR_MAX_SERIALISED_ENTRIES. May fail with -ENOBUFS if the -- * buffer array is too short. On success, nr_entries is updated with the -- * actual number of msrs written. -- */ --int x86_msr_copy_to_buffer(const struct msr_policy *policy, -- msr_entry_buffer_t msrs, uint32_t *nr_entries); -- --/** -- * Unserialise an msr_policy object from an array of msrs. -- * -- * @param policy The msr_policy object to unserialise into. -- * @param msrs The array of msrs to unserialise from. -- * @param nr_entries The number of entries in 'msrs'. -- * @param err_msr Optional hint for error diagnostics. -- * @returns -errno -- * -- * Reads at most MSR_MAX_SERIALISED_ENTRIES. May fail for a number of reasons -- * based on the content in an individual 'msrs' entry, including the MSR index -- * not being valid in the policy, the flags field being nonzero, or if the -- * value provided would truncate when stored in the policy. In such cases, -- * the optional err_* pointer will identify the problematic MSR. -- * -- * No content validation is performed on the data stored in the policy object. -- */ --int x86_msr_copy_from_buffer(struct msr_policy *policy, -- const msr_entry_buffer_t msrs, uint32_t nr_entries, -- uint32_t *err_msr); -- --#endif /* !XEN_LIB_X86_MSR_H */ -- --/* -- * Local variables: -- * mode: C -- * c-file-style: "BSD" -- * c-basic-offset: 4 -- * tab-width: 4 -- * indent-tabs-mode: nil -- * End: -- */ -diff --git a/xen/lib/x86/msr.c b/xen/lib/x86/msr.c -index 7d71e92a380a..c4d885e7b568 100644 ---- a/xen/lib/x86/msr.c -+++ b/xen/lib/x86/msr.c -@@ -1,6 +1,6 @@ - #include "private.h" - --#include -+#include - - /* - * Copy a single MSR into the provided msr_entry_buffer_t buffer, performing a --- -2.39.2 - diff --git a/0327-x86-Merge-the-system-cpuid-msr-policy-objects.patch b/0327-x86-Merge-the-system-cpuid-msr-policy-objects.patch deleted file mode 100644 index 392942ab..00000000 --- a/0327-x86-Merge-the-system-cpuid-msr-policy-objects.patch +++ /dev/null @@ -1,612 +0,0 @@ -From a30d43f4b80d3472ca70ee1fbd2c8d1721c61401 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 29 Mar 2023 07:39:44 +0100 -Subject: [PATCH 07/35] x86: Merge the system {cpuid,msr} policy objects - -Right now, they're the same underlying type, containing disjoint information. - -Introduce a new cpu-policy.{h,c} to be the new location for all policy -handling logic. Place the combined objects in __ro_after_init, which is new -since the original logic was written. - -As we're trying to phase out the use of struct old_cpu_policy entirely, rework -update_domain_cpu_policy() to not pointer-chase through system_policies[]. - -This in turn allows system_policies[] in sysctl.c to become static and reduced -in scope to XEN_SYSCTL_get_cpu_policy. - -No practical change. This undoes the transient doubling of storage space from -earlier patches. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 6bc33366795d14a21a3244d0f3b63f7dccea87ef) ---- - xen/arch/x86/Makefile | 1 + - xen/arch/x86/cpu-policy.c | 18 +++++++ - xen/arch/x86/cpu/common.c | 4 +- - xen/arch/x86/cpuid.c | 66 +++++++++++-------------- - xen/arch/x86/domctl.c | 17 +++++-- - xen/arch/x86/include/asm/cpu-policy.h | 14 ++++++ - xen/arch/x86/include/asm/cpuid.h | 6 --- - xen/arch/x86/include/asm/msr.h | 7 --- - xen/arch/x86/msr.c | 38 ++++++-------- - xen/arch/x86/sysctl.c | 71 ++++++++++----------------- - 10 files changed, 116 insertions(+), 126 deletions(-) - create mode 100644 xen/arch/x86/cpu-policy.c - create mode 100644 xen/arch/x86/include/asm/cpu-policy.h - -diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile -index 5accbe4c6746..f213a6b56a4d 100644 ---- a/xen/arch/x86/Makefile -+++ b/xen/arch/x86/Makefile -@@ -18,6 +18,7 @@ obj-y += bitops.o - obj-bin-y += bzimage.init.o - obj-bin-y += clear_page.o - obj-bin-y += copy_page.o -+obj-y += cpu-policy.o - obj-y += cpuid.o - obj-$(CONFIG_PV) += compat.o - obj-$(CONFIG_PV32) += x86_64/compat.o -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -new file mode 100644 -index 000000000000..663e9a084c53 ---- /dev/null -+++ b/xen/arch/x86/cpu-policy.c -@@ -0,0 +1,18 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+#include -+#include -+ -+#include -+ -+#include -+ -+struct cpu_policy __ro_after_init raw_cpu_policy; -+struct cpu_policy __ro_after_init host_cpu_policy; -+#ifdef CONFIG_PV -+struct cpu_policy __ro_after_init pv_max_cpu_policy; -+struct cpu_policy __ro_after_init pv_def_cpu_policy; -+#endif -+#ifdef CONFIG_HVM -+struct cpu_policy __ro_after_init hvm_max_cpu_policy; -+struct cpu_policy __ro_after_init hvm_def_cpu_policy; -+#endif -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 27f73d3bbe31..665200db382f 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -3,6 +3,8 @@ - #include - #include - #include -+ -+#include - #include - #include - #include -@@ -138,7 +140,7 @@ bool __init probe_cpuid_faulting(void) - return false; - - if ((rc = rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val)) == 0) -- raw_msr_policy.platform_info.cpuid_faulting = -+ raw_cpu_policy.platform_info.cpuid_faulting = - val & MSR_PLATFORM_INFO_CPUID_FAULTING; - - if (rc || -diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c -index acc2f606cea8..1327dba30dd8 100644 ---- a/xen/arch/x86/cpuid.c -+++ b/xen/arch/x86/cpuid.c -@@ -4,6 +4,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -142,17 +143,6 @@ static void zero_leaves(struct cpuid_leaf *l, - memset(&l[first], 0, sizeof(*l) * (last - first + 1)); - } - --struct cpuid_policy __read_mostly raw_cpuid_policy, -- __read_mostly host_cpuid_policy; --#ifdef CONFIG_PV --struct cpuid_policy __read_mostly pv_max_cpuid_policy; --struct cpuid_policy __read_mostly pv_def_cpuid_policy; --#endif --#ifdef CONFIG_HVM --struct cpuid_policy __read_mostly hvm_max_cpuid_policy; --struct cpuid_policy __read_mostly hvm_def_cpuid_policy; --#endif -- - static void sanitise_featureset(uint32_t *fs) - { - /* for_each_set_bit() uses unsigned longs. Extend with zeroes. */ -@@ -344,7 +334,7 @@ static void recalculate_misc(struct cpuid_policy *p) - - static void __init calculate_raw_policy(void) - { -- struct cpuid_policy *p = &raw_cpuid_policy; -+ struct cpuid_policy *p = &raw_cpu_policy; - - x86_cpuid_policy_fill_native(p); - -@@ -354,10 +344,10 @@ static void __init calculate_raw_policy(void) - - static void __init calculate_host_policy(void) - { -- struct cpuid_policy *p = &host_cpuid_policy; -+ struct cpuid_policy *p = &host_cpu_policy; - unsigned int max_extd_leaf; - -- *p = raw_cpuid_policy; -+ *p = raw_cpu_policy; - - p->basic.max_leaf = - min_t(uint32_t, p->basic.max_leaf, ARRAY_SIZE(p->basic.raw) - 1); -@@ -449,17 +439,17 @@ static void __init guest_common_feature_adjustments(uint32_t *fs) - * of IBRS by using the AMD feature bit. An administrator may wish for - * performance reasons to offer IBPB without IBRS. - */ -- if ( host_cpuid_policy.feat.ibrsb ) -+ if ( host_cpu_policy.feat.ibrsb ) - __set_bit(X86_FEATURE_IBPB, fs); - } - - static void __init calculate_pv_max_policy(void) - { -- struct cpuid_policy *p = &pv_max_cpuid_policy; -+ struct cpuid_policy *p = &pv_max_cpu_policy; - uint32_t pv_featureset[FSCAPINTS]; - unsigned int i; - -- *p = host_cpuid_policy; -+ *p = host_cpu_policy; - cpuid_policy_to_featureset(p, pv_featureset); - - for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) -@@ -486,11 +476,11 @@ static void __init calculate_pv_max_policy(void) - - static void __init calculate_pv_def_policy(void) - { -- struct cpuid_policy *p = &pv_def_cpuid_policy; -+ struct cpuid_policy *p = &pv_def_cpu_policy; - uint32_t pv_featureset[FSCAPINTS]; - unsigned int i; - -- *p = pv_max_cpuid_policy; -+ *p = pv_max_cpu_policy; - cpuid_policy_to_featureset(p, pv_featureset); - - for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) -@@ -506,12 +496,12 @@ static void __init calculate_pv_def_policy(void) - - static void __init calculate_hvm_max_policy(void) - { -- struct cpuid_policy *p = &hvm_max_cpuid_policy; -+ struct cpuid_policy *p = &hvm_max_cpu_policy; - uint32_t hvm_featureset[FSCAPINTS]; - unsigned int i; - const uint32_t *hvm_featuremask; - -- *p = host_cpuid_policy; -+ *p = host_cpu_policy; - cpuid_policy_to_featureset(p, hvm_featureset); - - hvm_featuremask = hvm_hap_supported() ? -@@ -539,7 +529,7 @@ static void __init calculate_hvm_max_policy(void) - * HVM guests are able if running in protected mode. - */ - if ( (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) && -- raw_cpuid_policy.basic.sep ) -+ raw_cpu_policy.basic.sep ) - __set_bit(X86_FEATURE_SEP, hvm_featureset); - - /* -@@ -588,12 +578,12 @@ static void __init calculate_hvm_max_policy(void) - - static void __init calculate_hvm_def_policy(void) - { -- struct cpuid_policy *p = &hvm_def_cpuid_policy; -+ struct cpuid_policy *p = &hvm_def_cpu_policy; - uint32_t hvm_featureset[FSCAPINTS]; - unsigned int i; - const uint32_t *hvm_featuremask; - -- *p = hvm_max_cpuid_policy; -+ *p = hvm_max_cpu_policy; - cpuid_policy_to_featureset(p, hvm_featureset); - - hvm_featuremask = hvm_hap_supported() ? -@@ -661,8 +651,8 @@ void recalculate_cpuid_policy(struct domain *d) - { - struct cpuid_policy *p = d->arch.cpuid; - const struct cpuid_policy *max = is_pv_domain(d) -- ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpuid_policy : NULL) -- : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpuid_policy : NULL); -+ ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) -+ : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); - uint32_t fs[FSCAPINTS], max_fs[FSCAPINTS]; - unsigned int i; - -@@ -737,7 +727,7 @@ void recalculate_cpuid_policy(struct domain *d) - /* Fold host's FDP_EXCP_ONLY and NO_FPU_SEL into guest's view. */ - fs[FEATURESET_7b0] &= ~(cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | - cpufeat_mask(X86_FEATURE_NO_FPU_SEL)); -- fs[FEATURESET_7b0] |= (host_cpuid_policy.feat._7b0 & -+ fs[FEATURESET_7b0] |= (host_cpu_policy.feat._7b0 & - (cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | - cpufeat_mask(X86_FEATURE_NO_FPU_SEL))); - -@@ -788,8 +778,8 @@ void recalculate_cpuid_policy(struct domain *d) - int init_domain_cpuid_policy(struct domain *d) - { - struct cpuid_policy *p = is_pv_domain(d) -- ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpuid_policy : NULL) -- : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpuid_policy : NULL); -+ ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpu_policy : NULL) -+ : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpu_policy : NULL); - - if ( !p ) - { -@@ -1093,7 +1083,7 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, - if ( is_pv_domain(d) && is_hardware_domain(d) && - guest_kernel_mode(v, regs) && cpu_has_monitor && - regs->entry_vector == TRAP_gp_fault ) -- *res = raw_cpuid_policy.basic.raw[5]; -+ *res = raw_cpu_policy.basic.raw[5]; - break; - - case 0x7: -@@ -1225,14 +1215,14 @@ static void __init __maybe_unused build_assertions(void) - /* Find some more clever allocation scheme if this trips. */ - BUILD_BUG_ON(sizeof(struct cpuid_policy) > PAGE_SIZE); - -- BUILD_BUG_ON(sizeof(raw_cpuid_policy.basic) != -- sizeof(raw_cpuid_policy.basic.raw)); -- BUILD_BUG_ON(sizeof(raw_cpuid_policy.feat) != -- sizeof(raw_cpuid_policy.feat.raw)); -- BUILD_BUG_ON(sizeof(raw_cpuid_policy.xstate) != -- sizeof(raw_cpuid_policy.xstate.raw)); -- BUILD_BUG_ON(sizeof(raw_cpuid_policy.extd) != -- sizeof(raw_cpuid_policy.extd.raw)); -+ BUILD_BUG_ON(sizeof(raw_cpu_policy.basic) != -+ sizeof(raw_cpu_policy.basic.raw)); -+ BUILD_BUG_ON(sizeof(raw_cpu_policy.feat) != -+ sizeof(raw_cpu_policy.feat.raw)); -+ BUILD_BUG_ON(sizeof(raw_cpu_policy.xstate) != -+ sizeof(raw_cpu_policy.xstate.raw)); -+ BUILD_BUG_ON(sizeof(raw_cpu_policy.extd) != -+ sizeof(raw_cpu_policy.extd.raw)); - } - - /* -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index 175d473e412a..2689df813b39 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -36,18 +36,25 @@ - #include - #include - #include --#include -+#include - - static int update_domain_cpu_policy(struct domain *d, - xen_domctl_cpu_policy_t *xdpc) - { - struct old_cpu_policy new = {}; -- const struct old_cpu_policy *sys = is_pv_domain(d) -- ? &system_policies[XEN_SYSCTL_cpu_policy_pv_max] -- : &system_policies[XEN_SYSCTL_cpu_policy_hvm_max]; -+ struct cpu_policy *sys = is_pv_domain(d) -+ ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) -+ : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); -+ struct old_cpu_policy old_sys = { sys, sys }; - struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; - int ret = -ENOMEM; - -+ if ( !sys ) -+ { -+ ASSERT_UNREACHABLE(); -+ return -EOPNOTSUPP; -+ } -+ - /* Start by copying the domain's existing policies. */ - if ( !(new.cpuid = xmemdup(d->arch.cpuid)) || - !(new.msr = xmemdup(d->arch.msr)) ) -@@ -65,7 +72,7 @@ static int update_domain_cpu_policy(struct domain *d, - x86_cpuid_policy_clear_out_of_range_leaves(new.cpuid); - - /* Audit the combined dataset. */ -- ret = x86_cpu_policies_are_compatible(sys, &new, &err); -+ ret = x86_cpu_policies_are_compatible(&old_sys, &new, &err); - if ( ret ) - goto out; - -diff --git a/xen/arch/x86/include/asm/cpu-policy.h b/xen/arch/x86/include/asm/cpu-policy.h -new file mode 100644 -index 000000000000..eef14bb4267e ---- /dev/null -+++ b/xen/arch/x86/include/asm/cpu-policy.h -@@ -0,0 +1,14 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+#ifndef X86_CPU_POLICY_H -+#define X86_CPU_POLICY_H -+ -+struct cpu_policy; -+ -+extern struct cpu_policy raw_cpu_policy; -+extern struct cpu_policy host_cpu_policy; -+extern struct cpu_policy pv_max_cpu_policy; -+extern struct cpu_policy pv_def_cpu_policy; -+extern struct cpu_policy hvm_max_cpu_policy; -+extern struct cpu_policy hvm_def_cpu_policy; -+ -+#endif /* X86_CPU_POLICY_H */ -diff --git a/xen/arch/x86/include/asm/cpuid.h b/xen/arch/x86/include/asm/cpuid.h -index d418e8100dde..ea0586277331 100644 ---- a/xen/arch/x86/include/asm/cpuid.h -+++ b/xen/arch/x86/include/asm/cpuid.h -@@ -46,12 +46,6 @@ DECLARE_PER_CPU(struct cpuidmasks, cpuidmasks); - /* Default masking MSR values, calculated at boot. */ - extern struct cpuidmasks cpuidmask_defaults; - --extern struct cpuid_policy raw_cpuid_policy, host_cpuid_policy, -- pv_max_cpuid_policy, pv_def_cpuid_policy, -- hvm_max_cpuid_policy, hvm_def_cpuid_policy; -- --extern const struct old_cpu_policy system_policies[]; -- - /* Check that all previously present features are still available. */ - bool recheck_cpu_features(unsigned int cpu); - -diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h -index bb32bf19adc7..8a4da50c500a 100644 ---- a/xen/arch/x86/include/asm/msr.h -+++ b/xen/arch/x86/include/asm/msr.h -@@ -280,13 +280,6 @@ static inline void wrmsr_tsc_aux(uint32_t val) - - uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp); - --extern struct msr_policy raw_msr_policy, -- host_msr_policy, -- pv_max_msr_policy, -- pv_def_msr_policy, -- hvm_max_msr_policy, -- hvm_def_msr_policy; -- - /* Container object for per-vCPU MSRs */ - struct vcpu_msrs - { -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index cf46b18aa64c..01f95603e297 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -25,6 +25,7 @@ - #include - - #include -+#include - #include - #include - #include -@@ -37,20 +38,9 @@ - - DEFINE_PER_CPU(uint32_t, tsc_aux); - --struct msr_policy __read_mostly raw_msr_policy, -- __read_mostly host_msr_policy; --#ifdef CONFIG_PV --struct msr_policy __read_mostly pv_max_msr_policy; --struct msr_policy __read_mostly pv_def_msr_policy; --#endif --#ifdef CONFIG_HVM --struct msr_policy __read_mostly hvm_max_msr_policy; --struct msr_policy __read_mostly hvm_def_msr_policy; --#endif -- - static void __init calculate_raw_policy(void) - { -- struct msr_policy *mp = &raw_msr_policy; -+ struct msr_policy *mp = &raw_cpu_policy; - - /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ - /* Was already added by probe_cpuid_faulting() */ -@@ -61,9 +51,9 @@ static void __init calculate_raw_policy(void) - - static void __init calculate_host_policy(void) - { -- struct msr_policy *mp = &host_msr_policy; -+ struct msr_policy *mp = &host_cpu_policy; - -- *mp = raw_msr_policy; -+ *mp = raw_cpu_policy; - - /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ - /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */ -@@ -81,25 +71,25 @@ static void __init calculate_host_policy(void) - - static void __init calculate_pv_max_policy(void) - { -- struct msr_policy *mp = &pv_max_msr_policy; -+ struct msr_policy *mp = &pv_max_cpu_policy; - -- *mp = host_msr_policy; -+ *mp = host_cpu_policy; - - mp->arch_caps.raw = 0; /* Not supported yet. */ - } - - static void __init calculate_pv_def_policy(void) - { -- struct msr_policy *mp = &pv_def_msr_policy; -+ struct msr_policy *mp = &pv_def_cpu_policy; - -- *mp = pv_max_msr_policy; -+ *mp = pv_max_cpu_policy; - } - - static void __init calculate_hvm_max_policy(void) - { -- struct msr_policy *mp = &hvm_max_msr_policy; -+ struct msr_policy *mp = &hvm_max_cpu_policy; - -- *mp = host_msr_policy; -+ *mp = host_cpu_policy; - - /* It's always possible to emulate CPUID faulting for HVM guests */ - mp->platform_info.cpuid_faulting = true; -@@ -109,9 +99,9 @@ static void __init calculate_hvm_max_policy(void) - - static void __init calculate_hvm_def_policy(void) - { -- struct msr_policy *mp = &hvm_def_msr_policy; -+ struct msr_policy *mp = &hvm_def_cpu_policy; - -- *mp = hvm_max_msr_policy; -+ *mp = hvm_max_cpu_policy; - } - - void __init init_guest_msr_policy(void) -@@ -135,8 +125,8 @@ void __init init_guest_msr_policy(void) - int init_domain_msr_policy(struct domain *d) - { - struct msr_policy *mp = is_pv_domain(d) -- ? (IS_ENABLED(CONFIG_PV) ? &pv_def_msr_policy : NULL) -- : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_msr_policy : NULL); -+ ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpu_policy : NULL) -+ : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpu_policy : NULL); - - if ( !mp ) - { -diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c -index 838a9947bfe3..c68242e5bcaf 100644 ---- a/xen/arch/x86/sysctl.c -+++ b/xen/arch/x86/sysctl.c -@@ -31,38 +31,7 @@ - #include - #include - #include --#include -- --const struct old_cpu_policy system_policies[6] = { -- [ XEN_SYSCTL_cpu_policy_raw ] = { -- &raw_cpuid_policy, -- &raw_msr_policy, -- }, -- [ XEN_SYSCTL_cpu_policy_host ] = { -- &host_cpuid_policy, -- &host_msr_policy, -- }, --#ifdef CONFIG_PV -- [ XEN_SYSCTL_cpu_policy_pv_max ] = { -- &pv_max_cpuid_policy, -- &pv_max_msr_policy, -- }, -- [ XEN_SYSCTL_cpu_policy_pv_default ] = { -- &pv_def_cpuid_policy, -- &pv_def_msr_policy, -- }, --#endif --#ifdef CONFIG_HVM -- [ XEN_SYSCTL_cpu_policy_hvm_max ] = { -- &hvm_max_cpuid_policy, -- &hvm_max_msr_policy, -- }, -- [ XEN_SYSCTL_cpu_policy_hvm_default ] = { -- &hvm_def_cpuid_policy, -- &hvm_def_msr_policy, -- }, --#endif --}; -+#include - - struct l3_cache_info { - int ret; -@@ -327,19 +296,19 @@ long arch_do_sysctl( - - case XEN_SYSCTL_get_cpu_featureset: - { -- static const struct cpuid_policy *const policy_table[6] = { -- [XEN_SYSCTL_cpu_featureset_raw] = &raw_cpuid_policy, -- [XEN_SYSCTL_cpu_featureset_host] = &host_cpuid_policy, -+ static const struct cpu_policy *const policy_table[6] = { -+ [XEN_SYSCTL_cpu_featureset_raw] = &raw_cpu_policy, -+ [XEN_SYSCTL_cpu_featureset_host] = &host_cpu_policy, - #ifdef CONFIG_PV -- [XEN_SYSCTL_cpu_featureset_pv] = &pv_def_cpuid_policy, -- [XEN_SYSCTL_cpu_featureset_pv_max] = &pv_max_cpuid_policy, -+ [XEN_SYSCTL_cpu_featureset_pv] = &pv_def_cpu_policy, -+ [XEN_SYSCTL_cpu_featureset_pv_max] = &pv_max_cpu_policy, - #endif - #ifdef CONFIG_HVM -- [XEN_SYSCTL_cpu_featureset_hvm] = &hvm_def_cpuid_policy, -- [XEN_SYSCTL_cpu_featureset_hvm_max] = &hvm_max_cpuid_policy, -+ [XEN_SYSCTL_cpu_featureset_hvm] = &hvm_def_cpu_policy, -+ [XEN_SYSCTL_cpu_featureset_hvm_max] = &hvm_max_cpu_policy, - #endif - }; -- const struct cpuid_policy *p = NULL; -+ const struct cpu_policy *p = NULL; - uint32_t featureset[FSCAPINTS]; - unsigned int nr; - -@@ -392,7 +361,19 @@ long arch_do_sysctl( - - case XEN_SYSCTL_get_cpu_policy: - { -- const struct old_cpu_policy *policy; -+ static const struct cpu_policy *const system_policies[6] = { -+ [XEN_SYSCTL_cpu_policy_raw] = &raw_cpu_policy, -+ [XEN_SYSCTL_cpu_policy_host] = &host_cpu_policy, -+#ifdef CONFIG_PV -+ [XEN_SYSCTL_cpu_policy_pv_max] = &pv_max_cpu_policy, -+ [XEN_SYSCTL_cpu_policy_pv_default] = &pv_def_cpu_policy, -+#endif -+#ifdef CONFIG_HVM -+ [XEN_SYSCTL_cpu_policy_hvm_max] = &hvm_max_cpu_policy, -+ [XEN_SYSCTL_cpu_policy_hvm_default] = &hvm_def_cpu_policy, -+#endif -+ }; -+ const struct cpu_policy *policy; - - /* Reserved field set, or bad policy index? */ - if ( sysctl->u.cpu_policy._rsvd || -@@ -401,11 +382,11 @@ long arch_do_sysctl( - ret = -EINVAL; - break; - } -- policy = &system_policies[ -+ policy = system_policies[ - array_index_nospec(sysctl->u.cpu_policy.index, - ARRAY_SIZE(system_policies))]; - -- if ( !policy->cpuid || !policy->msr ) -+ if ( !policy ) - { - ret = -EOPNOTSUPP; - break; -@@ -415,7 +396,7 @@ long arch_do_sysctl( - if ( guest_handle_is_null(sysctl->u.cpu_policy.leaves) ) - sysctl->u.cpu_policy.nr_leaves = CPUID_MAX_SERIALISED_LEAVES; - else if ( (ret = x86_cpuid_copy_to_buffer( -- policy->cpuid, -+ policy, - sysctl->u.cpu_policy.leaves, - &sysctl->u.cpu_policy.nr_leaves)) ) - break; -@@ -431,7 +412,7 @@ long arch_do_sysctl( - if ( guest_handle_is_null(sysctl->u.cpu_policy.msrs) ) - sysctl->u.cpu_policy.nr_msrs = MSR_MAX_SERIALISED_ENTRIES; - else if ( (ret = x86_msr_copy_to_buffer( -- policy->msr, -+ policy, - sysctl->u.cpu_policy.msrs, - &sysctl->u.cpu_policy.nr_msrs)) ) - break; --- -2.39.2 - diff --git a/0328-x86-Merge-a-domain-s-cpuid-msr-policy-objects.patch b/0328-x86-Merge-a-domain-s-cpuid-msr-policy-objects.patch deleted file mode 100644 index 3da522ce..00000000 --- a/0328-x86-Merge-a-domain-s-cpuid-msr-policy-objects.patch +++ /dev/null @@ -1,423 +0,0 @@ -From 1b87ec7dfde8c0d6d52bb7286e3f280061b361b5 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 29 Mar 2023 11:32:25 +0100 -Subject: [PATCH 08/35] x86: Merge a domain's {cpuid,msr} policy objects - -Right now, they're the same underlying type, containing disjoint information. - -Drop the d->arch.msr pointer, and union d->arch.cpuid to give it a second name -of cpu_policy in the interim. - -Merge init_domain_{cpuid,msr}_policy() into a single init_domain_cpu_policy(), -moving the implementation into cpu-policy.c - -No practical change. This undoes the transient doubling of storage space from -earlier patches. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit bd13dae34809e61e37ba1cd5de893c5c10c46256) ---- - xen/arch/x86/cpu-policy.c | 49 +++++++++++++++++++++++++++ - xen/arch/x86/cpuid.c | 23 ------------- - xen/arch/x86/domain.c | 15 +++----- - xen/arch/x86/domctl.c | 35 ++++++++++--------- - xen/arch/x86/include/asm/cpu-policy.h | 4 +++ - xen/arch/x86/include/asm/cpuid.h | 3 -- - xen/arch/x86/include/asm/domain.h | 13 +++++-- - xen/arch/x86/include/asm/msr.h | 1 - - xen/arch/x86/mm/mem_sharing.c | 3 +- - xen/arch/x86/msr.c | 44 ------------------------ - 10 files changed, 86 insertions(+), 104 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 663e9a084c53..e9ac1269c35a 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -1,10 +1,13 @@ - /* SPDX-License-Identifier: GPL-2.0-or-later */ - #include - #include -+#include - - #include - - #include -+#include -+#include - - struct cpu_policy __ro_after_init raw_cpu_policy; - struct cpu_policy __ro_after_init host_cpu_policy; -@@ -16,3 +19,49 @@ struct cpu_policy __ro_after_init pv_def_cpu_policy; - struct cpu_policy __ro_after_init hvm_max_cpu_policy; - struct cpu_policy __ro_after_init hvm_def_cpu_policy; - #endif -+ -+int init_domain_cpu_policy(struct domain *d) -+{ -+ struct cpu_policy *p = is_pv_domain(d) -+ ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpu_policy : NULL) -+ : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpu_policy : NULL); -+ -+ if ( !p ) -+ { -+ ASSERT_UNREACHABLE(); -+ return -EOPNOTSUPP; -+ } -+ -+ p = xmemdup(p); -+ if ( !p ) -+ return -ENOMEM; -+ -+ /* See comment in ctxt_switch_levelling() */ -+ if ( !opt_dom0_cpuid_faulting && is_control_domain(d) && is_pv_domain(d) ) -+ p->platform_info.cpuid_faulting = false; -+ -+ /* -+ * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0, -+ * so dom0 can turn off workarounds as appropriate. Temporary, until the -+ * domain policy logic gains a better understanding of MSRs. -+ */ -+ if ( is_hardware_domain(d) && cpu_has_arch_caps ) -+ { -+ uint64_t val; -+ -+ rdmsrl(MSR_ARCH_CAPABILITIES, val); -+ -+ p->arch_caps.raw = val & -+ (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | -+ ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO | -+ ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | -+ ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | -+ ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO); -+ } -+ -+ d->arch.cpu_policy = p; -+ -+ recalculate_cpuid_policy(d); -+ -+ return 0; -+} -diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c -index 1327dba30dd8..e074befb721d 100644 ---- a/xen/arch/x86/cpuid.c -+++ b/xen/arch/x86/cpuid.c -@@ -775,29 +775,6 @@ void recalculate_cpuid_policy(struct domain *d) - p->extd.raw[0x19] = EMPTY_LEAF; - } - --int init_domain_cpuid_policy(struct domain *d) --{ -- struct cpuid_policy *p = is_pv_domain(d) -- ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpu_policy : NULL) -- : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpu_policy : NULL); -- -- if ( !p ) -- { -- ASSERT_UNREACHABLE(); -- return -EOPNOTSUPP; -- } -- -- p = xmemdup(p); -- if ( !p ) -- return -ENOMEM; -- -- d->arch.cpuid = p; -- -- recalculate_cpuid_policy(d); -- -- return 0; --} -- - void __init init_dom0_cpuid_policy(struct domain *d) - { - struct cpuid_policy *p = d->arch.cpuid; -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index e546c9832225..faea542286c0 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -66,6 +66,7 @@ - #ifdef CONFIG_COMPAT - #include - #endif -+#include - #include - #include - #include -@@ -743,8 +744,7 @@ int arch_domain_create(struct domain *d, - - d->arch.ctxt_switch = &idle_csw; - -- d->arch.cpuid = ZERO_BLOCK_PTR; /* Catch stray misuses. */ -- d->arch.msr = ZERO_BLOCK_PTR; -+ d->arch.cpu_policy = ZERO_BLOCK_PTR; /* Catch stray misuses. */ - - return 0; - } -@@ -799,10 +799,7 @@ int arch_domain_create(struct domain *d, - goto fail; - paging_initialised = true; - -- if ( (rc = init_domain_cpuid_policy(d)) ) -- goto fail; -- -- if ( (rc = init_domain_msr_policy(d)) ) -+ if ( (rc = init_domain_cpu_policy(d)) ) - goto fail; - - d->arch.ioport_caps = -@@ -873,8 +870,7 @@ int arch_domain_create(struct domain *d, - iommu_domain_destroy(d); - cleanup_domain_irq_mapping(d); - free_xenheap_page(d->shared_info); -- xfree(d->arch.cpuid); -- xfree(d->arch.msr); -+ XFREE(d->arch.cpu_policy); - if ( paging_initialised ) - paging_final_teardown(d); - free_perdomain_mappings(d); -@@ -888,8 +884,7 @@ void arch_domain_destroy(struct domain *d) - hvm_domain_destroy(d); - - xfree(d->arch.e820); -- xfree(d->arch.cpuid); -- xfree(d->arch.msr); -+ XFREE(d->arch.cpu_policy); - - free_domain_pirqs(d); - if ( !is_idle_domain(d) ) -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index 2689df813b39..857d0abe323e 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -41,11 +41,11 @@ - static int update_domain_cpu_policy(struct domain *d, - xen_domctl_cpu_policy_t *xdpc) - { -- struct old_cpu_policy new = {}; -+ struct cpu_policy *new; - struct cpu_policy *sys = is_pv_domain(d) - ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) - : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); -- struct old_cpu_policy old_sys = { sys, sys }; -+ struct old_cpu_policy old_sys = { sys, sys }, old_new; - struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; - int ret = -ENOMEM; - -@@ -55,33 +55,33 @@ static int update_domain_cpu_policy(struct domain *d, - return -EOPNOTSUPP; - } - -- /* Start by copying the domain's existing policies. */ -- if ( !(new.cpuid = xmemdup(d->arch.cpuid)) || -- !(new.msr = xmemdup(d->arch.msr)) ) -+ /* Start by copying the domain's existing policy. */ -+ if ( !(new = xmemdup(d->arch.cpu_policy)) ) - goto out; - -+ old_new = (struct old_cpu_policy){ new, new }; -+ - /* Merge the toolstack provided data. */ - if ( (ret = x86_cpuid_copy_from_buffer( -- new.cpuid, xdpc->leaves, xdpc->nr_leaves, -+ new, xdpc->leaves, xdpc->nr_leaves, - &err.leaf, &err.subleaf)) || - (ret = x86_msr_copy_from_buffer( -- new.msr, xdpc->msrs, xdpc->nr_msrs, &err.msr)) ) -+ new, xdpc->msrs, xdpc->nr_msrs, &err.msr)) ) - goto out; - - /* Trim any newly-stale out-of-range leaves. */ -- x86_cpuid_policy_clear_out_of_range_leaves(new.cpuid); -+ x86_cpuid_policy_clear_out_of_range_leaves(new); - - /* Audit the combined dataset. */ -- ret = x86_cpu_policies_are_compatible(&old_sys, &new, &err); -+ ret = x86_cpu_policies_are_compatible(&old_sys, &old_new, &err); - if ( ret ) - goto out; - - /* -- * Audit was successful. Replace existing policies, leaving the old -- * policies to be freed. -+ * Audit was successful. Replace the existing policy, leaving the old one -+ * to be freed. - */ -- SWAP(new.cpuid, d->arch.cpuid); -- SWAP(new.msr, d->arch.msr); -+ SWAP(new, d->arch.cpu_policy); - - /* TODO: Drop when x86_cpu_policies_are_compatible() is completed. */ - recalculate_cpuid_policy(d); -@@ -90,9 +90,8 @@ static int update_domain_cpu_policy(struct domain *d, - domain_cpu_policy_changed(d); - - out: -- /* Free whichever cpuid/msr structs are not installed in struct domain. */ -- xfree(new.cpuid); -- xfree(new.msr); -+ /* Free whichever struct is not installed in struct domain. */ -+ xfree(new); - - if ( ret ) - { -@@ -1328,7 +1327,7 @@ long arch_do_domctl( - if ( guest_handle_is_null(domctl->u.cpu_policy.leaves) ) - domctl->u.cpu_policy.nr_leaves = CPUID_MAX_SERIALISED_LEAVES; - else if ( (ret = x86_cpuid_copy_to_buffer( -- d->arch.cpuid, -+ d->arch.cpu_policy, - domctl->u.cpu_policy.leaves, - &domctl->u.cpu_policy.nr_leaves)) ) - break; -@@ -1337,7 +1336,7 @@ long arch_do_domctl( - if ( guest_handle_is_null(domctl->u.cpu_policy.msrs) ) - domctl->u.cpu_policy.nr_msrs = MSR_MAX_SERIALISED_ENTRIES; - else if ( (ret = x86_msr_copy_to_buffer( -- d->arch.msr, -+ d->arch.cpu_policy, - domctl->u.cpu_policy.msrs, - &domctl->u.cpu_policy.nr_msrs)) ) - break; -diff --git a/xen/arch/x86/include/asm/cpu-policy.h b/xen/arch/x86/include/asm/cpu-policy.h -index eef14bb4267e..9ba34bbf5ea1 100644 ---- a/xen/arch/x86/include/asm/cpu-policy.h -+++ b/xen/arch/x86/include/asm/cpu-policy.h -@@ -3,6 +3,7 @@ - #define X86_CPU_POLICY_H - - struct cpu_policy; -+struct domain; - - extern struct cpu_policy raw_cpu_policy; - extern struct cpu_policy host_cpu_policy; -@@ -11,4 +12,7 @@ extern struct cpu_policy pv_def_cpu_policy; - extern struct cpu_policy hvm_max_cpu_policy; - extern struct cpu_policy hvm_def_cpu_policy; - -+/* Allocate and initialise a CPU policy suitable for the domain. */ -+int init_domain_cpu_policy(struct domain *d); -+ - #endif /* X86_CPU_POLICY_H */ -diff --git a/xen/arch/x86/include/asm/cpuid.h b/xen/arch/x86/include/asm/cpuid.h -index ea0586277331..7f81b998ce01 100644 ---- a/xen/arch/x86/include/asm/cpuid.h -+++ b/xen/arch/x86/include/asm/cpuid.h -@@ -49,9 +49,6 @@ extern struct cpuidmasks cpuidmask_defaults; - /* Check that all previously present features are still available. */ - bool recheck_cpu_features(unsigned int cpu); - --/* Allocate and initialise a CPUID policy suitable for the domain. */ --int init_domain_cpuid_policy(struct domain *d); -- - /* Apply dom0-specific tweaks to the CPUID policy. */ - void init_dom0_cpuid_policy(struct domain *d); - -diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h -index 4e59ca8c4e14..5293c0cde405 100644 ---- a/xen/arch/x86/include/asm/domain.h -+++ b/xen/arch/x86/include/asm/domain.h -@@ -384,9 +384,16 @@ struct arch_domain - */ - uint8_t x87_fip_width; - -- /* CPUID and MSR policy objects. */ -- struct cpuid_policy *cpuid; -- struct msr_policy *msr; -+ /* -+ * The domain's CPU Policy. "cpu_policy" is considered the canonical -+ * pointer, but the "cpuid" and "msr" aliases exist so the most -+ * appropriate one can be used for local code clarity. -+ */ -+ union { -+ struct cpu_policy *cpu_policy; -+ struct cpu_policy *cpuid; -+ struct cpu_policy *msr; -+ }; - - struct PITState vpit; - -diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h -index 8a4da50c500a..a174bc6e892b 100644 ---- a/xen/arch/x86/include/asm/msr.h -+++ b/xen/arch/x86/include/asm/msr.h -@@ -398,7 +398,6 @@ struct vcpu_msrs - }; - - void init_guest_msr_policy(void); --int init_domain_msr_policy(struct domain *d); - int init_vcpu_msr_policy(struct vcpu *v); - - /* -diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c -index 649d93dc5444..5b3449db7a11 100644 ---- a/xen/arch/x86/mm/mem_sharing.c -+++ b/xen/arch/x86/mm/mem_sharing.c -@@ -1902,8 +1902,7 @@ static int fork(struct domain *cd, struct domain *d) - - domain_pause(d); - cd->max_pages = d->max_pages; -- *cd->arch.cpuid = *d->arch.cpuid; -- *cd->arch.msr = *d->arch.msr; -+ *cd->arch.cpu_policy = *d->arch.cpu_policy; - cd->vmtrace_size = d->vmtrace_size; - cd->parent = d; - } -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index 01f95603e297..d3ca861454a7 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -122,50 +122,6 @@ void __init init_guest_msr_policy(void) - } - } - --int init_domain_msr_policy(struct domain *d) --{ -- struct msr_policy *mp = is_pv_domain(d) -- ? (IS_ENABLED(CONFIG_PV) ? &pv_def_cpu_policy : NULL) -- : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_cpu_policy : NULL); -- -- if ( !mp ) -- { -- ASSERT_UNREACHABLE(); -- return -EOPNOTSUPP; -- } -- -- mp = xmemdup(mp); -- if ( !mp ) -- return -ENOMEM; -- -- /* See comment in ctxt_switch_levelling() */ -- if ( !opt_dom0_cpuid_faulting && is_control_domain(d) && is_pv_domain(d) ) -- mp->platform_info.cpuid_faulting = false; -- -- /* -- * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0, -- * so dom0 can turn off workarounds as appropriate. Temporary, until the -- * domain policy logic gains a better understanding of MSRs. -- */ -- if ( is_hardware_domain(d) && cpu_has_arch_caps ) -- { -- uint64_t val; -- -- rdmsrl(MSR_ARCH_CAPABILITIES, val); -- -- mp->arch_caps.raw = val & -- (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | -- ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO | -- ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | -- ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | -- ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO); -- } -- -- d->arch.msr = mp; -- -- return 0; --} -- - int init_vcpu_msr_policy(struct vcpu *v) - { - struct vcpu_msrs *msrs = xzalloc(struct vcpu_msrs); --- -2.39.2 - diff --git a/0329-x86-Merge-xc_cpu_policy-s-cpuid-and-msr-objects.patch b/0329-x86-Merge-xc_cpu_policy-s-cpuid-and-msr-objects.patch deleted file mode 100644 index 9f85071a..00000000 --- a/0329-x86-Merge-xc_cpu_policy-s-cpuid-and-msr-objects.patch +++ /dev/null @@ -1,367 +0,0 @@ -From c10387a42b51e6ec8239203d6449aa5f546dc324 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 29 Mar 2023 12:37:33 +0100 -Subject: [PATCH 09/35] x86: Merge xc_cpu_policy's cpuid and msr objects - -Right now, they're the same underlying type, containing disjoint information. - -Use a single object instead. Also take the opportunity to rename 'entries' to -'msrs' which is more descriptive, and more in line with nr_msrs being the -count of MSR entries in the API. - -test-tsx uses xg_private.h to access the internals of xc_cpu_policy, so needs -updating at the same time. Take the opportunity to improve the code clarity -by passing a cpu_policy rather than an xc_cpu_policy into some functions. - -No practical change. This undoes the transient doubling of storage space from -earlier patches. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit c9985233ca663fea20fc8807cf509d2e3fef0dca) ---- - tools/libs/guest/xg_cpuid_x86.c | 36 ++++++++--------- - tools/libs/guest/xg_private.h | 5 +-- - tools/tests/tsx/test-tsx.c | 71 +++++++++++++++------------------ - 3 files changed, 53 insertions(+), 59 deletions(-) - -diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c -index 76d9522c3a7c..5133d59d8a1a 100644 ---- a/tools/libs/guest/xg_cpuid_x86.c -+++ b/tools/libs/guest/xg_cpuid_x86.c -@@ -431,7 +431,7 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, - xc_dominfo_t di; - unsigned int i, nr_leaves, nr_msrs; - xen_cpuid_leaf_t *leaves = NULL; -- struct cpuid_policy *p = NULL; -+ struct cpu_policy *p = NULL; - uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1; - uint32_t host_featureset[FEATURESET_NR_ENTRIES] = {}; - uint32_t len = ARRAY_SIZE(host_featureset); -@@ -692,7 +692,7 @@ static int deserialize_policy(xc_interface *xch, xc_cpu_policy_t *policy, - uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1; - int rc; - -- rc = x86_cpuid_copy_from_buffer(&policy->cpuid, policy->leaves, -+ rc = x86_cpuid_copy_from_buffer(&policy->policy, policy->leaves, - nr_leaves, &err_leaf, &err_subleaf); - if ( rc ) - { -@@ -702,7 +702,7 @@ static int deserialize_policy(xc_interface *xch, xc_cpu_policy_t *policy, - return rc; - } - -- rc = x86_msr_copy_from_buffer(&policy->msr, policy->entries, -+ rc = x86_msr_copy_from_buffer(&policy->policy, policy->msrs, - nr_entries, &err_msr); - if ( rc ) - { -@@ -719,18 +719,18 @@ int xc_cpu_policy_get_system(xc_interface *xch, unsigned int policy_idx, - xc_cpu_policy_t *policy) - { - unsigned int nr_leaves = ARRAY_SIZE(policy->leaves); -- unsigned int nr_entries = ARRAY_SIZE(policy->entries); -+ unsigned int nr_msrs = ARRAY_SIZE(policy->msrs); - int rc; - - rc = get_system_cpu_policy(xch, policy_idx, &nr_leaves, policy->leaves, -- &nr_entries, policy->entries); -+ &nr_msrs, policy->msrs); - if ( rc ) - { - PERROR("Failed to obtain %u policy", policy_idx); - return rc; - } - -- rc = deserialize_policy(xch, policy, nr_leaves, nr_entries); -+ rc = deserialize_policy(xch, policy, nr_leaves, nr_msrs); - if ( rc ) - { - errno = -rc; -@@ -744,18 +744,18 @@ int xc_cpu_policy_get_domain(xc_interface *xch, uint32_t domid, - xc_cpu_policy_t *policy) - { - unsigned int nr_leaves = ARRAY_SIZE(policy->leaves); -- unsigned int nr_entries = ARRAY_SIZE(policy->entries); -+ unsigned int nr_msrs = ARRAY_SIZE(policy->msrs); - int rc; - - rc = get_domain_cpu_policy(xch, domid, &nr_leaves, policy->leaves, -- &nr_entries, policy->entries); -+ &nr_msrs, policy->msrs); - if ( rc ) - { - PERROR("Failed to obtain domain %u policy", domid); - return rc; - } - -- rc = deserialize_policy(xch, policy, nr_leaves, nr_entries); -+ rc = deserialize_policy(xch, policy, nr_leaves, nr_msrs); - if ( rc ) - { - errno = -rc; -@@ -770,16 +770,16 @@ int xc_cpu_policy_set_domain(xc_interface *xch, uint32_t domid, - { - uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1; - unsigned int nr_leaves = ARRAY_SIZE(policy->leaves); -- unsigned int nr_entries = ARRAY_SIZE(policy->entries); -+ unsigned int nr_msrs = ARRAY_SIZE(policy->msrs); - int rc; - - rc = xc_cpu_policy_serialise(xch, policy, policy->leaves, &nr_leaves, -- policy->entries, &nr_entries); -+ policy->msrs, &nr_msrs); - if ( rc ) - return rc; - - rc = xc_set_domain_cpu_policy(xch, domid, nr_leaves, policy->leaves, -- nr_entries, policy->entries, -+ nr_msrs, policy->msrs, - &err_leaf, &err_subleaf, &err_msr); - if ( rc ) - { -@@ -802,7 +802,7 @@ int xc_cpu_policy_serialise(xc_interface *xch, const xc_cpu_policy_t *p, - - if ( leaves ) - { -- rc = x86_cpuid_copy_to_buffer(&p->cpuid, leaves, nr_leaves); -+ rc = x86_cpuid_copy_to_buffer(&p->policy, leaves, nr_leaves); - if ( rc ) - { - ERROR("Failed to serialize CPUID policy"); -@@ -813,7 +813,7 @@ int xc_cpu_policy_serialise(xc_interface *xch, const xc_cpu_policy_t *p, - - if ( msrs ) - { -- rc = x86_msr_copy_to_buffer(&p->msr, msrs, nr_msrs); -+ rc = x86_msr_copy_to_buffer(&p->policy, msrs, nr_msrs); - if ( rc ) - { - ERROR("Failed to serialize MSR policy"); -@@ -831,7 +831,7 @@ int xc_cpu_policy_update_cpuid(xc_interface *xch, xc_cpu_policy_t *policy, - uint32_t nr) - { - unsigned int err_leaf = -1, err_subleaf = -1; -- int rc = x86_cpuid_copy_from_buffer(&policy->cpuid, leaves, nr, -+ int rc = x86_cpuid_copy_from_buffer(&policy->policy, leaves, nr, - &err_leaf, &err_subleaf); - - if ( rc ) -@@ -850,7 +850,7 @@ int xc_cpu_policy_update_msrs(xc_interface *xch, xc_cpu_policy_t *policy, - const xen_msr_entry_t *msrs, uint32_t nr) - { - unsigned int err_msr = -1; -- int rc = x86_msr_copy_from_buffer(&policy->msr, msrs, nr, &err_msr); -+ int rc = x86_msr_copy_from_buffer(&policy->policy, msrs, nr, &err_msr); - - if ( rc ) - { -@@ -868,8 +868,8 @@ bool xc_cpu_policy_is_compatible(xc_interface *xch, xc_cpu_policy_t *host, - xc_cpu_policy_t *guest) - { - struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; -- struct old_cpu_policy h = { &host->cpuid, &host->msr }; -- struct old_cpu_policy g = { &guest->cpuid, &guest->msr }; -+ struct old_cpu_policy h = { &host->policy, &host->policy }; -+ struct old_cpu_policy g = { &guest->policy, &guest->policy }; - int rc = x86_cpu_policies_are_compatible(&h, &g, &err); - - if ( !rc ) -diff --git a/tools/libs/guest/xg_private.h b/tools/libs/guest/xg_private.h -index 09e24f122760..e729a8106c3e 100644 ---- a/tools/libs/guest/xg_private.h -+++ b/tools/libs/guest/xg_private.h -@@ -173,10 +173,9 @@ int pin_table(xc_interface *xch, unsigned int type, unsigned long mfn, - #include - - struct xc_cpu_policy { -- struct cpuid_policy cpuid; -- struct msr_policy msr; -+ struct cpu_policy policy; - xen_cpuid_leaf_t leaves[CPUID_MAX_SERIALISED_LEAVES]; -- xen_msr_entry_t entries[MSR_MAX_SERIALISED_ENTRIES]; -+ xen_msr_entry_t msrs[MSR_MAX_SERIALISED_ENTRIES]; - }; - #endif /* x86 */ - -diff --git a/tools/tests/tsx/test-tsx.c b/tools/tests/tsx/test-tsx.c -index f11e8c54e0de..0f4ea5f9c462 100644 ---- a/tools/tests/tsx/test-tsx.c -+++ b/tools/tests/tsx/test-tsx.c -@@ -151,15 +151,15 @@ static void test_tsx_msrs(void) - { - printf("Testing MSR_TSX_FORCE_ABORT consistency\n"); - test_tsx_msr_consistency( -- MSR_TSX_FORCE_ABORT, host.cpuid.feat.tsx_force_abort); -+ MSR_TSX_FORCE_ABORT, host.policy.feat.tsx_force_abort); - - printf("Testing MSR_TSX_CTRL consistency\n"); - test_tsx_msr_consistency( -- MSR_TSX_CTRL, host.msr.arch_caps.tsx_ctrl); -+ MSR_TSX_CTRL, host.policy.arch_caps.tsx_ctrl); - - printf("Testing MSR_MCU_OPT_CTRL consistency\n"); - test_tsx_msr_consistency( -- MSR_MCU_OPT_CTRL, host.cpuid.feat.srbds_ctrl); -+ MSR_MCU_OPT_CTRL, host.policy.feat.srbds_ctrl); - } - - /* -@@ -281,7 +281,7 @@ static void test_rtm_behaviour(void) - else - return fail(" Got unexpected behaviour %d\n", rtm_behaviour); - -- if ( host.cpuid.feat.rtm ) -+ if ( host.policy.feat.rtm ) - { - if ( rtm_behaviour == RTM_UD ) - fail(" Host reports RTM, but appears unavailable\n"); -@@ -293,57 +293,52 @@ static void test_rtm_behaviour(void) - } - } - --static void dump_tsx_details(const struct xc_cpu_policy *p, const char *pref) -+static void dump_tsx_details(const struct cpu_policy *p, const char *pref) - { - printf(" %s RTM %u, HLE %u, TSX_FORCE_ABORT %u, RTM_ALWAYS_ABORT %u, TSX_CTRL %u\n", - pref, -- p->cpuid.feat.rtm, -- p->cpuid.feat.hle, -- p->cpuid.feat.tsx_force_abort, -- p->cpuid.feat.rtm_always_abort, -- p->msr.arch_caps.tsx_ctrl); -+ p->feat.rtm, -+ p->feat.hle, -+ p->feat.tsx_force_abort, -+ p->feat.rtm_always_abort, -+ p->arch_caps.tsx_ctrl); - } - - /* Sanity test various invariants we expect in the default/max policies. */ --static void test_guest_policies(const struct xc_cpu_policy *max, -- const struct xc_cpu_policy *def) -+static void test_guest_policies(const struct cpu_policy *max, -+ const struct cpu_policy *def) - { -- const struct cpuid_policy *cm = &max->cpuid; -- const struct cpuid_policy *cd = &def->cpuid; -- const struct msr_policy *mm = &max->msr; -- const struct msr_policy *md = &def->msr; -- - dump_tsx_details(max, "Max:"); - dump_tsx_details(def, "Def:"); - -- if ( ((cm->feat.raw[0].d | cd->feat.raw[0].d) & -+ if ( ((max->feat.raw[0].d | def->feat.raw[0].d) & - (bitmaskof(X86_FEATURE_TSX_FORCE_ABORT) | - bitmaskof(X86_FEATURE_RTM_ALWAYS_ABORT) | - bitmaskof(X86_FEATURE_SRBDS_CTRL))) || -- ((mm->arch_caps.raw | md->arch_caps.raw) & ARCH_CAPS_TSX_CTRL) ) -+ ((max->arch_caps.raw | def->arch_caps.raw) & ARCH_CAPS_TSX_CTRL) ) - fail(" Xen-only TSX controls offered to guest\n"); - - switch ( rtm_behaviour ) - { - case RTM_UD: -- if ( (cm->feat.raw[0].b | cd->feat.raw[0].b) & -+ if ( (max->feat.raw[0].b | def->feat.raw[0].b) & - (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) ) - fail(" HLE/RTM offered to guests despite not being available\n"); - break; - - case RTM_ABORT: -- if ( cd->feat.raw[0].b & -+ if ( def->feat.raw[0].b & - (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) ) - fail(" HLE/RTM offered to guests by default despite not being usable\n"); - break; - - case RTM_OK: -- if ( !cm->feat.rtm || !cd->feat.rtm ) -+ if ( !max->feat.rtm || !def->feat.rtm ) - fail(" RTM not offered to guests despite being available\n"); - break; - } - -- if ( cd->feat.hle ) -+ if ( def->feat.hle ) - fail(" Fail: HLE offered in default policy\n"); - } - -@@ -352,13 +347,13 @@ static void test_def_max_policies(void) - if ( xen_has_pv ) - { - printf("Testing PV default/max policies\n"); -- test_guest_policies(&pv_max, &pv_default); -+ test_guest_policies(&pv_max.policy, &pv_default.policy); - } - - if ( xen_has_hvm ) - { - printf("Testing HVM default/max policies\n"); -- test_guest_policies(&hvm_max, &hvm_default); -+ test_guest_policies(&hvm_max.policy, &hvm_default.policy); - } - } - -@@ -382,23 +377,23 @@ static void test_guest(struct xen_domctl_createdomain *c) - goto out; - } - -- dump_tsx_details(&guest_policy, "Cur:"); -+ dump_tsx_details(&guest_policy.policy, "Cur:"); - - /* - * Check defaults given to the guest. - */ -- if ( guest_policy.cpuid.feat.rtm != (rtm_behaviour == RTM_OK) ) -+ if ( guest_policy.policy.feat.rtm != (rtm_behaviour == RTM_OK) ) - fail(" RTM %u in guest, despite rtm behaviour\n", -- guest_policy.cpuid.feat.rtm); -+ guest_policy.policy.feat.rtm); - -- if ( guest_policy.cpuid.feat.hle || -- guest_policy.cpuid.feat.tsx_force_abort || -- guest_policy.cpuid.feat.rtm_always_abort || -- guest_policy.cpuid.feat.srbds_ctrl || -- guest_policy.msr.arch_caps.tsx_ctrl ) -+ if ( guest_policy.policy.feat.hle || -+ guest_policy.policy.feat.tsx_force_abort || -+ guest_policy.policy.feat.rtm_always_abort || -+ guest_policy.policy.feat.srbds_ctrl || -+ guest_policy.policy.arch_caps.tsx_ctrl ) - fail(" Unexpected features advertised\n"); - -- if ( host.cpuid.feat.rtm ) -+ if ( host.policy.feat.rtm ) - { - unsigned int _7b0; - -@@ -406,7 +401,7 @@ static void test_guest(struct xen_domctl_createdomain *c) - * If host RTM is available, all combinations of guest flags should be - * possible. Flip both HLE/RTM to check non-default settings. - */ -- _7b0 = (guest_policy.cpuid.feat.raw[0].b ^= -+ _7b0 = (guest_policy.policy.feat.raw[0].b ^= - (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM))); - - /* Set the new policy. */ -@@ -427,12 +422,12 @@ static void test_guest(struct xen_domctl_createdomain *c) - goto out; - } - -- dump_tsx_details(&guest_policy, "Cur:"); -+ dump_tsx_details(&guest_policy.policy, "Cur:"); - -- if ( guest_policy.cpuid.feat.raw[0].b != _7b0 ) -+ if ( guest_policy.policy.feat.raw[0].b != _7b0 ) - { - fail(" Expected CPUID.7[1].b 0x%08x differs from actual 0x%08x\n", -- _7b0, guest_policy.cpuid.feat.raw[0].b); -+ _7b0, guest_policy.policy.feat.raw[0].b); - goto out; - } - } --- -2.39.2 - diff --git a/0330-x86-Drop-struct-old_cpu_policy.patch b/0330-x86-Drop-struct-old_cpu_policy.patch deleted file mode 100644 index f9efb336..00000000 --- a/0330-x86-Drop-struct-old_cpu_policy.patch +++ /dev/null @@ -1,292 +0,0 @@ -From ddae2880f7efbaff9126cf7169d25c1bac1c020a Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 29 Mar 2023 12:01:33 +0100 -Subject: [PATCH 10/35] x86: Drop struct old_cpu_policy - -With all the complicated callers of x86_cpu_policies_are_compatible() updated -to use a single cpu_policy object, we can drop the final user of struct -old_cpu_policy. - -Update x86_cpu_policies_are_compatible() to take (new) cpu_policy pointers, -reducing the amount of internal pointer chasing, and update all callers to -pass their cpu_policy objects directly. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 66c5c99656314451ff9520f91cff5bb39fee9fed) ---- - tools/libs/guest/xg_cpuid_x86.c | 4 +- - tools/tests/cpu-policy/test-cpu-policy.c | 50 +++++++----------------- - xen/arch/x86/domctl.c | 7 +--- - xen/include/xen/lib/x86/cpu-policy.h | 12 ++---- - xen/lib/x86/policy.c | 12 +++--- - 5 files changed, 27 insertions(+), 58 deletions(-) - -diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c -index 5133d59d8a1a..3be9c5e85587 100644 ---- a/tools/libs/guest/xg_cpuid_x86.c -+++ b/tools/libs/guest/xg_cpuid_x86.c -@@ -868,9 +868,7 @@ bool xc_cpu_policy_is_compatible(xc_interface *xch, xc_cpu_policy_t *host, - xc_cpu_policy_t *guest) - { - struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; -- struct old_cpu_policy h = { &host->policy, &host->policy }; -- struct old_cpu_policy g = { &guest->policy, &guest->policy }; -- int rc = x86_cpu_policies_are_compatible(&h, &g, &err); -+ int rc = x86_cpu_policies_are_compatible(&host->policy, &guest->policy, &err); - - if ( !rc ) - return true; -diff --git a/tools/tests/cpu-policy/test-cpu-policy.c b/tools/tests/cpu-policy/test-cpu-policy.c -index 8a223fddb3da..4f3d09f1b780 100644 ---- a/tools/tests/cpu-policy/test-cpu-policy.c -+++ b/tools/tests/cpu-policy/test-cpu-policy.c -@@ -98,7 +98,7 @@ static bool msrs_are_sorted(const xen_msr_entry_t *entries, unsigned int nr) - - static void test_cpuid_current(void) - { -- struct cpuid_policy p; -+ struct cpu_policy p; - xen_cpuid_leaf_t leaves[CPUID_MAX_SERIALISED_LEAVES]; - unsigned int nr = ARRAY_SIZE(leaves); - int rc; -@@ -118,7 +118,7 @@ static void test_cpuid_current(void) - static void test_cpuid_serialise_success(void) - { - static const struct test { -- struct cpuid_policy p; -+ struct cpu_policy p; - const char *name; - unsigned int nr_leaves; - } tests[] = { -@@ -242,7 +242,7 @@ static void test_cpuid_serialise_success(void) - static void test_msr_serialise_success(void) - { - static const struct test { -- struct msr_policy p; -+ struct cpu_policy p; - const char *name; - unsigned int nr_msrs; - } tests[] = { -@@ -430,7 +430,7 @@ static void test_cpuid_out_of_range_clearing(void) - static const struct test { - const char *name; - unsigned int nr_markers; -- struct cpuid_policy p; -+ struct cpu_policy p; - } tests[] = { - { - .name = "basic", -@@ -550,7 +550,7 @@ static void test_cpuid_out_of_range_clearing(void) - for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i ) - { - const struct test *t = &tests[i]; -- struct cpuid_policy *p = memdup(&t->p); -+ struct cpu_policy *p = memdup(&t->p); - void *ptr; - unsigned int nr_markers; - -@@ -574,23 +574,20 @@ static void test_is_compatible_success(void) - { - static struct test { - const char *name; -- struct cpuid_policy host_cpuid; -- struct cpuid_policy guest_cpuid; -- struct msr_policy host_msr; -- struct msr_policy guest_msr; -+ struct cpu_policy host, guest; - } tests[] = { - { - .name = "Host CPUID faulting, Guest not", -- .host_msr = { -+ .host = { - .platform_info.cpuid_faulting = true, - }, - }, - { - .name = "Host CPUID faulting, Guest wanted", -- .host_msr = { -+ .host = { - .platform_info.cpuid_faulting = true, - }, -- .guest_msr = { -+ .guest = { - .platform_info.cpuid_faulting = true, - }, - }, -@@ -602,15 +599,8 @@ static void test_is_compatible_success(void) - for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i ) - { - struct test *t = &tests[i]; -- struct old_cpu_policy sys = { -- &t->host_cpuid, -- &t->host_msr, -- }, new = { -- &t->guest_cpuid, -- &t->guest_msr, -- }; - struct cpu_policy_errors e; -- int res = x86_cpu_policies_are_compatible(&sys, &new, &e); -+ int res = x86_cpu_policies_are_compatible(&t->host, &t->guest, &e); - - /* Check the expected error output. */ - if ( res != 0 || memcmp(&no_errors, &e, sizeof(no_errors)) ) -@@ -624,25 +614,22 @@ static void test_is_compatible_failure(void) - { - static struct test { - const char *name; -- struct cpuid_policy host_cpuid; -- struct cpuid_policy guest_cpuid; -- struct msr_policy host_msr; -- struct msr_policy guest_msr; -+ struct cpu_policy host, guest; - struct cpu_policy_errors e; - } tests[] = { - { - .name = "Host basic.max_leaf out of range", -- .guest_cpuid.basic.max_leaf = 1, -+ .guest.basic.max_leaf = 1, - .e = { 0, -1, -1 }, - }, - { - .name = "Host extd.max_leaf out of range", -- .guest_cpuid.extd.max_leaf = 1, -+ .guest.extd.max_leaf = 1, - .e = { 0x80000000, -1, -1 }, - }, - { - .name = "Host no CPUID faulting, Guest wanted", -- .guest_msr = { -+ .guest = { - .platform_info.cpuid_faulting = true, - }, - .e = { -1, -1, 0xce }, -@@ -654,15 +641,8 @@ static void test_is_compatible_failure(void) - for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i ) - { - struct test *t = &tests[i]; -- struct old_cpu_policy sys = { -- &t->host_cpuid, -- &t->host_msr, -- }, new = { -- &t->guest_cpuid, -- &t->guest_msr, -- }; - struct cpu_policy_errors e; -- int res = x86_cpu_policies_are_compatible(&sys, &new, &e); -+ int res = x86_cpu_policies_are_compatible(&t->host, &t->guest, &e); - - /* Check the expected error output. */ - if ( res == 0 || memcmp(&t->e, &e, sizeof(t->e)) ) -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index 857d0abe323e..6d15d0c29c4e 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -42,10 +42,9 @@ static int update_domain_cpu_policy(struct domain *d, - xen_domctl_cpu_policy_t *xdpc) - { - struct cpu_policy *new; -- struct cpu_policy *sys = is_pv_domain(d) -+ const struct cpu_policy *sys = is_pv_domain(d) - ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) - : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); -- struct old_cpu_policy old_sys = { sys, sys }, old_new; - struct cpu_policy_errors err = INIT_CPU_POLICY_ERRORS; - int ret = -ENOMEM; - -@@ -59,8 +58,6 @@ static int update_domain_cpu_policy(struct domain *d, - if ( !(new = xmemdup(d->arch.cpu_policy)) ) - goto out; - -- old_new = (struct old_cpu_policy){ new, new }; -- - /* Merge the toolstack provided data. */ - if ( (ret = x86_cpuid_copy_from_buffer( - new, xdpc->leaves, xdpc->nr_leaves, -@@ -73,7 +70,7 @@ static int update_domain_cpu_policy(struct domain *d, - x86_cpuid_policy_clear_out_of_range_leaves(new); - - /* Audit the combined dataset. */ -- ret = x86_cpu_policies_are_compatible(&old_sys, &old_new, &err); -+ ret = x86_cpu_policies_are_compatible(sys, new, &err); - if ( ret ) - goto out; - -diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h -index 53fffca55211..8b27a0725b8e 100644 ---- a/xen/include/xen/lib/x86/cpu-policy.h -+++ b/xen/include/xen/lib/x86/cpu-policy.h -@@ -379,12 +379,6 @@ struct cpu_policy - #define cpuid_policy cpu_policy - #define msr_policy cpu_policy - --struct old_cpu_policy --{ -- struct cpuid_policy *cpuid; -- struct msr_policy *msr; --}; -- - struct cpu_policy_errors - { - uint32_t leaf, subleaf; -@@ -559,7 +553,7 @@ int x86_msr_copy_from_buffer(struct msr_policy *policy, - const msr_entry_buffer_t msrs, uint32_t nr_entries, - uint32_t *err_msr); - --/* -+/** - * Calculate whether two policies are compatible. - * - * i.e. Can a VM configured with @guest run on a CPU supporting @host. -@@ -573,8 +567,8 @@ int x86_msr_copy_from_buffer(struct msr_policy *policy, - * incompatibility is detected, the optional err pointer may identify the - * problematic leaf/subleaf and/or MSR. - */ --int x86_cpu_policies_are_compatible(const struct old_cpu_policy *host, -- const struct old_cpu_policy *guest, -+int x86_cpu_policies_are_compatible(const struct cpu_policy *host, -+ const struct cpu_policy *guest, - struct cpu_policy_errors *err); - - #endif /* !XEN_LIB_X86_POLICIES_H */ -diff --git a/xen/lib/x86/policy.c b/xen/lib/x86/policy.c -index 2975711d7c6c..a9c60000af9d 100644 ---- a/xen/lib/x86/policy.c -+++ b/xen/lib/x86/policy.c -@@ -2,8 +2,8 @@ - - #include - --int x86_cpu_policies_are_compatible(const struct old_cpu_policy *host, -- const struct old_cpu_policy *guest, -+int x86_cpu_policies_are_compatible(const struct cpu_policy *host, -+ const struct cpu_policy *guest, - struct cpu_policy_errors *err) - { - struct cpu_policy_errors e = INIT_CPU_POLICY_ERRORS; -@@ -15,18 +15,18 @@ int x86_cpu_policies_are_compatible(const struct old_cpu_policy *host, - #define FAIL_MSR(m) \ - do { e.msr = (m); goto out; } while ( 0 ) - -- if ( guest->cpuid->basic.max_leaf > host->cpuid->basic.max_leaf ) -+ if ( guest->basic.max_leaf > host->basic.max_leaf ) - FAIL_CPUID(0, NA); - -- if ( guest->cpuid->feat.max_subleaf > host->cpuid->feat.max_subleaf ) -+ if ( guest->feat.max_subleaf > host->feat.max_subleaf ) - FAIL_CPUID(7, 0); - -- if ( guest->cpuid->extd.max_leaf > host->cpuid->extd.max_leaf ) -+ if ( guest->extd.max_leaf > host->extd.max_leaf ) - FAIL_CPUID(0x80000000, NA); - - /* TODO: Audit more CPUID data. */ - -- if ( ~host->msr->platform_info.raw & guest->msr->platform_info.raw ) -+ if ( ~host->platform_info.raw & guest->platform_info.raw ) - FAIL_MSR(MSR_INTEL_PLATFORM_INFO); - - #undef FAIL_MSR --- -2.39.2 - diff --git a/0331-x86-Out-of-inline-the-policy-featureset-convertors.patch b/0331-x86-Out-of-inline-the-policy-featureset-convertors.patch deleted file mode 100644 index 3e521ec6..00000000 --- a/0331-x86-Out-of-inline-the-policy-featureset-convertors.patch +++ /dev/null @@ -1,293 +0,0 @@ -From fccc0212b28fc9a16dc469391a65ef9bd585d00b Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 30 Mar 2023 18:21:01 +0100 -Subject: [PATCH 11/35] x86: Out-of-inline the policy<->featureset convertors - -These are already getting over-large for being inline functions, and are only -going to grow further over time. Out of line them, yielding the following net -delta from bloat-o-meter: - - add/remove: 2/0 grow/shrink: 0/4 up/down: 276/-1877 (-1601) - -Switch to the newer cpu_policy terminology while doing so. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 1027df4c00823f8b448e3a6861cc7b6ce61ba4e4) ---- - tools/libs/guest/xg_cpuid_x86.c | 2 +- - xen/arch/x86/cpuid.c | 28 +++++++-------- - xen/arch/x86/sysctl.c | 2 +- - xen/include/xen/lib/x86/cpu-policy.h | 52 ++++++---------------------- - xen/lib/x86/cpuid.c | 42 ++++++++++++++++++++++ - 5 files changed, 68 insertions(+), 58 deletions(-) - -diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c -index 3be9c5e85587..b38e3a9de350 100644 ---- a/tools/libs/guest/xg_cpuid_x86.c -+++ b/tools/libs/guest/xg_cpuid_x86.c -@@ -565,7 +565,7 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, - } - } - -- cpuid_featureset_to_policy(feat, p); -+ x86_cpu_featureset_to_policy(feat, p); - } - else - { -diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c -index e074befb721d..ef96af738af0 100644 ---- a/xen/arch/x86/cpuid.c -+++ b/xen/arch/x86/cpuid.c -@@ -368,7 +368,7 @@ static void __init calculate_host_policy(void) - p->extd.max_leaf = 0x80000000 | min_t(uint32_t, max_extd_leaf & 0xffff, - ARRAY_SIZE(p->extd.raw) - 1); - -- cpuid_featureset_to_policy(boot_cpu_data.x86_capability, p); -+ x86_cpu_featureset_to_policy(boot_cpu_data.x86_capability, p); - recalculate_xstate(p); - recalculate_misc(p); - -@@ -450,7 +450,7 @@ static void __init calculate_pv_max_policy(void) - unsigned int i; - - *p = host_cpu_policy; -- cpuid_policy_to_featureset(p, pv_featureset); -+ x86_cpu_policy_to_featureset(p, pv_featureset); - - for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) - pv_featureset[i] &= pv_max_featuremask[i]; -@@ -468,7 +468,7 @@ static void __init calculate_pv_max_policy(void) - guest_common_feature_adjustments(pv_featureset); - - sanitise_featureset(pv_featureset); -- cpuid_featureset_to_policy(pv_featureset, p); -+ x86_cpu_featureset_to_policy(pv_featureset, p); - recalculate_xstate(p); - - p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ -@@ -481,7 +481,7 @@ static void __init calculate_pv_def_policy(void) - unsigned int i; - - *p = pv_max_cpu_policy; -- cpuid_policy_to_featureset(p, pv_featureset); -+ x86_cpu_policy_to_featureset(p, pv_featureset); - - for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) - pv_featureset[i] &= pv_def_featuremask[i]; -@@ -490,7 +490,7 @@ static void __init calculate_pv_def_policy(void) - guest_common_default_feature_adjustments(pv_featureset); - - sanitise_featureset(pv_featureset); -- cpuid_featureset_to_policy(pv_featureset, p); -+ x86_cpu_featureset_to_policy(pv_featureset, p); - recalculate_xstate(p); - } - -@@ -502,7 +502,7 @@ static void __init calculate_hvm_max_policy(void) - const uint32_t *hvm_featuremask; - - *p = host_cpu_policy; -- cpuid_policy_to_featureset(p, hvm_featureset); -+ x86_cpu_policy_to_featureset(p, hvm_featureset); - - hvm_featuremask = hvm_hap_supported() ? - hvm_hap_max_featuremask : hvm_shadow_max_featuremask; -@@ -572,7 +572,7 @@ static void __init calculate_hvm_max_policy(void) - guest_common_feature_adjustments(hvm_featureset); - - sanitise_featureset(hvm_featureset); -- cpuid_featureset_to_policy(hvm_featureset, p); -+ x86_cpu_featureset_to_policy(hvm_featureset, p); - recalculate_xstate(p); - } - -@@ -584,7 +584,7 @@ static void __init calculate_hvm_def_policy(void) - const uint32_t *hvm_featuremask; - - *p = hvm_max_cpu_policy; -- cpuid_policy_to_featureset(p, hvm_featureset); -+ x86_cpu_policy_to_featureset(p, hvm_featureset); - - hvm_featuremask = hvm_hap_supported() ? - hvm_hap_def_featuremask : hvm_shadow_def_featuremask; -@@ -603,7 +603,7 @@ static void __init calculate_hvm_def_policy(void) - __set_bit(X86_FEATURE_VIRT_SSBD, hvm_featureset); - - sanitise_featureset(hvm_featureset); -- cpuid_featureset_to_policy(hvm_featureset, p); -+ x86_cpu_featureset_to_policy(hvm_featureset, p); - recalculate_xstate(p); - } - -@@ -673,8 +673,8 @@ void recalculate_cpuid_policy(struct domain *d) - ? CPUID_GUEST_NR_EXTD_AMD - : CPUID_GUEST_NR_EXTD_INTEL) - 1); - -- cpuid_policy_to_featureset(p, fs); -- cpuid_policy_to_featureset(max, max_fs); -+ x86_cpu_policy_to_featureset(p, fs); -+ x86_cpu_policy_to_featureset(max, max_fs); - - if ( is_hvm_domain(d) ) - { -@@ -731,7 +731,7 @@ void recalculate_cpuid_policy(struct domain *d) - (cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | - cpufeat_mask(X86_FEATURE_NO_FPU_SEL))); - -- cpuid_featureset_to_policy(fs, p); -+ x86_cpu_featureset_to_policy(fs, p); - - /* Pass host cacheline size through to guests. */ - p->basic.clflush_size = max->basic.clflush_size; -@@ -797,7 +797,7 @@ void __init init_dom0_cpuid_policy(struct domain *d) - uint32_t fs[FSCAPINTS]; - unsigned int i; - -- cpuid_policy_to_featureset(p, fs); -+ x86_cpu_policy_to_featureset(p, fs); - - for ( i = 0; i < ARRAY_SIZE(fs); ++i ) - { -@@ -805,7 +805,7 @@ void __init init_dom0_cpuid_policy(struct domain *d) - fs[i] &= ~dom0_disable_feat[i]; - } - -- cpuid_featureset_to_policy(fs, p); -+ x86_cpu_featureset_to_policy(fs, p); - - recalculate_cpuid_policy(d); - } -diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c -index c68242e5bcaf..42dc360ad6e9 100644 ---- a/xen/arch/x86/sysctl.c -+++ b/xen/arch/x86/sysctl.c -@@ -339,7 +339,7 @@ long arch_do_sysctl( - ret = -EINVAL; - - if ( !ret ) -- cpuid_policy_to_featureset(p, featureset); -+ x86_cpu_policy_to_featureset(p, featureset); - - /* Copy the requested featureset into place. */ - if ( !ret && copy_to_guest(sysctl->u.cpu_featureset.features, -diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h -index 8b27a0725b8e..57b4633c861e 100644 ---- a/xen/include/xen/lib/x86/cpu-policy.h -+++ b/xen/include/xen/lib/x86/cpu-policy.h -@@ -387,49 +387,17 @@ struct cpu_policy_errors - - #define INIT_CPU_POLICY_ERRORS { -1, -1, -1 } - --/* Fill in a featureset bitmap from a CPUID policy. */ --static inline void cpuid_policy_to_featureset( -- const struct cpuid_policy *p, uint32_t fs[FEATURESET_NR_ENTRIES]) --{ -- fs[FEATURESET_1d] = p->basic._1d; -- fs[FEATURESET_1c] = p->basic._1c; -- fs[FEATURESET_e1d] = p->extd.e1d; -- fs[FEATURESET_e1c] = p->extd.e1c; -- fs[FEATURESET_Da1] = p->xstate.Da1; -- fs[FEATURESET_7b0] = p->feat._7b0; -- fs[FEATURESET_7c0] = p->feat._7c0; -- fs[FEATURESET_e7d] = p->extd.e7d; -- fs[FEATURESET_e8b] = p->extd.e8b; -- fs[FEATURESET_7d0] = p->feat._7d0; -- fs[FEATURESET_7a1] = p->feat._7a1; -- fs[FEATURESET_e21a] = p->extd.e21a; -- fs[FEATURESET_7b1] = p->feat._7b1; -- fs[FEATURESET_7d2] = p->feat._7d2; -- fs[FEATURESET_7c1] = p->feat._7c1; -- fs[FEATURESET_7d1] = p->feat._7d1; --} -+/** -+ * Copy the featureset words out of a cpu_policy object. -+ */ -+void x86_cpu_policy_to_featureset(const struct cpu_policy *p, -+ uint32_t fs[FEATURESET_NR_ENTRIES]); - --/* Fill in a CPUID policy from a featureset bitmap. */ --static inline void cpuid_featureset_to_policy( -- const uint32_t fs[FEATURESET_NR_ENTRIES], struct cpuid_policy *p) --{ -- p->basic._1d = fs[FEATURESET_1d]; -- p->basic._1c = fs[FEATURESET_1c]; -- p->extd.e1d = fs[FEATURESET_e1d]; -- p->extd.e1c = fs[FEATURESET_e1c]; -- p->xstate.Da1 = fs[FEATURESET_Da1]; -- p->feat._7b0 = fs[FEATURESET_7b0]; -- p->feat._7c0 = fs[FEATURESET_7c0]; -- p->extd.e7d = fs[FEATURESET_e7d]; -- p->extd.e8b = fs[FEATURESET_e8b]; -- p->feat._7d0 = fs[FEATURESET_7d0]; -- p->feat._7a1 = fs[FEATURESET_7a1]; -- p->extd.e21a = fs[FEATURESET_e21a]; -- p->feat._7b1 = fs[FEATURESET_7b1]; -- p->feat._7d2 = fs[FEATURESET_7d2]; -- p->feat._7c1 = fs[FEATURESET_7c1]; -- p->feat._7d1 = fs[FEATURESET_7d1]; --} -+/** -+ * Copy the featureset words back into a cpu_policy object. -+ */ -+void x86_cpu_featureset_to_policy(const uint32_t fs[FEATURESET_NR_ENTRIES], -+ struct cpu_policy *p); - - static inline uint64_t cpuid_policy_xcr0_max(const struct cpuid_policy *p) - { -diff --git a/xen/lib/x86/cpuid.c b/xen/lib/x86/cpuid.c -index e81f76c779c0..734e90823a63 100644 ---- a/xen/lib/x86/cpuid.c -+++ b/xen/lib/x86/cpuid.c -@@ -60,6 +60,48 @@ const char *x86_cpuid_vendor_to_str(unsigned int vendor) - } - } - -+void x86_cpu_policy_to_featureset( -+ const struct cpu_policy *p, uint32_t fs[FEATURESET_NR_ENTRIES]) -+{ -+ fs[FEATURESET_1d] = p->basic._1d; -+ fs[FEATURESET_1c] = p->basic._1c; -+ fs[FEATURESET_e1d] = p->extd.e1d; -+ fs[FEATURESET_e1c] = p->extd.e1c; -+ fs[FEATURESET_Da1] = p->xstate.Da1; -+ fs[FEATURESET_7b0] = p->feat._7b0; -+ fs[FEATURESET_7c0] = p->feat._7c0; -+ fs[FEATURESET_e7d] = p->extd.e7d; -+ fs[FEATURESET_e8b] = p->extd.e8b; -+ fs[FEATURESET_7d0] = p->feat._7d0; -+ fs[FEATURESET_7a1] = p->feat._7a1; -+ fs[FEATURESET_e21a] = p->extd.e21a; -+ fs[FEATURESET_7b1] = p->feat._7b1; -+ fs[FEATURESET_7d2] = p->feat._7d2; -+ fs[FEATURESET_7c1] = p->feat._7c1; -+ fs[FEATURESET_7d1] = p->feat._7d1; -+} -+ -+void x86_cpu_featureset_to_policy( -+ const uint32_t fs[FEATURESET_NR_ENTRIES], struct cpu_policy *p) -+{ -+ p->basic._1d = fs[FEATURESET_1d]; -+ p->basic._1c = fs[FEATURESET_1c]; -+ p->extd.e1d = fs[FEATURESET_e1d]; -+ p->extd.e1c = fs[FEATURESET_e1c]; -+ p->xstate.Da1 = fs[FEATURESET_Da1]; -+ p->feat._7b0 = fs[FEATURESET_7b0]; -+ p->feat._7c0 = fs[FEATURESET_7c0]; -+ p->extd.e7d = fs[FEATURESET_e7d]; -+ p->extd.e8b = fs[FEATURESET_e8b]; -+ p->feat._7d0 = fs[FEATURESET_7d0]; -+ p->feat._7a1 = fs[FEATURESET_7a1]; -+ p->extd.e21a = fs[FEATURESET_e21a]; -+ p->feat._7b1 = fs[FEATURESET_7b1]; -+ p->feat._7d2 = fs[FEATURESET_7d2]; -+ p->feat._7c1 = fs[FEATURESET_7c1]; -+ p->feat._7d1 = fs[FEATURESET_7d1]; -+} -+ - void x86_cpuid_policy_recalc_synth(struct cpuid_policy *p) - { - p->x86_vendor = x86_cpuid_lookup_vendor( --- -2.39.2 - diff --git a/0332-x86-boot-Move-MSR-policy-initialisation-logic-into-c.patch b/0332-x86-boot-Move-MSR-policy-initialisation-logic-into-c.patch deleted file mode 100644 index b56678e8..00000000 --- a/0332-x86-boot-Move-MSR-policy-initialisation-logic-into-c.patch +++ /dev/null @@ -1,261 +0,0 @@ -From ac2df7a2193262972e76149b2bd01ccdce908133 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 3 Apr 2023 17:48:43 +0100 -Subject: [PATCH 12/35] x86/boot: Move MSR policy initialisation logic into - cpu-policy.c - -Switch to the newer cpu_policy nomenclature. - -No practical change. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 4f20f596ce9bd95bde077a1ae0d7e07d20a5f6be) ---- - xen/arch/x86/cpu-policy.c | 84 +++++++++++++++++++++++++++ - xen/arch/x86/include/asm/cpu-policy.h | 3 + - xen/arch/x86/include/asm/msr.h | 1 - - xen/arch/x86/msr.c | 84 --------------------------- - xen/arch/x86/setup.c | 3 +- - 5 files changed, 89 insertions(+), 86 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index e9ac1269c35a..f6a2317ed7bd 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -20,6 +20,90 @@ struct cpu_policy __ro_after_init hvm_max_cpu_policy; - struct cpu_policy __ro_after_init hvm_def_cpu_policy; - #endif - -+static void __init calculate_raw_policy(void) -+{ -+ struct cpu_policy *p = &raw_cpu_policy; -+ -+ /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ -+ /* Was already added by probe_cpuid_faulting() */ -+ -+ if ( cpu_has_arch_caps ) -+ rdmsrl(MSR_ARCH_CAPABILITIES, p->arch_caps.raw); -+} -+ -+static void __init calculate_host_policy(void) -+{ -+ struct cpu_policy *p = &host_cpu_policy; -+ -+ *p = raw_cpu_policy; -+ -+ /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ -+ /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */ -+ p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; -+ -+ /* Temporary, until we have known_features[] for feature bits in MSRs. */ -+ p->arch_caps.raw &= -+ (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | -+ ARCH_CAPS_SKIP_L1DFL | ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | -+ ARCH_CAPS_IF_PSCHANGE_MC_NO | ARCH_CAPS_TSX_CTRL | ARCH_CAPS_TAA_NO | -+ ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | ARCH_CAPS_PSDP_NO | -+ ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | ARCH_CAPS_BHI_NO | -+ ARCH_CAPS_PBRSB_NO); -+} -+ -+static void __init calculate_pv_max_policy(void) -+{ -+ struct cpu_policy *p = &pv_max_cpu_policy; -+ -+ *p = host_cpu_policy; -+ -+ p->arch_caps.raw = 0; /* Not supported yet. */ -+} -+ -+static void __init calculate_pv_def_policy(void) -+{ -+ struct cpu_policy *p = &pv_def_cpu_policy; -+ -+ *p = pv_max_cpu_policy; -+} -+ -+static void __init calculate_hvm_max_policy(void) -+{ -+ struct cpu_policy *p = &hvm_max_cpu_policy; -+ -+ *p = host_cpu_policy; -+ -+ /* It's always possible to emulate CPUID faulting for HVM guests */ -+ p->platform_info.cpuid_faulting = true; -+ -+ p->arch_caps.raw = 0; /* Not supported yet. */ -+} -+ -+static void __init calculate_hvm_def_policy(void) -+{ -+ struct cpu_policy *p = &hvm_def_cpu_policy; -+ -+ *p = hvm_max_cpu_policy; -+} -+ -+void __init init_guest_cpu_policies(void) -+{ -+ calculate_raw_policy(); -+ calculate_host_policy(); -+ -+ if ( IS_ENABLED(CONFIG_PV) ) -+ { -+ calculate_pv_max_policy(); -+ calculate_pv_def_policy(); -+ } -+ -+ if ( hvm_enabled ) -+ { -+ calculate_hvm_max_policy(); -+ calculate_hvm_def_policy(); -+ } -+} -+ - int init_domain_cpu_policy(struct domain *d) - { - struct cpu_policy *p = is_pv_domain(d) -diff --git a/xen/arch/x86/include/asm/cpu-policy.h b/xen/arch/x86/include/asm/cpu-policy.h -index 9ba34bbf5ea1..13e2a1f86d13 100644 ---- a/xen/arch/x86/include/asm/cpu-policy.h -+++ b/xen/arch/x86/include/asm/cpu-policy.h -@@ -12,6 +12,9 @@ extern struct cpu_policy pv_def_cpu_policy; - extern struct cpu_policy hvm_max_cpu_policy; - extern struct cpu_policy hvm_def_cpu_policy; - -+/* Initialise the guest cpu_policy objects. */ -+void init_guest_cpu_policies(void); -+ - /* Allocate and initialise a CPU policy suitable for the domain. */ - int init_domain_cpu_policy(struct domain *d); - -diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h -index a174bc6e892b..b51d92e27c74 100644 ---- a/xen/arch/x86/include/asm/msr.h -+++ b/xen/arch/x86/include/asm/msr.h -@@ -397,7 +397,6 @@ struct vcpu_msrs - uint32_t dr_mask[4]; - }; - --void init_guest_msr_policy(void); - int init_vcpu_msr_policy(struct vcpu *v); - - /* -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index d3ca861454a7..14bcb8261c47 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -38,90 +38,6 @@ - - DEFINE_PER_CPU(uint32_t, tsc_aux); - --static void __init calculate_raw_policy(void) --{ -- struct msr_policy *mp = &raw_cpu_policy; -- -- /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ -- /* Was already added by probe_cpuid_faulting() */ -- -- if ( cpu_has_arch_caps ) -- rdmsrl(MSR_ARCH_CAPABILITIES, mp->arch_caps.raw); --} -- --static void __init calculate_host_policy(void) --{ -- struct msr_policy *mp = &host_cpu_policy; -- -- *mp = raw_cpu_policy; -- -- /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ -- /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */ -- mp->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; -- -- /* Temporary, until we have known_features[] for feature bits in MSRs. */ -- mp->arch_caps.raw &= -- (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | -- ARCH_CAPS_SKIP_L1DFL | ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | -- ARCH_CAPS_IF_PSCHANGE_MC_NO | ARCH_CAPS_TSX_CTRL | ARCH_CAPS_TAA_NO | -- ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | ARCH_CAPS_PSDP_NO | -- ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | ARCH_CAPS_BHI_NO | -- ARCH_CAPS_PBRSB_NO); --} -- --static void __init calculate_pv_max_policy(void) --{ -- struct msr_policy *mp = &pv_max_cpu_policy; -- -- *mp = host_cpu_policy; -- -- mp->arch_caps.raw = 0; /* Not supported yet. */ --} -- --static void __init calculate_pv_def_policy(void) --{ -- struct msr_policy *mp = &pv_def_cpu_policy; -- -- *mp = pv_max_cpu_policy; --} -- --static void __init calculate_hvm_max_policy(void) --{ -- struct msr_policy *mp = &hvm_max_cpu_policy; -- -- *mp = host_cpu_policy; -- -- /* It's always possible to emulate CPUID faulting for HVM guests */ -- mp->platform_info.cpuid_faulting = true; -- -- mp->arch_caps.raw = 0; /* Not supported yet. */ --} -- --static void __init calculate_hvm_def_policy(void) --{ -- struct msr_policy *mp = &hvm_def_cpu_policy; -- -- *mp = hvm_max_cpu_policy; --} -- --void __init init_guest_msr_policy(void) --{ -- calculate_raw_policy(); -- calculate_host_policy(); -- -- if ( IS_ENABLED(CONFIG_PV) ) -- { -- calculate_pv_max_policy(); -- calculate_pv_def_policy(); -- } -- -- if ( hvm_enabled ) -- { -- calculate_hvm_max_policy(); -- calculate_hvm_def_policy(); -- } --} -- - int init_vcpu_msr_policy(struct vcpu *v) - { - struct vcpu_msrs *msrs = xzalloc(struct vcpu_msrs); -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index 09c17b10167c..1d62ea1ad9d9 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -50,6 +50,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1943,7 +1944,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) - panic("Could not protect TXT memory regions\n"); - - init_guest_cpuid(); -- init_guest_msr_policy(); -+ init_guest_cpu_policies(); - - if ( xen_cpuidle ) - xen_processor_pmbits |= XEN_PROCESSOR_PM_CX; --- -2.39.2 - diff --git a/0333-x86-boot-Merge-CPUID-policy-initialisation-logic-int.patch b/0333-x86-boot-Merge-CPUID-policy-initialisation-logic-int.patch deleted file mode 100644 index 711fae6d..00000000 --- a/0333-x86-boot-Merge-CPUID-policy-initialisation-logic-int.patch +++ /dev/null @@ -1,1780 +0,0 @@ -From a7e07cd930532e75552fd20b14a4a9c301c723d4 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 3 Apr 2023 19:06:02 +0100 -Subject: [PATCH 13/35] x86/boot: Merge CPUID policy initialisation logic into - cpu-policy.c - -Switch to the newer cpu_policy nomenclature. Do some easy cleanup of -includes. - -No practical change. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 8eb56eb959a50bf9afd0fd590ec394e9145970a4) ---- - xen/arch/x86/cpu-policy.c | 743 +++++++++++++++++++++++ - xen/arch/x86/cpuid.c | 808 +------------------------- - xen/arch/x86/hvm/hvm.c | 1 - - xen/arch/x86/include/asm/cpu-policy.h | 6 + - xen/arch/x86/include/asm/cpuid.h | 11 +- - xen/arch/x86/pv/domain.c | 1 + - xen/arch/x86/setup.c | 2 - - 7 files changed, 755 insertions(+), 817 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index f6a2317ed7bd..9c506b6614f2 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -1,13 +1,19 @@ - /* SPDX-License-Identifier: GPL-2.0-or-later */ - #include - #include -+#include - #include - - #include - -+#include - #include -+#include -+#include - #include -+#include - #include -+#include - - struct cpu_policy __ro_after_init raw_cpu_policy; - struct cpu_policy __ro_after_init host_cpu_policy; -@@ -20,10 +26,332 @@ struct cpu_policy __ro_after_init hvm_max_cpu_policy; - struct cpu_policy __ro_after_init hvm_def_cpu_policy; - #endif - -+const uint32_t known_features[] = INIT_KNOWN_FEATURES; -+ -+static const uint32_t __initconst pv_max_featuremask[] = INIT_PV_MAX_FEATURES; -+static const uint32_t hvm_shadow_max_featuremask[] = INIT_HVM_SHADOW_MAX_FEATURES; -+static const uint32_t __initconst hvm_hap_max_featuremask[] = -+ INIT_HVM_HAP_MAX_FEATURES; -+static const uint32_t __initconst pv_def_featuremask[] = INIT_PV_DEF_FEATURES; -+static const uint32_t __initconst hvm_shadow_def_featuremask[] = -+ INIT_HVM_SHADOW_DEF_FEATURES; -+static const uint32_t __initconst hvm_hap_def_featuremask[] = -+ INIT_HVM_HAP_DEF_FEATURES; -+static const uint32_t deep_features[] = INIT_DEEP_FEATURES; -+ -+static const struct feature_name { -+ const char *name; -+ unsigned int bit; -+} feature_names[] __initconstrel = INIT_FEATURE_NAMES; -+ -+/* -+ * Parse a list of cpuid feature names -> bool, calling the callback for any -+ * matches found. -+ * -+ * always_inline, because this is init code only and we really don't want a -+ * function pointer call in the middle of the loop. -+ */ -+static int __init always_inline parse_cpuid( -+ const char *s, void (*callback)(unsigned int feat, bool val)) -+{ -+ const char *ss; -+ int val, rc = 0; -+ -+ do { -+ const struct feature_name *lhs, *rhs, *mid = NULL /* GCC... */; -+ const char *feat; -+ -+ ss = strchr(s, ','); -+ if ( !ss ) -+ ss = strchr(s, '\0'); -+ -+ /* Skip the 'no-' prefix for name comparisons. */ -+ feat = s; -+ if ( strncmp(s, "no-", 3) == 0 ) -+ feat += 3; -+ -+ /* (Re)initalise lhs and rhs for binary search. */ -+ lhs = feature_names; -+ rhs = feature_names + ARRAY_SIZE(feature_names); -+ -+ while ( lhs < rhs ) -+ { -+ int res; -+ -+ mid = lhs + (rhs - lhs) / 2; -+ res = cmdline_strcmp(feat, mid->name); -+ -+ if ( res < 0 ) -+ { -+ rhs = mid; -+ continue; -+ } -+ if ( res > 0 ) -+ { -+ lhs = mid + 1; -+ continue; -+ } -+ -+ if ( (val = parse_boolean(mid->name, s, ss)) >= 0 ) -+ { -+ callback(mid->bit, val); -+ mid = NULL; -+ } -+ -+ break; -+ } -+ -+ /* -+ * Mid being NULL means that the name and boolean were successfully -+ * identified. Everything else is an error. -+ */ -+ if ( mid ) -+ rc = -EINVAL; -+ -+ s = ss + 1; -+ } while ( *ss ); -+ -+ return rc; -+} -+ -+static void __init cf_check _parse_xen_cpuid(unsigned int feat, bool val) -+{ -+ if ( !val ) -+ setup_clear_cpu_cap(feat); -+ else if ( feat == X86_FEATURE_RDRAND && -+ (cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_RDRAND)) ) -+ setup_force_cpu_cap(X86_FEATURE_RDRAND); -+} -+ -+static int __init cf_check parse_xen_cpuid(const char *s) -+{ -+ return parse_cpuid(s, _parse_xen_cpuid); -+} -+custom_param("cpuid", parse_xen_cpuid); -+ -+static bool __initdata dom0_cpuid_cmdline; -+static uint32_t __initdata dom0_enable_feat[FSCAPINTS]; -+static uint32_t __initdata dom0_disable_feat[FSCAPINTS]; -+ -+static void __init cf_check _parse_dom0_cpuid(unsigned int feat, bool val) -+{ -+ __set_bit (feat, val ? dom0_enable_feat : dom0_disable_feat); -+ __clear_bit(feat, val ? dom0_disable_feat : dom0_enable_feat ); -+} -+ -+static int __init cf_check parse_dom0_cpuid(const char *s) -+{ -+ dom0_cpuid_cmdline = true; -+ -+ return parse_cpuid(s, _parse_dom0_cpuid); -+} -+custom_param("dom0-cpuid", parse_dom0_cpuid); -+ -+#define EMPTY_LEAF ((struct cpuid_leaf){}) -+static void zero_leaves(struct cpuid_leaf *l, -+ unsigned int first, unsigned int last) -+{ -+ memset(&l[first], 0, sizeof(*l) * (last - first + 1)); -+} -+ -+static void sanitise_featureset(uint32_t *fs) -+{ -+ /* for_each_set_bit() uses unsigned longs. Extend with zeroes. */ -+ uint32_t disabled_features[ -+ ROUNDUP(FSCAPINTS, sizeof(unsigned long)/sizeof(uint32_t))] = {}; -+ unsigned int i; -+ -+ for ( i = 0; i < FSCAPINTS; ++i ) -+ { -+ /* Clamp to known mask. */ -+ fs[i] &= known_features[i]; -+ -+ /* -+ * Identify which features with deep dependencies have been -+ * disabled. -+ */ -+ disabled_features[i] = ~fs[i] & deep_features[i]; -+ } -+ -+ for_each_set_bit(i, (void *)disabled_features, -+ sizeof(disabled_features) * 8) -+ { -+ const uint32_t *dfs = x86_cpuid_lookup_deep_deps(i); -+ unsigned int j; -+ -+ ASSERT(dfs); /* deep_features[] should guarentee this. */ -+ -+ for ( j = 0; j < FSCAPINTS; ++j ) -+ { -+ fs[j] &= ~dfs[j]; -+ disabled_features[j] &= ~dfs[j]; -+ } -+ } -+} -+ -+static void recalculate_xstate(struct cpu_policy *p) -+{ -+ uint64_t xstates = XSTATE_FP_SSE; -+ uint32_t xstate_size = XSTATE_AREA_MIN_SIZE; -+ unsigned int i, Da1 = p->xstate.Da1; -+ -+ /* -+ * The Da1 leaf is the only piece of information preserved in the common -+ * case. Everything else is derived from other feature state. -+ */ -+ memset(&p->xstate, 0, sizeof(p->xstate)); -+ -+ if ( !p->basic.xsave ) -+ return; -+ -+ if ( p->basic.avx ) -+ { -+ xstates |= X86_XCR0_YMM; -+ xstate_size = max(xstate_size, -+ xstate_offsets[X86_XCR0_YMM_POS] + -+ xstate_sizes[X86_XCR0_YMM_POS]); -+ } -+ -+ if ( p->feat.mpx ) -+ { -+ xstates |= X86_XCR0_BNDREGS | X86_XCR0_BNDCSR; -+ xstate_size = max(xstate_size, -+ xstate_offsets[X86_XCR0_BNDCSR_POS] + -+ xstate_sizes[X86_XCR0_BNDCSR_POS]); -+ } -+ -+ if ( p->feat.avx512f ) -+ { -+ xstates |= X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM; -+ xstate_size = max(xstate_size, -+ xstate_offsets[X86_XCR0_HI_ZMM_POS] + -+ xstate_sizes[X86_XCR0_HI_ZMM_POS]); -+ } -+ -+ if ( p->feat.pku ) -+ { -+ xstates |= X86_XCR0_PKRU; -+ xstate_size = max(xstate_size, -+ xstate_offsets[X86_XCR0_PKRU_POS] + -+ xstate_sizes[X86_XCR0_PKRU_POS]); -+ } -+ -+ p->xstate.max_size = xstate_size; -+ p->xstate.xcr0_low = xstates & ~XSTATE_XSAVES_ONLY; -+ p->xstate.xcr0_high = (xstates & ~XSTATE_XSAVES_ONLY) >> 32; -+ -+ p->xstate.Da1 = Da1; -+ if ( p->xstate.xsaves ) -+ { -+ p->xstate.xss_low = xstates & XSTATE_XSAVES_ONLY; -+ p->xstate.xss_high = (xstates & XSTATE_XSAVES_ONLY) >> 32; -+ } -+ else -+ xstates &= ~XSTATE_XSAVES_ONLY; -+ -+ for ( i = 2; i < min(63ul, ARRAY_SIZE(p->xstate.comp)); ++i ) -+ { -+ uint64_t curr_xstate = 1ul << i; -+ -+ if ( !(xstates & curr_xstate) ) -+ continue; -+ -+ p->xstate.comp[i].size = xstate_sizes[i]; -+ p->xstate.comp[i].offset = xstate_offsets[i]; -+ p->xstate.comp[i].xss = curr_xstate & XSTATE_XSAVES_ONLY; -+ p->xstate.comp[i].align = curr_xstate & xstate_align; -+ } -+} -+ -+/* -+ * Misc adjustments to the policy. Mostly clobbering reserved fields and -+ * duplicating shared fields. Intentionally hidden fields are annotated. -+ */ -+static void recalculate_misc(struct cpu_policy *p) -+{ -+ p->basic.raw_fms &= 0x0fff0fff; /* Clobber Processor Type on Intel. */ -+ p->basic.apic_id = 0; /* Dynamic. */ -+ -+ p->basic.raw[0x5] = EMPTY_LEAF; /* MONITOR not exposed to guests. */ -+ p->basic.raw[0x6] = EMPTY_LEAF; /* Therm/Power not exposed to guests. */ -+ -+ p->basic.raw[0x8] = EMPTY_LEAF; -+ -+ /* TODO: Rework topology logic. */ -+ memset(p->topo.raw, 0, sizeof(p->topo.raw)); -+ -+ p->basic.raw[0xc] = EMPTY_LEAF; -+ -+ p->extd.e1d &= ~CPUID_COMMON_1D_FEATURES; -+ -+ /* Most of Power/RAS hidden from guests. */ -+ p->extd.raw[0x7].a = p->extd.raw[0x7].b = p->extd.raw[0x7].c = 0; -+ -+ p->extd.raw[0x8].d = 0; -+ -+ switch ( p->x86_vendor ) -+ { -+ case X86_VENDOR_INTEL: -+ p->basic.l2_nr_queries = 1; /* Fixed to 1 query. */ -+ p->basic.raw[0x3] = EMPTY_LEAF; /* PSN - always hidden. */ -+ p->basic.raw[0x9] = EMPTY_LEAF; /* DCA - always hidden. */ -+ -+ p->extd.vendor_ebx = 0; -+ p->extd.vendor_ecx = 0; -+ p->extd.vendor_edx = 0; -+ -+ p->extd.raw[0x1].a = p->extd.raw[0x1].b = 0; -+ -+ p->extd.raw[0x5] = EMPTY_LEAF; -+ p->extd.raw[0x6].a = p->extd.raw[0x6].b = p->extd.raw[0x6].d = 0; -+ -+ p->extd.raw[0x8].a &= 0x0000ffff; -+ p->extd.raw[0x8].c = 0; -+ break; -+ -+ case X86_VENDOR_AMD: -+ case X86_VENDOR_HYGON: -+ zero_leaves(p->basic.raw, 0x2, 0x3); -+ memset(p->cache.raw, 0, sizeof(p->cache.raw)); -+ zero_leaves(p->basic.raw, 0x9, 0xa); -+ -+ p->extd.vendor_ebx = p->basic.vendor_ebx; -+ p->extd.vendor_ecx = p->basic.vendor_ecx; -+ p->extd.vendor_edx = p->basic.vendor_edx; -+ -+ p->extd.raw_fms = p->basic.raw_fms; -+ p->extd.raw[0x1].b &= 0xff00ffff; -+ p->extd.e1d |= p->basic._1d & CPUID_COMMON_1D_FEATURES; -+ -+ p->extd.raw[0x8].a &= 0x0000ffff; /* GuestMaxPhysAddr hidden. */ -+ p->extd.raw[0x8].c &= 0x0003f0ff; -+ -+ p->extd.raw[0x9] = EMPTY_LEAF; -+ -+ zero_leaves(p->extd.raw, 0xb, 0x18); -+ -+ /* 0x19 - TLB details. Pass through. */ -+ /* 0x1a - Perf hints. Pass through. */ -+ -+ p->extd.raw[0x1b] = EMPTY_LEAF; /* IBS - not supported. */ -+ p->extd.raw[0x1c] = EMPTY_LEAF; /* LWP - not supported. */ -+ p->extd.raw[0x1d] = EMPTY_LEAF; /* TopoExt Cache */ -+ p->extd.raw[0x1e] = EMPTY_LEAF; /* TopoExt APIC ID/Core/Node */ -+ p->extd.raw[0x1f] = EMPTY_LEAF; /* SEV */ -+ p->extd.raw[0x20] = EMPTY_LEAF; /* Platform QoS */ -+ break; -+ } -+} -+ - static void __init calculate_raw_policy(void) - { - struct cpu_policy *p = &raw_cpu_policy; - -+ x86_cpuid_policy_fill_native(p); -+ -+ /* Nothing good will come from Xen and libx86 disagreeing on vendor. */ -+ ASSERT(p->x86_vendor == boot_cpu_data.x86_vendor); -+ - /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ - /* Was already added by probe_cpuid_faulting() */ - -@@ -34,9 +362,50 @@ static void __init calculate_raw_policy(void) - static void __init calculate_host_policy(void) - { - struct cpu_policy *p = &host_cpu_policy; -+ unsigned int max_extd_leaf; - - *p = raw_cpu_policy; - -+ p->basic.max_leaf = -+ min_t(uint32_t, p->basic.max_leaf, ARRAY_SIZE(p->basic.raw) - 1); -+ p->feat.max_subleaf = -+ min_t(uint32_t, p->feat.max_subleaf, ARRAY_SIZE(p->feat.raw) - 1); -+ -+ max_extd_leaf = p->extd.max_leaf; -+ -+ /* -+ * For AMD/Hygon hardware before Zen3, we unilaterally modify LFENCE to be -+ * dispatch serialising for Spectre mitigations. Extend max_extd_leaf -+ * beyond what hardware supports, to include the feature leaf containing -+ * this information. -+ */ -+ if ( cpu_has_lfence_dispatch ) -+ max_extd_leaf = max(max_extd_leaf, 0x80000021); -+ -+ p->extd.max_leaf = 0x80000000 | min_t(uint32_t, max_extd_leaf & 0xffff, -+ ARRAY_SIZE(p->extd.raw) - 1); -+ -+ x86_cpu_featureset_to_policy(boot_cpu_data.x86_capability, p); -+ recalculate_xstate(p); -+ recalculate_misc(p); -+ -+ /* When vPMU is disabled, drop it from the host policy. */ -+ if ( vpmu_mode == XENPMU_MODE_OFF ) -+ p->basic.raw[0xa] = EMPTY_LEAF; -+ -+ if ( p->extd.svm ) -+ { -+ /* Clamp to implemented features which require hardware support. */ -+ p->extd.raw[0xa].d &= ((1u << SVM_FEATURE_NPT) | -+ (1u << SVM_FEATURE_LBRV) | -+ (1u << SVM_FEATURE_NRIPS) | -+ (1u << SVM_FEATURE_PAUSEFILTER) | -+ (1u << SVM_FEATURE_DECODEASSISTS)); -+ /* Enable features which are always emulated. */ -+ p->extd.raw[0xa].d |= ((1u << SVM_FEATURE_VMCBCLEAN) | -+ (1u << SVM_FEATURE_TSCRATEMSR)); -+ } -+ - /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ - /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */ - p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; -@@ -51,11 +420,88 @@ static void __init calculate_host_policy(void) - ARCH_CAPS_PBRSB_NO); - } - -+static void __init guest_common_default_feature_adjustments(uint32_t *fs) -+{ -+ /* -+ * IvyBridge client parts suffer from leakage of RDRAND data due to SRBDS -+ * (XSA-320 / CVE-2020-0543), and won't be receiving microcode to -+ * compensate. -+ * -+ * Mitigate by hiding RDRAND from guests by default, unless explicitly -+ * overridden on the Xen command line (cpuid=rdrand). Irrespective of the -+ * default setting, guests can use RDRAND if explicitly enabled -+ * (cpuid="host,rdrand=1") in the VM's config file, and VMs which were -+ * previously using RDRAND can migrate in. -+ */ -+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && -+ boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x3a && -+ cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) -+ __clear_bit(X86_FEATURE_RDRAND, fs); -+ -+ /* -+ * On certain hardware, speculative or errata workarounds can result in -+ * TSX being placed in "force-abort" mode, where it doesn't actually -+ * function as expected, but is technically compatible with the ISA. -+ * -+ * Do not advertise RTM to guests by default if it won't actually work. -+ */ -+ if ( rtm_disabled ) -+ __clear_bit(X86_FEATURE_RTM, fs); -+} -+ -+static void __init guest_common_feature_adjustments(uint32_t *fs) -+{ -+ /* Unconditionally claim to be able to set the hypervisor bit. */ -+ __set_bit(X86_FEATURE_HYPERVISOR, fs); -+ -+ /* -+ * If IBRS is offered to the guest, unconditionally offer STIBP. It is a -+ * nop on non-HT hardware, and has this behaviour to make heterogeneous -+ * setups easier to manage. -+ */ -+ if ( test_bit(X86_FEATURE_IBRSB, fs) ) -+ __set_bit(X86_FEATURE_STIBP, fs); -+ if ( test_bit(X86_FEATURE_IBRS, fs) ) -+ __set_bit(X86_FEATURE_AMD_STIBP, fs); -+ -+ /* -+ * On hardware which supports IBRS/IBPB, we can offer IBPB independently -+ * of IBRS by using the AMD feature bit. An administrator may wish for -+ * performance reasons to offer IBPB without IBRS. -+ */ -+ if ( host_cpu_policy.feat.ibrsb ) -+ __set_bit(X86_FEATURE_IBPB, fs); -+} -+ - static void __init calculate_pv_max_policy(void) - { - struct cpu_policy *p = &pv_max_cpu_policy; -+ uint32_t fs[FSCAPINTS]; -+ unsigned int i; - - *p = host_cpu_policy; -+ x86_cpu_policy_to_featureset(p, fs); -+ -+ for ( i = 0; i < ARRAY_SIZE(fs); ++i ) -+ fs[i] &= pv_max_featuremask[i]; -+ -+ /* -+ * If Xen isn't virtualising MSR_SPEC_CTRL for PV guests (functional -+ * availability, or admin choice), hide the feature. -+ */ -+ if ( !boot_cpu_has(X86_FEATURE_SC_MSR_PV) ) -+ { -+ __clear_bit(X86_FEATURE_IBRSB, fs); -+ __clear_bit(X86_FEATURE_IBRS, fs); -+ } -+ -+ guest_common_feature_adjustments(fs); -+ -+ sanitise_featureset(fs); -+ x86_cpu_featureset_to_policy(fs, p); -+ recalculate_xstate(p); -+ -+ p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ - - p->arch_caps.raw = 0; /* Not supported yet. */ - } -@@ -63,15 +509,103 @@ static void __init calculate_pv_max_policy(void) - static void __init calculate_pv_def_policy(void) - { - struct cpu_policy *p = &pv_def_cpu_policy; -+ uint32_t fs[FSCAPINTS]; -+ unsigned int i; - - *p = pv_max_cpu_policy; -+ x86_cpu_policy_to_featureset(p, fs); -+ -+ for ( i = 0; i < ARRAY_SIZE(fs); ++i ) -+ fs[i] &= pv_def_featuremask[i]; -+ -+ guest_common_feature_adjustments(fs); -+ guest_common_default_feature_adjustments(fs); -+ -+ sanitise_featureset(fs); -+ x86_cpu_featureset_to_policy(fs, p); -+ recalculate_xstate(p); - } - - static void __init calculate_hvm_max_policy(void) - { - struct cpu_policy *p = &hvm_max_cpu_policy; -+ uint32_t fs[FSCAPINTS]; -+ unsigned int i; -+ const uint32_t *mask; - - *p = host_cpu_policy; -+ x86_cpu_policy_to_featureset(p, fs); -+ -+ mask = hvm_hap_supported() ? -+ hvm_hap_max_featuremask : hvm_shadow_max_featuremask; -+ -+ for ( i = 0; i < ARRAY_SIZE(fs); ++i ) -+ fs[i] &= mask[i]; -+ -+ /* -+ * Xen can provide an (x2)APIC emulation to HVM guests even if the host's -+ * (x2)APIC isn't enabled. -+ */ -+ __set_bit(X86_FEATURE_APIC, fs); -+ __set_bit(X86_FEATURE_X2APIC, fs); -+ -+ /* -+ * We don't support EFER.LMSLE at all. AMD has dropped the feature from -+ * hardware and allocated a CPUID bit to indicate its absence. -+ */ -+ __set_bit(X86_FEATURE_NO_LMSL, fs); -+ -+ /* -+ * On AMD, PV guests are entirely unable to use SYSENTER as Xen runs in -+ * long mode (and init_amd() has cleared it out of host capabilities), but -+ * HVM guests are able if running in protected mode. -+ */ -+ if ( (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) && -+ raw_cpu_policy.basic.sep ) -+ __set_bit(X86_FEATURE_SEP, fs); -+ -+ /* -+ * VIRT_SSBD is exposed in the default policy as a result of -+ * amd_virt_spec_ctrl being set, it also needs exposing in the max policy. -+ */ -+ if ( amd_virt_spec_ctrl ) -+ __set_bit(X86_FEATURE_VIRT_SSBD, fs); -+ -+ /* -+ * If Xen isn't virtualising MSR_SPEC_CTRL for HVM guests (functional -+ * availability, or admin choice), hide the feature. -+ */ -+ if ( !boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ) -+ { -+ __clear_bit(X86_FEATURE_IBRSB, fs); -+ __clear_bit(X86_FEATURE_IBRS, fs); -+ } -+ else if ( boot_cpu_has(X86_FEATURE_AMD_SSBD) ) -+ /* -+ * If SPEC_CTRL.SSBD is available VIRT_SPEC_CTRL.SSBD can be exposed -+ * and implemented using the former. Expose in the max policy only as -+ * the preference is for guests to use SPEC_CTRL.SSBD if available. -+ */ -+ __set_bit(X86_FEATURE_VIRT_SSBD, fs); -+ -+ /* -+ * With VT-x, some features are only supported by Xen if dedicated -+ * hardware support is also available. -+ */ -+ if ( cpu_has_vmx ) -+ { -+ if ( !cpu_has_vmx_mpx ) -+ __clear_bit(X86_FEATURE_MPX, fs); -+ -+ if ( !cpu_has_vmx_xsaves ) -+ __clear_bit(X86_FEATURE_XSAVES, fs); -+ } -+ -+ guest_common_feature_adjustments(fs); -+ -+ sanitise_featureset(fs); -+ x86_cpu_featureset_to_policy(fs, p); -+ recalculate_xstate(p); - - /* It's always possible to emulate CPUID faulting for HVM guests */ - p->platform_info.cpuid_faulting = true; -@@ -82,8 +616,32 @@ static void __init calculate_hvm_max_policy(void) - static void __init calculate_hvm_def_policy(void) - { - struct cpu_policy *p = &hvm_def_cpu_policy; -+ uint32_t fs[FSCAPINTS]; -+ unsigned int i; -+ const uint32_t *mask; - - *p = hvm_max_cpu_policy; -+ x86_cpu_policy_to_featureset(p, fs); -+ -+ mask = hvm_hap_supported() ? -+ hvm_hap_def_featuremask : hvm_shadow_def_featuremask; -+ -+ for ( i = 0; i < ARRAY_SIZE(fs); ++i ) -+ fs[i] &= mask[i]; -+ -+ guest_common_feature_adjustments(fs); -+ guest_common_default_feature_adjustments(fs); -+ -+ /* -+ * Only expose VIRT_SSBD if AMD_SSBD is not available, and thus -+ * amd_virt_spec_ctrl is set. -+ */ -+ if ( amd_virt_spec_ctrl ) -+ __set_bit(X86_FEATURE_VIRT_SSBD, fs); -+ -+ sanitise_featureset(fs); -+ x86_cpu_featureset_to_policy(fs, p); -+ recalculate_xstate(p); - } - - void __init init_guest_cpu_policies(void) -@@ -149,3 +707,188 @@ int init_domain_cpu_policy(struct domain *d) - - return 0; - } -+ -+void recalculate_cpuid_policy(struct domain *d) -+{ -+ struct cpu_policy *p = d->arch.cpuid; -+ const struct cpu_policy *max = is_pv_domain(d) -+ ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) -+ : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); -+ uint32_t fs[FSCAPINTS], max_fs[FSCAPINTS]; -+ unsigned int i; -+ -+ if ( !max ) -+ { -+ ASSERT_UNREACHABLE(); -+ return; -+ } -+ -+ p->x86_vendor = x86_cpuid_lookup_vendor( -+ p->basic.vendor_ebx, p->basic.vendor_ecx, p->basic.vendor_edx); -+ -+ p->basic.max_leaf = min(p->basic.max_leaf, max->basic.max_leaf); -+ p->feat.max_subleaf = min(p->feat.max_subleaf, max->feat.max_subleaf); -+ p->extd.max_leaf = 0x80000000 | min(p->extd.max_leaf & 0xffff, -+ ((p->x86_vendor & (X86_VENDOR_AMD | -+ X86_VENDOR_HYGON)) -+ ? CPUID_GUEST_NR_EXTD_AMD -+ : CPUID_GUEST_NR_EXTD_INTEL) - 1); -+ -+ x86_cpu_policy_to_featureset(p, fs); -+ x86_cpu_policy_to_featureset(max, max_fs); -+ -+ if ( is_hvm_domain(d) ) -+ { -+ /* -+ * HVM domains using Shadow paging have further restrictions on their -+ * available paging features. -+ */ -+ if ( !hap_enabled(d) ) -+ { -+ for ( i = 0; i < ARRAY_SIZE(max_fs); i++ ) -+ max_fs[i] &= hvm_shadow_max_featuremask[i]; -+ } -+ -+ /* Hide nested-virt if it hasn't been explicitly configured. */ -+ if ( !nestedhvm_enabled(d) ) -+ { -+ __clear_bit(X86_FEATURE_VMX, max_fs); -+ __clear_bit(X86_FEATURE_SVM, max_fs); -+ } -+ } -+ -+ /* -+ * Allow the toolstack to set HTT, X2APIC and CMP_LEGACY. These bits -+ * affect how to interpret topology information in other cpuid leaves. -+ */ -+ __set_bit(X86_FEATURE_HTT, max_fs); -+ __set_bit(X86_FEATURE_X2APIC, max_fs); -+ __set_bit(X86_FEATURE_CMP_LEGACY, max_fs); -+ -+ /* -+ * 32bit PV domains can't use any Long Mode features, and cannot use -+ * SYSCALL on non-AMD hardware. -+ */ -+ if ( is_pv_32bit_domain(d) ) -+ { -+ __clear_bit(X86_FEATURE_LM, max_fs); -+ if ( !(boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) ) -+ __clear_bit(X86_FEATURE_SYSCALL, max_fs); -+ } -+ -+ /* Clamp the toolstacks choices to reality. */ -+ for ( i = 0; i < ARRAY_SIZE(fs); i++ ) -+ fs[i] &= max_fs[i]; -+ -+ if ( p->basic.max_leaf < XSTATE_CPUID ) -+ __clear_bit(X86_FEATURE_XSAVE, fs); -+ -+ sanitise_featureset(fs); -+ -+ /* Fold host's FDP_EXCP_ONLY and NO_FPU_SEL into guest's view. */ -+ fs[FEATURESET_7b0] &= ~(cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | -+ cpufeat_mask(X86_FEATURE_NO_FPU_SEL)); -+ fs[FEATURESET_7b0] |= (host_cpu_policy.feat._7b0 & -+ (cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | -+ cpufeat_mask(X86_FEATURE_NO_FPU_SEL))); -+ -+ x86_cpu_featureset_to_policy(fs, p); -+ -+ /* Pass host cacheline size through to guests. */ -+ p->basic.clflush_size = max->basic.clflush_size; -+ -+ p->extd.maxphysaddr = min(p->extd.maxphysaddr, max->extd.maxphysaddr); -+ p->extd.maxphysaddr = min_t(uint8_t, p->extd.maxphysaddr, -+ paging_max_paddr_bits(d)); -+ p->extd.maxphysaddr = max_t(uint8_t, p->extd.maxphysaddr, -+ (p->basic.pae || p->basic.pse36) ? 36 : 32); -+ -+ p->extd.maxlinaddr = p->extd.lm ? 48 : 32; -+ -+ recalculate_xstate(p); -+ recalculate_misc(p); -+ -+ for ( i = 0; i < ARRAY_SIZE(p->cache.raw); ++i ) -+ { -+ if ( p->cache.subleaf[i].type >= 1 && -+ p->cache.subleaf[i].type <= 3 ) -+ { -+ /* Subleaf has a valid cache type. Zero reserved fields. */ -+ p->cache.raw[i].a &= 0xffffc3ffu; -+ p->cache.raw[i].d &= 0x00000007u; -+ } -+ else -+ { -+ /* Subleaf is not valid. Zero the rest of the union. */ -+ zero_leaves(p->cache.raw, i, ARRAY_SIZE(p->cache.raw) - 1); -+ break; -+ } -+ } -+ -+ if ( vpmu_mode == XENPMU_MODE_OFF || -+ ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) ) -+ p->basic.raw[0xa] = EMPTY_LEAF; -+ -+ if ( !p->extd.svm ) -+ p->extd.raw[0xa] = EMPTY_LEAF; -+ -+ if ( !p->extd.page1gb ) -+ p->extd.raw[0x19] = EMPTY_LEAF; -+} -+ -+void __init init_dom0_cpuid_policy(struct domain *d) -+{ -+ struct cpu_policy *p = d->arch.cpuid; -+ -+ /* dom0 can't migrate. Give it ITSC if available. */ -+ if ( cpu_has_itsc ) -+ p->extd.itsc = true; -+ -+ /* -+ * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0, -+ * so dom0 can turn off workarounds as appropriate. Temporary, until the -+ * domain policy logic gains a better understanding of MSRs. -+ */ -+ if ( cpu_has_arch_caps ) -+ p->feat.arch_caps = true; -+ -+ /* Apply dom0-cpuid= command line settings, if provided. */ -+ if ( dom0_cpuid_cmdline ) -+ { -+ uint32_t fs[FSCAPINTS]; -+ unsigned int i; -+ -+ x86_cpu_policy_to_featureset(p, fs); -+ -+ for ( i = 0; i < ARRAY_SIZE(fs); ++i ) -+ { -+ fs[i] |= dom0_enable_feat [i]; -+ fs[i] &= ~dom0_disable_feat[i]; -+ } -+ -+ x86_cpu_featureset_to_policy(fs, p); -+ -+ recalculate_cpuid_policy(d); -+ } -+} -+ -+static void __init __maybe_unused build_assertions(void) -+{ -+ BUILD_BUG_ON(ARRAY_SIZE(known_features) != FSCAPINTS); -+ BUILD_BUG_ON(ARRAY_SIZE(pv_max_featuremask) != FSCAPINTS); -+ BUILD_BUG_ON(ARRAY_SIZE(hvm_shadow_max_featuremask) != FSCAPINTS); -+ BUILD_BUG_ON(ARRAY_SIZE(hvm_hap_max_featuremask) != FSCAPINTS); -+ BUILD_BUG_ON(ARRAY_SIZE(deep_features) != FSCAPINTS); -+ -+ /* Find some more clever allocation scheme if this trips. */ -+ BUILD_BUG_ON(sizeof(struct cpu_policy) > PAGE_SIZE); -+ -+ BUILD_BUG_ON(sizeof(raw_cpu_policy.basic) != -+ sizeof(raw_cpu_policy.basic.raw)); -+ BUILD_BUG_ON(sizeof(raw_cpu_policy.feat) != -+ sizeof(raw_cpu_policy.feat.raw)); -+ BUILD_BUG_ON(sizeof(raw_cpu_policy.xstate) != -+ sizeof(raw_cpu_policy.xstate.raw)); -+ BUILD_BUG_ON(sizeof(raw_cpu_policy.extd) != -+ sizeof(raw_cpu_policy.extd.raw)); -+} -diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c -index ef96af738af0..3f20c342fde8 100644 ---- a/xen/arch/x86/cpuid.c -+++ b/xen/arch/x86/cpuid.c -@@ -1,629 +1,14 @@ --#include --#include --#include - #include --#include --#include -+#include -+ -+#include -+ - #include - #include --#include --#include --#include - #include --#include --#include --#include - #include - --const uint32_t known_features[] = INIT_KNOWN_FEATURES; -- --static const uint32_t __initconst pv_max_featuremask[] = INIT_PV_MAX_FEATURES; --static const uint32_t hvm_shadow_max_featuremask[] = INIT_HVM_SHADOW_MAX_FEATURES; --static const uint32_t __initconst hvm_hap_max_featuremask[] = -- INIT_HVM_HAP_MAX_FEATURES; --static const uint32_t __initconst pv_def_featuremask[] = INIT_PV_DEF_FEATURES; --static const uint32_t __initconst hvm_shadow_def_featuremask[] = -- INIT_HVM_SHADOW_DEF_FEATURES; --static const uint32_t __initconst hvm_hap_def_featuremask[] = -- INIT_HVM_HAP_DEF_FEATURES; --static const uint32_t deep_features[] = INIT_DEEP_FEATURES; -- --static const struct feature_name { -- const char *name; -- unsigned int bit; --} feature_names[] __initconstrel = INIT_FEATURE_NAMES; -- --/* -- * Parse a list of cpuid feature names -> bool, calling the callback for any -- * matches found. -- * -- * always_inline, because this is init code only and we really don't want a -- * function pointer call in the middle of the loop. -- */ --static int __init always_inline parse_cpuid( -- const char *s, void (*callback)(unsigned int feat, bool val)) --{ -- const char *ss; -- int val, rc = 0; -- -- do { -- const struct feature_name *lhs, *rhs, *mid = NULL /* GCC... */; -- const char *feat; -- -- ss = strchr(s, ','); -- if ( !ss ) -- ss = strchr(s, '\0'); -- -- /* Skip the 'no-' prefix for name comparisons. */ -- feat = s; -- if ( strncmp(s, "no-", 3) == 0 ) -- feat += 3; -- -- /* (Re)initalise lhs and rhs for binary search. */ -- lhs = feature_names; -- rhs = feature_names + ARRAY_SIZE(feature_names); -- -- while ( lhs < rhs ) -- { -- int res; -- -- mid = lhs + (rhs - lhs) / 2; -- res = cmdline_strcmp(feat, mid->name); -- -- if ( res < 0 ) -- { -- rhs = mid; -- continue; -- } -- if ( res > 0 ) -- { -- lhs = mid + 1; -- continue; -- } -- -- if ( (val = parse_boolean(mid->name, s, ss)) >= 0 ) -- { -- callback(mid->bit, val); -- mid = NULL; -- } -- -- break; -- } -- -- /* -- * Mid being NULL means that the name and boolean were successfully -- * identified. Everything else is an error. -- */ -- if ( mid ) -- rc = -EINVAL; -- -- s = ss + 1; -- } while ( *ss ); -- -- return rc; --} -- --static void __init cf_check _parse_xen_cpuid(unsigned int feat, bool val) --{ -- if ( !val ) -- setup_clear_cpu_cap(feat); -- else if ( feat == X86_FEATURE_RDRAND && -- (cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_RDRAND)) ) -- setup_force_cpu_cap(X86_FEATURE_RDRAND); --} -- --static int __init cf_check parse_xen_cpuid(const char *s) --{ -- return parse_cpuid(s, _parse_xen_cpuid); --} --custom_param("cpuid", parse_xen_cpuid); -- --static bool __initdata dom0_cpuid_cmdline; --static uint32_t __initdata dom0_enable_feat[FSCAPINTS]; --static uint32_t __initdata dom0_disable_feat[FSCAPINTS]; -- --static void __init cf_check _parse_dom0_cpuid(unsigned int feat, bool val) --{ -- __set_bit (feat, val ? dom0_enable_feat : dom0_disable_feat); -- __clear_bit(feat, val ? dom0_disable_feat : dom0_enable_feat ); --} -- --static int __init cf_check parse_dom0_cpuid(const char *s) --{ -- dom0_cpuid_cmdline = true; -- -- return parse_cpuid(s, _parse_dom0_cpuid); --} --custom_param("dom0-cpuid", parse_dom0_cpuid); -- - #define EMPTY_LEAF ((struct cpuid_leaf){}) --static void zero_leaves(struct cpuid_leaf *l, -- unsigned int first, unsigned int last) --{ -- memset(&l[first], 0, sizeof(*l) * (last - first + 1)); --} -- --static void sanitise_featureset(uint32_t *fs) --{ -- /* for_each_set_bit() uses unsigned longs. Extend with zeroes. */ -- uint32_t disabled_features[ -- ROUNDUP(FSCAPINTS, sizeof(unsigned long)/sizeof(uint32_t))] = {}; -- unsigned int i; -- -- for ( i = 0; i < FSCAPINTS; ++i ) -- { -- /* Clamp to known mask. */ -- fs[i] &= known_features[i]; -- -- /* -- * Identify which features with deep dependencies have been -- * disabled. -- */ -- disabled_features[i] = ~fs[i] & deep_features[i]; -- } -- -- for_each_set_bit(i, (void *)disabled_features, -- sizeof(disabled_features) * 8) -- { -- const uint32_t *dfs = x86_cpuid_lookup_deep_deps(i); -- unsigned int j; -- -- ASSERT(dfs); /* deep_features[] should guarentee this. */ -- -- for ( j = 0; j < FSCAPINTS; ++j ) -- { -- fs[j] &= ~dfs[j]; -- disabled_features[j] &= ~dfs[j]; -- } -- } --} -- --static void recalculate_xstate(struct cpuid_policy *p) --{ -- uint64_t xstates = XSTATE_FP_SSE; -- uint32_t xstate_size = XSTATE_AREA_MIN_SIZE; -- unsigned int i, Da1 = p->xstate.Da1; -- -- /* -- * The Da1 leaf is the only piece of information preserved in the common -- * case. Everything else is derived from other feature state. -- */ -- memset(&p->xstate, 0, sizeof(p->xstate)); -- -- if ( !p->basic.xsave ) -- return; -- -- if ( p->basic.avx ) -- { -- xstates |= X86_XCR0_YMM; -- xstate_size = max(xstate_size, -- xstate_offsets[X86_XCR0_YMM_POS] + -- xstate_sizes[X86_XCR0_YMM_POS]); -- } -- -- if ( p->feat.mpx ) -- { -- xstates |= X86_XCR0_BNDREGS | X86_XCR0_BNDCSR; -- xstate_size = max(xstate_size, -- xstate_offsets[X86_XCR0_BNDCSR_POS] + -- xstate_sizes[X86_XCR0_BNDCSR_POS]); -- } -- -- if ( p->feat.avx512f ) -- { -- xstates |= X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM; -- xstate_size = max(xstate_size, -- xstate_offsets[X86_XCR0_HI_ZMM_POS] + -- xstate_sizes[X86_XCR0_HI_ZMM_POS]); -- } -- -- if ( p->feat.pku ) -- { -- xstates |= X86_XCR0_PKRU; -- xstate_size = max(xstate_size, -- xstate_offsets[X86_XCR0_PKRU_POS] + -- xstate_sizes[X86_XCR0_PKRU_POS]); -- } -- -- p->xstate.max_size = xstate_size; -- p->xstate.xcr0_low = xstates & ~XSTATE_XSAVES_ONLY; -- p->xstate.xcr0_high = (xstates & ~XSTATE_XSAVES_ONLY) >> 32; -- -- p->xstate.Da1 = Da1; -- if ( p->xstate.xsaves ) -- { -- p->xstate.xss_low = xstates & XSTATE_XSAVES_ONLY; -- p->xstate.xss_high = (xstates & XSTATE_XSAVES_ONLY) >> 32; -- } -- else -- xstates &= ~XSTATE_XSAVES_ONLY; -- -- for ( i = 2; i < min(63ul, ARRAY_SIZE(p->xstate.comp)); ++i ) -- { -- uint64_t curr_xstate = 1ul << i; -- -- if ( !(xstates & curr_xstate) ) -- continue; -- -- p->xstate.comp[i].size = xstate_sizes[i]; -- p->xstate.comp[i].offset = xstate_offsets[i]; -- p->xstate.comp[i].xss = curr_xstate & XSTATE_XSAVES_ONLY; -- p->xstate.comp[i].align = curr_xstate & xstate_align; -- } --} -- --/* -- * Misc adjustments to the policy. Mostly clobbering reserved fields and -- * duplicating shared fields. Intentionally hidden fields are annotated. -- */ --static void recalculate_misc(struct cpuid_policy *p) --{ -- p->basic.raw_fms &= 0x0fff0fff; /* Clobber Processor Type on Intel. */ -- p->basic.apic_id = 0; /* Dynamic. */ -- -- p->basic.raw[0x5] = EMPTY_LEAF; /* MONITOR not exposed to guests. */ -- p->basic.raw[0x6] = EMPTY_LEAF; /* Therm/Power not exposed to guests. */ -- -- p->basic.raw[0x8] = EMPTY_LEAF; -- -- /* TODO: Rework topology logic. */ -- memset(p->topo.raw, 0, sizeof(p->topo.raw)); -- -- p->basic.raw[0xc] = EMPTY_LEAF; -- -- p->extd.e1d &= ~CPUID_COMMON_1D_FEATURES; -- -- /* Most of Power/RAS hidden from guests. */ -- p->extd.raw[0x7].a = p->extd.raw[0x7].b = p->extd.raw[0x7].c = 0; -- -- p->extd.raw[0x8].d = 0; -- -- switch ( p->x86_vendor ) -- { -- case X86_VENDOR_INTEL: -- p->basic.l2_nr_queries = 1; /* Fixed to 1 query. */ -- p->basic.raw[0x3] = EMPTY_LEAF; /* PSN - always hidden. */ -- p->basic.raw[0x9] = EMPTY_LEAF; /* DCA - always hidden. */ -- -- p->extd.vendor_ebx = 0; -- p->extd.vendor_ecx = 0; -- p->extd.vendor_edx = 0; -- -- p->extd.raw[0x1].a = p->extd.raw[0x1].b = 0; -- -- p->extd.raw[0x5] = EMPTY_LEAF; -- p->extd.raw[0x6].a = p->extd.raw[0x6].b = p->extd.raw[0x6].d = 0; -- -- p->extd.raw[0x8].a &= 0x0000ffff; -- p->extd.raw[0x8].c = 0; -- break; -- -- case X86_VENDOR_AMD: -- case X86_VENDOR_HYGON: -- zero_leaves(p->basic.raw, 0x2, 0x3); -- memset(p->cache.raw, 0, sizeof(p->cache.raw)); -- zero_leaves(p->basic.raw, 0x9, 0xa); -- -- p->extd.vendor_ebx = p->basic.vendor_ebx; -- p->extd.vendor_ecx = p->basic.vendor_ecx; -- p->extd.vendor_edx = p->basic.vendor_edx; -- -- p->extd.raw_fms = p->basic.raw_fms; -- p->extd.raw[0x1].b &= 0xff00ffff; -- p->extd.e1d |= p->basic._1d & CPUID_COMMON_1D_FEATURES; -- -- p->extd.raw[0x8].a &= 0x0000ffff; /* GuestMaxPhysAddr hidden. */ -- p->extd.raw[0x8].c &= 0x0003f0ff; -- -- p->extd.raw[0x9] = EMPTY_LEAF; -- -- zero_leaves(p->extd.raw, 0xb, 0x18); -- -- /* 0x19 - TLB details. Pass through. */ -- /* 0x1a - Perf hints. Pass through. */ -- -- p->extd.raw[0x1b] = EMPTY_LEAF; /* IBS - not supported. */ -- p->extd.raw[0x1c] = EMPTY_LEAF; /* LWP - not supported. */ -- p->extd.raw[0x1d] = EMPTY_LEAF; /* TopoExt Cache */ -- p->extd.raw[0x1e] = EMPTY_LEAF; /* TopoExt APIC ID/Core/Node */ -- p->extd.raw[0x1f] = EMPTY_LEAF; /* SEV */ -- p->extd.raw[0x20] = EMPTY_LEAF; /* Platform QoS */ -- break; -- } --} -- --static void __init calculate_raw_policy(void) --{ -- struct cpuid_policy *p = &raw_cpu_policy; -- -- x86_cpuid_policy_fill_native(p); -- -- /* Nothing good will come from Xen and libx86 disagreeing on vendor. */ -- ASSERT(p->x86_vendor == boot_cpu_data.x86_vendor); --} -- --static void __init calculate_host_policy(void) --{ -- struct cpuid_policy *p = &host_cpu_policy; -- unsigned int max_extd_leaf; -- -- *p = raw_cpu_policy; -- -- p->basic.max_leaf = -- min_t(uint32_t, p->basic.max_leaf, ARRAY_SIZE(p->basic.raw) - 1); -- p->feat.max_subleaf = -- min_t(uint32_t, p->feat.max_subleaf, ARRAY_SIZE(p->feat.raw) - 1); -- -- max_extd_leaf = p->extd.max_leaf; -- -- /* -- * For AMD/Hygon hardware before Zen3, we unilaterally modify LFENCE to be -- * dispatch serialising for Spectre mitigations. Extend max_extd_leaf -- * beyond what hardware supports, to include the feature leaf containing -- * this information. -- */ -- if ( cpu_has_lfence_dispatch ) -- max_extd_leaf = max(max_extd_leaf, 0x80000021); -- -- p->extd.max_leaf = 0x80000000 | min_t(uint32_t, max_extd_leaf & 0xffff, -- ARRAY_SIZE(p->extd.raw) - 1); -- -- x86_cpu_featureset_to_policy(boot_cpu_data.x86_capability, p); -- recalculate_xstate(p); -- recalculate_misc(p); -- -- /* When vPMU is disabled, drop it from the host policy. */ -- if ( vpmu_mode == XENPMU_MODE_OFF ) -- p->basic.raw[0xa] = EMPTY_LEAF; -- -- if ( p->extd.svm ) -- { -- /* Clamp to implemented features which require hardware support. */ -- p->extd.raw[0xa].d &= ((1u << SVM_FEATURE_NPT) | -- (1u << SVM_FEATURE_LBRV) | -- (1u << SVM_FEATURE_NRIPS) | -- (1u << SVM_FEATURE_PAUSEFILTER) | -- (1u << SVM_FEATURE_DECODEASSISTS)); -- /* Enable features which are always emulated. */ -- p->extd.raw[0xa].d |= ((1u << SVM_FEATURE_VMCBCLEAN) | -- (1u << SVM_FEATURE_TSCRATEMSR)); -- } --} -- --static void __init guest_common_default_feature_adjustments(uint32_t *fs) --{ -- /* -- * IvyBridge client parts suffer from leakage of RDRAND data due to SRBDS -- * (XSA-320 / CVE-2020-0543), and won't be receiving microcode to -- * compensate. -- * -- * Mitigate by hiding RDRAND from guests by default, unless explicitly -- * overridden on the Xen command line (cpuid=rdrand). Irrespective of the -- * default setting, guests can use RDRAND if explicitly enabled -- * (cpuid="host,rdrand=1") in the VM's config file, and VMs which were -- * previously using RDRAND can migrate in. -- */ -- if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && -- boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x3a && -- cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) -- __clear_bit(X86_FEATURE_RDRAND, fs); -- -- /* -- * On certain hardware, speculative or errata workarounds can result in -- * TSX being placed in "force-abort" mode, where it doesn't actually -- * function as expected, but is technically compatible with the ISA. -- * -- * Do not advertise RTM to guests by default if it won't actually work. -- */ -- if ( rtm_disabled ) -- __clear_bit(X86_FEATURE_RTM, fs); --} -- --static void __init guest_common_feature_adjustments(uint32_t *fs) --{ -- /* Unconditionally claim to be able to set the hypervisor bit. */ -- __set_bit(X86_FEATURE_HYPERVISOR, fs); -- -- /* -- * If IBRS is offered to the guest, unconditionally offer STIBP. It is a -- * nop on non-HT hardware, and has this behaviour to make heterogeneous -- * setups easier to manage. -- */ -- if ( test_bit(X86_FEATURE_IBRSB, fs) ) -- __set_bit(X86_FEATURE_STIBP, fs); -- if ( test_bit(X86_FEATURE_IBRS, fs) ) -- __set_bit(X86_FEATURE_AMD_STIBP, fs); -- -- /* -- * On hardware which supports IBRS/IBPB, we can offer IBPB independently -- * of IBRS by using the AMD feature bit. An administrator may wish for -- * performance reasons to offer IBPB without IBRS. -- */ -- if ( host_cpu_policy.feat.ibrsb ) -- __set_bit(X86_FEATURE_IBPB, fs); --} -- --static void __init calculate_pv_max_policy(void) --{ -- struct cpuid_policy *p = &pv_max_cpu_policy; -- uint32_t pv_featureset[FSCAPINTS]; -- unsigned int i; -- -- *p = host_cpu_policy; -- x86_cpu_policy_to_featureset(p, pv_featureset); -- -- for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) -- pv_featureset[i] &= pv_max_featuremask[i]; -- -- /* -- * If Xen isn't virtualising MSR_SPEC_CTRL for PV guests (functional -- * availability, or admin choice), hide the feature. -- */ -- if ( !boot_cpu_has(X86_FEATURE_SC_MSR_PV) ) -- { -- __clear_bit(X86_FEATURE_IBRSB, pv_featureset); -- __clear_bit(X86_FEATURE_IBRS, pv_featureset); -- } -- -- guest_common_feature_adjustments(pv_featureset); -- -- sanitise_featureset(pv_featureset); -- x86_cpu_featureset_to_policy(pv_featureset, p); -- recalculate_xstate(p); -- -- p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ --} -- --static void __init calculate_pv_def_policy(void) --{ -- struct cpuid_policy *p = &pv_def_cpu_policy; -- uint32_t pv_featureset[FSCAPINTS]; -- unsigned int i; -- -- *p = pv_max_cpu_policy; -- x86_cpu_policy_to_featureset(p, pv_featureset); -- -- for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i ) -- pv_featureset[i] &= pv_def_featuremask[i]; -- -- guest_common_feature_adjustments(pv_featureset); -- guest_common_default_feature_adjustments(pv_featureset); -- -- sanitise_featureset(pv_featureset); -- x86_cpu_featureset_to_policy(pv_featureset, p); -- recalculate_xstate(p); --} -- --static void __init calculate_hvm_max_policy(void) --{ -- struct cpuid_policy *p = &hvm_max_cpu_policy; -- uint32_t hvm_featureset[FSCAPINTS]; -- unsigned int i; -- const uint32_t *hvm_featuremask; -- -- *p = host_cpu_policy; -- x86_cpu_policy_to_featureset(p, hvm_featureset); -- -- hvm_featuremask = hvm_hap_supported() ? -- hvm_hap_max_featuremask : hvm_shadow_max_featuremask; -- -- for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i ) -- hvm_featureset[i] &= hvm_featuremask[i]; -- -- /* -- * Xen can provide an (x2)APIC emulation to HVM guests even if the host's -- * (x2)APIC isn't enabled. -- */ -- __set_bit(X86_FEATURE_APIC, hvm_featureset); -- __set_bit(X86_FEATURE_X2APIC, hvm_featureset); -- -- /* -- * We don't support EFER.LMSLE at all. AMD has dropped the feature from -- * hardware and allocated a CPUID bit to indicate its absence. -- */ -- __set_bit(X86_FEATURE_NO_LMSL, hvm_featureset); -- -- /* -- * On AMD, PV guests are entirely unable to use SYSENTER as Xen runs in -- * long mode (and init_amd() has cleared it out of host capabilities), but -- * HVM guests are able if running in protected mode. -- */ -- if ( (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) && -- raw_cpu_policy.basic.sep ) -- __set_bit(X86_FEATURE_SEP, hvm_featureset); -- -- /* -- * VIRT_SSBD is exposed in the default policy as a result of -- * amd_virt_spec_ctrl being set, it also needs exposing in the max policy. -- */ -- if ( amd_virt_spec_ctrl ) -- __set_bit(X86_FEATURE_VIRT_SSBD, hvm_featureset); -- -- /* -- * If Xen isn't virtualising MSR_SPEC_CTRL for HVM guests (functional -- * availability, or admin choice), hide the feature. -- */ -- if ( !boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ) -- { -- __clear_bit(X86_FEATURE_IBRSB, hvm_featureset); -- __clear_bit(X86_FEATURE_IBRS, hvm_featureset); -- } -- else if ( boot_cpu_has(X86_FEATURE_AMD_SSBD) ) -- /* -- * If SPEC_CTRL.SSBD is available VIRT_SPEC_CTRL.SSBD can be exposed -- * and implemented using the former. Expose in the max policy only as -- * the preference is for guests to use SPEC_CTRL.SSBD if available. -- */ -- __set_bit(X86_FEATURE_VIRT_SSBD, hvm_featureset); -- -- /* -- * With VT-x, some features are only supported by Xen if dedicated -- * hardware support is also available. -- */ -- if ( cpu_has_vmx ) -- { -- if ( !cpu_has_vmx_mpx ) -- __clear_bit(X86_FEATURE_MPX, hvm_featureset); -- -- if ( !cpu_has_vmx_xsaves ) -- __clear_bit(X86_FEATURE_XSAVES, hvm_featureset); -- } -- -- guest_common_feature_adjustments(hvm_featureset); -- -- sanitise_featureset(hvm_featureset); -- x86_cpu_featureset_to_policy(hvm_featureset, p); -- recalculate_xstate(p); --} -- --static void __init calculate_hvm_def_policy(void) --{ -- struct cpuid_policy *p = &hvm_def_cpu_policy; -- uint32_t hvm_featureset[FSCAPINTS]; -- unsigned int i; -- const uint32_t *hvm_featuremask; -- -- *p = hvm_max_cpu_policy; -- x86_cpu_policy_to_featureset(p, hvm_featureset); -- -- hvm_featuremask = hvm_hap_supported() ? -- hvm_hap_def_featuremask : hvm_shadow_def_featuremask; -- -- for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i ) -- hvm_featureset[i] &= hvm_featuremask[i]; -- -- guest_common_feature_adjustments(hvm_featureset); -- guest_common_default_feature_adjustments(hvm_featureset); -- -- /* -- * Only expose VIRT_SSBD if AMD_SSBD is not available, and thus -- * amd_virt_spec_ctrl is set. -- */ -- if ( amd_virt_spec_ctrl ) -- __set_bit(X86_FEATURE_VIRT_SSBD, hvm_featureset); -- -- sanitise_featureset(hvm_featureset); -- x86_cpu_featureset_to_policy(hvm_featureset, p); -- recalculate_xstate(p); --} -- --void __init init_guest_cpuid(void) --{ -- calculate_raw_policy(); -- calculate_host_policy(); -- -- if ( IS_ENABLED(CONFIG_PV) ) -- { -- calculate_pv_max_policy(); -- calculate_pv_def_policy(); -- } -- -- if ( hvm_enabled ) -- { -- calculate_hvm_max_policy(); -- calculate_hvm_def_policy(); -- } --} - - bool recheck_cpu_features(unsigned int cpu) - { -@@ -647,170 +32,6 @@ bool recheck_cpu_features(unsigned int cpu) - return okay; - } - --void recalculate_cpuid_policy(struct domain *d) --{ -- struct cpuid_policy *p = d->arch.cpuid; -- const struct cpuid_policy *max = is_pv_domain(d) -- ? (IS_ENABLED(CONFIG_PV) ? &pv_max_cpu_policy : NULL) -- : (IS_ENABLED(CONFIG_HVM) ? &hvm_max_cpu_policy : NULL); -- uint32_t fs[FSCAPINTS], max_fs[FSCAPINTS]; -- unsigned int i; -- -- if ( !max ) -- { -- ASSERT_UNREACHABLE(); -- return; -- } -- -- p->x86_vendor = x86_cpuid_lookup_vendor( -- p->basic.vendor_ebx, p->basic.vendor_ecx, p->basic.vendor_edx); -- -- p->basic.max_leaf = min(p->basic.max_leaf, max->basic.max_leaf); -- p->feat.max_subleaf = min(p->feat.max_subleaf, max->feat.max_subleaf); -- p->extd.max_leaf = 0x80000000 | min(p->extd.max_leaf & 0xffff, -- ((p->x86_vendor & (X86_VENDOR_AMD | -- X86_VENDOR_HYGON)) -- ? CPUID_GUEST_NR_EXTD_AMD -- : CPUID_GUEST_NR_EXTD_INTEL) - 1); -- -- x86_cpu_policy_to_featureset(p, fs); -- x86_cpu_policy_to_featureset(max, max_fs); -- -- if ( is_hvm_domain(d) ) -- { -- /* -- * HVM domains using Shadow paging have further restrictions on their -- * available paging features. -- */ -- if ( !hap_enabled(d) ) -- { -- for ( i = 0; i < ARRAY_SIZE(max_fs); i++ ) -- max_fs[i] &= hvm_shadow_max_featuremask[i]; -- } -- -- /* Hide nested-virt if it hasn't been explicitly configured. */ -- if ( !nestedhvm_enabled(d) ) -- { -- __clear_bit(X86_FEATURE_VMX, max_fs); -- __clear_bit(X86_FEATURE_SVM, max_fs); -- } -- } -- -- /* -- * Allow the toolstack to set HTT, X2APIC and CMP_LEGACY. These bits -- * affect how to interpret topology information in other cpuid leaves. -- */ -- __set_bit(X86_FEATURE_HTT, max_fs); -- __set_bit(X86_FEATURE_X2APIC, max_fs); -- __set_bit(X86_FEATURE_CMP_LEGACY, max_fs); -- -- /* -- * 32bit PV domains can't use any Long Mode features, and cannot use -- * SYSCALL on non-AMD hardware. -- */ -- if ( is_pv_32bit_domain(d) ) -- { -- __clear_bit(X86_FEATURE_LM, max_fs); -- if ( !(boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) ) -- __clear_bit(X86_FEATURE_SYSCALL, max_fs); -- } -- -- /* Clamp the toolstacks choices to reality. */ -- for ( i = 0; i < ARRAY_SIZE(fs); i++ ) -- fs[i] &= max_fs[i]; -- -- if ( p->basic.max_leaf < XSTATE_CPUID ) -- __clear_bit(X86_FEATURE_XSAVE, fs); -- -- sanitise_featureset(fs); -- -- /* Fold host's FDP_EXCP_ONLY and NO_FPU_SEL into guest's view. */ -- fs[FEATURESET_7b0] &= ~(cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | -- cpufeat_mask(X86_FEATURE_NO_FPU_SEL)); -- fs[FEATURESET_7b0] |= (host_cpu_policy.feat._7b0 & -- (cpufeat_mask(X86_FEATURE_FDP_EXCP_ONLY) | -- cpufeat_mask(X86_FEATURE_NO_FPU_SEL))); -- -- x86_cpu_featureset_to_policy(fs, p); -- -- /* Pass host cacheline size through to guests. */ -- p->basic.clflush_size = max->basic.clflush_size; -- -- p->extd.maxphysaddr = min(p->extd.maxphysaddr, max->extd.maxphysaddr); -- p->extd.maxphysaddr = min_t(uint8_t, p->extd.maxphysaddr, -- paging_max_paddr_bits(d)); -- p->extd.maxphysaddr = max_t(uint8_t, p->extd.maxphysaddr, -- (p->basic.pae || p->basic.pse36) ? 36 : 32); -- -- p->extd.maxlinaddr = p->extd.lm ? 48 : 32; -- -- recalculate_xstate(p); -- recalculate_misc(p); -- -- for ( i = 0; i < ARRAY_SIZE(p->cache.raw); ++i ) -- { -- if ( p->cache.subleaf[i].type >= 1 && -- p->cache.subleaf[i].type <= 3 ) -- { -- /* Subleaf has a valid cache type. Zero reserved fields. */ -- p->cache.raw[i].a &= 0xffffc3ffu; -- p->cache.raw[i].d &= 0x00000007u; -- } -- else -- { -- /* Subleaf is not valid. Zero the rest of the union. */ -- zero_leaves(p->cache.raw, i, ARRAY_SIZE(p->cache.raw) - 1); -- break; -- } -- } -- -- if ( vpmu_mode == XENPMU_MODE_OFF || -- ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) ) -- p->basic.raw[0xa] = EMPTY_LEAF; -- -- if ( !p->extd.svm ) -- p->extd.raw[0xa] = EMPTY_LEAF; -- -- if ( !p->extd.page1gb ) -- p->extd.raw[0x19] = EMPTY_LEAF; --} -- --void __init init_dom0_cpuid_policy(struct domain *d) --{ -- struct cpuid_policy *p = d->arch.cpuid; -- -- /* dom0 can't migrate. Give it ITSC if available. */ -- if ( cpu_has_itsc ) -- p->extd.itsc = true; -- -- /* -- * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0, -- * so dom0 can turn off workarounds as appropriate. Temporary, until the -- * domain policy logic gains a better understanding of MSRs. -- */ -- if ( cpu_has_arch_caps ) -- p->feat.arch_caps = true; -- -- /* Apply dom0-cpuid= command line settings, if provided. */ -- if ( dom0_cpuid_cmdline ) -- { -- uint32_t fs[FSCAPINTS]; -- unsigned int i; -- -- x86_cpu_policy_to_featureset(p, fs); -- -- for ( i = 0; i < ARRAY_SIZE(fs); ++i ) -- { -- fs[i] |= dom0_enable_feat [i]; -- fs[i] &= ~dom0_disable_feat[i]; -- } -- -- x86_cpu_featureset_to_policy(fs, p); -- -- recalculate_cpuid_policy(d); -- } --} -- - void guest_cpuid(const struct vcpu *v, uint32_t leaf, - uint32_t subleaf, struct cpuid_leaf *res) - { -@@ -1181,27 +402,6 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, - } - } - --static void __init __maybe_unused build_assertions(void) --{ -- BUILD_BUG_ON(ARRAY_SIZE(known_features) != FSCAPINTS); -- BUILD_BUG_ON(ARRAY_SIZE(pv_max_featuremask) != FSCAPINTS); -- BUILD_BUG_ON(ARRAY_SIZE(hvm_shadow_max_featuremask) != FSCAPINTS); -- BUILD_BUG_ON(ARRAY_SIZE(hvm_hap_max_featuremask) != FSCAPINTS); -- BUILD_BUG_ON(ARRAY_SIZE(deep_features) != FSCAPINTS); -- -- /* Find some more clever allocation scheme if this trips. */ -- BUILD_BUG_ON(sizeof(struct cpuid_policy) > PAGE_SIZE); -- -- BUILD_BUG_ON(sizeof(raw_cpu_policy.basic) != -- sizeof(raw_cpu_policy.basic.raw)); -- BUILD_BUG_ON(sizeof(raw_cpu_policy.feat) != -- sizeof(raw_cpu_policy.feat.raw)); -- BUILD_BUG_ON(sizeof(raw_cpu_policy.xstate) != -- sizeof(raw_cpu_policy.xstate.raw)); -- BUILD_BUG_ON(sizeof(raw_cpu_policy.extd) != -- sizeof(raw_cpu_policy.extd.raw)); --} -- - /* - * Local variables: - * mode: C -diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c -index 2f20df787425..b486c0efe061 100644 ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -77,7 +77,6 @@ - #include - #include - #include --#include - - #include - -diff --git a/xen/arch/x86/include/asm/cpu-policy.h b/xen/arch/x86/include/asm/cpu-policy.h -index 13e2a1f86d13..b361537a602b 100644 ---- a/xen/arch/x86/include/asm/cpu-policy.h -+++ b/xen/arch/x86/include/asm/cpu-policy.h -@@ -18,4 +18,10 @@ void init_guest_cpu_policies(void); - /* Allocate and initialise a CPU policy suitable for the domain. */ - int init_domain_cpu_policy(struct domain *d); - -+/* Apply dom0-specific tweaks to the CPUID policy. */ -+void init_dom0_cpuid_policy(struct domain *d); -+ -+/* Clamp the CPUID policy to reality. */ -+void recalculate_cpuid_policy(struct domain *d); -+ - #endif /* X86_CPU_POLICY_H */ -diff --git a/xen/arch/x86/include/asm/cpuid.h b/xen/arch/x86/include/asm/cpuid.h -index 7f81b998ce01..b32ba0bbfe5c 100644 ---- a/xen/arch/x86/include/asm/cpuid.h -+++ b/xen/arch/x86/include/asm/cpuid.h -@@ -8,14 +8,10 @@ - #include - #include - --#include -- - #include - - extern const uint32_t known_features[FSCAPINTS]; - --void init_guest_cpuid(void); -- - /* - * Expected levelling capabilities (given cpuid vendor/family information), - * and levelling capabilities actually available (given MSR probing). -@@ -49,13 +45,8 @@ extern struct cpuidmasks cpuidmask_defaults; - /* Check that all previously present features are still available. */ - bool recheck_cpu_features(unsigned int cpu); - --/* Apply dom0-specific tweaks to the CPUID policy. */ --void init_dom0_cpuid_policy(struct domain *d); -- --/* Clamp the CPUID policy to reality. */ --void recalculate_cpuid_policy(struct domain *d); -- - struct vcpu; -+struct cpuid_leaf; - void guest_cpuid(const struct vcpu *v, uint32_t leaf, - uint32_t subleaf, struct cpuid_leaf *res); - -diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c -index f94f28c8e271..95492715d8ad 100644 ---- a/xen/arch/x86/pv/domain.c -+++ b/xen/arch/x86/pv/domain.c -@@ -10,6 +10,7 @@ - #include - #include - -+#include - #include - #include - #include -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index 1d62ea1ad9d9..0c00ea875d6f 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -51,7 +51,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -1943,7 +1942,6 @@ void __init noreturn __start_xen(unsigned long mbi_p) - if ( !tboot_protect_mem_regions() ) - panic("Could not protect TXT memory regions\n"); - -- init_guest_cpuid(); - init_guest_cpu_policies(); - - if ( xen_cpuidle ) --- -2.39.2 - diff --git a/0334-x86-emul-Switch-x86_emulate_ctxt-to-cpu_policy.patch b/0334-x86-emul-Switch-x86_emulate_ctxt-to-cpu_policy.patch deleted file mode 100644 index 0e1071ac..00000000 --- a/0334-x86-emul-Switch-x86_emulate_ctxt-to-cpu_policy.patch +++ /dev/null @@ -1,142 +0,0 @@ -From 42b3d10d5d0e10586a4e3cbb5b428fd4ecac570b Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 3 Apr 2023 20:03:57 +0100 -Subject: [PATCH 14/35] x86/emul: Switch x86_emulate_ctxt to cpu_policy - -As with struct domain, retain cpuid as a valid alias for local code clarity. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 441b1b2a50ea3656954d75e06d42c96d619ea0fc) ---- - tools/fuzz/x86_instruction_emulator/fuzz-emul.c | 2 +- - tools/tests/x86_emulator/test_x86_emulator.c | 2 +- - tools/tests/x86_emulator/x86-emulate.c | 2 +- - xen/arch/x86/hvm/emulate.c | 4 ++-- - xen/arch/x86/mm/shadow/hvm.c | 2 +- - xen/arch/x86/pv/emul-priv-op.c | 2 +- - xen/arch/x86/pv/ro-page-fault.c | 2 +- - xen/arch/x86/x86_emulate/x86_emulate.h | 7 +++++-- - 8 files changed, 13 insertions(+), 10 deletions(-) - -diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c -index 966e46bee199..4885a68210d0 100644 ---- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c -+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c -@@ -893,7 +893,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size) - struct x86_emulate_ctxt ctxt = { - .data = &state, - .regs = &input.regs, -- .cpuid = &cp, -+ .cpu_policy = &cp, - .addr_size = 8 * sizeof(void *), - .sp_size = 8 * sizeof(void *), - }; -diff --git a/tools/tests/x86_emulator/test_x86_emulator.c b/tools/tests/x86_emulator/test_x86_emulator.c -index 31586f805726..7b7fbaaf45ec 100644 ---- a/tools/tests/x86_emulator/test_x86_emulator.c -+++ b/tools/tests/x86_emulator/test_x86_emulator.c -@@ -909,7 +909,7 @@ int main(int argc, char **argv) - - ctxt.regs = ®s; - ctxt.force_writeback = 0; -- ctxt.cpuid = &cp; -+ ctxt.cpu_policy = &cp; - ctxt.lma = sizeof(void *) == 8; - ctxt.addr_size = 8 * sizeof(void *); - ctxt.sp_size = 8 * sizeof(void *); -diff --git a/tools/tests/x86_emulator/x86-emulate.c b/tools/tests/x86_emulator/x86-emulate.c -index ea286d6ad87b..5ad282b57545 100644 ---- a/tools/tests/x86_emulator/x86-emulate.c -+++ b/tools/tests/x86_emulator/x86-emulate.c -@@ -38,7 +38,7 @@ - #define put_stub(stb) ((stb).addr = 0) - - uint32_t mxcsr_mask = 0x0000ffbf; --struct cpuid_policy cp; -+struct cpu_policy cp; - - static char fpu_save_area[0x4000] __attribute__((__aligned__((64)))); - static bool use_xsave; -diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c -index cb221f70e8f0..275451dd3615 100644 ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -2772,7 +2772,7 @@ int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla) - void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr, - unsigned int errcode) - { -- struct hvm_emulate_ctxt ctx = {{ 0 }}; -+ struct hvm_emulate_ctxt ctx = {}; - int rc; - - hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs()); -@@ -2847,7 +2847,7 @@ void hvm_emulate_init_once( - - hvmemul_ctxt->validate = validate; - hvmemul_ctxt->ctxt.regs = regs; -- hvmemul_ctxt->ctxt.cpuid = curr->domain->arch.cpuid; -+ hvmemul_ctxt->ctxt.cpu_policy = curr->domain->arch.cpu_policy; - hvmemul_ctxt->ctxt.force_writeback = true; - } - -diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c -index 39abf4732d91..c00ce550a1da 100644 ---- a/xen/arch/x86/mm/shadow/hvm.c -+++ b/xen/arch/x86/mm/shadow/hvm.c -@@ -319,7 +319,7 @@ const struct x86_emulate_ops *shadow_init_emulation( - memset(sh_ctxt, 0, sizeof(*sh_ctxt)); - - sh_ctxt->ctxt.regs = regs; -- sh_ctxt->ctxt.cpuid = curr->domain->arch.cpuid; -+ sh_ctxt->ctxt.cpu_policy = curr->domain->arch.cpu_policy; - sh_ctxt->ctxt.lma = hvm_long_mode_active(curr); - - /* Segment cache initialisation. Primed with CS. */ -diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c -index 5da00e24e4ff..ab52768271c5 100644 ---- a/xen/arch/x86/pv/emul-priv-op.c -+++ b/xen/arch/x86/pv/emul-priv-op.c -@@ -1327,7 +1327,7 @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs) - struct domain *currd = curr->domain; - struct priv_op_ctxt ctxt = { - .ctxt.regs = regs, -- .ctxt.cpuid = currd->arch.cpuid, -+ .ctxt.cpu_policy = currd->arch.cpu_policy, - .ctxt.lma = !is_pv_32bit_domain(currd), - }; - int rc; -diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c -index 5963f5ee2d51..0d02c7d2ab10 100644 ---- a/xen/arch/x86/pv/ro-page-fault.c -+++ b/xen/arch/x86/pv/ro-page-fault.c -@@ -356,7 +356,7 @@ int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs) - unsigned int addr_size = is_pv_32bit_domain(currd) ? 32 : BITS_PER_LONG; - struct x86_emulate_ctxt ctxt = { - .regs = regs, -- .cpuid = currd->arch.cpuid, -+ .cpu_policy = currd->arch.cpu_policy, - .addr_size = addr_size, - .sp_size = addr_size, - .lma = addr_size > 32, -diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h -index c89c53e83bfe..e1be0435f891 100644 ---- a/xen/arch/x86/x86_emulate/x86_emulate.h -+++ b/xen/arch/x86/x86_emulate/x86_emulate.h -@@ -565,8 +565,11 @@ struct x86_emulate_ctxt - * Input-only state: - */ - -- /* CPUID Policy for the domain. */ -- const struct cpuid_policy *cpuid; -+ /* CPU policy for the domain. Allow aliases for local code clarity. */ -+ union { -+ struct cpu_policy *cpu_policy; -+ struct cpu_policy *cpuid; -+ }; - - /* Set this if writes may have side effects. */ - bool force_writeback; --- -2.39.2 - diff --git a/0335-tools-fuzz-Rework-afl-policy-fuzzer.patch b/0335-tools-fuzz-Rework-afl-policy-fuzzer.patch deleted file mode 100644 index 08a089c1..00000000 --- a/0335-tools-fuzz-Rework-afl-policy-fuzzer.patch +++ /dev/null @@ -1,133 +0,0 @@ -From eb84c7783e997e4b4bca0e000c7827a556e57d58 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 3 Apr 2023 17:14:14 +0100 -Subject: [PATCH 15/35] tools/fuzz: Rework afl-policy-fuzzer - -With cpuid_policy and msr_policy merged to form cpu_policy, merge the -respective fuzzing logic. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit a16dcd48c2db3f6820a15ea482551d289bd9cdec) ---- - tools/fuzz/cpu-policy/afl-policy-fuzzer.c | 57 ++++++++--------------- - 1 file changed, 20 insertions(+), 37 deletions(-) - -diff --git a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -index 316eb0efe034..585324e41774 100644 ---- a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -+++ b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -@@ -16,16 +16,19 @@ static bool debug; - - #define EMPTY_LEAF ((struct cpuid_leaf){}) - --static void check_cpuid(struct cpuid_policy *cp) -+static void check_policy(struct cpu_policy *cp) - { -- struct cpuid_policy new = {}; -+ struct cpu_policy new = {}; - size_t data_end; - xen_cpuid_leaf_t *leaves = malloc(CPUID_MAX_SERIALISED_LEAVES * - sizeof(xen_cpuid_leaf_t)); -- unsigned int nr = CPUID_MAX_SERIALISED_LEAVES; -+ xen_msr_entry_t *msrs = malloc(MSR_MAX_SERIALISED_ENTRIES * -+ sizeof(xen_cpuid_leaf_t)); -+ unsigned int nr_leaves = CPUID_MAX_SERIALISED_LEAVES; -+ unsigned int nr_msrs = MSR_MAX_SERIALISED_ENTRIES; - int rc; - -- if ( !leaves ) -+ if ( !leaves || !msrs ) - return; - - /* -@@ -49,12 +52,19 @@ static void check_cpuid(struct cpuid_policy *cp) - x86_cpuid_policy_recalc_synth(cp); - - /* Serialise... */ -- rc = x86_cpuid_copy_to_buffer(cp, leaves, &nr); -+ rc = x86_cpuid_copy_to_buffer(cp, leaves, &nr_leaves); -+ assert(rc == 0); -+ assert(nr_leaves <= CPUID_MAX_SERIALISED_LEAVES); -+ -+ rc = x86_msr_copy_to_buffer(cp, msrs, &nr_msrs); - assert(rc == 0); -- assert(nr <= CPUID_MAX_SERIALISED_LEAVES); -+ assert(nr_msrs <= MSR_MAX_SERIALISED_ENTRIES); - - /* ... and deserialise. */ -- rc = x86_cpuid_copy_from_buffer(&new, leaves, nr, NULL, NULL); -+ rc = x86_cpuid_copy_from_buffer(&new, leaves, nr_leaves, NULL, NULL); -+ assert(rc == 0); -+ -+ rc = x86_msr_copy_from_buffer(&new, msrs, nr_msrs, NULL); - assert(rc == 0); - - /* The result after serialisation/deserialisaion should be identical... */ -@@ -76,28 +86,6 @@ static void check_cpuid(struct cpuid_policy *cp) - free(leaves); - } - --static void check_msr(struct msr_policy *mp) --{ -- struct msr_policy new = {}; -- xen_msr_entry_t *msrs = malloc(MSR_MAX_SERIALISED_ENTRIES * -- sizeof(xen_msr_entry_t)); -- unsigned int nr = MSR_MAX_SERIALISED_ENTRIES; -- int rc; -- -- if ( !msrs ) -- return; -- -- rc = x86_msr_copy_to_buffer(mp, msrs, &nr); -- assert(rc == 0); -- assert(nr <= MSR_MAX_SERIALISED_ENTRIES); -- -- rc = x86_msr_copy_from_buffer(&new, msrs, nr, NULL); -- assert(rc == 0); -- assert(memcmp(mp, &new, sizeof(*mp)) == 0); -- -- free(msrs); --} -- - int main(int argc, char **argv) - { - FILE *fp = NULL; -@@ -144,8 +132,7 @@ int main(int argc, char **argv) - while ( __AFL_LOOP(1000) ) - #endif - { -- struct cpuid_policy *cp = NULL; -- struct msr_policy *mp = NULL; -+ struct cpu_policy *cp = NULL; - - if ( fp != stdin ) - { -@@ -160,22 +147,18 @@ int main(int argc, char **argv) - } - - cp = calloc(1, sizeof(*cp)); -- mp = calloc(1, sizeof(*mp)); -- if ( !cp || !mp ) -+ if ( !cp ) - goto skip; - - fread(cp, sizeof(*cp), 1, fp); -- fread(mp, sizeof(*mp), 1, fp); - - if ( !feof(fp) ) - goto skip; - -- check_cpuid(cp); -- check_msr(mp); -+ check_policy(cp); - - skip: - free(cp); -- free(mp); - - if ( fp != stdin ) - { --- -2.39.2 - diff --git a/0336-libx86-Update-library-API-for-cpu_policy.patch b/0336-libx86-Update-library-API-for-cpu_policy.patch deleted file mode 100644 index 381e659f..00000000 --- a/0336-libx86-Update-library-API-for-cpu_policy.patch +++ /dev/null @@ -1,448 +0,0 @@ -From 34e02246efaac0038fa5c57cb810c4e5f523a80f Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 3 Apr 2023 14:18:43 +0100 -Subject: [PATCH 16/35] libx86: Update library API for cpu_policy -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Adjust the API and comments appropriately. - -x86_cpu_policy_fill_native() will eventually contain MSR reads, but leave a -TODO in the short term. - -No practical change. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 1b67fccf3b02825f6a036bad06cd17963d0972d2) - -tools/libs/guest: Fix build following libx86 changes - -I appear to have lost this hunk somewhere... - -Fixes: 1b67fccf3b02 ("libx86: Update library API for cpu_policy") -Signed-off-by: Andrew Cooper -Acked-by: Roger Pau Monné -(cherry picked from commit 48d76e6da92f9ef76c8468e299349a2f698362fa) ---- - tools/fuzz/cpu-policy/afl-policy-fuzzer.c | 4 +- - tools/libs/guest/xg_cpuid_x86.c | 2 +- - tools/tests/cpu-policy/test-cpu-policy.c | 4 +- - tools/tests/x86_emulator/x86-emulate.c | 2 +- - xen/arch/x86/cpu-policy.c | 4 +- - xen/arch/x86/cpu/common.c | 2 +- - xen/arch/x86/domctl.c | 2 +- - xen/arch/x86/xstate.c | 4 +- - xen/include/xen/lib/x86/cpu-policy.h | 49 +++++++++++++---------- - xen/lib/x86/cpuid.c | 26 ++++++------ - xen/lib/x86/msr.c | 4 +- - 11 files changed, 56 insertions(+), 47 deletions(-) - -diff --git a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -index 585324e41774..11df2f780234 100644 ---- a/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -+++ b/tools/fuzz/cpu-policy/afl-policy-fuzzer.c -@@ -48,8 +48,8 @@ static void check_policy(struct cpu_policy *cp) - * Fix up the data in the source policy which isn't expected to survive - * serialisation. - */ -- x86_cpuid_policy_clear_out_of_range_leaves(cp); -- x86_cpuid_policy_recalc_synth(cp); -+ x86_cpu_policy_clear_out_of_range_leaves(cp); -+ x86_cpu_policy_recalc_synth(cp); - - /* Serialise... */ - rc = x86_cpuid_copy_to_buffer(cp, leaves, &nr_leaves); -diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c -index b38e3a9de350..5d658534ef6e 100644 ---- a/tools/libs/guest/xg_cpuid_x86.c -+++ b/tools/libs/guest/xg_cpuid_x86.c -@@ -555,7 +555,7 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, - const uint32_t *dfs; - - if ( !test_bit(b, disabled_features) || -- !(dfs = x86_cpuid_lookup_deep_deps(b)) ) -+ !(dfs = x86_cpu_policy_lookup_deep_deps(b)) ) - continue; - - for ( i = 0; i < ARRAY_SIZE(disabled_features); ++i ) -diff --git a/tools/tests/cpu-policy/test-cpu-policy.c b/tools/tests/cpu-policy/test-cpu-policy.c -index 4f3d09f1b780..fea0eb8c3549 100644 ---- a/tools/tests/cpu-policy/test-cpu-policy.c -+++ b/tools/tests/cpu-policy/test-cpu-policy.c -@@ -105,7 +105,7 @@ static void test_cpuid_current(void) - - printf("Testing CPUID on current CPU\n"); - -- x86_cpuid_policy_fill_native(&p); -+ x86_cpu_policy_fill_native(&p); - - rc = x86_cpuid_copy_to_buffer(&p, leaves, &nr); - if ( rc != 0 ) -@@ -554,7 +554,7 @@ static void test_cpuid_out_of_range_clearing(void) - void *ptr; - unsigned int nr_markers; - -- x86_cpuid_policy_clear_out_of_range_leaves(p); -+ x86_cpu_policy_clear_out_of_range_leaves(p); - - /* Count the number of 0xc2's still remaining. */ - for ( ptr = p, nr_markers = 0; -diff --git a/tools/tests/x86_emulator/x86-emulate.c b/tools/tests/x86_emulator/x86-emulate.c -index 5ad282b57545..3a092ea3ec7e 100644 ---- a/tools/tests/x86_emulator/x86-emulate.c -+++ b/tools/tests/x86_emulator/x86-emulate.c -@@ -85,7 +85,7 @@ bool emul_test_init(void) - - unsigned long sp; - -- x86_cpuid_policy_fill_native(&cp); -+ x86_cpu_policy_fill_native(&cp); - - /* - * The emulator doesn't use these instructions, so can always emulate -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 9c506b6614f2..19766e87b68f 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -176,7 +176,7 @@ static void sanitise_featureset(uint32_t *fs) - for_each_set_bit(i, (void *)disabled_features, - sizeof(disabled_features) * 8) - { -- const uint32_t *dfs = x86_cpuid_lookup_deep_deps(i); -+ const uint32_t *dfs = x86_cpu_policy_lookup_deep_deps(i); - unsigned int j; - - ASSERT(dfs); /* deep_features[] should guarentee this. */ -@@ -347,7 +347,7 @@ static void __init calculate_raw_policy(void) - { - struct cpu_policy *p = &raw_cpu_policy; - -- x86_cpuid_policy_fill_native(p); -+ x86_cpu_policy_fill_native(p); - - /* Nothing good will come from Xen and libx86 disagreeing on vendor. */ - ASSERT(p->x86_vendor == boot_cpu_data.x86_vendor); -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 665200db382f..ce692328086e 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -72,7 +72,7 @@ void __init setup_clear_cpu_cap(unsigned int cap) - __builtin_return_address(0), cap); - - __clear_bit(cap, boot_cpu_data.x86_capability); -- dfs = x86_cpuid_lookup_deep_deps(cap); -+ dfs = x86_cpu_policy_lookup_deep_deps(cap); - - if (!dfs) - return; -diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c -index 6d15d0c29c4e..1ce12b79e674 100644 ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -67,7 +67,7 @@ static int update_domain_cpu_policy(struct domain *d, - goto out; - - /* Trim any newly-stale out-of-range leaves. */ -- x86_cpuid_policy_clear_out_of_range_leaves(new); -+ x86_cpu_policy_clear_out_of_range_leaves(new); - - /* Audit the combined dataset. */ - ret = x86_cpu_policies_are_compatible(sys, new, &err); -diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c -index 3b32bdc51cf4..cea3d0b81f0b 100644 ---- a/xen/arch/x86/xstate.c -+++ b/xen/arch/x86/xstate.c -@@ -685,7 +685,7 @@ void xstate_init(struct cpuinfo_x86 *c) - int validate_xstate(const struct domain *d, uint64_t xcr0, uint64_t xcr0_accum, - const struct xsave_hdr *hdr) - { -- uint64_t xcr0_max = cpuid_policy_xcr0_max(d->arch.cpuid); -+ uint64_t xcr0_max = cpu_policy_xcr0_max(d->arch.cpuid); - unsigned int i; - - if ( (hdr->xstate_bv & ~xcr0_accum) || -@@ -709,7 +709,7 @@ int validate_xstate(const struct domain *d, uint64_t xcr0, uint64_t xcr0_accum, - int handle_xsetbv(u32 index, u64 new_bv) - { - struct vcpu *curr = current; -- uint64_t xcr0_max = cpuid_policy_xcr0_max(curr->domain->arch.cpuid); -+ uint64_t xcr0_max = cpu_policy_xcr0_max(curr->domain->arch.cpuid); - u64 mask; - - if ( index != XCR_XFEATURE_ENABLED_MASK ) -diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h -index 57b4633c861e..cf7de0f29ccd 100644 ---- a/xen/include/xen/lib/x86/cpu-policy.h -+++ b/xen/include/xen/lib/x86/cpu-policy.h -@@ -399,33 +399,38 @@ void x86_cpu_policy_to_featureset(const struct cpu_policy *p, - void x86_cpu_featureset_to_policy(const uint32_t fs[FEATURESET_NR_ENTRIES], - struct cpu_policy *p); - --static inline uint64_t cpuid_policy_xcr0_max(const struct cpuid_policy *p) -+static inline uint64_t cpu_policy_xcr0_max(const struct cpu_policy *p) - { - return ((uint64_t)p->xstate.xcr0_high << 32) | p->xstate.xcr0_low; - } - --static inline uint64_t cpuid_policy_xstates(const struct cpuid_policy *p) -+static inline uint64_t cpu_policy_xstates(const struct cpu_policy *p) - { - uint64_t val = p->xstate.xcr0_high | p->xstate.xss_high; - - return (val << 32) | p->xstate.xcr0_low | p->xstate.xss_low; - } - --const uint32_t *x86_cpuid_lookup_deep_deps(uint32_t feature); -+/** -+ * For a specific feature, look up the dependent features. Returns NULL if -+ * this feature has no dependencies. Otherwise return a featureset of -+ * dependent features, which has been recursively flattened. -+ */ -+const uint32_t *x86_cpu_policy_lookup_deep_deps(uint32_t feature); - - /** -- * Recalculate the content in a CPUID policy which is derived from raw data. -+ * Recalculate the content in a CPU policy which is derived from raw data. - */ --void x86_cpuid_policy_recalc_synth(struct cpuid_policy *p); -+void x86_cpu_policy_recalc_synth(struct cpu_policy *p); - - /** -- * Fill a CPUID policy using the native CPUID instruction. -+ * Fill CPU policy using the native CPUID/RDMSR instruction. - * - * No sanitisation is performed, but synthesised values are calculated. - * Values may be influenced by a hypervisor or from masking/faulting - * configuration. - */ --void x86_cpuid_policy_fill_native(struct cpuid_policy *p); -+void x86_cpu_policy_fill_native(struct cpu_policy *p); - - /** - * Clear leaf data beyond the policies max leaf/subleaf settings. -@@ -436,7 +441,7 @@ void x86_cpuid_policy_fill_native(struct cpuid_policy *p); - * with out-of-range leaves with stale content in them. This helper clears - * them. - */ --void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p); -+void x86_cpu_policy_clear_out_of_range_leaves(struct cpu_policy *p); - - #ifdef __XEN__ - #include -@@ -449,9 +454,10 @@ typedef xen_msr_entry_t msr_entry_buffer_t[]; - #endif - - /** -- * Serialise a cpuid_policy object into an array of cpuid leaves. -+ * Serialise the CPUID leaves of a cpu_policy object into an array of cpuid -+ * leaves. - * -- * @param policy The cpuid_policy to serialise. -+ * @param policy The cpu_policy to serialise. - * @param leaves The array of leaves to serialise into. - * @param nr_entries The number of entries in 'leaves'. - * @returns -errno -@@ -460,13 +466,14 @@ typedef xen_msr_entry_t msr_entry_buffer_t[]; - * leaves array is too short. On success, nr_entries is updated with the - * actual number of leaves written. - */ --int x86_cpuid_copy_to_buffer(const struct cpuid_policy *policy, -+int x86_cpuid_copy_to_buffer(const struct cpu_policy *policy, - cpuid_leaf_buffer_t leaves, uint32_t *nr_entries); - - /** -- * Unserialise a cpuid_policy object from an array of cpuid leaves. -+ * Unserialise the CPUID leaves of a cpu_policy object into an array of cpuid -+ * leaves. - * -- * @param policy The cpuid_policy to unserialise into. -+ * @param policy The cpu_policy to unserialise into. - * @param leaves The array of leaves to unserialise from. - * @param nr_entries The number of entries in 'leaves'. - * @param err_leaf Optional hint for error diagnostics. -@@ -474,21 +481,21 @@ int x86_cpuid_copy_to_buffer(const struct cpuid_policy *policy, - * @returns -errno - * - * Reads at most CPUID_MAX_SERIALISED_LEAVES. May return -ERANGE if an -- * incoming leaf is out of range of cpuid_policy, in which case the optional -+ * incoming leaf is out of range of cpu_policy, in which case the optional - * err_* pointers will identify the out-of-range indicies. - * - * No content validation of in-range leaves is performed. Synthesised data is - * recalculated. - */ --int x86_cpuid_copy_from_buffer(struct cpuid_policy *policy, -+int x86_cpuid_copy_from_buffer(struct cpu_policy *policy, - const cpuid_leaf_buffer_t leaves, - uint32_t nr_entries, uint32_t *err_leaf, - uint32_t *err_subleaf); - - /** -- * Serialise an msr_policy object into an array. -+ * Serialise the MSRs of a cpu_policy object into an array. - * -- * @param policy The msr_policy to serialise. -+ * @param policy The cpu_policy to serialise. - * @param msrs The array of msrs to serialise into. - * @param nr_entries The number of entries in 'msrs'. - * @returns -errno -@@ -497,13 +504,13 @@ int x86_cpuid_copy_from_buffer(struct cpuid_policy *policy, - * buffer array is too short. On success, nr_entries is updated with the - * actual number of msrs written. - */ --int x86_msr_copy_to_buffer(const struct msr_policy *policy, -+int x86_msr_copy_to_buffer(const struct cpu_policy *policy, - msr_entry_buffer_t msrs, uint32_t *nr_entries); - - /** -- * Unserialise an msr_policy object from an array of msrs. -+ * Unserialise the MSRs of a cpu_policy object from an array of msrs. - * -- * @param policy The msr_policy object to unserialise into. -+ * @param policy The cpu_policy object to unserialise into. - * @param msrs The array of msrs to unserialise from. - * @param nr_entries The number of entries in 'msrs'. - * @param err_msr Optional hint for error diagnostics. -@@ -517,7 +524,7 @@ int x86_msr_copy_to_buffer(const struct msr_policy *policy, - * - * No content validation is performed on the data stored in the policy object. - */ --int x86_msr_copy_from_buffer(struct msr_policy *policy, -+int x86_msr_copy_from_buffer(struct cpu_policy *policy, - const msr_entry_buffer_t msrs, uint32_t nr_entries, - uint32_t *err_msr); - -diff --git a/xen/lib/x86/cpuid.c b/xen/lib/x86/cpuid.c -index 734e90823a63..68aafb404927 100644 ---- a/xen/lib/x86/cpuid.c -+++ b/xen/lib/x86/cpuid.c -@@ -102,13 +102,13 @@ void x86_cpu_featureset_to_policy( - p->feat._7d1 = fs[FEATURESET_7d1]; - } - --void x86_cpuid_policy_recalc_synth(struct cpuid_policy *p) -+void x86_cpu_policy_recalc_synth(struct cpu_policy *p) - { - p->x86_vendor = x86_cpuid_lookup_vendor( - p->basic.vendor_ebx, p->basic.vendor_ecx, p->basic.vendor_edx); - } - --void x86_cpuid_policy_fill_native(struct cpuid_policy *p) -+void x86_cpu_policy_fill_native(struct cpu_policy *p) - { - unsigned int i; - -@@ -199,7 +199,7 @@ void x86_cpuid_policy_fill_native(struct cpuid_policy *p) - cpuid_count_leaf(0xd, 0, &p->xstate.raw[0]); - cpuid_count_leaf(0xd, 1, &p->xstate.raw[1]); - -- xstates = cpuid_policy_xstates(p); -+ xstates = cpu_policy_xstates(p); - - /* This logic will probably need adjusting when XCR0[63] gets used. */ - BUILD_BUG_ON(ARRAY_SIZE(p->xstate.raw) > 63); -@@ -222,10 +222,12 @@ void x86_cpuid_policy_fill_native(struct cpuid_policy *p) - p->hv_limit = 0; - p->hv2_limit = 0; - -- x86_cpuid_policy_recalc_synth(p); -+ /* TODO MSRs */ -+ -+ x86_cpu_policy_recalc_synth(p); - } - --void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p) -+void x86_cpu_policy_clear_out_of_range_leaves(struct cpu_policy *p) - { - unsigned int i; - -@@ -260,7 +262,7 @@ void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p) - zero_leaves(p->topo.raw, i, ARRAY_SIZE(p->topo.raw) - 1); - } - -- if ( p->basic.max_leaf < 0xd || !cpuid_policy_xstates(p) ) -+ if ( p->basic.max_leaf < 0xd || !cpu_policy_xstates(p) ) - memset(p->xstate.raw, 0, sizeof(p->xstate.raw)); - else - { -@@ -268,7 +270,7 @@ void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p) - BUILD_BUG_ON(ARRAY_SIZE(p->xstate.raw) > 63); - - /* First two leaves always valid. Rest depend on xstates. */ -- i = max(2, 64 - __builtin_clzll(cpuid_policy_xstates(p))); -+ i = max(2, 64 - __builtin_clzll(cpu_policy_xstates(p))); - - zero_leaves(p->xstate.raw, i, - ARRAY_SIZE(p->xstate.raw) - 1); -@@ -278,7 +280,7 @@ void x86_cpuid_policy_clear_out_of_range_leaves(struct cpuid_policy *p) - ARRAY_SIZE(p->extd.raw) - 1); - } - --const uint32_t *x86_cpuid_lookup_deep_deps(uint32_t feature) -+const uint32_t *x86_cpu_policy_lookup_deep_deps(uint32_t feature) - { - static const uint32_t deep_features[] = INIT_DEEP_FEATURES; - static const struct { -@@ -333,7 +335,7 @@ static int copy_leaf_to_buffer(uint32_t leaf, uint32_t subleaf, - return 0; - } - --int x86_cpuid_copy_to_buffer(const struct cpuid_policy *p, -+int x86_cpuid_copy_to_buffer(const struct cpu_policy *p, - cpuid_leaf_buffer_t leaves, uint32_t *nr_entries_p) - { - const uint32_t nr_entries = *nr_entries_p; -@@ -383,7 +385,7 @@ int x86_cpuid_copy_to_buffer(const struct cpuid_policy *p, - - case 0xd: - { -- uint64_t xstates = cpuid_policy_xstates(p); -+ uint64_t xstates = cpu_policy_xstates(p); - - COPY_LEAF(leaf, 0, &p->xstate.raw[0]); - COPY_LEAF(leaf, 1, &p->xstate.raw[1]); -@@ -419,7 +421,7 @@ int x86_cpuid_copy_to_buffer(const struct cpuid_policy *p, - return 0; - } - --int x86_cpuid_copy_from_buffer(struct cpuid_policy *p, -+int x86_cpuid_copy_from_buffer(struct cpu_policy *p, - const cpuid_leaf_buffer_t leaves, - uint32_t nr_entries, uint32_t *err_leaf, - uint32_t *err_subleaf) -@@ -522,7 +524,7 @@ int x86_cpuid_copy_from_buffer(struct cpuid_policy *p, - } - } - -- x86_cpuid_policy_recalc_synth(p); -+ x86_cpu_policy_recalc_synth(p); - - return 0; - -diff --git a/xen/lib/x86/msr.c b/xen/lib/x86/msr.c -index c4d885e7b568..e04b9ca01302 100644 ---- a/xen/lib/x86/msr.c -+++ b/xen/lib/x86/msr.c -@@ -23,7 +23,7 @@ static int copy_msr_to_buffer(uint32_t idx, uint64_t val, - return 0; - } - --int x86_msr_copy_to_buffer(const struct msr_policy *p, -+int x86_msr_copy_to_buffer(const struct cpu_policy *p, - msr_entry_buffer_t msrs, uint32_t *nr_entries_p) - { - const uint32_t nr_entries = *nr_entries_p; -@@ -48,7 +48,7 @@ int x86_msr_copy_to_buffer(const struct msr_policy *p, - return 0; - } - --int x86_msr_copy_from_buffer(struct msr_policy *p, -+int x86_msr_copy_from_buffer(struct cpu_policy *p, - const msr_entry_buffer_t msrs, uint32_t nr_entries, - uint32_t *err_msr) - { --- -2.39.2 - diff --git a/0337-x86-Remove-temporary-cpuid-msr-_policy-defines.patch b/0337-x86-Remove-temporary-cpuid-msr-_policy-defines.patch deleted file mode 100644 index 3db33b0e..00000000 --- a/0337-x86-Remove-temporary-cpuid-msr-_policy-defines.patch +++ /dev/null @@ -1,334 +0,0 @@ -From c07eb947082104a75fb4b58b3f85f5076577c728 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 29 Mar 2023 13:07:03 +0100 -Subject: [PATCH 17/35] x86: Remove temporary {cpuid,msr}_policy defines - -With all code areas updated, drop the temporary defines and adjust all -remaining users. - -No practical change. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 994c1553a158ada9db5ab64c9178a0d23c0a42ce) ---- - xen/arch/x86/cpu/mcheck/mce_intel.c | 2 +- - xen/arch/x86/cpuid.c | 2 +- - xen/arch/x86/domain.c | 2 +- - xen/arch/x86/hvm/hvm.c | 4 ++-- - xen/arch/x86/hvm/svm/svm.c | 2 +- - xen/arch/x86/hvm/vlapic.c | 2 +- - xen/arch/x86/hvm/vmx/vmx.c | 4 ++-- - xen/arch/x86/include/asm/msr.h | 2 +- - xen/arch/x86/msr.c | 20 +++++++++----------- - xen/arch/x86/pv/domain.c | 2 +- - xen/arch/x86/pv/emul-priv-op.c | 4 ++-- - xen/arch/x86/traps.c | 2 +- - xen/arch/x86/x86_emulate/x86_emulate.c | 6 +++--- - xen/include/xen/lib/x86/cpu-policy.h | 4 ---- - 14 files changed, 26 insertions(+), 32 deletions(-) - -diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c -index 28a605a5cbc7..ce7678f242a8 100644 ---- a/xen/arch/x86/cpu/mcheck/mce_intel.c -+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c -@@ -1008,7 +1008,7 @@ int vmce_intel_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - - int vmce_intel_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) - { -- const struct cpuid_policy *cp = v->domain->arch.cpuid; -+ const struct cpu_policy *cp = v->domain->arch.cpu_policy; - unsigned int bank = msr - MSR_IA32_MC0_CTL2; - - switch ( msr ) -diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c -index 3f20c342fde8..f311372cdf1f 100644 ---- a/xen/arch/x86/cpuid.c -+++ b/xen/arch/x86/cpuid.c -@@ -36,7 +36,7 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, - uint32_t subleaf, struct cpuid_leaf *res) - { - const struct domain *d = v->domain; -- const struct cpuid_policy *p = d->arch.cpuid; -+ const struct cpu_policy *p = d->arch.cpu_policy; - - *res = EMPTY_LEAF; - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index faea542286c0..aca9fa310cc8 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -283,7 +283,7 @@ void update_guest_memory_policy(struct vcpu *v, - - void domain_cpu_policy_changed(struct domain *d) - { -- const struct cpuid_policy *p = d->arch.cpuid; -+ const struct cpu_policy *p = d->arch.cpu_policy; - struct vcpu *v; - - if ( is_pv_domain(d) ) -diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c -index b486c0efe061..d6c6ab889757 100644 ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -905,7 +905,7 @@ const char *hvm_efer_valid(const struct vcpu *v, uint64_t value, - signed int cr0_pg) - { - const struct domain *d = v->domain; -- const struct cpuid_policy *p = d->arch.cpuid; -+ const struct cpu_policy *p = d->arch.cpu_policy; - - if ( value & ~EFER_KNOWN_MASK ) - return "Unknown bits set"; -@@ -942,7 +942,7 @@ const char *hvm_efer_valid(const struct vcpu *v, uint64_t value, - /* These bits in CR4 can be set by the guest. */ - unsigned long hvm_cr4_guest_valid_bits(const struct domain *d) - { -- const struct cpuid_policy *p = d->arch.cpuid; -+ const struct cpu_policy *p = d->arch.cpu_policy; - bool mce, vmxe, cet; - - /* Logic broken out simply to aid readability below. */ -diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c -index fa7325720328..5fa945c526ec 100644 ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -596,7 +596,7 @@ static void cf_check svm_cpuid_policy_changed(struct vcpu *v) - { - struct svm_vcpu *svm = &v->arch.hvm.svm; - struct vmcb_struct *vmcb = svm->vmcb; -- const struct cpuid_policy *cp = v->domain->arch.cpuid; -+ const struct cpu_policy *cp = v->domain->arch.cpu_policy; - u32 bitmap = vmcb_get_exception_intercepts(vmcb); - - if ( opt_hvm_fep || -diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c -index eb32f12e2d14..5909935e0b3e 100644 ---- a/xen/arch/x86/hvm/vlapic.c -+++ b/xen/arch/x86/hvm/vlapic.c -@@ -1083,7 +1083,7 @@ static void set_x2apic_id(struct vlapic *vlapic) - - int guest_wrmsr_apic_base(struct vcpu *v, uint64_t value) - { -- const struct cpuid_policy *cp = v->domain->arch.cpuid; -+ const struct cpu_policy *cp = v->domain->arch.cpu_policy; - struct vlapic *vlapic = vcpu_vlapic(v); - - if ( !has_vlapic(v->domain) ) -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index 64dbd501974b..8da6be33eeec 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -738,7 +738,7 @@ void vmx_update_exception_bitmap(struct vcpu *v) - - static void cf_check vmx_cpuid_policy_changed(struct vcpu *v) - { -- const struct cpuid_policy *cp = v->domain->arch.cpuid; -+ const struct cpu_policy *cp = v->domain->arch.cpu_policy; - int rc = 0; - - if ( opt_hvm_fep || -@@ -3465,7 +3465,7 @@ static int cf_check vmx_msr_write_intercept( - unsigned int msr, uint64_t msr_content) - { - struct vcpu *v = current; -- const struct cpuid_policy *cp = v->domain->arch.cpuid; -+ const struct cpu_policy *cp = v->domain->arch.cpu_policy; - - HVM_DBG_LOG(DBG_LEVEL_MSR, "ecx=%#x, msr_value=%#"PRIx64, msr, msr_content); - -diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h -index b51d92e27c74..adda736efc4f 100644 ---- a/xen/arch/x86/include/asm/msr.h -+++ b/xen/arch/x86/include/asm/msr.h -@@ -278,7 +278,7 @@ static inline void wrmsr_tsc_aux(uint32_t val) - } - } - --uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp); -+uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp); - - /* Container object for per-vCPU MSRs */ - struct vcpu_msrs -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index 14bcb8261c47..a79021774b5c 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -54,8 +54,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) - { - const struct vcpu *curr = current; - const struct domain *d = v->domain; -- const struct cpuid_policy *cp = d->arch.cpuid; -- const struct msr_policy *mp = d->arch.msr; -+ const struct cpu_policy *cp = d->arch.cpu_policy; - const struct vcpu_msrs *msrs = v->arch.msrs; - int ret = X86EMUL_OKAY; - -@@ -139,13 +138,13 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) - goto get_reg; - - case MSR_INTEL_PLATFORM_INFO: -- *val = mp->platform_info.raw; -+ *val = cp->platform_info.raw; - break; - - case MSR_ARCH_CAPABILITIES: - if ( !cp->feat.arch_caps ) - goto gp_fault; -- *val = mp->arch_caps.raw; -+ *val = cp->arch_caps.raw; - break; - - case MSR_INTEL_MISC_FEATURES_ENABLES: -@@ -321,7 +320,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) - * separate CPUID features for this functionality, but only set will be - * active. - */ --uint64_t msr_spec_ctrl_valid_bits(const struct cpuid_policy *cp) -+uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp) - { - bool ssbd = cp->feat.ssbd || cp->extd.amd_ssbd; - bool psfd = cp->feat.intel_psfd || cp->extd.psfd; -@@ -340,8 +339,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - { - const struct vcpu *curr = current; - struct domain *d = v->domain; -- const struct cpuid_policy *cp = d->arch.cpuid; -- const struct msr_policy *mp = d->arch.msr; -+ const struct cpu_policy *cp = d->arch.cpu_policy; - struct vcpu_msrs *msrs = v->arch.msrs; - int ret = X86EMUL_OKAY; - -@@ -382,7 +380,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - * for backwards compatiblity, the OS should write 0 to it before - * trying to access the current microcode version. - */ -- if ( d->arch.cpuid->x86_vendor != X86_VENDOR_INTEL || val != 0 ) -+ if ( cp->x86_vendor != X86_VENDOR_INTEL || val != 0 ) - goto gp_fault; - break; - -@@ -392,7 +390,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - * to AMD CPUs as well (at least the architectural/CPUID part does). - */ - if ( is_pv_domain(d) || -- d->arch.cpuid->x86_vendor != X86_VENDOR_AMD ) -+ cp->x86_vendor != X86_VENDOR_AMD ) - goto gp_fault; - break; - -@@ -404,7 +402,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - * by any CPUID bit. - */ - if ( is_pv_domain(d) || -- d->arch.cpuid->x86_vendor != X86_VENDOR_INTEL ) -+ cp->x86_vendor != X86_VENDOR_INTEL ) - goto gp_fault; - break; - -@@ -441,7 +439,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) - bool old_cpuid_faulting = msrs->misc_features_enables.cpuid_faulting; - - rsvd = ~0ull; -- if ( mp->platform_info.cpuid_faulting ) -+ if ( cp->platform_info.cpuid_faulting ) - rsvd &= ~MSR_MISC_FEATURES_CPUID_FAULTING; - - if ( val & rsvd ) -diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c -index 95492715d8ad..5c92812dc67a 100644 ---- a/xen/arch/x86/pv/domain.c -+++ b/xen/arch/x86/pv/domain.c -@@ -146,7 +146,7 @@ static void release_compat_l4(struct vcpu *v) - - unsigned long pv_fixup_guest_cr4(const struct vcpu *v, unsigned long cr4) - { -- const struct cpuid_policy *p = v->domain->arch.cpuid; -+ const struct cpu_policy *p = v->domain->arch.cpu_policy; - - /* Discard attempts to set guest controllable bits outside of the policy. */ - cr4 &= ~((p->basic.tsc ? 0 : X86_CR4_TSD) | -diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c -index ab52768271c5..04416f197951 100644 ---- a/xen/arch/x86/pv/emul-priv-op.c -+++ b/xen/arch/x86/pv/emul-priv-op.c -@@ -885,7 +885,7 @@ static int cf_check read_msr( - { - struct vcpu *curr = current; - const struct domain *currd = curr->domain; -- const struct cpuid_policy *cp = currd->arch.cpuid; -+ const struct cpu_policy *cp = currd->arch.cpu_policy; - bool vpmu_msr = false, warn = false; - uint64_t tmp; - int ret; -@@ -1034,7 +1034,7 @@ static int cf_check write_msr( - { - struct vcpu *curr = current; - const struct domain *currd = curr->domain; -- const struct cpuid_policy *cp = currd->arch.cpuid; -+ const struct cpu_policy *cp = currd->arch.cpu_policy; - bool vpmu_msr = false; - int ret; - -diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c -index cade9e12f8fa..d12004b1c6fc 100644 ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -1035,7 +1035,7 @@ void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf, - uint32_t subleaf, struct cpuid_leaf *res) - { - const struct domain *d = v->domain; -- const struct cpuid_policy *p = d->arch.cpuid; -+ const struct cpu_policy *p = d->arch.cpu_policy; - uint32_t base = is_viridian_domain(d) ? 0x40000100 : 0x40000000; - uint32_t idx = leaf - base; - unsigned int limit = is_viridian_domain(d) ? p->hv2_limit : p->hv_limit; -diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c -index 94dd72585ab9..7a4d3437dd62 100644 ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -1923,7 +1923,7 @@ in_protmode( - } - - static bool --_amd_like(const struct cpuid_policy *cp) -+_amd_like(const struct cpu_policy *cp) - { - return cp->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON); - } -@@ -1931,7 +1931,7 @@ _amd_like(const struct cpuid_policy *cp) - static bool - amd_like(const struct x86_emulate_ctxt *ctxt) - { -- return _amd_like(ctxt->cpuid); -+ return _amd_like(ctxt->cpu_policy); - } - - #define vcpu_has_fpu() (ctxt->cpuid->basic.fpu) -@@ -2078,7 +2078,7 @@ protmode_load_seg( - struct x86_emulate_ctxt *ctxt, - const struct x86_emulate_ops *ops) - { -- const struct cpuid_policy *cp = ctxt->cpuid; -+ const struct cpu_policy *cp = ctxt->cpu_policy; - enum x86_segment sel_seg = (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr; - struct { uint32_t a, b; } desc, desc_hi = {}; - uint8_t dpl, rpl; -diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h -index cf7de0f29ccd..bfa425060464 100644 ---- a/xen/include/xen/lib/x86/cpu-policy.h -+++ b/xen/include/xen/lib/x86/cpu-policy.h -@@ -375,10 +375,6 @@ struct cpu_policy - uint8_t x86_vendor; - }; - --/* Temporary */ --#define cpuid_policy cpu_policy --#define msr_policy cpu_policy -- - struct cpu_policy_errors - { - uint32_t leaf, subleaf; --- -2.39.2 - diff --git a/0338-x86-cpuid-Calculate-FEATURESET_NR_ENTRIES-more-helpf.patch b/0338-x86-cpuid-Calculate-FEATURESET_NR_ENTRIES-more-helpf.patch deleted file mode 100644 index 3b7523b1..00000000 --- a/0338-x86-cpuid-Calculate-FEATURESET_NR_ENTRIES-more-helpf.patch +++ /dev/null @@ -1,110 +0,0 @@ -From c72185f9463dd021f810c19ed025dc20ee1d0a6f Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 10 May 2023 19:58:43 +0100 -Subject: [PATCH 18/35] x86/cpuid: Calculate FEATURESET_NR_ENTRIES more - helpfully - -When adding new featureset words, it is convenient to split the work into -several patches. However, GCC 12 spotted that the way we prefer to split the -work results in a real (transient) breakage whereby the policy <-> featureset -helpers perform out-of-bounds accesses on the featureset array. - -Fix this by having gen-cpuid.py calculate FEATURESET_NR_ENTRIES from the -comments describing the word blocks, rather than from the XEN_CPUFEATURE() -with the greatest value. - -For simplicty, require that the word blocks appear in order. This can be -revisted if we find a good reason to have blocks out of order. - -No functional change. - -Reported-by: Jan Beulich -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 56e2c8e5860090a35d5f0cafe168223a2a7c0e62) ---- - xen/tools/gen-cpuid.py | 42 ++++++++++++++++++++++++++++++++++++------ - 1 file changed, 36 insertions(+), 6 deletions(-) - -diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py -index 4f7c8d78cce7..83b19c8515cd 100755 ---- a/xen/tools/gen-cpuid.py -+++ b/xen/tools/gen-cpuid.py -@@ -50,13 +50,37 @@ def parse_definitions(state): - "\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)" - "\s+/\*([\w!]*) .*$") - -+ word_regex = re.compile( -+ r"^/\* .* word (\d*) \*/$") -+ last_word = -1 -+ - this = sys.modules[__name__] - - for l in state.input.readlines(): -- # Short circuit the regex... -- if not l.startswith("XEN_CPUFEATURE("): -+ -+ # Short circuit the regexes... -+ if not (l.startswith("XEN_CPUFEATURE(") or -+ l.startswith("/* ")): - continue - -+ # Handle /* ... word $N */ lines -+ if l.startswith("/* "): -+ -+ res = word_regex.match(l) -+ if res is None: -+ continue # Some other comment -+ -+ word = int(res.groups()[0]) -+ -+ if word != last_word + 1: -+ raise Fail("Featureset word %u out of order (last word %u)" -+ % (word, last_word)) -+ -+ last_word = word -+ state.nr_entries = word + 1 -+ continue -+ -+ # Handle XEN_CPUFEATURE( lines - res = feat_regex.match(l) - - if res is None: -@@ -94,6 +118,15 @@ def parse_definitions(state): - if len(state.names) == 0: - raise Fail("No features found") - -+ if state.nr_entries == 0: -+ raise Fail("No featureset word info found") -+ -+ max_val = max(state.names.keys()) -+ if (max_val >> 5) >= state.nr_entries: -+ max_name = state.names[max_val] -+ raise Fail("Feature %s (%d*32+%d) exceeds FEATURESET_NR_ENTRIES (%d)" -+ % (max_name, max_val >> 5, max_val & 31, state.nr_entries)) -+ - def featureset_to_uint32s(fs, nr): - """ Represent a featureset as a list of C-compatible uint32_t's """ - -@@ -122,9 +155,6 @@ def format_uint32s(state, featureset, indent): - - def crunch_numbers(state): - -- # Size of bitmaps -- state.nr_entries = nr_entries = (max(state.names.keys()) >> 5) + 1 -- - # Features common between 1d and e1d. - common_1d = (FPU, VME, DE, PSE, TSC, MSR, PAE, MCE, CX8, APIC, - MTRR, PGE, MCA, CMOV, PAT, PSE36, MMX, FXSR) -@@ -328,7 +358,7 @@ def crunch_numbers(state): - state.nr_deep_deps = len(state.deep_deps.keys()) - - # Calculate the bitfield name declarations -- for word in range(nr_entries): -+ for word in range(state.nr_entries): - - names = [] - for bit in range(32): --- -2.39.2 - diff --git a/0339-x86-boot-Rework-dom0-feature-configuration.patch b/0339-x86-boot-Rework-dom0-feature-configuration.patch deleted file mode 100644 index 007a30cf..00000000 --- a/0339-x86-boot-Rework-dom0-feature-configuration.patch +++ /dev/null @@ -1,136 +0,0 @@ -From e74dbbf0bdf6dc895ae0d6082cc2d764c88d6ec1 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 12 May 2023 13:52:39 +0100 -Subject: [PATCH 19/35] x86/boot: Rework dom0 feature configuration - -Right now, dom0's feature configuration is split between between the common -path and a dom0-specific one. This mostly is by accident, and causes some -very subtle bugs. - -First, start by clearly defining init_dom0_cpuid_policy() to be the domain -that Xen builds automatically. The late hwdom case is still constructed in a -mostly normal way, with the control domain having full discretion over the CPU -policy. - -Identifying this highlights a latent bug - the two halves of the MSR_ARCH_CAPS -bodge are asymmetric with respect to the hardware domain. This means that -shim, or a control-only dom0 sees the MSR_ARCH_CAPS CPUID bit but none of the -MSR content. This in turn declares the hardware to be retpoline-safe by -failing to advertise the {R,}RSBA bits appropriately. Restrict this logic to -the hardware domain, although the special case will cease to exist shortly. - -For the CPUID Faulting adjustment, the comment in ctxt_switch_levelling() -isn't actually relevant. Provide a better explanation. - -Move the recalculate_cpuid_policy() call outside of the dom0-cpuid= case. -This is no change for now, but will become necessary shortly. - -Finally, place the second half of the MSR_ARCH_CAPS bodge after the -recalculate_cpuid_policy() call. This is necessary to avoid transiently -breaking the hardware domain's view while the handling is cleaned up. This -special case will cease to exist shortly. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit ef1987fcb0fdfaa7ee148024037cb5fa335a7b2d) ---- - xen/arch/x86/cpu-policy.c | 57 +++++++++++++++++++++------------------ - 1 file changed, 31 insertions(+), 26 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 19766e87b68f..0f5182386227 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -678,29 +678,6 @@ int init_domain_cpu_policy(struct domain *d) - if ( !p ) - return -ENOMEM; - -- /* See comment in ctxt_switch_levelling() */ -- if ( !opt_dom0_cpuid_faulting && is_control_domain(d) && is_pv_domain(d) ) -- p->platform_info.cpuid_faulting = false; -- -- /* -- * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0, -- * so dom0 can turn off workarounds as appropriate. Temporary, until the -- * domain policy logic gains a better understanding of MSRs. -- */ -- if ( is_hardware_domain(d) && cpu_has_arch_caps ) -- { -- uint64_t val; -- -- rdmsrl(MSR_ARCH_CAPABILITIES, val); -- -- p->arch_caps.raw = val & -- (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | -- ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO | -- ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | -- ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | -- ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO); -- } -- - d->arch.cpu_policy = p; - - recalculate_cpuid_policy(d); -@@ -836,11 +813,15 @@ void recalculate_cpuid_policy(struct domain *d) - p->extd.raw[0x19] = EMPTY_LEAF; - } - -+/* -+ * Adjust the CPU policy for dom0. Really, this is "the domain Xen builds -+ * automatically on boot", and might not have the domid 0 (e.g. pvshim). -+ */ - void __init init_dom0_cpuid_policy(struct domain *d) - { - struct cpu_policy *p = d->arch.cpuid; - -- /* dom0 can't migrate. Give it ITSC if available. */ -+ /* Dom0 doesn't migrate relative to Xen. Give it ITSC if available. */ - if ( cpu_has_itsc ) - p->extd.itsc = true; - -@@ -849,7 +830,7 @@ void __init init_dom0_cpuid_policy(struct domain *d) - * so dom0 can turn off workarounds as appropriate. Temporary, until the - * domain policy logic gains a better understanding of MSRs. - */ -- if ( cpu_has_arch_caps ) -+ if ( is_hardware_domain(d) && cpu_has_arch_caps ) - p->feat.arch_caps = true; - - /* Apply dom0-cpuid= command line settings, if provided. */ -@@ -867,8 +848,32 @@ void __init init_dom0_cpuid_policy(struct domain *d) - } - - x86_cpu_featureset_to_policy(fs, p); -+ } -+ -+ /* -+ * PV Control domains used to require unfiltered CPUID. This was fixed in -+ * Xen 4.13, but there is an cmdline knob to restore the prior behaviour. -+ * -+ * If the domain is getting unfiltered CPUID, don't let the guest kernel -+ * play with CPUID faulting either, as Xen's CPUID path won't cope. -+ */ -+ if ( !opt_dom0_cpuid_faulting && is_control_domain(d) && is_pv_domain(d) ) -+ p->platform_info.cpuid_faulting = false; - -- recalculate_cpuid_policy(d); -+ recalculate_cpuid_policy(d); -+ -+ if ( is_hardware_domain(d) && cpu_has_arch_caps ) -+ { -+ uint64_t val; -+ -+ rdmsrl(MSR_ARCH_CAPABILITIES, val); -+ -+ p->arch_caps.raw = val & -+ (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | -+ ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO | -+ ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | -+ ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | -+ ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO); - } - } - --- -2.39.2 - diff --git a/0340-x86-boot-Adjust-MSR_ARCH_CAPS-handling-for-the-Host-.patch b/0340-x86-boot-Adjust-MSR_ARCH_CAPS-handling-for-the-Host-.patch deleted file mode 100644 index fce1a78f..00000000 --- a/0340-x86-boot-Adjust-MSR_ARCH_CAPS-handling-for-the-Host-.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 78543222faaecfcdb23239b7a686f06186e287bf Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 15 May 2023 14:14:53 +0100 -Subject: [PATCH 20/35] x86/boot: Adjust MSR_ARCH_CAPS handling for the Host - policy - -We are about to move MSR_ARCH_CAPS into featureset, but the order of -operations (copy raw policy, then copy x86_capabilitiles[] in) will end up -clobbering the ARCH_CAPS value. - -Some toolstacks use this information to handle TSX compatibility across the -CPUs and microcode versions where support was removed. - -To avoid this transient breakage, read from raw_cpu_policy rather than -modifying it in place. This logic will be removed entirely in due course. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 43912f8dbb1888ffd7f00adb10724c70e71927c4) ---- - xen/arch/x86/cpu-policy.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 0f5182386227..630c133daf08 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -411,7 +411,7 @@ static void __init calculate_host_policy(void) - p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; - - /* Temporary, until we have known_features[] for feature bits in MSRs. */ -- p->arch_caps.raw &= -+ p->arch_caps.raw = raw_cpu_policy.arch_caps.raw & - (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | - ARCH_CAPS_SKIP_L1DFL | ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | - ARCH_CAPS_IF_PSCHANGE_MC_NO | ARCH_CAPS_TSX_CTRL | ARCH_CAPS_TAA_NO | --- -2.39.2 - diff --git a/0341-x86-cpu-policy-Infrastructure-for-MSR_ARCH_CAPS.patch b/0341-x86-cpu-policy-Infrastructure-for-MSR_ARCH_CAPS.patch deleted file mode 100644 index a086ddfb..00000000 --- a/0341-x86-cpu-policy-Infrastructure-for-MSR_ARCH_CAPS.patch +++ /dev/null @@ -1,216 +0,0 @@ -From b088cf5f3a9553bfc6ac1238fdcd32e854d62c3f Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 12 May 2023 17:55:21 +0100 -Subject: [PATCH 21/35] x86/cpu-policy: Infrastructure for MSR_ARCH_CAPS - -Bits through 24 are already defined, meaning that we're not far off needing -the second word. Put both in right away. - -As both halves are present now, the arch_caps field is full width. Adjust the -unit test, which notices. - -The bool bitfield names in the arch_caps union are unused, and somewhat out of -date. They'll shortly be automatically generated. - -Add CPUID and MSR prefixes to the ./xen-cpuid verbose output, now that there -are a mix of the two. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit d9fe459ffad8a6eac2f695adb2331aff83c345d1) ---- - tools/misc/xen-cpuid.c | 44 +++++++++++------- - tools/tests/cpu-policy/test-cpu-policy.c | 5 --- - xen/include/public/arch-x86/cpufeatureset.h | 4 ++ - xen/include/xen/lib/x86/cpu-policy.h | 50 ++++++++++----------- - xen/lib/x86/cpuid.c | 4 ++ - 5 files changed, 59 insertions(+), 48 deletions(-) - -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index 859345ae8ab2..642e62efdf20 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -218,31 +218,41 @@ static const char *const str_7d2[32] = - [ 4] = "bhi-ctrl", [ 5] = "mcdt-no", - }; - -+static const char *const str_m10Al[32] = -+{ -+}; -+ -+static const char *const str_m10Ah[32] = -+{ -+}; -+ - static const struct { - const char *name; - const char *abbr; - const char *const *strs; - } decodes[] = - { -- { "0x00000001.edx", "1d", str_1d }, -- { "0x00000001.ecx", "1c", str_1c }, -- { "0x80000001.edx", "e1d", str_e1d }, -- { "0x80000001.ecx", "e1c", str_e1c }, -- { "0x0000000d:1.eax", "Da1", str_Da1 }, -- { "0x00000007:0.ebx", "7b0", str_7b0 }, -- { "0x00000007:0.ecx", "7c0", str_7c0 }, -- { "0x80000007.edx", "e7d", str_e7d }, -- { "0x80000008.ebx", "e8b", str_e8b }, -- { "0x00000007:0.edx", "7d0", str_7d0 }, -- { "0x00000007:1.eax", "7a1", str_7a1 }, -- { "0x80000021.eax", "e21a", str_e21a }, -- { "0x00000007:1.ebx", "7b1", str_7b1 }, -- { "0x00000007:2.edx", "7d2", str_7d2 }, -- { "0x00000007:1.ecx", "7c1", str_7c1 }, -- { "0x00000007:1.edx", "7d1", str_7d1 }, -+ { "CPUID 0x00000001.edx", "1d", str_1d }, -+ { "CPUID 0x00000001.ecx", "1c", str_1c }, -+ { "CPUID 0x80000001.edx", "e1d", str_e1d }, -+ { "CPUID 0x80000001.ecx", "e1c", str_e1c }, -+ { "CPUID 0x0000000d:1.eax", "Da1", str_Da1 }, -+ { "CPUID 0x00000007:0.ebx", "7b0", str_7b0 }, -+ { "CPUID 0x00000007:0.ecx", "7c0", str_7c0 }, -+ { "CPUID 0x80000007.edx", "e7d", str_e7d }, -+ { "CPUID 0x80000008.ebx", "e8b", str_e8b }, -+ { "CPUID 0x00000007:0.edx", "7d0", str_7d0 }, -+ { "CPUID 0x00000007:1.eax", "7a1", str_7a1 }, -+ { "CPUID 0x80000021.eax", "e21a", str_e21a }, -+ { "CPUID 0x00000007:1.ebx", "7b1", str_7b1 }, -+ { "CPUID 0x00000007:2.edx", "7d2", str_7d2 }, -+ { "CPUID 0x00000007:1.ecx", "7c1", str_7c1 }, -+ { "CPUID 0x00000007:1.edx", "7d1", str_7d1 }, -+ { "MSR_ARCH_CAPS.lo", "m10Al", str_m10Al }, -+ { "MSR_ARCH_CAPS.hi", "m10Ah", str_m10Ah }, - }; - --#define COL_ALIGN "18" -+#define COL_ALIGN "24" - - static const char *const fs_names[] = { - [XEN_SYSCTL_cpu_featureset_raw] = "Raw", -diff --git a/tools/tests/cpu-policy/test-cpu-policy.c b/tools/tests/cpu-policy/test-cpu-policy.c -index fea0eb8c3549..cac28c76256c 100644 ---- a/tools/tests/cpu-policy/test-cpu-policy.c -+++ b/tools/tests/cpu-policy/test-cpu-policy.c -@@ -391,11 +391,6 @@ static void test_msr_deserialise_failure(void) - .msr = { .idx = 0xce, .val = ~0ull }, - .rc = -EOVERFLOW, - }, -- { -- .name = "truncated val", -- .msr = { .idx = 0x10a, .val = ~0ull }, -- .rc = -EOVERFLOW, -- }, - }; - - printf("Testing MSR deserialise failure:\n"); -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 08600cfdc784..52e862b67810 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -300,6 +300,10 @@ XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ - /* Intel-defined CPU features, CPUID level 0x00000007:1.edx, word 15 */ - XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks safe to use */ - -+/* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.eax, word 16 */ -+ -+/* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ -+ - #endif /* XEN_CPUFEATURE */ - - /* Clean up from a default include. Close the enum (for C). */ -diff --git a/xen/include/xen/lib/x86/cpu-policy.h b/xen/include/xen/lib/x86/cpu-policy.h -index bfa425060464..6d5e9edd269b 100644 ---- a/xen/include/xen/lib/x86/cpu-policy.h -+++ b/xen/include/xen/lib/x86/cpu-policy.h -@@ -4,22 +4,24 @@ - - #include - --#define FEATURESET_1d 0 /* 0x00000001.edx */ --#define FEATURESET_1c 1 /* 0x00000001.ecx */ --#define FEATURESET_e1d 2 /* 0x80000001.edx */ --#define FEATURESET_e1c 3 /* 0x80000001.ecx */ --#define FEATURESET_Da1 4 /* 0x0000000d:1.eax */ --#define FEATURESET_7b0 5 /* 0x00000007:0.ebx */ --#define FEATURESET_7c0 6 /* 0x00000007:0.ecx */ --#define FEATURESET_e7d 7 /* 0x80000007.edx */ --#define FEATURESET_e8b 8 /* 0x80000008.ebx */ --#define FEATURESET_7d0 9 /* 0x00000007:0.edx */ --#define FEATURESET_7a1 10 /* 0x00000007:1.eax */ --#define FEATURESET_e21a 11 /* 0x80000021.eax */ --#define FEATURESET_7b1 12 /* 0x00000007:1.ebx */ --#define FEATURESET_7d2 13 /* 0x00000007:2.edx */ --#define FEATURESET_7c1 14 /* 0x00000007:1.ecx */ --#define FEATURESET_7d1 15 /* 0x00000007:1.edx */ -+#define FEATURESET_1d 0 /* 0x00000001.edx */ -+#define FEATURESET_1c 1 /* 0x00000001.ecx */ -+#define FEATURESET_e1d 2 /* 0x80000001.edx */ -+#define FEATURESET_e1c 3 /* 0x80000001.ecx */ -+#define FEATURESET_Da1 4 /* 0x0000000d:1.eax */ -+#define FEATURESET_7b0 5 /* 0x00000007:0.ebx */ -+#define FEATURESET_7c0 6 /* 0x00000007:0.ecx */ -+#define FEATURESET_e7d 7 /* 0x80000007.edx */ -+#define FEATURESET_e8b 8 /* 0x80000008.ebx */ -+#define FEATURESET_7d0 9 /* 0x00000007:0.edx */ -+#define FEATURESET_7a1 10 /* 0x00000007:1.eax */ -+#define FEATURESET_e21a 11 /* 0x80000021.eax */ -+#define FEATURESET_7b1 12 /* 0x00000007:1.ebx */ -+#define FEATURESET_7d2 13 /* 0x00000007:2.edx */ -+#define FEATURESET_7c1 14 /* 0x00000007:1.ecx */ -+#define FEATURESET_7d1 15 /* 0x00000007:1.edx */ -+#define FEATURESET_m10Al 16 /* 0x0000010a.eax */ -+#define FEATURESET_m10Ah 17 /* 0x0000010a.edx */ - - struct cpuid_leaf - { -@@ -350,17 +352,13 @@ struct cpu_policy - * fixed in hardware. - */ - union { -- uint32_t raw; -+ uint64_t raw; -+ struct { -+ uint32_t lo, hi; -+ }; - struct { -- bool rdcl_no:1; -- bool ibrs_all:1; -- bool rsba:1; -- bool skip_l1dfl:1; -- bool ssb_no:1; -- bool mds_no:1; -- bool if_pschange_mc_no:1; -- bool tsx_ctrl:1; -- bool taa_no:1; -+ DECL_BITFIELD(m10Al); -+ DECL_BITFIELD(m10Ah); - }; - } arch_caps; - -diff --git a/xen/lib/x86/cpuid.c b/xen/lib/x86/cpuid.c -index 68aafb404927..e795ce375032 100644 ---- a/xen/lib/x86/cpuid.c -+++ b/xen/lib/x86/cpuid.c -@@ -79,6 +79,8 @@ void x86_cpu_policy_to_featureset( - fs[FEATURESET_7d2] = p->feat._7d2; - fs[FEATURESET_7c1] = p->feat._7c1; - fs[FEATURESET_7d1] = p->feat._7d1; -+ fs[FEATURESET_m10Al] = p->arch_caps.lo; -+ fs[FEATURESET_m10Ah] = p->arch_caps.hi; - } - - void x86_cpu_featureset_to_policy( -@@ -100,6 +102,8 @@ void x86_cpu_featureset_to_policy( - p->feat._7d2 = fs[FEATURESET_7d2]; - p->feat._7c1 = fs[FEATURESET_7c1]; - p->feat._7d1 = fs[FEATURESET_7d1]; -+ p->arch_caps.lo = fs[FEATURESET_m10Al]; -+ p->arch_caps.hi = fs[FEATURESET_m10Ah]; - } - - void x86_cpu_policy_recalc_synth(struct cpu_policy *p) --- -2.39.2 - diff --git a/0342-x86-cpu-policy-MSR_ARCH_CAPS-feature-names.patch b/0342-x86-cpu-policy-MSR_ARCH_CAPS-feature-names.patch deleted file mode 100644 index 8a7368ba..00000000 --- a/0342-x86-cpu-policy-MSR_ARCH_CAPS-feature-names.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 2d1e0ef2f13d913e8d4c2959cfb7f97be4116a1f Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 12 May 2023 18:50:59 +0100 -Subject: [PATCH 22/35] x86/cpu-policy: MSR_ARCH_CAPS feature names - -Seed the default visibility from the dom0 special case, which for the most -part just exposes the *_NO bits. EIBRS is the one non-*_NO bit, which is -"just" a status bit to the guest indicating a change in implemention of IBRS -which is already fully supported. - -Insert a block dependency from the ARCH_CAPS CPUID bit to the entire content -of the MSR. This is because MSRs have no structure information similar to -CPUID, and used by x86_cpu_policy_clear_out_of_range_leaves(), in order to -bulk-clear inaccessable words. - -The overall CPUID bit is still max-only, so all of MSR_ARCH_CAPS is hidden in -the default policies. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit ce8c930851a5ca21c4e70f83be7e8b290ce1b519) ---- - tools/misc/xen-cpuid.c | 13 ++++++++++++ - xen/include/public/arch-x86/cpufeatureset.h | 23 +++++++++++++++++++++ - xen/tools/gen-cpuid.py | 3 +++ - 3 files changed, 39 insertions(+) - -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index 642e62efdf20..9eaa6c920976 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -220,6 +220,19 @@ static const char *const str_7d2[32] = - - static const char *const str_m10Al[32] = - { -+ [ 0] = "rdcl-no", [ 1] = "eibrs", -+ [ 2] = "rsba", [ 3] = "skip-l1dfl", -+ [ 4] = "intel-ssb-no", [ 5] = "mds-no", -+ [ 6] = "if-pschange-mc-no", [ 7] = "tsx-ctrl", -+ [ 8] = "taa-no", [ 9] = "mcu-ctrl", -+ [10] = "misc-pkg-ctrl", [11] = "energy-ctrl", -+ [12] = "doitm", [13] = "sbdr-ssdp-no", -+ [14] = "fbsdp-no", [15] = "psdp-no", -+ /* 16 */ [17] = "fb-clear", -+ [18] = "fb-clear-ctrl", [19] = "rrsba", -+ [20] = "bhi-no", [21] = "xapic-status", -+ /* 22 */ [23] = "ovrclk-status", -+ [24] = "pbrsb-no", - }; - - static const char *const str_m10Ah[32] = -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 52e862b67810..23b72094c64f 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -301,6 +301,29 @@ XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ - XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks safe to use */ - - /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.eax, word 16 */ -+XEN_CPUFEATURE(RDCL_NO, 16*32+ 0) /*A No Rogue Data Cache Load (Meltdown) */ -+XEN_CPUFEATURE(EIBRS, 16*32+ 1) /*A Enhanced IBRS */ -+XEN_CPUFEATURE(RSBA, 16*32+ 2) /*!A RSB Alternative (Retpoline not safe) */ -+XEN_CPUFEATURE(SKIP_L1DFL, 16*32+ 3) /* Don't need to flush L1D on VMEntry */ -+XEN_CPUFEATURE(INTEL_SSB_NO, 16*32+ 4) /*A No Speculative Store Bypass */ -+XEN_CPUFEATURE(MDS_NO, 16*32+ 5) /*A No Microarchitectural Data Sampling */ -+XEN_CPUFEATURE(IF_PSCHANGE_MC_NO, 16*32+ 6) /*A No Instruction fetch #MC */ -+XEN_CPUFEATURE(TSX_CTRL, 16*32+ 7) /* MSR_TSX_CTRL */ -+XEN_CPUFEATURE(TAA_NO, 16*32+ 8) /*A No TSX Async Abort */ -+XEN_CPUFEATURE(MCU_CTRL, 16*32+ 9) /* MSR_MCU_CTRL */ -+XEN_CPUFEATURE(MISC_PKG_CTRL, 16*32+10) /* MSR_MISC_PKG_CTRL */ -+XEN_CPUFEATURE(ENERGY_FILTERING, 16*32+11) /* MSR_MISC_PKG_CTRL.ENERGY_FILTERING */ -+XEN_CPUFEATURE(DOITM, 16*32+12) /* Data Operand Invariant Timing Mode */ -+XEN_CPUFEATURE(SBDR_SSDP_NO, 16*32+13) /*A No Shared Buffer Data Read or Sideband Stale Data Propagation */ -+XEN_CPUFEATURE(FBSDP_NO, 16*32+14) /*A No Fill Buffer Stale Data Propagation */ -+XEN_CPUFEATURE(PSDP_NO, 16*32+15) /*A No Primary Stale Data Propagation */ -+XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*A Fill Buffers cleared by VERW */ -+XEN_CPUFEATURE(FB_CLEAR_CTRL, 16*32+18) /* MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */ -+XEN_CPUFEATURE(RRSBA, 16*32+19) /*!A Restricted RSB Alternative */ -+XEN_CPUFEATURE(BHI_NO, 16*32+20) /*A No Branch History Injection */ -+XEN_CPUFEATURE(XAPIC_STATUS, 16*32+21) /* MSR_XAPIC_DISABLE_STATUS */ -+XEN_CPUFEATURE(OVRCLK_STATUS, 16*32+23) /* MSR_OVERCLOCKING_STATUS */ -+XEN_CPUFEATURE(PBRSB_NO, 16*32+24) /*A No Post-Barrier RSB predictions */ - - /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ - -diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py -index 83b19c8515cd..72497b3cb0a1 100755 ---- a/xen/tools/gen-cpuid.py -+++ b/xen/tools/gen-cpuid.py -@@ -325,6 +325,9 @@ def crunch_numbers(state): - - # In principle the TSXLDTRK insns could also be considered independent. - RTM: [TSXLDTRK], -+ -+ # The ARCH_CAPS CPUID bit enumerates the availability of the whole register. -+ ARCH_CAPS: list(range(RDCL_NO, RDCL_NO + 64)), - } - - deep_features = tuple(sorted(deps.keys())) --- -2.39.2 - diff --git a/0343-x86-boot-Record-MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch b/0343-x86-boot-Record-MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch deleted file mode 100644 index 035b5af0..00000000 --- a/0343-x86-boot-Record-MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch +++ /dev/null @@ -1,96 +0,0 @@ -From fe363a4e142f8849d2a8e7e84a95e148bc494930 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 12 May 2023 15:37:02 +0100 -Subject: [PATCH 23/35] x86/boot: Record MSR_ARCH_CAPS for the Raw and Host CPU - policy - -Extend x86_cpu_policy_fill_native() with a read of ARCH_CAPS based on the -CPUID information just read, removing the specially handling in -calculate_raw_cpu_policy(). - -Right now, the only use of x86_cpu_policy_fill_native() outside of Xen is the -unit tests. Getting MSR data in this context is left to whomever first -encounters a genuine need to have it. - -Extend generic_identify() to read ARCH_CAPS into x86_capability[], which is -fed into the Host Policy. This in turn means there's no need to special case -arch_caps in calculate_host_policy(). - -No practical change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 70553000d6b44dd7c271a35932b0b3e1f22c5532) ---- - xen/arch/x86/cpu-policy.c | 12 ------------ - xen/arch/x86/cpu/common.c | 5 +++++ - xen/lib/x86/cpuid.c | 7 ++++++- - 3 files changed, 11 insertions(+), 13 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 630c133daf08..db04ffb8992d 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -354,9 +354,6 @@ static void __init calculate_raw_policy(void) - - /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ - /* Was already added by probe_cpuid_faulting() */ -- -- if ( cpu_has_arch_caps ) -- rdmsrl(MSR_ARCH_CAPABILITIES, p->arch_caps.raw); - } - - static void __init calculate_host_policy(void) -@@ -409,15 +406,6 @@ static void __init calculate_host_policy(void) - /* 0x000000ce MSR_INTEL_PLATFORM_INFO */ - /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */ - p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; -- -- /* Temporary, until we have known_features[] for feature bits in MSRs. */ -- p->arch_caps.raw = raw_cpu_policy.arch_caps.raw & -- (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | -- ARCH_CAPS_SKIP_L1DFL | ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | -- ARCH_CAPS_IF_PSCHANGE_MC_NO | ARCH_CAPS_TSX_CTRL | ARCH_CAPS_TAA_NO | -- ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | ARCH_CAPS_PSDP_NO | -- ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | ARCH_CAPS_BHI_NO | -- ARCH_CAPS_PBRSB_NO); - } - - static void __init guest_common_default_feature_adjustments(uint32_t *fs) -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index ce692328086e..ffa609930766 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -471,6 +471,11 @@ static void generic_identify(struct cpuinfo_x86 *c) - cpuid_count(0xd, 1, - &c->x86_capability[FEATURESET_Da1], - &tmp, &tmp, &tmp); -+ -+ if (test_bit(X86_FEATURE_ARCH_CAPS, c->x86_capability)) -+ rdmsr(MSR_ARCH_CAPABILITIES, -+ c->x86_capability[FEATURESET_m10Al], -+ c->x86_capability[FEATURESET_m10Ah]); - } - - /* -diff --git a/xen/lib/x86/cpuid.c b/xen/lib/x86/cpuid.c -index e795ce375032..07e550191448 100644 ---- a/xen/lib/x86/cpuid.c -+++ b/xen/lib/x86/cpuid.c -@@ -226,7 +226,12 @@ void x86_cpu_policy_fill_native(struct cpu_policy *p) - p->hv_limit = 0; - p->hv2_limit = 0; - -- /* TODO MSRs */ -+#ifdef __XEN__ -+ /* TODO MSR_PLATFORM_INFO */ -+ -+ if ( p->feat.arch_caps ) -+ rdmsrl(MSR_ARCH_CAPABILITIES, p->arch_caps.raw); -+#endif - - x86_cpu_policy_recalc_synth(p); - } --- -2.39.2 - diff --git a/0344-x86-boot-Expose-MSR_ARCH_CAPS-data-in-guest-max-poli.patch b/0344-x86-boot-Expose-MSR_ARCH_CAPS-data-in-guest-max-poli.patch deleted file mode 100644 index b3de21e4..00000000 --- a/0344-x86-boot-Expose-MSR_ARCH_CAPS-data-in-guest-max-poli.patch +++ /dev/null @@ -1,138 +0,0 @@ -From fb06e624f268c5fc4b8bd7c4e5ed7b1b6c9032f9 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 12 May 2023 15:53:35 +0100 -Subject: [PATCH 24/35] x86/boot: Expose MSR_ARCH_CAPS data in guest max - policies - -We already have common and default feature adjustment helpers. Introduce one -for max featuresets too. - -Offer MSR_ARCH_CAPS unconditionally in the max policy, and stop clobbering the -data inherited from the Host policy. This will be necessary to level a VM -safely for migration. Annotate the ARCH_CAPS CPUID bit as special. Note: -ARCH_CAPS is still max-only for now, so will not be inhereted by the default -policies. - -With this done, the special case for dom0 can be shrunk to just resampling the -Host policy (as ARCH_CAPS isn't visible by default yet). - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit bbb289f3d5bdd3358af748d7c567343532ac45b5) ---- - xen/arch/x86/cpu-policy.c | 42 ++++++++++++--------- - xen/include/public/arch-x86/cpufeatureset.h | 2 +- - 2 files changed, 25 insertions(+), 19 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index db04ffb8992d..d76b544816dd 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -408,6 +408,25 @@ static void __init calculate_host_policy(void) - p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting; - } - -+static void __init guest_common_max_feature_adjustments(uint32_t *fs) -+{ -+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) -+ { -+ /* -+ * MSR_ARCH_CAPS is just feature data, and we can offer it to guests -+ * unconditionally, although limit it to Intel systems as it is highly -+ * uarch-specific. -+ * -+ * In particular, the RSBA and RRSBA bits mean "you might migrate to a -+ * system where RSB underflow uses alternative predictors (a.k.a -+ * Retpoline not safe)", so these need to be visible to a guest in all -+ * cases, even when it's only some other server in the pool which -+ * suffers the identified behaviour. -+ */ -+ __set_bit(X86_FEATURE_ARCH_CAPS, fs); -+ } -+} -+ - static void __init guest_common_default_feature_adjustments(uint32_t *fs) - { - /* -@@ -483,6 +502,7 @@ static void __init calculate_pv_max_policy(void) - __clear_bit(X86_FEATURE_IBRS, fs); - } - -+ guest_common_max_feature_adjustments(fs); - guest_common_feature_adjustments(fs); - - sanitise_featureset(fs); -@@ -490,8 +510,6 @@ static void __init calculate_pv_max_policy(void) - recalculate_xstate(p); - - p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ -- -- p->arch_caps.raw = 0; /* Not supported yet. */ - } - - static void __init calculate_pv_def_policy(void) -@@ -589,6 +607,7 @@ static void __init calculate_hvm_max_policy(void) - __clear_bit(X86_FEATURE_XSAVES, fs); - } - -+ guest_common_max_feature_adjustments(fs); - guest_common_feature_adjustments(fs); - - sanitise_featureset(fs); -@@ -597,8 +616,6 @@ static void __init calculate_hvm_max_policy(void) - - /* It's always possible to emulate CPUID faulting for HVM guests */ - p->platform_info.cpuid_faulting = true; -- -- p->arch_caps.raw = 0; /* Not supported yet. */ - } - - static void __init calculate_hvm_def_policy(void) -@@ -819,7 +836,10 @@ void __init init_dom0_cpuid_policy(struct domain *d) - * domain policy logic gains a better understanding of MSRs. - */ - if ( is_hardware_domain(d) && cpu_has_arch_caps ) -+ { - p->feat.arch_caps = true; -+ p->arch_caps.raw = host_cpu_policy.arch_caps.raw; -+ } - - /* Apply dom0-cpuid= command line settings, if provided. */ - if ( dom0_cpuid_cmdline ) -@@ -849,20 +869,6 @@ void __init init_dom0_cpuid_policy(struct domain *d) - p->platform_info.cpuid_faulting = false; - - recalculate_cpuid_policy(d); -- -- if ( is_hardware_domain(d) && cpu_has_arch_caps ) -- { -- uint64_t val; -- -- rdmsrl(MSR_ARCH_CAPABILITIES, val); -- -- p->arch_caps.raw = val & -- (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA | -- ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO | -- ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO | -- ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA | -- ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO); -- } - } - - static void __init __maybe_unused build_assertions(void) -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 23b72094c64f..02a80b0c0c35 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -270,7 +270,7 @@ XEN_CPUFEATURE(AVX512_FP16, 9*32+23) /* AVX512 FP16 instructions */ - XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ - XEN_CPUFEATURE(STIBP, 9*32+27) /*A STIBP */ - XEN_CPUFEATURE(L1D_FLUSH, 9*32+28) /*S MSR_FLUSH_CMD and L1D flush. */ --XEN_CPUFEATURE(ARCH_CAPS, 9*32+29) /*a IA32_ARCH_CAPABILITIES MSR */ -+XEN_CPUFEATURE(ARCH_CAPS, 9*32+29) /*!a IA32_ARCH_CAPABILITIES MSR */ - XEN_CPUFEATURE(CORE_CAPS, 9*32+30) /* IA32_CORE_CAPABILITIES MSR */ - XEN_CPUFEATURE(SSBD, 9*32+31) /*A MSR_SPEC_CTRL.SSBD available */ - --- -2.39.2 - diff --git a/0345-x86-vtx-Remove-opencoded-MSR_ARCH_CAPS-check.patch b/0345-x86-vtx-Remove-opencoded-MSR_ARCH_CAPS-check.patch deleted file mode 100644 index 01a501bb..00000000 --- a/0345-x86-vtx-Remove-opencoded-MSR_ARCH_CAPS-check.patch +++ /dev/null @@ -1,58 +0,0 @@ -From f2c655a3dc286d287b7aa8974e15a712db9bf713 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 15 May 2023 16:59:25 +0100 -Subject: [PATCH 25/35] x86/vtx: Remove opencoded MSR_ARCH_CAPS check - -MSR_ARCH_CAPS data is now included in featureset information. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 8f6bc7f9b72eb7cf0c8c5ae5d80498a58ba0b7c3) ---- - xen/arch/x86/hvm/vmx/vmx.c | 8 ++------ - xen/arch/x86/include/asm/cpufeature.h | 3 +++ - 2 files changed, 5 insertions(+), 6 deletions(-) - -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index 8da6be33eeec..f256dc2635f5 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -2808,8 +2808,6 @@ static void __init ler_to_fixup_check(void); - */ - static bool __init has_if_pschange_mc(void) - { -- uint64_t caps = 0; -- - /* - * If we are virtualised, there is nothing we can do. Our EPT tables are - * shadowed by our hypervisor, and not walked by hardware. -@@ -2817,10 +2815,8 @@ static bool __init has_if_pschange_mc(void) - if ( cpu_has_hypervisor ) - return false; - -- if ( cpu_has_arch_caps ) -- rdmsrl(MSR_ARCH_CAPABILITIES, caps); -- -- if ( caps & ARCH_CAPS_IF_PSCHANGE_MC_NO ) -+ /* Hardware reports itself as fixed. */ -+ if ( cpu_has_if_pschange_mc_no ) - return false; - - /* -diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h -index a3ad9ebee4e9..448d5c1e0560 100644 ---- a/xen/arch/x86/include/asm/cpufeature.h -+++ b/xen/arch/x86/include/asm/cpufeature.h -@@ -145,6 +145,9 @@ - #define cpu_has_avx_vnni boot_cpu_has(X86_FEATURE_AVX_VNNI) - #define cpu_has_avx512_bf16 boot_cpu_has(X86_FEATURE_AVX512_BF16) - -+/* MSR_ARCH_CAPS */ -+#define cpu_has_if_pschange_mc_no boot_cpu_has(X86_FEATURE_IF_PSCHANGE_MC_NO) -+ - /* Synthesized. */ - #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) - #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) --- -2.39.2 - diff --git a/0346-x86-tsx-Remove-opencoded-MSR_ARCH_CAPS-check.patch b/0346-x86-tsx-Remove-opencoded-MSR_ARCH_CAPS-check.patch deleted file mode 100644 index 8a9439f9..00000000 --- a/0346-x86-tsx-Remove-opencoded-MSR_ARCH_CAPS-check.patch +++ /dev/null @@ -1,95 +0,0 @@ -From d5b1cad4fa54bc67d5743ff21cd3d1eadafc6e72 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 15 May 2023 19:05:01 +0100 -Subject: [PATCH 26/35] x86/tsx: Remove opencoded MSR_ARCH_CAPS check - -The current cpu_has_tsx_ctrl tristate is serving double pupose; to signal the -first pass through tsx_init(), and the availability of MSR_TSX_CTRL. - -Drop the variable, replacing it with a once boolean, and altering -cpu_has_tsx_ctrl to come out of the feature information. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 205a9f970378c31ae3e00b52d59103a2e881b9e0) ---- - xen/arch/x86/include/asm/cpufeature.h | 1 + - xen/arch/x86/include/asm/processor.h | 2 +- - xen/arch/x86/tsx.c | 13 ++++++++----- - 3 files changed, 10 insertions(+), 6 deletions(-) - -diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h -index 448d5c1e0560..31ab4495b3a6 100644 ---- a/xen/arch/x86/include/asm/cpufeature.h -+++ b/xen/arch/x86/include/asm/cpufeature.h -@@ -147,6 +147,7 @@ - - /* MSR_ARCH_CAPS */ - #define cpu_has_if_pschange_mc_no boot_cpu_has(X86_FEATURE_IF_PSCHANGE_MC_NO) -+#define cpu_has_tsx_ctrl boot_cpu_has(X86_FEATURE_TSX_CTRL) - - /* Synthesized. */ - #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) -diff --git a/xen/arch/x86/include/asm/processor.h b/xen/arch/x86/include/asm/processor.h -index 8e2816fae9b9..40e4e2b02442 100644 ---- a/xen/arch/x86/include/asm/processor.h -+++ b/xen/arch/x86/include/asm/processor.h -@@ -624,7 +624,7 @@ static inline uint8_t get_cpu_family(uint32_t raw, uint8_t *model, - return fam; - } - --extern int8_t opt_tsx, cpu_has_tsx_ctrl; -+extern int8_t opt_tsx; - extern bool rtm_disabled; - void tsx_init(void); - -diff --git a/xen/arch/x86/tsx.c b/xen/arch/x86/tsx.c -index 41b6092cfe16..80c6f4cedd6b 100644 ---- a/xen/arch/x86/tsx.c -+++ b/xen/arch/x86/tsx.c -@@ -19,7 +19,6 @@ - * controlling TSX behaviour, and where TSX isn't force-disabled by firmware. - */ - int8_t __read_mostly opt_tsx = -1; --int8_t __read_mostly cpu_has_tsx_ctrl = -1; - bool __read_mostly rtm_disabled; - - static int __init cf_check parse_tsx(const char *s) -@@ -37,24 +36,28 @@ custom_param("tsx", parse_tsx); - - void tsx_init(void) - { -+ static bool __read_mostly once; -+ - /* - * This function is first called between microcode being loaded, and CPUID - * being scanned generally. Read into boot_cpu_data.x86_capability[] for - * the cpu_has_* bits we care about using here. - */ -- if ( unlikely(cpu_has_tsx_ctrl < 0) ) -+ if ( unlikely(!once) ) - { -- uint64_t caps = 0; - bool has_rtm_always_abort; - -+ once = true; -+ - if ( boot_cpu_data.cpuid_level >= 7 ) - boot_cpu_data.x86_capability[FEATURESET_7d0] - = cpuid_count_edx(7, 0); - - if ( cpu_has_arch_caps ) -- rdmsrl(MSR_ARCH_CAPABILITIES, caps); -+ rdmsr(MSR_ARCH_CAPABILITIES, -+ boot_cpu_data.x86_capability[FEATURESET_m10Al], -+ boot_cpu_data.x86_capability[FEATURESET_m10Ah]); - -- cpu_has_tsx_ctrl = !!(caps & ARCH_CAPS_TSX_CTRL); - has_rtm_always_abort = cpu_has_rtm_always_abort; - - if ( cpu_has_tsx_ctrl && cpu_has_srbds_ctrl ) --- -2.39.2 - diff --git a/0347-x86-spec-ctrl-Remove-opencoded-MSR_ARCH_CAPS-check.patch b/0347-x86-spec-ctrl-Remove-opencoded-MSR_ARCH_CAPS-check.patch deleted file mode 100644 index 7a51cb03..00000000 --- a/0347-x86-spec-ctrl-Remove-opencoded-MSR_ARCH_CAPS-check.patch +++ /dev/null @@ -1,247 +0,0 @@ -From 2211ad7df8fa5a9ae3e09c2aa2718fee9aba120f Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 15 May 2023 19:15:48 +0100 -Subject: [PATCH 27/35] x86/spec-ctrl: Remove opencoded MSR_ARCH_CAPS check - -MSR_ARCH_CAPS data is now included in featureset information. Replace -opencoded checks with regular feature ones. - -No functional change. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 511b9f286c3dadd041e0d90beeff7d47c9bf3b7a) ---- - xen/arch/x86/include/asm/cpufeature.h | 7 ++++ - xen/arch/x86/spec_ctrl.c | 56 +++++++++++++-------------- - 2 files changed, 33 insertions(+), 30 deletions(-) - -diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h -index 31ab4495b3a6..2460bc7e12c8 100644 ---- a/xen/arch/x86/include/asm/cpufeature.h -+++ b/xen/arch/x86/include/asm/cpufeature.h -@@ -146,8 +146,15 @@ - #define cpu_has_avx512_bf16 boot_cpu_has(X86_FEATURE_AVX512_BF16) - - /* MSR_ARCH_CAPS */ -+#define cpu_has_rdcl_no boot_cpu_has(X86_FEATURE_RDCL_NO) -+#define cpu_has_eibrs boot_cpu_has(X86_FEATURE_EIBRS) -+#define cpu_has_rsba boot_cpu_has(X86_FEATURE_RSBA) -+#define cpu_has_skip_l1dfl boot_cpu_has(X86_FEATURE_SKIP_L1DFL) -+#define cpu_has_mds_no boot_cpu_has(X86_FEATURE_MDS_NO) - #define cpu_has_if_pschange_mc_no boot_cpu_has(X86_FEATURE_IF_PSCHANGE_MC_NO) - #define cpu_has_tsx_ctrl boot_cpu_has(X86_FEATURE_TSX_CTRL) -+#define cpu_has_taa_no boot_cpu_has(X86_FEATURE_TAA_NO) -+#define cpu_has_fb_clear boot_cpu_has(X86_FEATURE_FB_CLEAR) - - /* Synthesized. */ - #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index e80e2a5ed1a9..4bba5e8c2992 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -294,12 +294,10 @@ custom_param("spec-ctrl", parse_spec_ctrl); - int8_t __read_mostly opt_xpti_hwdom = -1; - int8_t __read_mostly opt_xpti_domu = -1; - --static __init void xpti_init_default(uint64_t caps) -+static __init void xpti_init_default(void) - { -- if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) ) -- caps = ARCH_CAPS_RDCL_NO; -- -- if ( caps & ARCH_CAPS_RDCL_NO ) -+ if ( (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) || -+ cpu_has_rdcl_no ) - { - if ( opt_xpti_hwdom < 0 ) - opt_xpti_hwdom = 0; -@@ -402,9 +400,10 @@ static int __init cf_check parse_pv_l1tf(const char *s) - } - custom_param("pv-l1tf", parse_pv_l1tf); - --static void __init print_details(enum ind_thunk thunk, uint64_t caps) -+static void __init print_details(enum ind_thunk thunk) - { - unsigned int _7d0 = 0, _7d2 = 0, e8b = 0, max = 0, tmp; -+ uint64_t caps = 0; - - /* Collect diagnostics about available mitigations. */ - if ( boot_cpu_data.cpuid_level >= 7 ) -@@ -413,6 +412,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - cpuid_count(7, 2, &tmp, &tmp, &tmp, &_7d2); - if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 ) - cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp); -+ if ( cpu_has_arch_caps ) -+ rdmsrl(MSR_ARCH_CAPABILITIES, caps); - - printk("Speculative mitigation facilities:\n"); - -@@ -590,7 +591,7 @@ static bool __init check_smt_enabled(void) - } - - /* Calculate whether Retpoline is known-safe on this CPU. */ --static bool __init retpoline_safe(uint64_t caps) -+static bool __init retpoline_safe(void) - { - unsigned int ucode_rev = this_cpu(cpu_sig).rev; - -@@ -608,7 +609,7 @@ static bool __init retpoline_safe(uint64_t caps) - * Processors offering Enhanced IBRS are not guarenteed to be - * repoline-safe. - */ -- if ( caps & (ARCH_CAPS_RSBA | ARCH_CAPS_IBRS_ALL) ) -+ if ( cpu_has_rsba || cpu_has_eibrs ) - return false; - - switch ( boot_cpu_data.x86_model ) -@@ -857,7 +858,7 @@ static void __init ibpb_calculations(void) - } - - /* Calculate whether this CPU is vulnerable to L1TF. */ --static __init void l1tf_calculations(uint64_t caps) -+static __init void l1tf_calculations(void) - { - bool hit_default = false; - -@@ -945,7 +946,7 @@ static __init void l1tf_calculations(uint64_t caps) - } - - /* Any processor advertising RDCL_NO should be not vulnerable to L1TF. */ -- if ( caps & ARCH_CAPS_RDCL_NO ) -+ if ( cpu_has_rdcl_no ) - cpu_has_bug_l1tf = false; - - if ( cpu_has_bug_l1tf && hit_default ) -@@ -1004,7 +1005,7 @@ static __init void l1tf_calculations(uint64_t caps) - } - - /* Calculate whether this CPU is vulnerable to MDS. */ --static __init void mds_calculations(uint64_t caps) -+static __init void mds_calculations(void) - { - /* MDS is only known to affect Intel Family 6 processors at this time. */ - if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || -@@ -1012,7 +1013,7 @@ static __init void mds_calculations(uint64_t caps) - return; - - /* Any processor advertising MDS_NO should be not vulnerable to MDS. */ -- if ( caps & ARCH_CAPS_MDS_NO ) -+ if ( cpu_has_mds_no ) - return; - - switch ( boot_cpu_data.x86_model ) -@@ -1125,10 +1126,6 @@ void __init init_speculation_mitigations(void) - enum ind_thunk thunk = THUNK_DEFAULT; - bool has_spec_ctrl, ibrs = false, hw_smt_enabled; - bool cpu_has_bug_taa; -- uint64_t caps = 0; -- -- if ( cpu_has_arch_caps ) -- rdmsrl(MSR_ARCH_CAPABILITIES, caps); - - hw_smt_enabled = check_smt_enabled(); - -@@ -1175,7 +1172,7 @@ void __init init_speculation_mitigations(void) - * On all hardware, we'd like to use retpoline in preference to - * IBRS, but only if it is safe on this hardware. - */ -- if ( retpoline_safe(caps) ) -+ if ( retpoline_safe() ) - thunk = THUNK_RETPOLINE; - else if ( has_spec_ctrl ) - ibrs = true; -@@ -1404,13 +1401,13 @@ void __init init_speculation_mitigations(void) - * threads. Activate this if SMT is enabled, and Xen is using a non-zero - * MSR_SPEC_CTRL setting. - */ -- if ( boot_cpu_has(X86_FEATURE_IBRSB) && !(caps & ARCH_CAPS_IBRS_ALL) && -+ if ( boot_cpu_has(X86_FEATURE_IBRSB) && !cpu_has_eibrs && - hw_smt_enabled && default_xen_spec_ctrl ) - setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE); - -- xpti_init_default(caps); -+ xpti_init_default(); - -- l1tf_calculations(caps); -+ l1tf_calculations(); - - /* - * By default, enable PV domU L1TF mitigations on all L1TF-vulnerable -@@ -1431,7 +1428,7 @@ void __init init_speculation_mitigations(void) - if ( !boot_cpu_has(X86_FEATURE_L1D_FLUSH) ) - opt_l1d_flush = 0; - else if ( opt_l1d_flush == -1 ) -- opt_l1d_flush = cpu_has_bug_l1tf && !(caps & ARCH_CAPS_SKIP_L1DFL); -+ opt_l1d_flush = cpu_has_bug_l1tf && !cpu_has_skip_l1dfl; - - /* We compile lfence's in by default, and nop them out if requested. */ - if ( !opt_branch_harden ) -@@ -1454,7 +1451,7 @@ void __init init_speculation_mitigations(void) - "enabled. Please assess your configuration and choose an\n" - "explicit 'smt=' setting. See XSA-273.\n"); - -- mds_calculations(caps); -+ mds_calculations(); - - /* - * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have -@@ -1466,7 +1463,7 @@ void __init init_speculation_mitigations(void) - * the return-to-guest path. - */ - if ( opt_unpriv_mmio ) -- opt_fb_clear_mmio = caps & ARCH_CAPS_FB_CLEAR; -+ opt_fb_clear_mmio = cpu_has_fb_clear; - - /* - * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. -@@ -1496,7 +1493,7 @@ void __init init_speculation_mitigations(void) - */ - if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio ) - setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); -- opt_md_clear_hvm &= !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush; -+ opt_md_clear_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; - - /* - * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT -@@ -1527,8 +1524,7 @@ void __init init_speculation_mitigations(void) - * we check both to spot TSX in a microcode/cmdline independent way. - */ - cpu_has_bug_taa = -- (cpu_has_rtm || (caps & ARCH_CAPS_TSX_CTRL)) && -- (caps & (ARCH_CAPS_MDS_NO | ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO; -+ (cpu_has_rtm || cpu_has_tsx_ctrl) && cpu_has_mds_no && !cpu_has_taa_no; - - /* - * On TAA-affected hardware, disabling TSX is the preferred mitigation, vs -@@ -1547,7 +1543,7 @@ void __init init_speculation_mitigations(void) - * plausibly value TSX higher than Hyperthreading...), disable TSX to - * mitigate TAA. - */ -- if ( opt_tsx == -1 && cpu_has_bug_taa && (caps & ARCH_CAPS_TSX_CTRL) && -+ if ( opt_tsx == -1 && cpu_has_bug_taa && cpu_has_tsx_ctrl && - ((hw_smt_enabled && opt_smt) || - !boot_cpu_has(X86_FEATURE_SC_VERW_IDLE)) ) - { -@@ -1572,15 +1568,15 @@ void __init init_speculation_mitigations(void) - if ( cpu_has_srbds_ctrl ) - { - if ( opt_srb_lock == -1 && !opt_unpriv_mmio && -- (caps & (ARCH_CAPS_MDS_NO|ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO && -- (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && rtm_disabled)) ) -+ cpu_has_mds_no && !cpu_has_taa_no && -+ (!cpu_has_hle || (cpu_has_tsx_ctrl && rtm_disabled)) ) - opt_srb_lock = 0; - - set_in_mcu_opt_ctrl(MCU_OPT_CTRL_RNGDS_MITG_DIS, - opt_srb_lock ? 0 : MCU_OPT_CTRL_RNGDS_MITG_DIS); - } - -- print_details(thunk, caps); -+ print_details(thunk); - - /* - * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard --- -2.39.2 - diff --git a/0348-x86-spec-ctrl-Update-hardware-hints.patch b/0348-x86-spec-ctrl-Update-hardware-hints.patch deleted file mode 100644 index 7f7c1377..00000000 --- a/0348-x86-spec-ctrl-Update-hardware-hints.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 6b659d91fb77b0c9e1bfbf48e70764a508f9e886 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 30 May 2023 16:03:16 +0100 -Subject: [PATCH 28/35] x86/spec-ctrl: Update hardware hints - - * Rename IBRS_ALL to EIBRS. EIBRS is the term that everyone knows, and this - makes ARCH_CAPS_EIBRS match the X86_FEATURE_EIBRS form. - * Print RRSBA too, which is also a hint about behaviour. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 94200e1bae07e725cc07238c11569c5cab7befb7) ---- - xen/arch/x86/include/asm/msr-index.h | 2 +- - xen/arch/x86/spec_ctrl.c | 5 +++-- - 2 files changed, 4 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h -index 0a8852f3c246..0daa265a5f12 100644 ---- a/xen/arch/x86/include/asm/msr-index.h -+++ b/xen/arch/x86/include/asm/msr-index.h -@@ -66,7 +66,7 @@ - - #define MSR_ARCH_CAPABILITIES 0x0000010a - #define ARCH_CAPS_RDCL_NO (_AC(1, ULL) << 0) --#define ARCH_CAPS_IBRS_ALL (_AC(1, ULL) << 1) -+#define ARCH_CAPS_EIBRS (_AC(1, ULL) << 1) - #define ARCH_CAPS_RSBA (_AC(1, ULL) << 2) - #define ARCH_CAPS_SKIP_L1DFL (_AC(1, ULL) << 3) - #define ARCH_CAPS_SSB_NO (_AC(1, ULL) << 4) -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 4bba5e8c2992..2e94eded7f55 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -421,10 +421,11 @@ static void __init print_details(enum ind_thunk thunk) - * Hardware read-only information, stating immunity to certain issues, or - * suggestions of which mitigation to use. - */ -- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", -- (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "", -+ (caps & ARCH_CAPS_EIBRS) ? " EIBRS" : "", - (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", -+ (caps & ARCH_CAPS_RRSBA) ? " RRSBA" : "", - (caps & ARCH_CAPS_SKIP_L1DFL) ? " SKIP_L1DFL" : "", - (e8b & cpufeat_mask(X86_FEATURE_SSB_NO)) || - (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : "", --- -2.39.2 - diff --git a/0349-x86-cpu-policy-Rearrange-guest_common_default_featur.patch b/0349-x86-cpu-policy-Rearrange-guest_common_default_featur.patch deleted file mode 100644 index 87e0ed0a..00000000 --- a/0349-x86-cpu-policy-Rearrange-guest_common_default_featur.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 1a2f6ec3cc77fe71877ac579ea751eae6debb28b Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 10 Mar 2023 16:23:20 +0000 -Subject: [PATCH 29/35] x86/cpu-policy: Rearrange - guest_common_default_feature_adjustments() - -This is prep work, split out to simply the diff on the following change. - - * Split the INTEL check out of the IvyBridge RDRAND check, as the former will - be reused. - * Use asm/intel-family.h to remove a raw 0x3a model number. - -No functional change. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 064f572f96f1558faae0a74cad616ba95ec8ff34) ---- - xen/arch/x86/cpu-policy.c | 34 +++++++++++++++++++--------------- - 1 file changed, 19 insertions(+), 15 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index d76b544816dd..4ec3c2fb93c7 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -429,21 +430,24 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) - - static void __init guest_common_default_feature_adjustments(uint32_t *fs) - { -- /* -- * IvyBridge client parts suffer from leakage of RDRAND data due to SRBDS -- * (XSA-320 / CVE-2020-0543), and won't be receiving microcode to -- * compensate. -- * -- * Mitigate by hiding RDRAND from guests by default, unless explicitly -- * overridden on the Xen command line (cpuid=rdrand). Irrespective of the -- * default setting, guests can use RDRAND if explicitly enabled -- * (cpuid="host,rdrand=1") in the VM's config file, and VMs which were -- * previously using RDRAND can migrate in. -- */ -- if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && -- boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x3a && -- cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) -- __clear_bit(X86_FEATURE_RDRAND, fs); -+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) -+ { -+ /* -+ * IvyBridge client parts suffer from leakage of RDRAND data due to SRBDS -+ * (XSA-320 / CVE-2020-0543), and won't be receiving microcode to -+ * compensate. -+ * -+ * Mitigate by hiding RDRAND from guests by default, unless explicitly -+ * overridden on the Xen command line (cpuid=rdrand). Irrespective of the -+ * default setting, guests can use RDRAND if explicitly enabled -+ * (cpuid="host,rdrand=1") in the VM's config file, and VMs which were -+ * previously using RDRAND can migrate in. -+ */ -+ if ( boot_cpu_data.x86 == 6 && -+ boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE && -+ cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) -+ __clear_bit(X86_FEATURE_RDRAND, fs); -+ } - - /* - * On certain hardware, speculative or errata workarounds can result in --- -2.39.2 - diff --git a/0350-x86-spec-ctrl-Fix-the-rendering-of-FB_CLEAR.patch b/0350-x86-spec-ctrl-Fix-the-rendering-of-FB_CLEAR.patch deleted file mode 100644 index 48270816..00000000 --- a/0350-x86-spec-ctrl-Fix-the-rendering-of-FB_CLEAR.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 7f70f13b94818273a10419b2ff2b8af6e8946f82 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 12 Jun 2023 20:24:00 +0100 -Subject: [PATCH 30/35] x86/spec-ctrl: Fix the rendering of FB_CLEAR - -FB_CLEAR is a read-only status bit, not a read-write control. Move it from -"Hardware features" into "Hardware hints". - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 921afcbae843bb3f575a8f4a270b8e6cf471f4ca) ---- - xen/arch/x86/spec_ctrl.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 2e94eded7f55..d5f56d74366e 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -421,7 +421,7 @@ static void __init print_details(enum ind_thunk thunk) - * Hardware read-only information, stating immunity to certain issues, or - * suggestions of which mitigation to use. - */ -- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", - (caps & ARCH_CAPS_EIBRS) ? " EIBRS" : "", - (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", -@@ -434,6 +434,7 @@ static void __init print_details(enum ind_thunk thunk) - (caps & ARCH_CAPS_SBDR_SSDP_NO) ? " SBDR_SSDP_NO" : "", - (caps & ARCH_CAPS_FBSDP_NO) ? " FBSDP_NO" : "", - (caps & ARCH_CAPS_PSDP_NO) ? " PSDP_NO" : "", -+ (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "", - (caps & ARCH_CAPS_PBRSB_NO) ? " PBRSB_NO" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "", -@@ -443,7 +444,7 @@ static void __init print_details(enum ind_thunk thunk) - (e8b & cpufeat_mask(X86_FEATURE_IBPB_RET)) ? " IBPB_RET" : ""); - - /* Hardware features which need driving to mitigate issues. */ -- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s\n", - (e8b & cpufeat_mask(X86_FEATURE_IBPB)) || - (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBPB" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS)) || -@@ -459,7 +460,6 @@ static void __init print_details(enum ind_thunk thunk) - (_7d0 & cpufeat_mask(X86_FEATURE_SRBDS_CTRL)) ? " SRBDS_CTRL" : "", - (e8b & cpufeat_mask(X86_FEATURE_VIRT_SSBD)) ? " VIRT_SSBD" : "", - (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : "", -- (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "", - (caps & ARCH_CAPS_FB_CLEAR_CTRL) ? " FB_CLEAR_CTRL" : ""); - - /* Compiled-in support which pertains to mitigations. */ --- -2.39.2 - diff --git a/0351-x86-spec-ctrl-Use-a-taint-for-CET-without-MSR_SPEC_C.patch b/0351-x86-spec-ctrl-Use-a-taint-for-CET-without-MSR_SPEC_C.patch deleted file mode 100644 index 3b7bf05a..00000000 --- a/0351-x86-spec-ctrl-Use-a-taint-for-CET-without-MSR_SPEC_C.patch +++ /dev/null @@ -1,48 +0,0 @@ -From b58b319f6ff12d96ab13dc8ee322491001e61d7e Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 5 Jun 2023 11:09:11 +0100 -Subject: [PATCH 31/35] x86/spec-ctrl: Use a taint for CET without - MSR_SPEC_CTRL - -Reword the comment for 'S' to include an incompatible set of features on the -same core. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 3f63f4510422c29fda7ba238b880cbb53eca34fe) ---- - xen/arch/x86/spec_ctrl.c | 3 +++ - xen/common/kernel.c | 2 +- - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index d5f56d74366e..c75521d3a6e4 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1144,7 +1144,10 @@ void __init init_speculation_mitigations(void) - if ( read_cr4() & X86_CR4_CET ) - { - if ( !has_spec_ctrl ) -+ { - printk(XENLOG_WARNING "?!? CET active, but no MSR_SPEC_CTRL?\n"); -+ add_taint(TAINT_CPU_OUT_OF_SPEC); -+ } - else if ( opt_ibrs == -1 ) - opt_ibrs = ibrs = true; - -diff --git a/xen/common/kernel.c b/xen/common/kernel.c -index f8134d3e7a9d..0e8abe0cf8a8 100644 ---- a/xen/common/kernel.c -+++ b/xen/common/kernel.c -@@ -339,7 +339,7 @@ unsigned int tainted; - * 'H' - HVM forced emulation prefix is permitted. - * 'M' - Machine had a machine check experience. - * 'U' - Platform is unsecure (usually due to an errata on the platform). -- * 'S' - Out of spec CPU (One core has a feature incompatible with others). -+ * 'S' - Out of spec CPU (Incompatible features on one or more cores). - * - * The string is overwritten by the next call to print_taint(). - */ --- -2.39.2 - diff --git a/0352-x86-spec-ctrl-Rename-retpoline_safe-to-retpoline_cal.patch b/0352-x86-spec-ctrl-Rename-retpoline_safe-to-retpoline_cal.patch deleted file mode 100644 index 99557d1a..00000000 --- a/0352-x86-spec-ctrl-Rename-retpoline_safe-to-retpoline_cal.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 7e2164b44adba2e1fbee316faeaba229920c0474 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 26 May 2023 10:35:47 +0100 -Subject: [PATCH 32/35] x86/spec-ctrl: Rename retpoline_safe() to - retpoline_calculations() - -This is prep work, split out to simply the diff on the following change. - - * Rename to retpoline_calculations(), and call unconditionally. It is - shortly going to synthesise missing enumerations required for guest safety. - * For the model check switch statement, store the result in a variable and - break rather than returning directly. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 724c0d94ff79b208312d30676392bfdd693403be) ---- - xen/arch/x86/spec_ctrl.c | 41 +++++++++++++++++++++++++--------------- - 1 file changed, 26 insertions(+), 15 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index c75521d3a6e4..a6fd2fe9f56f 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -592,9 +592,10 @@ static bool __init check_smt_enabled(void) - } - - /* Calculate whether Retpoline is known-safe on this CPU. */ --static bool __init retpoline_safe(void) -+static bool __init retpoline_calculations(void) - { - unsigned int ucode_rev = this_cpu(cpu_sig).rev; -+ bool safe = false; - - if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) ) - return true; -@@ -632,29 +633,31 @@ static bool __init retpoline_safe(void) - case 0x3f: /* Haswell EX/EP */ - case 0x45: /* Haswell D */ - case 0x46: /* Haswell H */ -- return true; -+ safe = true; -+ break; - - /* - * Broadwell processors are retpoline-safe after specific microcode - * versions. - */ - case 0x3d: /* Broadwell */ -- return ucode_rev >= 0x2a; -+ safe = ucode_rev >= 0x2a; break; - case 0x47: /* Broadwell H */ -- return ucode_rev >= 0x1d; -+ safe = ucode_rev >= 0x1d; break; - case 0x4f: /* Broadwell EP/EX */ -- return ucode_rev >= 0xb000021; -+ safe = ucode_rev >= 0xb000021; break; - case 0x56: /* Broadwell D */ - switch ( boot_cpu_data.x86_mask ) - { -- case 2: return ucode_rev >= 0x15; -- case 3: return ucode_rev >= 0x7000012; -- case 4: return ucode_rev >= 0xf000011; -- case 5: return ucode_rev >= 0xe000009; -+ case 2: safe = ucode_rev >= 0x15; break; -+ case 3: safe = ucode_rev >= 0x7000012; break; -+ case 4: safe = ucode_rev >= 0xf000011; break; -+ case 5: safe = ucode_rev >= 0xe000009; break; - default: - printk("Unrecognised CPU stepping %#x - assuming not reptpoline safe\n", - boot_cpu_data.x86_mask); -- return false; -+ safe = false; -+ break; - } - break; - -@@ -668,7 +671,8 @@ static bool __init retpoline_safe(void) - case 0x67: /* Cannonlake? */ - case 0x8e: /* Kabylake M */ - case 0x9e: /* Kabylake D */ -- return false; -+ safe = false; -+ break; - - /* - * Atom processors before Goldmont Plus/Gemini Lake are retpoline-safe. -@@ -687,13 +691,17 @@ static bool __init retpoline_safe(void) - case 0x5c: /* Goldmont */ - case 0x5f: /* Denverton */ - case 0x85: /* Knights Mill */ -- return true; -+ safe = true; -+ break; - - default: - printk("Unrecognised CPU model %#x - assuming not reptpoline safe\n", - boot_cpu_data.x86_model); -- return false; -+ safe = false; -+ break; - } -+ -+ return safe; - } - - /* -@@ -1126,7 +1134,7 @@ void __init init_speculation_mitigations(void) - { - enum ind_thunk thunk = THUNK_DEFAULT; - bool has_spec_ctrl, ibrs = false, hw_smt_enabled; -- bool cpu_has_bug_taa; -+ bool cpu_has_bug_taa, retpoline_safe; - - hw_smt_enabled = check_smt_enabled(); - -@@ -1155,6 +1163,9 @@ void __init init_speculation_mitigations(void) - thunk = THUNK_JMP; - } - -+ /* Determine if retpoline is safe on this CPU. */ -+ retpoline_safe = retpoline_calculations(); -+ - /* - * Has the user specified any custom BTI mitigations? If so, follow their - * instructions exactly and disable all heuristics. -@@ -1176,7 +1187,7 @@ void __init init_speculation_mitigations(void) - * On all hardware, we'd like to use retpoline in preference to - * IBRS, but only if it is safe on this hardware. - */ -- if ( retpoline_safe() ) -+ if ( retpoline_safe ) - thunk = THUNK_RETPOLINE; - else if ( has_spec_ctrl ) - ibrs = true; --- -2.39.2 - diff --git a/0353-x86-spec-ctrl-Fix-up-the-RSBA-RRSBA-bits-as-appropri.patch b/0353-x86-spec-ctrl-Fix-up-the-RSBA-RRSBA-bits-as-appropri.patch deleted file mode 100644 index f278653e..00000000 --- a/0353-x86-spec-ctrl-Fix-up-the-RSBA-RRSBA-bits-as-appropri.patch +++ /dev/null @@ -1,172 +0,0 @@ -From f07a464a832acb9f92a281e5600ec580e06f315a Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 25 May 2023 20:31:22 +0100 -Subject: [PATCH 33/35] x86/spec-ctrl: Fix up the RSBA/RRSBA bits as - appropriate - -In order to level a VM safely for migration, the toolstack needs to know the -RSBA/RRSBA properties of the CPU, whether or not they happen to be enumerated. - -See the code comment for details. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 36525a964fb629d0bd26e5a1c42de467af7a42a7) ---- - xen/arch/x86/include/asm/cpufeature.h | 1 + - xen/arch/x86/spec_ctrl.c | 100 ++++++++++++++++++++++++-- - 2 files changed, 96 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h -index 2460bc7e12c8..ec9456e1fdc3 100644 ---- a/xen/arch/x86/include/asm/cpufeature.h -+++ b/xen/arch/x86/include/asm/cpufeature.h -@@ -155,6 +155,7 @@ - #define cpu_has_tsx_ctrl boot_cpu_has(X86_FEATURE_TSX_CTRL) - #define cpu_has_taa_no boot_cpu_has(X86_FEATURE_TAA_NO) - #define cpu_has_fb_clear boot_cpu_has(X86_FEATURE_FB_CLEAR) -+#define cpu_has_rrsba boot_cpu_has(X86_FEATURE_RRSBA) - - /* Synthesized. */ - #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index a6fd2fe9f56f..8f9500bc64ae 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -591,7 +591,10 @@ static bool __init check_smt_enabled(void) - return false; - } - --/* Calculate whether Retpoline is known-safe on this CPU. */ -+/* -+ * Calculate whether Retpoline is known-safe on this CPU. Fix up the -+ * RSBA/RRSBA bits as necessary. -+ */ - static bool __init retpoline_calculations(void) - { - unsigned int ucode_rev = this_cpu(cpu_sig).rev; -@@ -605,15 +608,93 @@ static bool __init retpoline_calculations(void) - return false; - - /* -- * RSBA may be set by a hypervisor to indicate that we may move to a -- * processor which isn't retpoline-safe. -+ * The meaning of the RSBA and RRSBA bits have evolved over time. The -+ * agreed upon meaning at the time of writing (May 2023) is thus: -+ * -+ * - RSBA (RSB Alternative) means that an RSB may fall back to an -+ * alternative predictor on underflow. Skylake uarch and later all have -+ * this property. Broadwell too, when running microcode versions prior -+ * to Jan 2018. -+ * -+ * - All eIBRS-capable processors suffer RSBA, but eIBRS also introduces -+ * tagging of predictions with the mode in which they were learned. So -+ * when eIBRS is active, RSBA becomes RRSBA (Restricted RSBA). -+ * -+ * - CPUs are not expected to enumerate both RSBA and RRSBA. -+ * -+ * Some parts (Broadwell) are not expected to ever enumerate this -+ * behaviour directly. Other parts have differing enumeration with -+ * microcode version. Fix up Xen's idea, so we can advertise them safely -+ * to guests, and so toolstacks can level a VM safety for migration. -+ * -+ * The following states exist: -+ * -+ * | | RSBA | EIBRS | RRSBA | Notes | Action (in principle) | -+ * |---+------+-------+-------+--------------------+-----------------------| -+ * | 1 | 0 | 0 | 0 | OK (older parts) | Maybe +RSBA | -+ * | 2 | 0 | 0 | 1 | Broken | (+RSBA, -RRSBA) | -+ * | 3 | 0 | 1 | 0 | OK (pre-Aug ucode) | +RRSBA | -+ * | 4 | 0 | 1 | 1 | OK | | -+ * | 5 | 1 | 0 | 0 | OK | | -+ * | 6 | 1 | 0 | 1 | Broken | (-RRSBA) | -+ * | 7 | 1 | 1 | 0 | Broken | (-RSBA, +RRSBA) | -+ * | 8 | 1 | 1 | 1 | Broken | (-RSBA) | -+ * -+ * However, we don't need perfect adherence to the spec. We only need -+ * RSBA || RRSBA to indicate "alternative predictors potentially in use". -+ * Rows 1 & 3 are fixed up by later logic, as they're known configurations -+ * which exist in the world. - * -+ * Complain loudly at the broken cases. They're safe for Xen to use (so we -+ * don't attempt to correct), and may or may not exist in reality, but if -+ * we ever encounter them in practice, something is wrong and needs -+ * further investigation. -+ */ -+ if ( cpu_has_eibrs ? cpu_has_rsba /* Rows 7, 8 */ -+ : cpu_has_rrsba /* Rows 2, 6 */ ) -+ { -+ printk(XENLOG_ERR -+ "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RSBA %u, EIBRS %u, RRSBA %u\n", -+ boot_cpu_data.x86, boot_cpu_data.x86_model, -+ boot_cpu_data.x86_mask, ucode_rev, -+ cpu_has_rsba, cpu_has_eibrs, cpu_has_rrsba); -+ add_taint(TAINT_CPU_OUT_OF_SPEC); -+ } -+ -+ /* - * Processors offering Enhanced IBRS are not guarenteed to be - * repoline-safe. - */ -- if ( cpu_has_rsba || cpu_has_eibrs ) -+ if ( cpu_has_eibrs ) -+ { -+ /* -+ * Prior to the August 2023 microcode, many eIBRS-capable parts did -+ * not enumerate RRSBA. -+ */ -+ if ( !cpu_has_rrsba ) -+ setup_force_cpu_cap(X86_FEATURE_RRSBA); -+ -+ return false; -+ } -+ -+ /* -+ * RSBA is explicitly enumerated in some cases, but may also be set by a -+ * hypervisor to indicate that we may move to a processor which isn't -+ * retpoline-safe. -+ */ -+ if ( cpu_has_rsba ) - return false; - -+ /* -+ * At this point, we've filtered all the legal RSBA || RRSBA cases (or the -+ * known non-ideal cases). If ARCH_CAPS is visible, trust the absence of -+ * RSBA || RRSBA. There's no known microcode which advertises ARCH_CAPS -+ * without RSBA or EIBRS, and if we're virtualised we can't rely the model -+ * check anyway. -+ */ -+ if ( cpu_has_arch_caps ) -+ return true; -+ - switch ( boot_cpu_data.x86_model ) - { - case 0x17: /* Penryn */ -@@ -701,6 +782,15 @@ static bool __init retpoline_calculations(void) - break; - } - -+ if ( !safe ) -+ { -+ /* -+ * Note: the eIBRS-capable parts are filtered out earlier, so the -+ * remainder here are the ones which suffer RSBA behaviour. -+ */ -+ setup_force_cpu_cap(X86_FEATURE_RSBA); -+ } -+ - return safe; - } - -@@ -1163,7 +1253,7 @@ void __init init_speculation_mitigations(void) - thunk = THUNK_JMP; - } - -- /* Determine if retpoline is safe on this CPU. */ -+ /* Determine if retpoline is safe on this CPU. Fix up RSBA/RRSBA enumerations. */ - retpoline_safe = retpoline_calculations(); - - /* --- -2.39.2 - diff --git a/0354-x86-cpu-policy-Derive-RSBA-RRSBA-for-guest-policies.patch b/0354-x86-cpu-policy-Derive-RSBA-RRSBA-for-guest-policies.patch deleted file mode 100644 index be48e0d0..00000000 --- a/0354-x86-cpu-policy-Derive-RSBA-RRSBA-for-guest-policies.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 138822c15d870b9cc699ea6a9f98f3998eda6978 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 24 May 2023 15:41:21 +0100 -Subject: [PATCH 34/35] x86/cpu-policy: Derive RSBA/RRSBA for guest policies - -The RSBA bit, "RSB Alternative", means that the RSB may use alternative -predictors when empty. From a practical point of view, this mean "Retpoline -not safe". - -Enhanced IBRS (officially IBRS_ALL in Intel's docs, previously IBRS_ATT) is a -statement that IBRS is implemented in hardware (as opposed to the form -retrofitted to existing CPUs in microcode). - -The RRSBA bit, "Restricted-RSBA", is a combination of RSBA, and the eIBRS -property that predictions are tagged with the mode in which they were learnt. -Therefore, it means "when eIBRS is active, the RSB may fall back to -alternative predictors but restricted to the current prediction mode". As -such, it's stronger statement than RSBA, but still means "Retpoline not safe". - -CPUs are not expected to enumerate both RSBA and RRSBA. - -Add feature dependencies for EIBRS and RRSBA. While technically they're not -linked, absolutely nothing good can come of letting the guest see RRSBA -without EIBRS. Nor a guest seeing EIBRS without IBRSB. Furthermore, we use -this dependency to simplify the max derivation logic. - -The max policies gets RSBA and RRSBA unconditionally set (with the EIBRS -dependency maybe hiding RRSBA). We can run any VM, even if it has been told -"somewhere you might run, Retpoline isn't safe". - -The default policies are more complicated. A guest shouldn't see both bits, -but it needs to see one if the current host suffers from any form of RSBA, and -which bit it needs to see depends on whether eIBRS is visible or not. -Therefore, the calculation must be performed after sanitise_featureset(). - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit e0586a4ff514590eec50185e2440b97f9a31cb7f) ---- - xen/arch/x86/cpu-policy.c | 39 +++++++++++++++++++++ - xen/include/public/arch-x86/cpufeatureset.h | 4 +-- - xen/tools/gen-cpuid.py | 5 ++- - 3 files changed, 45 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 4ec3c2fb93c7..55524e28e168 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -423,8 +423,17 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) - * Retpoline not safe)", so these need to be visible to a guest in all - * cases, even when it's only some other server in the pool which - * suffers the identified behaviour. -+ * -+ * We can always run any VM which has previously (or will -+ * subsequently) run on hardware where Retpoline is not safe. -+ * Note: -+ * - The dependency logic may hide RRSBA for other reasons. -+ * - The max policy does not constitute a sensible configuration to -+ * run a guest in. - */ - __set_bit(X86_FEATURE_ARCH_CAPS, fs); -+ __set_bit(X86_FEATURE_RSBA, fs); -+ __set_bit(X86_FEATURE_RRSBA, fs); - } - } - -@@ -532,6 +541,21 @@ static void __init calculate_pv_def_policy(void) - guest_common_default_feature_adjustments(fs); - - sanitise_featureset(fs); -+ -+ /* -+ * If the host suffers from RSBA of any form, and the guest can see -+ * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest -+ * depending on the visibility of eIBRS. -+ */ -+ if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) && -+ (cpu_has_rsba || cpu_has_rrsba) ) -+ { -+ bool eibrs = test_bit(X86_FEATURE_EIBRS, fs); -+ -+ __set_bit(eibrs ? X86_FEATURE_RRSBA -+ : X86_FEATURE_RSBA, fs); -+ } -+ - x86_cpu_featureset_to_policy(fs, p); - recalculate_xstate(p); - } -@@ -649,6 +673,21 @@ static void __init calculate_hvm_def_policy(void) - __set_bit(X86_FEATURE_VIRT_SSBD, fs); - - sanitise_featureset(fs); -+ -+ /* -+ * If the host suffers from RSBA of any form, and the guest can see -+ * MSR_ARCH_CAPS, reflect the appropriate RSBA/RRSBA property to the guest -+ * depending on the visibility of eIBRS. -+ */ -+ if ( test_bit(X86_FEATURE_ARCH_CAPS, fs) && -+ (cpu_has_rsba || cpu_has_rrsba) ) -+ { -+ bool eibrs = test_bit(X86_FEATURE_EIBRS, fs); -+ -+ __set_bit(eibrs ? X86_FEATURE_RRSBA -+ : X86_FEATURE_RSBA, fs); -+ } -+ - x86_cpu_featureset_to_policy(fs, p); - recalculate_xstate(p); - } -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 02a80b0c0c35..fe01dc231e5f 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -303,7 +303,7 @@ XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks s - /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.eax, word 16 */ - XEN_CPUFEATURE(RDCL_NO, 16*32+ 0) /*A No Rogue Data Cache Load (Meltdown) */ - XEN_CPUFEATURE(EIBRS, 16*32+ 1) /*A Enhanced IBRS */ --XEN_CPUFEATURE(RSBA, 16*32+ 2) /*!A RSB Alternative (Retpoline not safe) */ -+XEN_CPUFEATURE(RSBA, 16*32+ 2) /*! RSB Alternative (Retpoline not safe) */ - XEN_CPUFEATURE(SKIP_L1DFL, 16*32+ 3) /* Don't need to flush L1D on VMEntry */ - XEN_CPUFEATURE(INTEL_SSB_NO, 16*32+ 4) /*A No Speculative Store Bypass */ - XEN_CPUFEATURE(MDS_NO, 16*32+ 5) /*A No Microarchitectural Data Sampling */ -@@ -319,7 +319,7 @@ XEN_CPUFEATURE(FBSDP_NO, 16*32+14) /*A No Fill Buffer Stale Data Prop - XEN_CPUFEATURE(PSDP_NO, 16*32+15) /*A No Primary Stale Data Propagation */ - XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*A Fill Buffers cleared by VERW */ - XEN_CPUFEATURE(FB_CLEAR_CTRL, 16*32+18) /* MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */ --XEN_CPUFEATURE(RRSBA, 16*32+19) /*!A Restricted RSB Alternative */ -+XEN_CPUFEATURE(RRSBA, 16*32+19) /*! Restricted RSB Alternative */ - XEN_CPUFEATURE(BHI_NO, 16*32+20) /*A No Branch History Injection */ - XEN_CPUFEATURE(XAPIC_STATUS, 16*32+21) /* MSR_XAPIC_DISABLE_STATUS */ - XEN_CPUFEATURE(OVRCLK_STATUS, 16*32+23) /* MSR_OVERCLOCKING_STATUS */ -diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py -index 72497b3cb0a1..8a7516ae0f96 100755 ---- a/xen/tools/gen-cpuid.py -+++ b/xen/tools/gen-cpuid.py -@@ -318,7 +318,7 @@ def crunch_numbers(state): - # IBRSB/IBRS, and we pass this MSR directly to guests. Treating them - # as dependent features simplifies Xen's logic, and prevents the guest - # from seeing implausible configurations. -- IBRSB: [STIBP, SSBD, INTEL_PSFD], -+ IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS], - IBRS: [AMD_STIBP, AMD_SSBD, PSFD, - IBRS_ALWAYS, IBRS_FAST, IBRS_SAME_MODE], - AMD_STIBP: [STIBP_ALWAYS], -@@ -328,6 +328,9 @@ def crunch_numbers(state): - - # The ARCH_CAPS CPUID bit enumerates the availability of the whole register. - ARCH_CAPS: list(range(RDCL_NO, RDCL_NO + 64)), -+ -+ # The behaviour described by RRSBA depend on eIBRS being active. -+ EIBRS: [RRSBA], - } - - deep_features = tuple(sorted(deps.keys())) --- -2.39.2 - diff --git a/0355-def-arch-caps.patch b/0355-def-arch-caps.patch deleted file mode 100644 index 94d1a764..00000000 --- a/0355-def-arch-caps.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 74b8fcb1279fb4f1267d0ce85146b3108c12ee1c Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 17 May 2023 10:13:36 +0100 -Subject: [PATCH 35/35] def-arch-caps - ---- - xen/arch/x86/cpu-policy.c | 6 ++++++ - xen/include/public/arch-x86/cpufeatureset.h | 2 +- - 2 files changed, 7 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 55524e28e168..3e8cbe984957 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -27,6 +27,9 @@ struct cpu_policy __ro_after_init hvm_max_cpu_policy; - struct cpu_policy __ro_after_init hvm_def_cpu_policy; - #endif - -+static bool opt_def_ac = true; -+boolean_param("def-ac", opt_def_ac); -+ - const uint32_t known_features[] = INIT_KNOWN_FEATURES; - - static const uint32_t __initconst pv_max_featuremask[] = INIT_PV_MAX_FEATURES; -@@ -467,6 +470,9 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) - */ - if ( rtm_disabled ) - __clear_bit(X86_FEATURE_RTM, fs); -+ -+ if ( !opt_def_ac ) -+ __clear_bit(X86_FEATURE_ARCH_CAPS, fs); - } - - static void __init guest_common_feature_adjustments(uint32_t *fs) -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index fe01dc231e5f..6ae3117687c9 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -270,7 +270,7 @@ XEN_CPUFEATURE(AVX512_FP16, 9*32+23) /* AVX512 FP16 instructions */ - XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ - XEN_CPUFEATURE(STIBP, 9*32+27) /*A STIBP */ - XEN_CPUFEATURE(L1D_FLUSH, 9*32+28) /*S MSR_FLUSH_CMD and L1D flush. */ --XEN_CPUFEATURE(ARCH_CAPS, 9*32+29) /*!a IA32_ARCH_CAPABILITIES MSR */ -+XEN_CPUFEATURE(ARCH_CAPS, 9*32+29) /*!A IA32_ARCH_CAPABILITIES MSR */ - XEN_CPUFEATURE(CORE_CAPS, 9*32+30) /* IA32_CORE_CAPABILITIES MSR */ - XEN_CPUFEATURE(SSBD, 9*32+31) /*A MSR_SPEC_CTRL.SSBD available */ - --- -2.39.2 - diff --git a/0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch b/0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch index f5ccdb20..3553e86f 100644 --- a/0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch +++ b/0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch @@ -73,20 +73,6 @@ index 0412dbc915e51a518c4541e50c9690b6afcb79bb..9e35ff506646c474f78971f7abc911b3 #ifdef NOISY_CAPS printk(KERN_DEBUG "CPU: After all inits, caps:"); -diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h -index 0a8852f3c2461145e2a7ac44bf680709a60c0bb4..6908e45e0b91df1c6e0e3a802622689a617b611c 100644 ---- a/xen/arch/x86/include/asm/msr-index.h -+++ b/xen/arch/x86/include/asm/msr-index.h -@@ -104,6 +104,9 @@ - #define MCU_OPT_CTRL_RTM_LOCKED (_AC(1, ULL) << 2) - #define MCU_OPT_CTRL_FB_CLEAR_DIS (_AC(1, ULL) << 3) - -+#define MSR_UARCH_MISC_CTRL 0x00001b01 -+#define UARCH_CTRL_DOITM (_AC(1, ULL) << 0) -+ - #define MSR_RTIT_OUTPUT_BASE 0x00000560 - #define MSR_RTIT_OUTPUT_MASK 0x00000561 - #define MSR_RTIT_CTL 0x00000570 -- Sincerely, Demi Marie Obenour (she/her/hers) diff --git a/0501-xsa431.patch b/0501-xsa431.patch deleted file mode 100644 index 07c39716..00000000 --- a/0501-xsa431.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 66c930ceac3989b6dc6031bfc30e1e894fc6aebe Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 16 May 2023 17:22:35 +0200 -Subject: [PATCH] x86/amd: fix legacy setting of SSBD on AMD Family 17h -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current logic to set SSBD on AMD Family 17h and Hygon Family 18h -processors requires that the setting of SSBD is coordinated at a core -level, as the setting is shared between threads. Logic was introduced -to keep track of how many threads require SSBD active in order to -coordinate it, such logic relies on using a per-core counter of -threads that have SSBD active. - -Given the current logic, it's possible for a guest to under or -overflow the thread counter, because each write to VIRT_SPEC_CTRL.SSBD -by the guest gets propagated to the helper that does the per-core -active accounting. Overflowing the counter is not so much of an -issue, as this would just make SSBD sticky. - -Underflowing however is more problematic: on non-debug Xen builds a -guest can perform empty writes to VIRT_SPEC_CTRL that would cause the -counter to underflow and thus the value gets saturated to the max -value of unsigned int. At which points attempts from any thread to -set VIRT_SPEC_CTRL.SSBD won't get propagated to the hardware anymore, -because the logic will see that the counter is greater than 1 and -assume that SSBD is already active, effectively loosing the setting -of SSBD and the protection it provides. - -Fix this by introducing a per-CPU variable that keeps track of whether -the current thread has legacy SSBD active or not, and thus only -attempt to propagate the value to the hardware once the thread -selected value changes. - -This is XSA-431 / CVE-2022-42336 - -Fixes: b2030e6730a2 ('amd/virt_ssbd: set SSBD at vCPU context switch') -Reported-by: Andrew Cooper -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -master commit: eda98ea870803ea204a1928519b3f21ec6a679b6 -master date: 2023-05-16 17:17:24 +0200 ---- - xen/arch/x86/cpu/amd.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index 1ddb55cbe5..b6a20d375a 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -783,12 +783,23 @@ bool __init amd_setup_legacy_ssbd(void) - return true; - } - -+/* -+ * legacy_ssbd is always initialized to false because when SSBD is set -+ * from the command line guest attempts to change it are a no-op (see -+ * amd_set_legacy_ssbd()), whereas when SSBD is inactive hardware will -+ * be forced into that mode (see amd_init_ssbd()). -+ */ -+static DEFINE_PER_CPU(bool, legacy_ssbd); -+ -+/* Must be called only when the SSBD setting needs toggling. */ - static void core_set_legacy_ssbd(bool enable) - { - const struct cpuinfo_x86 *c = ¤t_cpu_data; - struct ssbd_ls_cfg *status; - unsigned long flags; - -+ BUG_ON(this_cpu(legacy_ssbd) == enable); -+ - if ((c->x86 != 0x17 && c->x86 != 0x18) || c->x86_num_siblings <= 1) { - BUG_ON(!set_legacy_ssbd(c, enable)); - return; -@@ -816,12 +827,17 @@ void amd_set_legacy_ssbd(bool enable) - */ - return; - -+ if (this_cpu(legacy_ssbd) == enable) -+ return; -+ - if (cpu_has_virt_ssbd) - wrmsr(MSR_VIRT_SPEC_CTRL, enable ? SPEC_CTRL_SSBD : 0, 0); - else if (amd_legacy_ssbd) - core_set_legacy_ssbd(enable); - else - ASSERT_UNREACHABLE(); -+ -+ this_cpu(legacy_ssbd) = enable; - } - - /* --- -2.40.1 - diff --git a/0502-xsa433-4.17.patch b/0502-xsa433-4.17.patch deleted file mode 100644 index 668f556b..00000000 --- a/0502-xsa433-4.17.patch +++ /dev/null @@ -1,138 +0,0 @@ -From: Andrew Cooper -Subject: x86/amd: Mitigations for Zenbleed - -Zenbleed is a malfunction on AMD Zen2 uarch parts which results in corruption -of the vector registers. An attacker can trigger this bug deliberately in -order to access stale data in the physical vector register file. This can -include data from sibling threads, or a higher-privilege context. - -Microcode is the preferred mitigation but in the case that's not available use -the chickenbit as instructed by AMD. Re-evaluate the mitigation on late -microcode load too. - -This is XSA-433 / CVE-2023-20593. - -Signed-off-by: Andrew Cooper -Acked-by: Roger Pau Monné - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index b6a20d375ad1..8d23a5be0c5f 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -13,6 +13,7 @@ - #include - #include - #include -+#include - - #include "cpu.h" - -@@ -878,6 +879,72 @@ void __init detect_zen2_null_seg_behaviour(void) - - } - -+void amd_check_zenbleed(void) -+{ -+ const struct cpu_signature *sig = &this_cpu(cpu_sig); -+ unsigned int good_rev, chickenbit = (1 << 9); -+ uint64_t val, old_val; -+ -+ /* -+ * If we're virtualised, we can't do family/model checks safely, and -+ * we likely wouldn't have access to DE_CFG even if we could see a -+ * microcode revision. -+ * -+ * A hypervisor may hide AVX as a stopgap mitigation. We're not in a -+ * position to care either way. An admin doesn't want to be disabling -+ * AVX as a mitigation on any build of Xen with this logic present. -+ */ -+ if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17) -+ return; -+ -+ switch (boot_cpu_data.x86_model) { -+ case 0x30 ... 0x3f: good_rev = 0x0830107a; break; -+ case 0x60 ... 0x67: good_rev = 0x0860010b; break; -+ case 0x68 ... 0x6f: good_rev = 0x08608105; break; -+ case 0x70 ... 0x7f: good_rev = 0x08701032; break; -+ case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; -+ default: -+ /* -+ * With the Fam17h check above, parts getting here are Zen1. -+ * They're not affected. -+ */ -+ return; -+ } -+ -+ rdmsrl(MSR_AMD64_DE_CFG, val); -+ old_val = val; -+ -+ /* -+ * Microcode is the preferred mitigation, in terms of performance. -+ * However, without microcode, this chickenbit (specific to the Zen2 -+ * uarch) disables Floating Point Mov-Elimination to mitigate the -+ * issue. -+ */ -+ val &= ~chickenbit; -+ if (sig->rev < good_rev) -+ val |= chickenbit; -+ -+ if (val == old_val) -+ /* Nothing to change. */ -+ return; -+ -+ /* -+ * DE_CFG is a Core-scoped MSR, and this write is racy during late -+ * microcode load. However, both threads calculate the new value from -+ * state which is shared, and unrelated to the old value, so the -+ * result should be consistent. -+ */ -+ wrmsrl(MSR_AMD64_DE_CFG, val); -+ -+ /* -+ * Inform the admin that we changed something, but don't spam, -+ * especially during a late microcode load. -+ */ -+ if (smp_processor_id() == 0) -+ printk(XENLOG_INFO "Zenbleed mitigation - using %s\n", -+ val & chickenbit ? "chickenbit" : "microcode"); -+} -+ - static void cf_check init_amd(struct cpuinfo_x86 *c) - { - u32 l, h; -@@ -1150,6 +1217,8 @@ static void cf_check init_amd(struct cpuinfo_x86 *c) - if ((smp_processor_id() == 1) && !cpu_has(c, X86_FEATURE_ITSC)) - disable_c1_ramping(); - -+ amd_check_zenbleed(); -+ - check_syscfg_dram_mod_en(); - - amd_log_freq(c); -diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c -index ded8fe90e650..c6d13f3fb35f 100644 ---- a/xen/arch/x86/cpu/microcode/amd.c -+++ b/xen/arch/x86/cpu/microcode/amd.c -@@ -262,6 +262,8 @@ static int cf_check apply_microcode(const struct microcode_patch *patch) - "microcode: CPU%u updated from revision %#x to %#x, date = %04x-%02x-%02x\n", - cpu, old_rev, rev, patch->year, patch->month, patch->day); - -+ amd_check_zenbleed(); -+ - return 0; - } - -diff --git a/xen/arch/x86/include/asm/processor.h b/xen/arch/x86/include/asm/processor.h -index 8e2816fae9b9..66611df6efc1 100644 ---- a/xen/arch/x86/include/asm/processor.h -+++ b/xen/arch/x86/include/asm/processor.h -@@ -637,6 +637,8 @@ enum ap_boot_method { - }; - extern enum ap_boot_method ap_boot_method; - -+void amd_check_zenbleed(void); -+ - #endif /* !__ASSEMBLY__ */ - - #endif /* __ASM_X86_PROCESSOR_H */ - diff --git a/0503-xsa433-bugfix.patch b/0503-xsa433-bugfix.patch deleted file mode 100644 index 8ad4eda9..00000000 --- a/0503-xsa433-bugfix.patch +++ /dev/null @@ -1,29 +0,0 @@ -From: Andrew Cooper -Subject: x86/amd: Fix DE_CFG truncation in amd_check_zenbleed() - -This line: - - val &= ~chickenbit; - -ends up truncating val to 32 bits, and turning off various errata workarounds -in Zen2 systems. - -Fixes: f91c5ea97067 ("x86/amd: Mitigations for Zenbleed") -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index 3ed06f670491..df2681b7c455 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -909,8 +909,8 @@ void __init detect_zen2_null_seg_behaviour(void) - void amd_check_zenbleed(void) - { - const struct cpu_signature *sig = &this_cpu(cpu_sig); -- unsigned int good_rev, chickenbit = (1 << 9); -- uint64_t val, old_val; -+ unsigned int good_rev; -+ uint64_t val, old_val, chickenbit = (1 << 9); - - /* - * If we're virtualised, we can't do family/model checks safely, and diff --git a/archlinux/PKGBUILD.in b/archlinux/PKGBUILD.in index 593d4cc3..6743362f 100644 --- a/archlinux/PKGBUILD.in +++ b/archlinux/PKGBUILD.in @@ -14,7 +14,6 @@ makedepends=(wget make gcc patch git bin86 dev86 iasl yajl pkg-config openssl pi provides=('xen-qubes-vm-essentials') _patches=( - 0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch 1000-Do-not-access-network-during-the-build.patch 1001-hotplug-store-block-params-for-cleanup.patch 1020-xen-tools-qubes-vm.patch diff --git a/xen.spec.in b/xen.spec.in index 6b5d4cff..94db4de6 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -103,49 +103,9 @@ Patch0306: 0306-x86-Replace-PAT_-with-X86_MT_.patch Patch0307: 0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch Patch0308: 0308-x86-Replace-EPT_EMT_-constants-with-X86_MT_.patch Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch -Patch0310: 0310-ns16550-enable-memory-decoding-on-MMIO-based-PCI-con.patch -Patch0311: 0311-tools-libs-guest-assist-gcc13-s-realloc-analyzer.patch -Patch0321: 0321-tools-xen-cpuid-Rework-the-handling-of-dynamic-featu.patch -Patch0322: 0322-x86-sysctl-Retrofit-XEN_SYSCTL_cpu_featureset_-pv-hv.patch -Patch0323: 0323-x86-Rename-struct-cpu_policy-to-struct-old_cpuid_pol.patch -Patch0324: 0324-x86-Rename-domctl-sysctl-.cpu_policy.-cpuid-msr-_pol.patch -Patch0325: 0325-x86-Rename-struct-cpuid_policy-to-struct-cpu_policy.patch -Patch0326: 0326-x86-Merge-struct-msr_policy-into-struct-cpu_policy.patch -Patch0327: 0327-x86-Merge-the-system-cpuid-msr-policy-objects.patch -Patch0328: 0328-x86-Merge-a-domain-s-cpuid-msr-policy-objects.patch -Patch0329: 0329-x86-Merge-xc_cpu_policy-s-cpuid-and-msr-objects.patch -Patch0330: 0330-x86-Drop-struct-old_cpu_policy.patch -Patch0331: 0331-x86-Out-of-inline-the-policy-featureset-convertors.patch -Patch0332: 0332-x86-boot-Move-MSR-policy-initialisation-logic-into-c.patch -Patch0333: 0333-x86-boot-Merge-CPUID-policy-initialisation-logic-int.patch -Patch0334: 0334-x86-emul-Switch-x86_emulate_ctxt-to-cpu_policy.patch -Patch0335: 0335-tools-fuzz-Rework-afl-policy-fuzzer.patch -Patch0336: 0336-libx86-Update-library-API-for-cpu_policy.patch -Patch0337: 0337-x86-Remove-temporary-cpuid-msr-_policy-defines.patch -Patch0338: 0338-x86-cpuid-Calculate-FEATURESET_NR_ENTRIES-more-helpf.patch -Patch0339: 0339-x86-boot-Rework-dom0-feature-configuration.patch -Patch0340: 0340-x86-boot-Adjust-MSR_ARCH_CAPS-handling-for-the-Host-.patch -Patch0341: 0341-x86-cpu-policy-Infrastructure-for-MSR_ARCH_CAPS.patch -Patch0342: 0342-x86-cpu-policy-MSR_ARCH_CAPS-feature-names.patch -Patch0343: 0343-x86-boot-Record-MSR_ARCH_CAPS-for-the-Raw-and-Host-C.patch -Patch0344: 0344-x86-boot-Expose-MSR_ARCH_CAPS-data-in-guest-max-poli.patch -Patch0345: 0345-x86-vtx-Remove-opencoded-MSR_ARCH_CAPS-check.patch -Patch0346: 0346-x86-tsx-Remove-opencoded-MSR_ARCH_CAPS-check.patch -Patch0347: 0347-x86-spec-ctrl-Remove-opencoded-MSR_ARCH_CAPS-check.patch -Patch0348: 0348-x86-spec-ctrl-Update-hardware-hints.patch -Patch0349: 0349-x86-cpu-policy-Rearrange-guest_common_default_featur.patch -Patch0350: 0350-x86-spec-ctrl-Fix-the-rendering-of-FB_CLEAR.patch -Patch0351: 0351-x86-spec-ctrl-Use-a-taint-for-CET-without-MSR_SPEC_C.patch -Patch0352: 0352-x86-spec-ctrl-Rename-retpoline_safe-to-retpoline_cal.patch -Patch0353: 0353-x86-spec-ctrl-Fix-up-the-RSBA-RRSBA-bits-as-appropri.patch -Patch0354: 0354-x86-cpu-policy-Derive-RSBA-RRSBA-for-guest-policies.patch -Patch0355: 0355-def-arch-caps.patch # Security fixes Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch -Patch0501: 0501-xsa431.patch -Patch0502: 0502-xsa433-4.17.patch -Patch0503: 0503-xsa433-bugfix.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch From 0cb44dddf66f8fa353f47733ad6d9ecd1b49ce0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 9 Aug 2023 04:15:08 +0200 Subject: [PATCH 15/64] Update to 4.17.2 Temporarily use a git snapshot, until upstream provides proper tarballs. --- .qubesbuilder | 10 ++++++---- Makefile | 13 ++++++++++--- archlinux/PKGBUILD.in | 2 +- rel | 2 +- version | 2 +- xen-4.17.2-git.tar.gz.sha512 | 1 + xen.spec.in | 3 ++- 7 files changed, 22 insertions(+), 11 deletions(-) create mode 100644 xen-4.17.2-git.tar.gz.sha512 diff --git a/.qubesbuilder b/.qubesbuilder index 8700b019..19fc45e0 100644 --- a/.qubesbuilder +++ b/.qubesbuilder @@ -8,7 +8,9 @@ vm: - archlinux source: files: - - url: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz - signature: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz.sig - pubkeys: - - xen.org-key.asc +# - url: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz +# signature: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz.sig +# pubkeys: +# - xen.org-key.asc + - url: https://ftp.qubes-os.org/distfiles/xen-@VERSION@-git.tar.gz + sha512: xen-@VERSION@-git.tar.gz.sha512 diff --git a/Makefile b/Makefile index 44e7e009..adb5e2cb 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,11 @@ UNTRUSTED_SUFF := .UNTRUSTED URLS := \ https://downloads.xenproject.org/release/xen/$(VERSION)/xen-$(VERSION).tar.gz.sig +# temporarily use git snapshot +URLS := \ + https://ftp.qubes-os.org/distfiles/xen-$(VERSION)-git.tar.gz + + ALL_FILES := $(notdir $(URLS:%.sig=%)) $(notdir $(filter %.sig, $(URLS))) ALL_URLS := $(URLS:%.sig=%) $(filter %.sig, $(URLS)) @@ -19,6 +24,8 @@ ifneq ($(DISTFILES_MIRROR),) ALL_URLS := $(addprefix $(DISTFILES_MIRROR),$(ALL_FILES)) endif +SHELL := bash + get-sources: $(ALL_FILES) git submodule update --init --recursive @@ -47,10 +54,10 @@ $(filter %.sig, $(ALL_FILES)): %: { echo "Wrong signature on $@$(UNTRUSTED_SUFF)!"; exit 1; } @mv $@$(UNTRUSTED_SUFF) $@ -%: %.sha1sum +%: %.sha512 @$(FETCH_CMD) $@$(UNTRUSTED_SUFF) $(filter %$@,$(ALL_URLS)) - @sha1sum --status -c $< <$@$(UNTRUSTED_SUFF) || \ - { echo "Wrong SHA1 checksum on $@$(UNTRUSTED_SUFF)!"; exit 1; } + @sha512sum --status -c <(printf "$$(cat $<) -\n") <$@$(UNTRUSTED_SUFF) || \ + { echo "Wrong SHA512 checksum on $@$(UNTRUSTED_SUFF)!"; exit 1; } @mv $@$(UNTRUSTED_SUFF) $@ .PHONY: clean-sources diff --git a/archlinux/PKGBUILD.in b/archlinux/PKGBUILD.in index 6743362f..efac0519 100644 --- a/archlinux/PKGBUILD.in +++ b/archlinux/PKGBUILD.in @@ -22,7 +22,7 @@ _patches=( 1102-docs-xen-headers-use-alphabetical-sorting-for-incont.patch 1103-Strip-build-path-directories-in-tools-xen-and-xen-ar.patch ) -source=(xen-$_upstream_pkgver.tar.gz "${_patches[@]}") +source=(xen-$_upstream_pkgver-git.tar.gz "${_patches[@]}") md5sums=(SKIP SKIP SKIP SKIP SKIP SKIP SKIP SKIP SKIP) prepare() { diff --git a/rel b/rel index b8626c4c..d00491fd 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -4 +1 diff --git a/version b/version index 1b0a87fd..1c8fd19f 100644 --- a/version +++ b/version @@ -1 +1 @@ -4.17.1 +4.17.2 diff --git a/xen-4.17.2-git.tar.gz.sha512 b/xen-4.17.2-git.tar.gz.sha512 new file mode 100644 index 00000000..d82f09c4 --- /dev/null +++ b/xen-4.17.2-git.tar.gz.sha512 @@ -0,0 +1 @@ +71496616372e7cf3bf024234980420a26cfd715df4a71c3704b4ef9d618b0236f409f6dc51ea2f7753e62a932bcaa788cd87bd99c4a1f5dab16f3a2db00d38a8 diff --git a/xen.spec.in b/xen.spec.in index 94db4de6..b477e5c8 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -69,7 +69,8 @@ Release: %{?rctag}@REL@%{?dist} Epoch: 2001 License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ -Source0: https://downloads.xenproject.org/release/xen/%{upstream_version}/xen-%{upstream_version}.tar.gz +#Source0: https://downloads.xenproject.org/release/xen/%%{upstream_version}/xen-%%{upstream_version}.tar.gz +Source0: xen-%{upstream_version}-git.tar.gz Source2: %{name}.logrotate # .config file for xen hypervisor Source3: config From d46117635980e73fc4edc164a136e3c1f6260f2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Thu, 21 Sep 2023 02:18:09 +0200 Subject: [PATCH 16/64] Apply XSA-439 fix --- ...enbleed-check-to-models-good-ucode-i.patch | 48 ++++ ...x-confusion-between-SPEC_CTRL_EXIT_T.patch | 74 ++++++ ...ld-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch | 85 +++++++ ...rn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch | 83 +++++++ ...prove-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch | 106 ++++++++ ...-restore_all_xen-to-hold-stack_end-i.patch | 74 ++++++ ...the-IST-ness-of-an-entry-for-the-exi.patch | 109 +++++++++ ...rl-Issue-VERW-during-IST-exit-to-Xen.patch | 89 +++++++ ...troduce-is_zen-1-2-_uarch-predicates.patch | 91 +++++++ ...c-ctrl-Mitigate-the-Zen1-DIV-leakage.patch | 228 ++++++++++++++++++ xen.spec.in | 10 + 11 files changed, 997 insertions(+) create mode 100644 0501-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch create mode 100644 0502-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch create mode 100644 0503-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch create mode 100644 0504-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch create mode 100644 0505-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch create mode 100644 0506-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch create mode 100644 0507-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch create mode 100644 0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch create mode 100644 0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch create mode 100644 0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch diff --git a/0501-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch b/0501-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch new file mode 100644 index 00000000..2b2df9db --- /dev/null +++ b/0501-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch @@ -0,0 +1,48 @@ +From d2d2dcae879c6cc05227c9620f0a772f35fe6886 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Wed, 23 Aug 2023 09:26:36 +0200 +Subject: [PATCH 501/510] x86/AMD: extend Zenbleed check to models "good" ucode + isn't known for + +Reportedly the AMD Custom APU 0405 found on SteamDeck, models 0x90 and +0x91, (quoting the respective Linux commit) is similarly affected. Put +another instance of our Zen1 vs Zen2 distinction checks in +amd_check_zenbleed(), forcing use of the chickenbit irrespective of +ucode version (building upon real hardware never surfacing a version of +0xffffffff). + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit 145a69c0944ac70cfcf9d247c85dee9e99d9d302) +--- + xen/arch/x86/cpu/amd.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index 3ea214fc2e84..1bb3044be15b 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -909,10 +909,17 @@ void amd_check_zenbleed(void) + case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; + default: + /* +- * With the Fam17h check above, parts getting here are Zen1. +- * They're not affected. ++ * With the Fam17h check above, most parts getting here are ++ * Zen1. They're not affected. Assume Zen2 ones making it ++ * here are affected regardless of microcode version. ++ * ++ * Zen1 vs Zen2 isn't a simple model number comparison, so use ++ * STIBP as a heuristic to distinguish. + */ +- return; ++ if (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) ++ return; ++ good_rev = ~0U; ++ break; + } + + rdmsrl(MSR_AMD64_DE_CFG, val); +-- +2.41.0 + diff --git a/0502-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch b/0502-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch new file mode 100644 index 00000000..f0898558 --- /dev/null +++ b/0502-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch @@ -0,0 +1,74 @@ +From dc28aba565f226f9bec24cfde993e78478acfb4e Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 12 Sep 2023 15:06:49 +0100 +Subject: [PATCH 502/510] x86/spec-ctrl: Fix confusion between + SPEC_CTRL_EXIT_TO_XEN{,_IST} + +c/s 3fffaf9c13e9 ("x86/entry: Avoid using alternatives in NMI/#MC paths") +dropped the only user, leaving behind the (incorrect) implication that Xen had +split exit paths. + +Delete the unused SPEC_CTRL_EXIT_TO_XEN and rename SPEC_CTRL_EXIT_TO_XEN_IST +to SPEC_CTRL_EXIT_TO_XEN for consistency. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 1c18d73774533a55ba9d1cbee8bdace03efdb5e7) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 10 ++-------- + xen/arch/x86/x86_64/entry.S | 2 +- + 2 files changed, 3 insertions(+), 9 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index f23bb105c51e..e8fd01243ce7 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -79,7 +79,6 @@ + * - SPEC_CTRL_ENTRY_FROM_PV + * - SPEC_CTRL_ENTRY_FROM_INTR + * - SPEC_CTRL_ENTRY_FROM_INTR_IST +- * - SPEC_CTRL_EXIT_TO_XEN_IST + * - SPEC_CTRL_EXIT_TO_XEN + * - SPEC_CTRL_EXIT_TO_PV + * +@@ -268,11 +267,6 @@ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ + X86_FEATURE_SC_MSR_PV + +-/* Use when exiting to Xen context. */ +-#define SPEC_CTRL_EXIT_TO_XEN \ +- ALTERNATIVE "", \ +- DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR_PV +- + /* Use when exiting to PV guest context. */ + #define SPEC_CTRL_EXIT_TO_PV \ + ALTERNATIVE "", \ +@@ -339,8 +333,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + UNLIKELY_END(\@_serialise) + .endm + +-/* Use when exiting to Xen in IST context. */ +-.macro SPEC_CTRL_EXIT_TO_XEN_IST ++/* Use when exiting to Xen context. */ ++.macro SPEC_CTRL_EXIT_TO_XEN + /* + * Requires %rbx=stack_end + * Clobbers %rax, %rcx, %rdx +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 7675a59ff057..b45a09823a03 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -673,7 +673,7 @@ UNLIKELY_START(ne, exit_cr3) + UNLIKELY_END(exit_cr3) + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */ ++ SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ + + RESTORE_ALL adj=8 + iretq +-- +2.41.0 + diff --git a/0503-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch b/0503-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch new file mode 100644 index 00000000..96b6e4c4 --- /dev/null +++ b/0503-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch @@ -0,0 +1,85 @@ +From 84690fb82c4f4aecb72a6789d8994efa74841e09 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 12 Sep 2023 17:03:16 +0100 +Subject: [PATCH 503/510] x86/spec-ctrl: Fold DO_SPEC_CTRL_EXIT_TO_XEN into + it's single user + +With the SPEC_CTRL_EXIT_TO_XEN{,_IST} confusion fixed, it's now obvious that +there's only a single EXIT_TO_XEN path. Fold DO_SPEC_CTRL_EXIT_TO_XEN into +SPEC_CTRL_EXIT_TO_XEN to simplify further fixes. + +When merging labels, switch the name to .L\@_skip_sc_msr as "skip" on its own +is going to be too generic shortly. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 694bb0f280fd08a4377e36e32b84b5062def4de2) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 40 ++++++++++-------------- + 1 file changed, 16 insertions(+), 24 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index e8fd01243ce7..d5f65d80eafb 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -211,27 +211,6 @@ + wrmsr + .endm + +-.macro DO_SPEC_CTRL_EXIT_TO_XEN +-/* +- * Requires %rbx=stack_end +- * Clobbers %rax, %rcx, %rdx +- * +- * When returning to Xen context, look to see whether SPEC_CTRL shadowing is +- * in effect, and reload the shadow value. This covers race conditions which +- * exist with an NMI/MCE/etc hitting late in the return-to-guest path. +- */ +- xor %edx, %edx +- +- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) +- jz .L\@_skip +- +- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax +- mov $MSR_SPEC_CTRL, %ecx +- wrmsr +- +-.L\@_skip: +-.endm +- + .macro DO_SPEC_CTRL_EXIT_TO_GUEST + /* + * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo +@@ -340,11 +319,24 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + * Clobbers %rax, %rcx, %rdx + */ + testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) +- jz .L\@_skip ++ jz .L\@_skip_sc_msr + +- DO_SPEC_CTRL_EXIT_TO_XEN ++ /* ++ * When returning to Xen context, look to see whether SPEC_CTRL shadowing ++ * is in effect, and reload the shadow value. This covers race conditions ++ * which exist with an NMI/MCE/etc hitting late in the return-to-guest ++ * path. ++ */ ++ xor %edx, %edx + +-.L\@_skip: ++ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) ++ jz .L\@_skip_sc_msr ++ ++ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax ++ mov $MSR_SPEC_CTRL, %ecx ++ wrmsr ++ ++.L\@_skip_sc_msr: + .endm + + #endif /* __ASSEMBLY__ */ +-- +2.41.0 + diff --git a/0504-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch b/0504-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch new file mode 100644 index 00000000..93c96563 --- /dev/null +++ b/0504-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch @@ -0,0 +1,83 @@ +From 3952c73bdbd05f0e666986fce633a591237b3c88 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 1 Sep 2023 11:38:44 +0100 +Subject: [PATCH 504/510] x86/spec-ctrl: Turn the remaining + SPEC_CTRL_{ENTRY,EXIT}_* into asm macros + +These have grown more complex over time, with some already having been +converted. + +Provide full Requires/Clobbers comments, otherwise missing at this level of +indirection. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 7125429aafb9e3c9c88fc93001fc2300e0ac2cc8) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 37 ++++++++++++++++++------ + 1 file changed, 28 insertions(+), 9 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index d5f65d80eafb..c6d5f2ad0142 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -231,26 +231,45 @@ + .endm + + /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ +-#define SPEC_CTRL_ENTRY_FROM_PV \ ++.macro SPEC_CTRL_ENTRY_FROM_PV ++/* ++ * Requires %rsp=regs/cpuinfo, %rdx=0 ++ * Clobbers %rax, %rcx, %rdx ++ */ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \ +- X86_FEATURE_IBPB_ENTRY_PV; \ +- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ ++ X86_FEATURE_IBPB_ENTRY_PV ++ ++ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV ++ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ + X86_FEATURE_SC_MSR_PV ++.endm + + /* Use in interrupt/exception context. May interrupt Xen or PV context. */ +-#define SPEC_CTRL_ENTRY_FROM_INTR \ ++.macro SPEC_CTRL_ENTRY_FROM_INTR ++/* ++ * Requires %rsp=regs, %r14=stack_end, %rdx=0 ++ * Clobbers %rax, %rcx, %rdx ++ */ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \ +- X86_FEATURE_IBPB_ENTRY_PV; \ +- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ ++ X86_FEATURE_IBPB_ENTRY_PV ++ ++ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV ++ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ + X86_FEATURE_SC_MSR_PV ++.endm + + /* Use when exiting to PV guest context. */ +-#define SPEC_CTRL_EXIT_TO_PV \ +- ALTERNATIVE "", \ +- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \ ++.macro SPEC_CTRL_EXIT_TO_PV ++/* ++ * Requires %rax=spec_ctrl, %rsp=regs/info ++ * Clobbers %rcx, %rdx ++ */ ++ ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV ++ + DO_SPEC_CTRL_COND_VERW ++.endm + + /* + * Use in IST interrupt/exception context. May interrupt Xen or PV context. +-- +2.41.0 + diff --git a/0505-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch b/0505-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch new file mode 100644 index 00000000..4a0b5a00 --- /dev/null +++ b/0505-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch @@ -0,0 +1,106 @@ +From ba023e93d0b1e60b80251bf080bab694efb9f8e3 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 30 Aug 2023 20:11:50 +0100 +Subject: [PATCH 505/510] x86/spec-ctrl: Improve all SPEC_CTRL_{ENTER,EXIT}_* + comments + +... to better explain how they're used. + +Doing so highlights that SPEC_CTRL_EXIT_TO_XEN is missing a VERW flush for the +corner case when e.g. an NMI hits late in an exit-to-guest path. + +Leave a TODO, which will be addressed in subsequent patches which arrange for +VERW flushing to be safe within SPEC_CTRL_EXIT_TO_XEN. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 45f00557350dc7d0756551069803fc49c29184ca) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 36 ++++++++++++++++++++---- + 1 file changed, 31 insertions(+), 5 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index c6d5f2ad0142..97c4db31cde9 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -230,7 +230,10 @@ + wrmsr + .endm + +-/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ ++/* ++ * Used after an entry from PV context: SYSCALL, SYSENTER, INT, ++ * etc. There is always a guest speculation state in context. ++ */ + .macro SPEC_CTRL_ENTRY_FROM_PV + /* + * Requires %rsp=regs/cpuinfo, %rdx=0 +@@ -245,7 +248,11 @@ + X86_FEATURE_SC_MSR_PV + .endm + +-/* Use in interrupt/exception context. May interrupt Xen or PV context. */ ++/* ++ * Used after an exception or maskable interrupt, hitting Xen or PV context. ++ * There will either be a guest speculation context, or (barring fatal ++ * exceptions) a well-formed Xen speculation context. ++ */ + .macro SPEC_CTRL_ENTRY_FROM_INTR + /* + * Requires %rsp=regs, %r14=stack_end, %rdx=0 +@@ -260,7 +267,10 @@ + X86_FEATURE_SC_MSR_PV + .endm + +-/* Use when exiting to PV guest context. */ ++/* ++ * Used when exiting from any entry context, back to PV context. This ++ * includes from an IST entry which moved onto the primary stack. ++ */ + .macro SPEC_CTRL_EXIT_TO_PV + /* + * Requires %rax=spec_ctrl, %rsp=regs/info +@@ -272,7 +282,13 @@ + .endm + + /* +- * Use in IST interrupt/exception context. May interrupt Xen or PV context. ++ * Used after an IST entry hitting Xen or PV context. Special care is needed, ++ * because when hitting Xen context, there may not be a well-formed ++ * speculation context. (i.e. it can hit in the middle of ++ * SPEC_CTRL_{ENTRY,EXIT}_* regions.) ++ * ++ * An IST entry which hits PV context moves onto the primary stack and leaves ++ * via SPEC_CTRL_EXIT_TO_PV, *not* SPEC_CTRL_EXIT_TO_XEN. + */ + .macro SPEC_CTRL_ENTRY_FROM_INTR_IST + /* +@@ -331,7 +347,14 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + UNLIKELY_END(\@_serialise) + .endm + +-/* Use when exiting to Xen context. */ ++/* ++ * Use when exiting from any entry context, back to Xen context. This ++ * includes returning to other SPEC_CTRL_{ENTRY,EXIT}_* regions with an ++ * incomplete speculation context. ++ * ++ * Because we might have interrupted Xen beyond SPEC_CTRL_EXIT_TO_$GUEST, we ++ * need to treat this as if it were an EXIT_TO_$GUEST case too. ++ */ + .macro SPEC_CTRL_EXIT_TO_XEN + /* + * Requires %rbx=stack_end +@@ -356,6 +379,9 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + wrmsr + + .L\@_skip_sc_msr: ++ ++ /* TODO VERW */ ++ + .endm + + #endif /* __ASSEMBLY__ */ +-- +2.41.0 + diff --git a/0506-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch b/0506-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch new file mode 100644 index 00000000..2b46cec5 --- /dev/null +++ b/0506-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch @@ -0,0 +1,74 @@ +From 5f7efd47c8273fde972637d0360851802f76eca9 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 13 Sep 2023 13:48:16 +0100 +Subject: [PATCH 506/510] x86/entry: Adjust restore_all_xen to hold stack_end + in %r14 + +All other SPEC_CTRL_{ENTRY,EXIT}_* helpers hold stack_end in %r14. Adjust it +for consistency. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 7aa28849a1155d856e214e9a80a7e65fffdc3e58) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 8 ++++---- + xen/arch/x86/x86_64/entry.S | 8 ++++---- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index 97c4db31cde9..66c706496f94 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -357,10 +357,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + */ + .macro SPEC_CTRL_EXIT_TO_XEN + /* +- * Requires %rbx=stack_end ++ * Requires %r14=stack_end + * Clobbers %rax, %rcx, %rdx + */ +- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) ++ testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) + jz .L\@_skip_sc_msr + + /* +@@ -371,10 +371,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + */ + xor %edx, %edx + +- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) ++ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) + jz .L\@_skip_sc_msr + +- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax ++ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax + mov $MSR_SPEC_CTRL, %ecx + wrmsr + +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index b45a09823a03..92279a225dd6 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -665,15 +665,15 @@ restore_all_xen: + * Check whether we need to switch to the per-CPU page tables, in + * case we return to late PV exit code (from an NMI or #MC). + */ +- GET_STACK_END(bx) +- cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) ++ GET_STACK_END(14) ++ cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) + UNLIKELY_START(ne, exit_cr3) +- mov STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax ++ mov STACK_CPUINFO_FIELD(pv_cr3)(%r14), %rax + mov %rax, %cr3 + UNLIKELY_END(exit_cr3) + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ ++ SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */ + + RESTORE_ALL adj=8 + iretq +-- +2.41.0 + diff --git a/0507-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch b/0507-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch new file mode 100644 index 00000000..3de9cd4f --- /dev/null +++ b/0507-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch @@ -0,0 +1,109 @@ +From e4a71bc0da0baf7464bb0d8e33053f330e5ea366 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 13 Sep 2023 12:20:12 +0100 +Subject: [PATCH 507/510] x86/entry: Track the IST-ness of an entry for the + exit paths + +Use %r12 to hold an ist_exit boolean. This register is zero elsewhere in the +entry/exit asm, so it only needs setting in the IST path. + +As this is subtle and fragile, add check_ist_exit() to be used in debugging +builds to cross-check that the ist_exit boolean matches the entry vector. + +Write check_ist_exit() it in C, because it's debug only and the logic more +complicated than I care to maintain in asm. + +For now, we only need to use this signal in the exit-to-Xen path, but some +exit-to-guest paths happen in IST context too. Check the correctness in all +exit paths to avoid the logic bit-rotting. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 21bdc25b05a0f8ab6bc73520a9ca01327360732c) + +x86/entry: Partially revert IST-exit checks + +The patch adding check_ist_exit() didn't account for the fact that +reset_stack_and_jump() is not an ABI-preserving boundary. The IST-ness in +%r12 doesn't survive into the next context, and is a stale value C. + +This shows up in Gitlab CI for the Clang build: + + https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/5112783827 + +and in OSSTest for GCC 8: + + http://logs.test-lab.xenproject.org/osstest/logs/183045/test-amd64-amd64-xl-qemuu-debianhvm-amd64/serial-pinot0.log + +There's no straightforward way to reconstruct the IST-exit-ness on the +exit-to-guest path after a context switch. For now, we only need IST-exit on +the return-to-Xen path. + +Fixes: 21bdc25b05a0 ("x86/entry: Track the IST-ness of an entry for the exit paths") +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 9b57c800b79b96769ea3dcd6468578fa664d19f9) +--- + xen/arch/x86/traps.c | 13 +++++++++++++ + xen/arch/x86/x86_64/entry.S | 13 ++++++++++++- + 2 files changed, 25 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c +index d12004b1c6fc..e65cc6004148 100644 +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -2315,6 +2315,19 @@ void asm_domain_crash_synchronous(unsigned long addr) + do_softirq(); + } + ++#ifdef CONFIG_DEBUG ++void check_ist_exit(const struct cpu_user_regs *regs, bool ist_exit) ++{ ++ const unsigned int ist_mask = ++ (1U << X86_EXC_NMI) | (1U << X86_EXC_DB) | ++ (1U << X86_EXC_DF) | (1U << X86_EXC_MC); ++ uint8_t ev = regs->entry_vector; ++ bool is_ist = (ev < TRAP_nr) && ((1U << ev) & ist_mask); ++ ++ ASSERT(is_ist == ist_exit); ++} ++#endif ++ + /* + * Local variables: + * mode: C +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 92279a225dd6..4cebc4fbe33f 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -659,8 +659,15 @@ ENTRY(early_page_fault) + .section .text.entry, "ax", @progbits + + ALIGN +-/* No special register assumptions. */ ++/* %r12=ist_exit */ + restore_all_xen: ++ ++#ifdef CONFIG_DEBUG ++ mov %rsp, %rdi ++ mov %r12, %rsi ++ call check_ist_exit ++#endif ++ + /* + * Check whether we need to switch to the per-CPU page tables, in + * case we return to late PV exit code (from an NMI or #MC). +@@ -1091,6 +1098,10 @@ handle_ist_exception: + .L_ist_dispatch_done: + mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + mov %bl, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) ++ ++ /* This is an IST exit */ ++ mov $1, %r12d ++ + cmpb $TRAP_nmi,UREGS_entry_vector(%rsp) + jne ret_from_intr + +-- +2.41.0 + diff --git a/0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch b/0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch new file mode 100644 index 00000000..c21173a0 --- /dev/null +++ b/0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch @@ -0,0 +1,89 @@ +From 2e2c3efcfc9f183674a8de6ed954ffbe7188b70d Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 13 Sep 2023 13:53:33 +0100 +Subject: [PATCH 508/510] x86/spec-ctrl: Issue VERW during IST exit to Xen + +There is a corner case where e.g. an NMI hitting an exit-to-guest path after +SPEC_CTRL_EXIT_TO_* would have run the entire NMI handler *after* the VERW +flush to scrub potentially sensitive data from uarch buffers. + +In order to compensate, issue VERW when exiting to Xen from an IST entry. + +SPEC_CTRL_EXIT_TO_XEN already has two reads of spec_ctrl_flags off the stack, +and we're about to add a third. Load the field into %ebx, and list the +register as clobbered. + +%r12 has been arranged to be the ist_exit signal, so add this as an input +dependency and use it to identify when to issue a VERW. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 3ee6066bcd737756b0990d417d94eddc0b0d2585) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 20 +++++++++++++++----- + xen/arch/x86/x86_64/entry.S | 2 +- + 2 files changed, 16 insertions(+), 6 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index 66c706496f94..28a75796e652 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -357,10 +357,12 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + */ + .macro SPEC_CTRL_EXIT_TO_XEN + /* +- * Requires %r14=stack_end +- * Clobbers %rax, %rcx, %rdx ++ * Requires %r12=ist_exit, %r14=stack_end ++ * Clobbers %rax, %rbx, %rcx, %rdx + */ +- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) ++ movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx ++ ++ testb $SCF_ist_sc_msr, %bl + jz .L\@_skip_sc_msr + + /* +@@ -371,7 +373,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + */ + xor %edx, %edx + +- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) ++ testb $SCF_use_shadow, %bl + jz .L\@_skip_sc_msr + + mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax +@@ -380,8 +382,16 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + + .L\@_skip_sc_msr: + +- /* TODO VERW */ ++ test %r12, %r12 ++ jz .L\@_skip_ist_exit ++ ++ /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */ ++ testb $SCF_verw, %bl ++ jz .L\@_skip_verw ++ verw STACK_CPUINFO_FIELD(verw_sel)(%r14) ++.L\@_skip_verw: + ++.L\@_skip_ist_exit: + .endm + + #endif /* __ASSEMBLY__ */ +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 4cebc4fbe33f..c12e011b4d2a 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -680,7 +680,7 @@ UNLIKELY_START(ne, exit_cr3) + UNLIKELY_END(exit_cr3) + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */ ++ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */ + + RESTORE_ALL adj=8 + iretq +-- +2.41.0 + diff --git a/0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch b/0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch new file mode 100644 index 00000000..3bdff7f5 --- /dev/null +++ b/0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch @@ -0,0 +1,91 @@ +From 19ee1e1faa32b79274b3484cb1170a5970f1e602 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 15 Sep 2023 12:13:51 +0100 +Subject: [PATCH 509/510] x86/amd: Introduce is_zen{1,2}_uarch() predicates + +We already have 3 cases using STIBP as a Zen1/2 heuristic, and are about to +introduce a 4th. Wrap the heuristic into a pair of predicates rather than +opencoding it, and the explanation of the heuristic, at each usage site. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit de1d265001397f308c5c3c5d3ffc30e7ef8c0705) +--- + xen/arch/x86/cpu/amd.c | 18 ++++-------------- + xen/arch/x86/include/asm/amd.h | 11 +++++++++++ + 2 files changed, 15 insertions(+), 14 deletions(-) + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index 1bb3044be15b..e94ba5a0e006 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -855,15 +855,13 @@ void amd_set_legacy_ssbd(bool enable) + * non-branch instructions to be ignored. It is to be set unilaterally in + * newer microcode. + * +- * This chickenbit is something unrelated on Zen1, and Zen1 vs Zen2 isn't a +- * simple model number comparison, so use STIBP as a heuristic to separate the +- * two uarches in Fam17h(AMD)/18h(Hygon). ++ * This chickenbit is something unrelated on Zen1. + */ + void amd_init_spectral_chicken(void) + { + uint64_t val, chickenbit = 1 << 1; + +- if (cpu_has_hypervisor || !boot_cpu_has(X86_FEATURE_AMD_STIBP)) ++ if (cpu_has_hypervisor || !is_zen2_uarch()) + return; + + if (rdmsr_safe(MSR_AMD64_DE_CFG2, val) == 0 && !(val & chickenbit)) +@@ -912,11 +910,8 @@ void amd_check_zenbleed(void) + * With the Fam17h check above, most parts getting here are + * Zen1. They're not affected. Assume Zen2 ones making it + * here are affected regardless of microcode version. +- * +- * Zen1 vs Zen2 isn't a simple model number comparison, so use +- * STIBP as a heuristic to distinguish. + */ +- if (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) ++ if (is_zen1_uarch()) + return; + good_rev = ~0U; + break; +@@ -1277,12 +1272,7 @@ static int __init cf_check zen2_c6_errata_check(void) + */ + s_time_t delta; + +- /* +- * Zen1 vs Zen2 isn't a simple model number comparison, so use STIBP as +- * a heuristic to separate the two uarches in Fam17h. +- */ +- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || +- !boot_cpu_has(X86_FEATURE_AMD_STIBP)) ++ if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch()) + return 0; + + /* +diff --git a/xen/arch/x86/include/asm/amd.h b/xen/arch/x86/include/asm/amd.h +index a975d3de2688..82324110abdf 100644 +--- a/xen/arch/x86/include/asm/amd.h ++++ b/xen/arch/x86/include/asm/amd.h +@@ -140,6 +140,17 @@ + AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf), \ + AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf)) + ++/* ++ * The Zen1 and Zen2 microarchitectures are implemented by AMD (Fam17h) and ++ * Hygon (Fam18h) but without simple model number rules. Instead, use STIBP ++ * as a heuristic that distinguishes the two. ++ * ++ * The caller is required to perform the appropriate vendor/family checks ++ * first. ++ */ ++#define is_zen1_uarch() (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) ++#define is_zen2_uarch() boot_cpu_has(X86_FEATURE_AMD_STIBP) ++ + struct cpuinfo_x86; + int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...); + +-- +2.41.0 + diff --git a/0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch b/0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch new file mode 100644 index 00000000..3655b5ac --- /dev/null +++ b/0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch @@ -0,0 +1,228 @@ +From 9ac2f49f5fa3a5159409241d4f74fb0d721dd4c5 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 30 Aug 2023 20:24:25 +0100 +Subject: [PATCH 510/510] x86/spec-ctrl: Mitigate the Zen1 DIV leakage + +In the Zen1 microarchitecure, there is one divider in the pipeline which +services uops from both threads. In the case of #DE, the latched result from +the previous DIV to execute will be forwarded speculatively. + +This is an interesting covert channel that allows two threads to communicate +without any system calls. In also allows userspace to obtain the result of +the most recent DIV instruction executed (even speculatively) in the core, +which can be from a higher privilege context. + +Scrub the result from the divider by executing a non-faulting divide. This +needs performing on the exit-to-guest paths, and ist_exit-to-Xen. + +Alternatives in IST context is believed safe now that it's done in NMI +context. + +This is XSA-439 / CVE-2023-20588. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit b5926c6ecf05c28ee99c6248c42d691ccbf0c315) +--- + docs/misc/xen-command-line.pandoc | 6 ++- + xen/arch/x86/hvm/svm/entry.S | 1 + + xen/arch/x86/include/asm/cpufeatures.h | 2 +- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 17 +++++++++ + xen/arch/x86/spec_ctrl.c | 48 +++++++++++++++++++++++- + 5 files changed, 71 insertions(+), 3 deletions(-) + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index d9dae740ccba..b92c8f969c33 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -2315,7 +2315,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). + > {msr-sc,rsb,md-clear,ibpb-entry}=|{pv,hvm}=, + > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, + > eager-fpu,l1d-flush,branch-harden,srb-lock, +-> unpriv-mmio,gds-mit}= ]` ++> unpriv-mmio,gds-mit,div-scrub}= ]` + + Controls for speculative execution sidechannel mitigations. By default, Xen + will pick the most appropriate mitigations based on compiled in support, +@@ -2437,6 +2437,10 @@ has elected not to lock the configuration, Xen will use GDS_CTRL to mitigate + GDS with. Otherwise, Xen will mitigate by disabling AVX, which blocks the use + of the AVX2 Gather instructions. + ++On all hardware, the `div-scrub=` option can be used to force or prevent Xen ++from mitigating the DIV-leakage vulnerability. By default, Xen will mitigate ++DIV-leakage on hardware believed to be vulnerable. ++ + ### sync_console + > `= ` + +diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S +index 981cd82e7c0b..934f12cf5cdd 100644 +--- a/xen/arch/x86/hvm/svm/entry.S ++++ b/xen/arch/x86/hvm/svm/entry.S +@@ -74,6 +74,7 @@ __UNLIKELY_END(nsvm_hap) + 1: /* No Spectre v1 concerns. Execution will hit VMRUN imminently. */ + .endm + ALTERNATIVE "", svm_vmentry_spec_ctrl, X86_FEATURE_SC_MSR_HVM ++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV + + pop %r15 + pop %r14 +diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h +index da0593de8542..c3aad21c3b43 100644 +--- a/xen/arch/x86/include/asm/cpufeatures.h ++++ b/xen/arch/x86/include/asm/cpufeatures.h +@@ -35,7 +35,7 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM + XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */ + XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */ + XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */ +-/* Bits 23 unused. */ ++XEN_CPUFEATURE(SC_DIV, X86_SYNTH(23)) /* DIV scrub needed */ + XEN_CPUFEATURE(SC_RSB_IDLE, X86_SYNTH(24)) /* RSB overwrite needed for idle. */ + XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */ + XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */ +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index 28a75796e652..f4b8b9d9561c 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -177,6 +177,19 @@ + .L\@_verw_skip: + .endm + ++.macro DO_SPEC_CTRL_DIV ++/* ++ * Requires nothing ++ * Clobbers %rax ++ * ++ * Issue a DIV for its flushing side effect (Zen1 uarch specific). Any ++ * non-faulting DIV will do; a byte DIV has least latency, and doesn't clobber ++ * %rdx. ++ */ ++ mov $1, %eax ++ div %al ++.endm ++ + .macro DO_SPEC_CTRL_ENTRY maybexen:req + /* + * Requires %rsp=regs (also cpuinfo if !maybexen) +@@ -279,6 +292,8 @@ + ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV + + DO_SPEC_CTRL_COND_VERW ++ ++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV + .endm + + /* +@@ -391,6 +406,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + verw STACK_CPUINFO_FIELD(verw_sel)(%r14) + .L\@_skip_verw: + ++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV ++ + .L\@_skip_ist_exit: + .endm + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 79b98f0fe7ba..0ff3c895ac72 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -79,6 +79,7 @@ static int8_t __initdata opt_srb_lock = -1; + static bool __initdata opt_unpriv_mmio; + static bool __ro_after_init opt_fb_clear_mmio; + static int8_t __initdata opt_gds_mit = -1; ++static int8_t __initdata opt_div_scrub = -1; + + static int __init cf_check parse_spec_ctrl(const char *s) + { +@@ -133,6 +134,7 @@ static int __init cf_check parse_spec_ctrl(const char *s) + opt_srb_lock = 0; + opt_unpriv_mmio = false; + opt_gds_mit = 0; ++ opt_div_scrub = 0; + } + else if ( val > 0 ) + rc = -EINVAL; +@@ -285,6 +287,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) + opt_unpriv_mmio = val; + else if ( (val = parse_boolean("gds-mit", s, ss)) >= 0 ) + opt_gds_mit = val; ++ else if ( (val = parse_boolean("div-scrub", s, ss)) >= 0 ) ++ opt_div_scrub = val; + else + rc = -EINVAL; + +@@ -485,7 +489,7 @@ static void __init print_details(enum ind_thunk thunk) + "\n"); + + /* Settings for Xen's protection, irrespective of guests. */ +- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s\n", ++ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", + thunk == THUNK_NONE ? "N/A" : + thunk == THUNK_RETPOLINE ? "RETPOLINE" : + thunk == THUNK_LFENCE ? "LFENCE" : +@@ -510,6 +514,7 @@ static void __init print_details(enum ind_thunk thunk) + opt_l1d_flush ? " L1D_FLUSH" : "", + opt_md_clear_pv || opt_md_clear_hvm || + opt_fb_clear_mmio ? " VERW" : "", ++ opt_div_scrub ? " DIV" : "", + opt_branch_harden ? " BRANCH_HARDEN" : ""); + + /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */ +@@ -967,6 +972,45 @@ static void __init srso_calculations(bool hw_smt_enabled) + setup_force_cpu_cap(X86_FEATURE_SRSO_NO); + } + ++/* ++ * The Div leakage issue is specific to the AMD Zen1 microarchitecure. ++ * ++ * However, there's no $FOO_NO bit defined, so if we're virtualised we have no ++ * hope of spotting the case where we might move to vulnerable hardware. We ++ * also can't make any useful conclusion about SMT-ness. ++ * ++ * Don't check the hypervisor bit, so at least we do the safe thing when ++ * booting on something that looks like a Zen1 CPU. ++ */ ++static bool __init has_div_vuln(void) ++{ ++ if ( !(boot_cpu_data.x86_vendor & ++ (X86_VENDOR_AMD | X86_VENDOR_HYGON)) ) ++ return false; ++ ++ if ( boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18 ) ++ return false; ++ ++ return is_zen1_uarch(); ++} ++ ++static void __init div_calculations(bool hw_smt_enabled) ++{ ++ bool cpu_bug_div = has_div_vuln(); ++ ++ if ( opt_div_scrub == -1 ) ++ opt_div_scrub = cpu_bug_div; ++ ++ if ( opt_div_scrub ) ++ setup_force_cpu_cap(X86_FEATURE_SC_DIV); ++ ++ if ( opt_smt == -1 && !cpu_has_hypervisor && cpu_bug_div && hw_smt_enabled ) ++ warning_add( ++ "Booted on leaky-DIV hardware with SMT/Hyperthreading\n" ++ "enabled. Please assess your configuration and choose an\n" ++ "explicit 'smt=' setting. See XSA-439.\n"); ++} ++ + static void __init ibpb_calculations(void) + { + bool def_ibpb_entry = false; +@@ -1726,6 +1770,8 @@ void __init init_speculation_mitigations(void) + + ibpb_calculations(); + ++ div_calculations(hw_smt_enabled); ++ + /* Check whether Eager FPU should be enabled by default. */ + if ( opt_eager_fpu == -1 ) + opt_eager_fpu = should_use_eager_fpu(); +-- +2.41.0 + diff --git a/xen.spec.in b/xen.spec.in index b477e5c8..0786bf79 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -107,6 +107,16 @@ Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch # Security fixes Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch +Patch0501: 0501-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch +Patch0502: 0502-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch +Patch0503: 0503-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch +Patch0504: 0504-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch +Patch0505: 0505-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch +Patch0506: 0506-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch +Patch0507: 0507-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch +Patch0508: 0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch +Patch0509: 0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch +Patch0510: 0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch From a480b9e211bdb383c340b98877369015b7a13c12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Thu, 21 Sep 2023 02:19:34 +0200 Subject: [PATCH 17/64] Backport fix for OpenBSD 7.3 booting Fixes QubesOS/qubes-issues#8502 --- ...not-expose-HWCR.TscFreqSel-to-guests.patch | 59 +++++++++++++++++++ xen.spec.in | 1 + 2 files changed, 60 insertions(+) create mode 100644 0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch diff --git a/0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch b/0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch new file mode 100644 index 00000000..480aba7c --- /dev/null +++ b/0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch @@ -0,0 +1,59 @@ +From e4ca4e261da3fdddd541c3a9842b1e9e2ad00525 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Mon, 18 Sep 2023 15:07:49 +0200 +Subject: [PATCH] x86/amd: do not expose HWCR.TscFreqSel to guests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +OpenBSD 7.3 will unconditionally access HWCR if the TSC is reported as +Invariant, and it will then attempt to also unconditionally access PSTATE0 if +HWCR.TscFreqSel is set (currently the case on Xen). + +The motivation for exposing HWCR.TscFreqSel was to avoid warning messages from +Linux. It has been agreed that Linux should be changed instead to not +complaint about missing HWCR.TscFreqSel when running virtualized. + +The relation between HWCR.TscFreqSel and PSTATE0 is not clearly written down in +the PPR, but it's natural for OSes to attempt to fetch the P0 frequency if the +TSC is stated to increment at the P0 frequency. + +Exposing PSTATEn (PSTATE0 at least) with all zeroes is not a suitable solution +because the PstateEn bit is read-write, and OSes could legitimately attempt to +set PstateEn=1 which Xen couldn't handle. + +Furthermore, the TscFreqSel bit is model specific and was never safe to expose +like this in the first place. At a minimum it should have had a toolstack +adjustment to know not to migrate such a VM. + +Therefore, simply remove the bit. Note the HWCR itself is an architectural +register, and does need to be accessible by the guest. Since HWCR contains +both architectural and non-architectural bits, going forward care must be taken +to assert the exposed value is correct on newer CPU families. + +Reported-by: Solène Rapenne +Link: https://github.com/QubesOS/qubes-issues/issues/8502 +Fixes: 14b95b3b8546 ('x86/AMD: expose HWCR.TscFreqSel to guests') +Signed-off-by: Roger Pau Monné +Reviewed-by: Andrew Cooper +--- + xen/arch/x86/msr.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index 3f0450259cdf..c33dc78cd8f6 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -240,8 +240,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) + case MSR_K8_HWCR: + if ( !(cp->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) ) + goto gp_fault; +- *val = get_cpu_family(cp->basic.raw_fms, NULL, NULL) >= 0x10 +- ? K8_HWCR_TSC_FREQ_SEL : 0; ++ *val = 0; + break; + + case MSR_VIRT_SPEC_CTRL: +-- +2.41.0 + diff --git a/xen.spec.in b/xen.spec.in index 0786bf79..a64b2b9b 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -104,6 +104,7 @@ Patch0306: 0306-x86-Replace-PAT_-with-X86_MT_.patch Patch0307: 0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch Patch0308: 0308-x86-Replace-EPT_EMT_-constants-with-X86_MT_.patch Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch +Patch0310: 0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch # Security fixes Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch From 57e8e741df44f2171be458812b1f990514b7f51b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Thu, 21 Sep 2023 02:40:27 +0200 Subject: [PATCH 18/64] Switch back to official tarballs --- .qubesbuilder | 10 ++++------ Makefile | 5 ----- archlinux/PKGBUILD.in | 2 +- xen-4.17.2-git.tar.gz.sha512 | 1 - xen.spec.in | 3 +-- 5 files changed, 6 insertions(+), 15 deletions(-) delete mode 100644 xen-4.17.2-git.tar.gz.sha512 diff --git a/.qubesbuilder b/.qubesbuilder index 19fc45e0..8700b019 100644 --- a/.qubesbuilder +++ b/.qubesbuilder @@ -8,9 +8,7 @@ vm: - archlinux source: files: -# - url: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz -# signature: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz.sig -# pubkeys: -# - xen.org-key.asc - - url: https://ftp.qubes-os.org/distfiles/xen-@VERSION@-git.tar.gz - sha512: xen-@VERSION@-git.tar.gz.sha512 + - url: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz + signature: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz.sig + pubkeys: + - xen.org-key.asc diff --git a/Makefile b/Makefile index adb5e2cb..22a2139c 100644 --- a/Makefile +++ b/Makefile @@ -12,11 +12,6 @@ UNTRUSTED_SUFF := .UNTRUSTED URLS := \ https://downloads.xenproject.org/release/xen/$(VERSION)/xen-$(VERSION).tar.gz.sig -# temporarily use git snapshot -URLS := \ - https://ftp.qubes-os.org/distfiles/xen-$(VERSION)-git.tar.gz - - ALL_FILES := $(notdir $(URLS:%.sig=%)) $(notdir $(filter %.sig, $(URLS))) ALL_URLS := $(URLS:%.sig=%) $(filter %.sig, $(URLS)) diff --git a/archlinux/PKGBUILD.in b/archlinux/PKGBUILD.in index efac0519..6743362f 100644 --- a/archlinux/PKGBUILD.in +++ b/archlinux/PKGBUILD.in @@ -22,7 +22,7 @@ _patches=( 1102-docs-xen-headers-use-alphabetical-sorting-for-incont.patch 1103-Strip-build-path-directories-in-tools-xen-and-xen-ar.patch ) -source=(xen-$_upstream_pkgver-git.tar.gz "${_patches[@]}") +source=(xen-$_upstream_pkgver.tar.gz "${_patches[@]}") md5sums=(SKIP SKIP SKIP SKIP SKIP SKIP SKIP SKIP SKIP) prepare() { diff --git a/xen-4.17.2-git.tar.gz.sha512 b/xen-4.17.2-git.tar.gz.sha512 deleted file mode 100644 index d82f09c4..00000000 --- a/xen-4.17.2-git.tar.gz.sha512 +++ /dev/null @@ -1 +0,0 @@ -71496616372e7cf3bf024234980420a26cfd715df4a71c3704b4ef9d618b0236f409f6dc51ea2f7753e62a932bcaa788cd87bd99c4a1f5dab16f3a2db00d38a8 diff --git a/xen.spec.in b/xen.spec.in index a64b2b9b..d9148ece 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -69,8 +69,7 @@ Release: %{?rctag}@REL@%{?dist} Epoch: 2001 License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ -#Source0: https://downloads.xenproject.org/release/xen/%%{upstream_version}/xen-%%{upstream_version}.tar.gz -Source0: xen-%{upstream_version}-git.tar.gz +Source0: https://downloads.xenproject.org/release/xen/%{upstream_version}/xen-%{upstream_version}.tar.gz Source2: %{name}.logrotate # .config file for xen hypervisor Source3: config From 5cd1d9157e8c11ba2f774919c2b21fc81603fbd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Thu, 21 Sep 2023 03:18:28 +0200 Subject: [PATCH 19/64] version 4.17.2-2 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index d00491fd..0cfbf088 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -1 +2 From 9596786c2540b175903a894a20c904c63afd8922 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sat, 7 Oct 2023 00:18:05 +0200 Subject: [PATCH 20/64] Compress old console logs They may grow quite big in some cases, and are very compressible. --- xen.logrotate | 1 + 1 file changed, 1 insertion(+) diff --git a/xen.logrotate b/xen.logrotate index b6114cbd..1e4aedb9 100644 --- a/xen.logrotate +++ b/xen.logrotate @@ -3,6 +3,7 @@ /var/log/xen/console/*.log{ notifempty missingok + compress copytruncate su root qubes } From 5b6bf0628d1d1ca1fee6f52c89383e2d831b541e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 14 Nov 2022 16:25:55 +0100 Subject: [PATCH 21/64] Apply patches for MSI-X support with stubdomain Fixes QubesOS/qubes-issues#4799 Fixes QubesOS/qubes-issues#7057 Fixes QubesOS/qubes-issues#7052 --- ...ugh-all-MSI-X-vector-ctrl-writes-to-.patch | 224 +++++++++ ...ites-to-registers-on-the-same-page-a.patch | 440 ++++++++++++++++++ xen.spec.in | 3 + 3 files changed, 667 insertions(+) create mode 100644 0651-x86-msi-passthrough-all-MSI-X-vector-ctrl-writes-to-.patch create mode 100644 0652-x86-hvm-Allow-writes-to-registers-on-the-same-page-a.patch diff --git a/0651-x86-msi-passthrough-all-MSI-X-vector-ctrl-writes-to-.patch b/0651-x86-msi-passthrough-all-MSI-X-vector-ctrl-writes-to-.patch new file mode 100644 index 00000000..ef29fdac --- /dev/null +++ b/0651-x86-msi-passthrough-all-MSI-X-vector-ctrl-writes-to-.patch @@ -0,0 +1,224 @@ +From 829e622f3a84b62ec9593568e06122023bffe9f2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= + +Date: Mon, 14 Nov 2022 13:56:56 +0100 +Subject: [PATCH 1/2] x86/msi: passthrough all MSI-X vector ctrl writes to + device model +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +QEMU needs to know whether clearing maskbit of a vector is really +clearing, or was already cleared before. Currently Xen sends only +clearing that bit to the device model, but not setting it, so QEMU +cannot detect it. Because of that, QEMU is working this around by +checking via /dev/mem, but that isn't the proper approach. + +Give all necessary information to QEMU by passing all ctrl writes, +including masking a vector. Advertise the new behavior via +XENVER_get_features, so QEMU can know it doesn't need to access /dev/mem +anymore. + +While this commit doesn't move the whole maskbit handling to QEMU (as +discussed on xen-devel as one of the possibilities), it is a necessary +first step anyway. Including telling QEMU it will get all the required +information to do so. The actual implementation would need to include: + - a hypercall for QEMU to control just maskbit (without (re)binding the + interrupt again + - a methor for QEMU to tell Xen it will actually do the work +Those are not part of this series. + +Signed-off-by: Marek Marczykowski-Górecki +--- +I did not added any control to enable/disable this new behavior (as +Roget have suggested for possible non-QEMU ioreqs). And especially not +as part of XEN_DMOP_get_ioreq_server_info, as the behavior isn't really +per-ioreq-server but at best per-device (or system-wide, as it's +currently done). I don't see how the new behavior could be problematic +for some existing ioreq server (they already received writes to those +addresses, just not all of them), but if that's really necessary, I can +probably add a command line option to restore previous behavior +system-wide. + +Changes in v4: +- ignore unaligned writes with X86EMUL_OKAY +- restructure the code to forward all writes in _msixtbl_write() instead + of manipulating return value of msixtbl_write() - this makes + WRITE_LEN4_COMPLETION special case unnecessary +- advertise the changed behavior via XENVER_get_features instead of DMOP +v3: + - advertise changed behavior in XEN_DMOP_get_ioreq_server_info - make + "flags" parameter IN/OUT + - move len check back to msixtbl_write() - will be needed there anyway + in a later patch +v2: + - passthrough quad writes to emulator too (Jan) + - (ab)use len==0 for write len=4 completion (Jan), but add descriptive + #define for this magic value +--- + xen/arch/x86/hvm/vmsi.c | 27 ++++++++++++++++++++++----- + xen/common/ioreq.c | 9 +++++++-- + xen/common/kernel.c | 1 + + xen/include/public/features.h | 8 ++++++++ + xen/include/public/hvm/dm_op.h | 12 ++++++++---- + 5 files changed, 46 insertions(+), 11 deletions(-) + +diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c +index 3cd4923060c8..ebc052b8ea84 100644 +--- a/xen/arch/x86/hvm/vmsi.c ++++ b/xen/arch/x86/hvm/vmsi.c +@@ -272,6 +272,14 @@ out: + return r; + } + ++/* ++ * This function returns X86EMUL_UNHANDLEABLE even if write is properly ++ * handled, to propagate it to the device model (so it can keep its internal ++ * state in sync). ++ * len==0 means really len==4, but as a write completion that will return ++ * X86EMUL_OKAY on successful processing. Use WRITE_LEN4_COMPLETION to make it ++ * less confusing. ++ */ + static int msixtbl_write(struct vcpu *v, unsigned long address, + unsigned int len, unsigned long val) + { +@@ -283,8 +291,8 @@ static int msixtbl_write(struct vcpu *v, unsigned long address, + unsigned long flags; + struct irq_desc *desc; + +- if ( (len != 4 && len != 8) || (address & (len - 1)) ) +- return r; ++ if ( !IS_ALIGNED(address, len) ) ++ return X86EMUL_OKAY; + + rcu_read_lock(&msixtbl_rcu_lock); + +@@ -345,8 +353,7 @@ static int msixtbl_write(struct vcpu *v, unsigned long address, + + unlock: + spin_unlock_irqrestore(&desc->lock, flags); +- if ( len == 4 ) +- r = X86EMUL_OKAY; ++ r = X86EMUL_OKAY; + + out: + rcu_read_unlock(&msixtbl_rcu_lock); +@@ -357,7 +364,17 @@ static int cf_check _msixtbl_write( + const struct hvm_io_handler *handler, uint64_t address, uint32_t len, + uint64_t val) + { +- return msixtbl_write(current, address, len, val); ++ /* ignore invalid length or unaligned writes */ ++ if ( len != 4 && len != 8 || !IS_ALIGNED(address, len) ) ++ return X86EMUL_OKAY; ++ ++ /* ++ * This function returns X86EMUL_UNHANDLEABLE even if write is properly ++ * handled, to propagate it to the device model (so it can keep its ++ * internal state in sync). ++ */ ++ msixtbl_write(current, address, len, val); ++ return X86EMUL_UNHANDLEABLE; + } + + static bool cf_check msixtbl_range( +diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c +index ecb8f545e1c4..bd6f074c1e85 100644 +--- a/xen/common/ioreq.c ++++ b/xen/common/ioreq.c +@@ -743,7 +743,8 @@ static int ioreq_server_destroy(struct domain *d, ioservid_t id) + static int ioreq_server_get_info(struct domain *d, ioservid_t id, + unsigned long *ioreq_gfn, + unsigned long *bufioreq_gfn, +- evtchn_port_t *bufioreq_port) ++ evtchn_port_t *bufioreq_port, ++ uint16_t *flags) + { + struct ioreq_server *s; + int rc; +@@ -779,6 +780,9 @@ static int ioreq_server_get_info(struct domain *d, ioservid_t id, + *bufioreq_port = s->bufioreq_evtchn; + } + ++ /* Advertise supported features/behaviors. */ ++ *flags = XEN_DMOP_all_msix_writes; ++ + rc = 0; + + out: +@@ -1374,7 +1378,8 @@ int ioreq_server_dm_op(struct xen_dm_op *op, struct domain *d, bool *const_op) + NULL : (unsigned long *)&data->ioreq_gfn, + (data->flags & XEN_DMOP_no_gfns) ? + NULL : (unsigned long *)&data->bufioreq_gfn, +- &data->bufioreq_port); ++ &data->bufioreq_port, &data->flags); ++ + break; + } + +diff --git a/xen/common/kernel.c b/xen/common/kernel.c +index 0e8abe0cf8a8..37386f74797b 100644 +--- a/xen/common/kernel.c ++++ b/xen/common/kernel.c +@@ -578,6 +578,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) + fi.submap |= (1U << XENFEAT_direct_mapped); + else + fi.submap |= (1U << XENFEAT_not_direct_mapped); ++ fi.submap |= (1U << XENFEAT_dm_msix_all_writes); + break; + default: + return -EINVAL; +diff --git a/xen/include/public/features.h b/xen/include/public/features.h +index d2a9175aae67..7b936655aadb 100644 +--- a/xen/include/public/features.h ++++ b/xen/include/public/features.h +@@ -111,6 +111,14 @@ + #define XENFEAT_not_direct_mapped 16 + #define XENFEAT_direct_mapped 17 + ++/* ++ * If set, Xen will passthrough all MSI-X vector ctrl writes to device model, ++ * not only those unmasking an entry. This allows device model to properly keep ++ * track of the MSI-X table without having to read it from the device behind ++ * Xen's backs. This information is relevant only for device models. ++ */ ++#define XENFEAT_dm_msix_all_writes 18 ++ + #define XENFEAT_NR_SUBMAPS 1 + + #endif /* __XEN_PUBLIC_FEATURES_H__ */ +diff --git a/xen/include/public/hvm/dm_op.h b/xen/include/public/hvm/dm_op.h +index acdf91693d0b..490b151c5dd7 100644 +--- a/xen/include/public/hvm/dm_op.h ++++ b/xen/include/public/hvm/dm_op.h +@@ -70,7 +70,9 @@ typedef struct xen_dm_op_create_ioreq_server xen_dm_op_create_ioreq_server_t; + * not contain XEN_DMOP_no_gfns then these pages will be made available and + * the frame numbers passed back in gfns and + * respectively. (If the IOREQ Server is not handling buffered emulation +- * only will be valid). ++ * only will be valid). When Xen returns XEN_DMOP_all_msix_writes ++ * flag set, it will notify the IOREQ server about all writes to MSI-X table ++ * (if it's handled by this IOREQ server), not only those clearing a mask bit. + * + * NOTE: To access the synchronous ioreq structures and buffered ioreq + * ring, it is preferable to use the XENMEM_acquire_resource memory +@@ -81,11 +83,13 @@ typedef struct xen_dm_op_create_ioreq_server xen_dm_op_create_ioreq_server_t; + struct xen_dm_op_get_ioreq_server_info { + /* IN - server id */ + ioservid_t id; +- /* IN - flags */ ++ /* IN/OUT - flags */ + uint16_t flags; + +-#define _XEN_DMOP_no_gfns 0 +-#define XEN_DMOP_no_gfns (1u << _XEN_DMOP_no_gfns) ++#define _XEN_DMOP_no_gfns 0 /* IN */ ++#define _XEN_DMOP_all_msix_writes 1 /* OUT */ ++#define XEN_DMOP_no_gfns (1u << _XEN_DMOP_no_gfns) ++#define XEN_DMOP_all_msix_writes (1u << _XEN_DMOP_all_msix_writes) + + /* OUT - buffered ioreq port */ + evtchn_port_t bufioreq_port; +-- +2.41.0 + diff --git a/0652-x86-hvm-Allow-writes-to-registers-on-the-same-page-a.patch b/0652-x86-hvm-Allow-writes-to-registers-on-the-same-page-a.patch new file mode 100644 index 00000000..2cd3a7ff --- /dev/null +++ b/0652-x86-hvm-Allow-writes-to-registers-on-the-same-page-a.patch @@ -0,0 +1,440 @@ +From 308f548ee8f7171d8a34238f3f435dcf861ee499 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= + +Date: Mon, 14 Nov 2022 15:50:46 +0100 +Subject: [PATCH 2/2] x86/hvm: Allow writes to registers on the same page as + MSI-X table +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Some devices (notably Intel Wifi 6 AX210 card) keep auxiliary registers +on the same page as MSI-X table. Device model (especially one in +stubdomain) cannot really handle those, as direct writes to that page is +refused (page is on the mmio_ro_ranges list). Instead, extend +msixtbl_mmio_ops to handle such accesses too. + +Doing this, requires correlating write location with guest view +of MSI-X table address. Since QEMU doesn't map MSI-X table to the guest, +it requires msixtbl_entry->gtable, which is HVM-only. Similar feature +for PV would need to be done separately. + +This will be also used to read Pending Bit Array, if it lives on the same +page, making QEMU not needing /dev/mem access at all (especially helpful +with lockdown enabled in dom0). If PBA lives on another page, QEMU will +map it to the guest directly. +If PBA lives on the same page, discard writes and log a message. +Technically, writes outside of PBA could be allowed, but at this moment +the precise location of PBA isn't saved, and also no known device abuses +the spec in this way (at least yet). + +To access those registers, msixtbl_mmio_ops need the relevant page +mapped. MSI handling already has infrastructure for that, using fixmap, +so try to map first/last page of the MSI-X table (if necessary) and save +their fixmap indexes. Note that msix_get_fixmap() does reference +counting and reuses existing mapping, so just call it directly, even if +the page was mapped before. Also, it uses a specific range of fixmap +indexes which doesn't include 0, so use 0 as default ("not mapped") +value - which simplifies code a bit. + +GCC gets confused about 'desc' variable: + + arch/x86/hvm/vmsi.c: In function ‘msixtbl_range’: + arch/x86/hvm/vmsi.c:553:8: error: ‘desc’ may be used uninitialized [-Werror=maybe-uninitialized] + 553 | if ( desc ) + | ^ + arch/x86/hvm/vmsi.c:537:28: note: ‘desc’ was declared here + 537 | const struct msi_desc *desc; + | ^~~~ + +It's conditional initialization is actually correct (in the case where +it isn't initialized, function returns early), but to avoid +build failure initialize it explicitly to NULL anyway. + +Signed-off-by: Marek Marczykowski-Górecki +--- +Changes in v4: +- drop same_page parameter of msixtbl_find_entry(), distinguish two + cases in relevant callers +- rename adj_access_table_idx to adj_access_idx +- code style fixes +- drop alignment check in adjacent_{read,write}() - all callers already + have it earlier +- delay mapping first/last MSI-X pages until preparing device for a + passthrough +v3: + - merge handling into msixtbl_mmio_ops + - extend commit message +v2: + - adjust commit message + - pass struct domain to msixtbl_page_handler_get_hwaddr() + - reduce local variables used only once + - log a warning if write is forbidden if MSI-X and PBA lives on the same + page + - do not passthrough unaligned accesses + - handle accesses both before and after MSI-X table +--- + xen/arch/x86/hvm/vmsi.c | 191 +++++++++++++++++++++++++++++++-- + xen/arch/x86/include/asm/msi.h | 5 + + xen/arch/x86/msi.c | 40 +++++++ + 3 files changed, 225 insertions(+), 11 deletions(-) + +diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c +index ebc052b8ea84..e21c09f0f5cb 100644 +--- a/xen/arch/x86/hvm/vmsi.c ++++ b/xen/arch/x86/hvm/vmsi.c +@@ -180,6 +180,10 @@ static bool msixtbl_initialised(const struct domain *d) + return d->arch.hvm.msixtbl_list.next; + } + ++/* ++ * Lookup an msixtbl_entry on the same page as given addr. It's up to the ++ * caller to check if address is strictly part of the table - if relevant. ++ */ + static struct msixtbl_entry *msixtbl_find_entry( + struct vcpu *v, unsigned long addr) + { +@@ -187,8 +191,8 @@ static struct msixtbl_entry *msixtbl_find_entry( + struct domain *d = v->domain; + + list_for_each_entry( entry, &d->arch.hvm.msixtbl_list, list ) +- if ( addr >= entry->gtable && +- addr < entry->gtable + entry->table_len ) ++ if ( PFN_DOWN(addr) >= PFN_DOWN(entry->gtable) && ++ PFN_DOWN(addr) <= PFN_DOWN(entry->gtable + entry->table_len - 1) ) + return entry; + + return NULL; +@@ -213,6 +217,131 @@ static struct msi_desc *msixtbl_addr_to_desc( + return NULL; + } + ++/* ++ * Returns: ++ * - UINT_MAX if no handling should be done ++ * - UINT_MAX-1 if write should be discarded ++ * - a fixmap idx to use for handling ++ */ ++#define ADJACENT_DONT_HANDLE UINT_MAX ++#define ADJACENT_DISCARD_WRITE (UINT_MAX - 1) ++static unsigned int adjacent_handle( ++ const struct msixtbl_entry *entry, unsigned long addr, bool write) ++{ ++ unsigned int adj_type; ++ const struct arch_msix *msix; ++ ++ if ( !entry || !entry->pdev ) ++ return ADJACENT_DONT_HANDLE; ++ ++ if ( PFN_DOWN(addr) == PFN_DOWN(entry->gtable) && addr < entry->gtable ) ++ adj_type = ADJ_IDX_FIRST; ++ else if ( PFN_DOWN(addr) == PFN_DOWN(entry->gtable + entry->table_len - 1) && ++ addr >= entry->gtable + entry->table_len ) ++ adj_type = ADJ_IDX_LAST; ++ else ++ return ADJACENT_DONT_HANDLE; ++ ++ msix = entry->pdev->msix; ++ ASSERT(msix); ++ ++ if ( !msix->adj_access_idx[adj_type] ) ++ { ++ gprintk(XENLOG_WARNING, ++ "Page for adjacent(%d) MSI-X table access not initialized for %pp (addr %#lx, gtable %#lx\n", ++ adj_type, &entry->pdev->sbdf, addr, entry->gtable); ++ ++ return ADJACENT_DONT_HANDLE; ++ } ++ ++ /* If PBA lives on the same page too, discard writes. */ ++ if ( write && ++ ((adj_type == ADJ_IDX_LAST && ++ msix->table.last == msix->pba.first) || ++ (adj_type == ADJ_IDX_FIRST && ++ msix->table.first == msix->pba.last)) ) ++ { ++ gprintk(XENLOG_WARNING, ++ "MSI-X table and PBA of %pp live on the same page, " ++ "writing to other registers there is not implemented\n", ++ &entry->pdev->sbdf); ++ return ADJACENT_DISCARD_WRITE; ++ } ++ ++ return msix->adj_access_idx[adj_type]; ++} ++ ++static int adjacent_read( ++ unsigned int fixmap_idx, ++ paddr_t address, unsigned int len, uint64_t *pval) ++{ ++ const void __iomem *hwaddr; ++ ++ *pval = ~0UL; ++ ++ ASSERT(fixmap_idx != ADJACENT_DISCARD_WRITE); ++ ++ hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address); ++ ++ switch ( len ) ++ { ++ case 1: ++ *pval = readb(hwaddr); ++ break; ++ ++ case 2: ++ *pval = readw(hwaddr); ++ break; ++ ++ case 4: ++ *pval = readl(hwaddr); ++ break; ++ ++ case 8: ++ *pval = readq(hwaddr); ++ break; ++ ++ default: ++ ASSERT_UNREACHABLE(); ++ } ++ return X86EMUL_OKAY; ++} ++ ++static int adjacent_write( ++ unsigned int fixmap_idx, ++ uint64_t address, uint32_t len, uint64_t val) ++{ ++ void __iomem *hwaddr; ++ ++ if ( fixmap_idx == ADJACENT_DISCARD_WRITE ) ++ return X86EMUL_OKAY; ++ ++ hwaddr = fix_to_virt(fixmap_idx) + PAGE_OFFSET(address); ++ ++ switch ( len ) ++ { ++ case 1: ++ writeb(val, hwaddr); ++ break; ++ ++ case 2: ++ writew(val, hwaddr); ++ break; ++ ++ case 4: ++ writel(val, hwaddr); ++ break; ++ ++ case 8: ++ writeq(val, hwaddr); ++ break; ++ ++ default: ++ ASSERT_UNREACHABLE(); ++ } ++ return X86EMUL_OKAY; ++} ++ + static int cf_check msixtbl_read( + const struct hvm_io_handler *handler, uint64_t address, uint32_t len, + uint64_t *pval) +@@ -220,16 +349,31 @@ static int cf_check msixtbl_read( + unsigned long offset; + struct msixtbl_entry *entry; + unsigned int nr_entry, index; ++ unsigned int adjacent_fixmap; + int r = X86EMUL_UNHANDLEABLE; + +- if ( (len != 4 && len != 8) || (address & (len - 1)) ) ++ if ( !IS_ALIGNED(address, len) ) + return r; + + rcu_read_lock(&msixtbl_rcu_lock); +- + entry = msixtbl_find_entry(current, address); + if ( !entry ) + goto out; ++ ++ adjacent_fixmap = adjacent_handle(entry, address, false); ++ if ( adjacent_fixmap != ADJACENT_DONT_HANDLE ) ++ { ++ r = adjacent_read(adjacent_fixmap, address, len, pval); ++ goto out; ++ } ++ ++ if ( address < entry->gtable || ++ address >= entry->gtable + entry->table_len ) ++ goto out; ++ ++ if ( len != 4 && len != 8 ) ++ goto out; ++ + offset = address & (PCI_MSIX_ENTRY_SIZE - 1); + + if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET ) +@@ -290,6 +434,7 @@ static int msixtbl_write(struct vcpu *v, unsigned long address, + int r = X86EMUL_UNHANDLEABLE; + unsigned long flags; + struct irq_desc *desc; ++ unsigned int adjacent_fixmap; + + if ( !IS_ALIGNED(address, len) ) + return X86EMUL_OKAY; +@@ -299,6 +444,19 @@ static int msixtbl_write(struct vcpu *v, unsigned long address, + entry = msixtbl_find_entry(v, address); + if ( !entry ) + goto out; ++ ++ adjacent_fixmap = adjacent_handle(entry, address, true); ++ if ( adjacent_fixmap != ADJACENT_DONT_HANDLE ) ++ { ++ r = adjacent_write(adjacent_fixmap, address, len, val); ++ goto out; ++ } ++ if ( address < entry->gtable || ++ address >= entry->gtable + entry->table_len ) ++ goto out; ++ if ( len != 4 && len != 8 ) ++ goto out; ++ + nr_entry = array_index_nospec(((address - entry->gtable) / + PCI_MSIX_ENTRY_SIZE), + MAX_MSIX_TABLE_ENTRIES); +@@ -364,8 +522,8 @@ static int cf_check _msixtbl_write( + const struct hvm_io_handler *handler, uint64_t address, uint32_t len, + uint64_t val) + { +- /* ignore invalid length or unaligned writes */ +- if ( len != 4 && len != 8 || !IS_ALIGNED(address, len) ) ++ /* ignore unaligned writes */ ++ if ( !IS_ALIGNED(address, len) ) + return X86EMUL_OKAY; + + /* +@@ -382,14 +540,22 @@ static bool cf_check msixtbl_range( + { + struct vcpu *curr = current; + unsigned long addr = r->addr; +- const struct msi_desc *desc; ++ const struct msixtbl_entry *entry; ++ const struct msi_desc *desc = NULL; ++ unsigned int adjacent_fixmap; + + ASSERT(r->type == IOREQ_TYPE_COPY); + + rcu_read_lock(&msixtbl_rcu_lock); +- desc = msixtbl_addr_to_desc(msixtbl_find_entry(curr, addr), addr); ++ entry = msixtbl_find_entry(curr, addr); ++ adjacent_fixmap = adjacent_handle(entry, addr, false); ++ if ( adjacent_fixmap == ADJACENT_DONT_HANDLE ) ++ desc = msixtbl_addr_to_desc(entry, addr); + rcu_read_unlock(&msixtbl_rcu_lock); + ++ if ( adjacent_fixmap != ADJACENT_DONT_HANDLE ) ++ return 1; ++ + if ( desc ) + return 1; + +@@ -630,12 +796,15 @@ void msix_write_completion(struct vcpu *v) + v->arch.hvm.hvm_io.msix_snoop_gpa ) + { + unsigned int token = hvmemul_cache_disable(v); +- const struct msi_desc *desc; ++ const struct msi_desc *desc = NULL; ++ const struct msixtbl_entry *entry; + uint32_t data; + + rcu_read_lock(&msixtbl_rcu_lock); +- desc = msixtbl_addr_to_desc(msixtbl_find_entry(v, snoop_addr), +- snoop_addr); ++ entry = msixtbl_find_entry(v, snoop_addr); ++ if ( entry && snoop_addr >= entry->gtable && ++ snoop_addr < entry->gtable + entry->table_len ) ++ desc = msixtbl_addr_to_desc(entry, snoop_addr); + rcu_read_unlock(&msixtbl_rcu_lock); + + if ( desc && +diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h +index fe670895eed2..86acae3adc6a 100644 +--- a/xen/arch/x86/include/asm/msi.h ++++ b/xen/arch/x86/include/asm/msi.h +@@ -229,6 +229,10 @@ struct __packed msg_address { + PCI_MSIX_ENTRY_SIZE + \ + (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1))) + ++/* indexes in adj_access_idx[] below */ ++#define ADJ_IDX_FIRST 0 ++#define ADJ_IDX_LAST 1 ++ + struct arch_msix { + unsigned int nr_entries, used_entries; + struct { +@@ -236,6 +240,7 @@ struct arch_msix { + } table, pba; + int table_refcnt[MAX_MSIX_TABLE_PAGES]; + int table_idx[MAX_MSIX_TABLE_PAGES]; ++ unsigned int adj_access_idx[2]; + spinlock_t table_lock; + bool host_maskall, guest_maskall; + domid_t warned; +diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c +index d0bf63df1def..7673c1cffe43 100644 +--- a/xen/arch/x86/msi.c ++++ b/xen/arch/x86/msi.c +@@ -928,6 +928,36 @@ static int msix_capability_init(struct pci_dev *dev, + list_add_tail(&entry->list, &dev->msi_list); + *desc = entry; + } ++ else ++ { ++ /* ++ * If the MSI-X table doesn't start at the page boundary, map the first page for ++ * passthrough accesses. ++ */ ++ if ( PAGE_OFFSET(table_paddr) ) ++ { ++ int idx = msix_get_fixmap(msix, table_paddr, table_paddr); ++ ++ if ( idx > 0 ) ++ msix->adj_access_idx[ADJ_IDX_FIRST] = idx; ++ else ++ gprintk(XENLOG_ERR, "Failed to map first MSI-X table page: %d\n", idx); ++ } ++ /* ++ * If the MSI-X table doesn't end on the page boundary, map the last page ++ * for passthrough accesses. ++ */ ++ if ( PAGE_OFFSET(table_paddr + msix->nr_entries * PCI_MSIX_ENTRY_SIZE) ) ++ { ++ uint64_t entry_paddr = table_paddr + msix->nr_entries * PCI_MSIX_ENTRY_SIZE; ++ int idx = msix_get_fixmap(msix, table_paddr, entry_paddr); ++ ++ if ( idx > 0 ) ++ msix->adj_access_idx[ADJ_IDX_LAST] = idx; ++ else ++ gprintk(XENLOG_ERR, "Failed to map last MSI-X table page: %d\n", idx); ++ } ++ } + + if ( !msix->used_entries ) + { +@@ -1090,6 +1120,16 @@ static void _pci_cleanup_msix(struct arch_msix *msix) + WARN(); + msix->table.first = 0; + msix->table.last = 0; ++ if ( msix->adj_access_idx[ADJ_IDX_FIRST] ) ++ { ++ msix_put_fixmap(msix, msix->adj_access_idx[ADJ_IDX_FIRST]); ++ msix->adj_access_idx[ADJ_IDX_FIRST] = 0; ++ } ++ if ( msix->adj_access_idx[ADJ_IDX_LAST] ) ++ { ++ msix_put_fixmap(msix, msix->adj_access_idx[ADJ_IDX_LAST]); ++ msix->adj_access_idx[ADJ_IDX_LAST] = 0; ++ } + + if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first, + msix->pba.last) ) +-- +2.41.0 + diff --git a/xen.spec.in b/xen.spec.in index d9148ece..520531cf 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -161,6 +161,9 @@ Patch0641: 0641-xenpm-Factor-out-a-non-fatal-cpuid_parse-variant.patch Patch0642: 0642-xenpm-Add-set-cpufreq-hwp-subcommand.patch Patch0643: 0643-cpufreq-enable-HWP-by-default.patch +PAtch0651: 0651-x86-msi-passthrough-all-MSI-X-vector-ctrl-writes-to-.patch +PAtch0652: 0652-x86-hvm-Allow-writes-to-registers-on-the-same-page-a.patch + # Qubes specific patches Patch1000: 1000-Do-not-access-network-during-the-build.patch Patch1001: 1001-hotplug-store-block-params-for-cleanup.patch From 641f36bee0080aa992ba8a9bf7bfebe09ebee0dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 10 Oct 2023 14:07:26 +0200 Subject: [PATCH 22/64] Apply XSA-442 patch --- 0511-xsa442-4.17.patch | 185 +++++++++++++++++++++++++++++++++++++++++ xen.spec.in | 1 + 2 files changed, 186 insertions(+) create mode 100644 0511-xsa442-4.17.patch diff --git a/0511-xsa442-4.17.patch b/0511-xsa442-4.17.patch new file mode 100644 index 00000000..a78bfdd2 --- /dev/null +++ b/0511-xsa442-4.17.patch @@ -0,0 +1,185 @@ +From 5b2ccb60ff22fbff44dd66214c2956a434ee6271 Mon Sep 17 00:00:00 2001 +From: Roger Pau Monne +Date: Tue, 13 Jun 2023 15:01:05 +0200 +Subject: [PATCH] iommu/amd-vi: flush IOMMU TLB when flushing the DTE +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The caching invalidation guidelines from the AMD-Vi specification (48882—Rev +3.07-PUB—Oct 2022) seem to be misleading on some hardware, as devices will +malfunction (see stale DMA mappings) if some fields of the DTE are updated but +the IOMMU TLB is not flushed. This has been observed in practice on AMD +systems. Due to the lack of guidance from the currently published +specification this patch aims to increase the flushing done in order to prevent +device malfunction. + +In order to fix, issue an INVALIDATE_IOMMU_PAGES command from +amd_iommu_flush_device(), flushing all the address space. Note this requires +callers to be adjusted in order to pass the DomID on the DTE previous to the +modification. + +Some call sites don't provide a valid DomID to amd_iommu_flush_device() in +order to avoid the flush. That's because the device had address translations +disabled and hence the previous DomID on the DTE is not valid. Note the +current logic relies on the entity disabling address translations to also flush +the TLB of the in use DomID. + +Device I/O TLB flushing when ATS are enabled is not covered by the current +change, as ATS usage is not security supported. + +This is XSA-442 / CVE-2023-34326 + +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +--- + xen/drivers/passthrough/amd/iommu.h | 3 ++- + xen/drivers/passthrough/amd/iommu_cmd.c | 10 +++++++++- + xen/drivers/passthrough/amd/iommu_guest.c | 5 +++-- + xen/drivers/passthrough/amd/iommu_init.c | 6 +++++- + xen/drivers/passthrough/amd/pci_amd_iommu.c | 14 ++++++++++---- + 5 files changed, 29 insertions(+), 9 deletions(-) + +diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h +index 5429ada58ef5..a58be28bf96d 100644 +--- a/xen/drivers/passthrough/amd/iommu.h ++++ b/xen/drivers/passthrough/amd/iommu.h +@@ -283,7 +283,8 @@ void amd_iommu_flush_pages(struct domain *d, unsigned long dfn, + unsigned int order); + void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev, + uint64_t gaddr, unsigned int order); +-void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf); ++void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf, ++ domid_t domid); + void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf); + void amd_iommu_flush_all_caches(struct amd_iommu *iommu); + +diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c +index 40ddf366bb4d..cb28b36abc38 100644 +--- a/xen/drivers/passthrough/amd/iommu_cmd.c ++++ b/xen/drivers/passthrough/amd/iommu_cmd.c +@@ -363,10 +363,18 @@ void amd_iommu_flush_pages(struct domain *d, + _amd_iommu_flush_pages(d, __dfn_to_daddr(dfn), order); + } + +-void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf) ++void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf, ++ domid_t domid) + { + invalidate_dev_table_entry(iommu, bdf); + flush_command_buffer(iommu, 0); ++ ++ /* Also invalidate IOMMU TLB entries when flushing the DTE. */ ++ if ( domid != DOMID_INVALID ) ++ { ++ invalidate_iommu_pages(iommu, INV_IOMMU_ALL_PAGES_ADDRESS, domid, 0); ++ flush_command_buffer(iommu, 0); ++ } + } + + void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf) +diff --git a/xen/drivers/passthrough/amd/iommu_guest.c b/xen/drivers/passthrough/amd/iommu_guest.c +index 80a331f546ed..be86bce6fb03 100644 +--- a/xen/drivers/passthrough/amd/iommu_guest.c ++++ b/xen/drivers/passthrough/amd/iommu_guest.c +@@ -385,7 +385,7 @@ static int do_completion_wait(struct domain *d, cmd_entry_t *cmd) + + static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd) + { +- uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id; ++ uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id, prev_domid; + struct amd_iommu_dte *gdte, *mdte, *dte_base; + struct amd_iommu *iommu = NULL; + struct guest_iommu *g_iommu; +@@ -445,13 +445,14 @@ static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd) + req_id = get_dma_requestor_id(iommu->seg, mbdf); + dte_base = iommu->dev_table.buffer; + mdte = &dte_base[req_id]; ++ prev_domid = mdte->domain_id; + + spin_lock_irqsave(&iommu->lock, flags); + dte_set_gcr3_table(mdte, hdom_id, gcr3_mfn << PAGE_SHIFT, gv, glx); + + spin_unlock_irqrestore(&iommu->lock, flags); + +- amd_iommu_flush_device(iommu, req_id); ++ amd_iommu_flush_device(iommu, req_id, prev_domid); + + return 0; + } +diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c +index 166570648d26..101a60ce1794 100644 +--- a/xen/drivers/passthrough/amd/iommu_init.c ++++ b/xen/drivers/passthrough/amd/iommu_init.c +@@ -1547,7 +1547,11 @@ static int cf_check _invalidate_all_devices( + req_id = ivrs_mappings[bdf].dte_requestor_id; + if ( iommu ) + { +- amd_iommu_flush_device(iommu, req_id); ++ /* ++ * IOMMU TLB flush performed separately (see ++ * invalidate_all_domain_pages()). ++ */ ++ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID); + amd_iommu_flush_intremap(iommu, req_id); + } + } +diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c +index 94e37755064b..8641b84712a0 100644 +--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c ++++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c +@@ -192,10 +192,13 @@ static int __must_check amd_iommu_setup_domain_device( + + spin_unlock_irqrestore(&iommu->lock, flags); + +- amd_iommu_flush_device(iommu, req_id); ++ /* DTE didn't have DMA translations enabled, do not flush the TLB. */ ++ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID); + } + else if ( dte->pt_root != mfn_x(page_to_mfn(root_pg)) ) + { ++ domid_t prev_domid = dte->domain_id; ++ + /* + * Strictly speaking if the device is the only one with this requestor + * ID, it could be allowed to be re-assigned regardless of unity map +@@ -252,7 +255,7 @@ static int __must_check amd_iommu_setup_domain_device( + + spin_unlock_irqrestore(&iommu->lock, flags); + +- amd_iommu_flush_device(iommu, req_id); ++ amd_iommu_flush_device(iommu, req_id, prev_domid); + } + else + spin_unlock_irqrestore(&iommu->lock, flags); +@@ -421,6 +424,8 @@ static void amd_iommu_disable_domain_device(const struct domain *domain, + spin_lock_irqsave(&iommu->lock, flags); + if ( dte->tv || dte->v ) + { ++ domid_t prev_domid = dte->domain_id; ++ + /* See the comment in amd_iommu_setup_device_table(). */ + dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_ABORTED; + smp_wmb(); +@@ -439,7 +444,7 @@ static void amd_iommu_disable_domain_device(const struct domain *domain, + + spin_unlock_irqrestore(&iommu->lock, flags); + +- amd_iommu_flush_device(iommu, req_id); ++ amd_iommu_flush_device(iommu, req_id, prev_domid); + + AMD_IOMMU_DEBUG("Disable: device id = %#x, " + "domain = %d, paging mode = %d\n", +@@ -610,7 +615,8 @@ static int cf_check amd_iommu_add_device(u8 devfn, struct pci_dev *pdev) + + spin_unlock_irqrestore(&iommu->lock, flags); + +- amd_iommu_flush_device(iommu, bdf); ++ /* DTE didn't have DMA translations enabled, do not flush the TLB. */ ++ amd_iommu_flush_device(iommu, bdf, DOMID_INVALID); + } + + if ( amd_iommu_reserve_domain_unity_map( +-- +2.42.0 + diff --git a/xen.spec.in b/xen.spec.in index d9148ece..a65751f8 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -117,6 +117,7 @@ Patch0507: 0507-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch Patch0508: 0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch Patch0509: 0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch Patch0510: 0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch +Patch0511: 0511-xsa442-4.17.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch From 50d46d93518c0ed31fe7eb6d03e2c7a8c172d79a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 10 Oct 2023 14:07:37 +0200 Subject: [PATCH 23/64] version 4.17.2-3 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 0cfbf088..00750edc 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -2 +3 From 7c46662952853357d2f9f815a7e84767eaf0d7de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 11 Oct 2023 02:54:40 +0200 Subject: [PATCH 24/64] rpm: remove duplicated ldconfig call And avoid sending a comment as an input for ldconfig. --- xen.spec.in | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/xen.spec.in b/xen.spec.in index a65751f8..e511e28d 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -722,12 +722,10 @@ if [ ! -e /usr/lib/xen ]; then ln -s /usr/libexec/xen /usr/lib/xen fi -%ldconfig_scriptlets libs - -# BEGIN QUBES SPECIFIC PART +# QUBES SPECIFIC PART: next 2 lines (do not put comment before next section) %post libs -p /sbin/ldconfig %postun libs -p /sbin/ldconfig -# END QUBES SPECIFIC PART + %if %build_hyp %post hypervisor %if %build_efi From 719daa6f656ffeb6d519a0b6af9e0f7e4f885736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 11 Oct 2023 13:24:04 +0200 Subject: [PATCH 25/64] version 4.17.2-4 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 00750edc..b8626c4c 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -3 +4 From d8d557c978aac335e5da3277273893e7ed3ed7e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 14 Nov 2023 13:08:11 +0100 Subject: [PATCH 26/64] Apply XSA-446 patch --- 0512-xsa446.patch | 115 ++++++++++++++++++++++++++++++++++++++++++++++ xen.spec.in | 1 + 2 files changed, 116 insertions(+) create mode 100644 0512-xsa446.patch diff --git a/0512-xsa446.patch b/0512-xsa446.patch new file mode 100644 index 00000000..acf1d0f7 --- /dev/null +++ b/0512-xsa446.patch @@ -0,0 +1,115 @@ +From 80d5aada598c3a800a350003d5d582931545e13c Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 26 Oct 2023 14:37:38 +0100 +Subject: [PATCH] x86/spec-ctrl: Remove conditional IRQs-on-ness for INT + $0x80/0x82 paths +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Before speculation defences, some paths in Xen could genuinely get away with +being IRQs-on at entry. But XPTI invalidated this property on most paths, and +attempting to maintain it on the remaining paths was a mistake. + +Fast forward, and DO_SPEC_CTRL_COND_IBPB (protection for AMD BTC/SRSO) is not +IRQ-safe, running with IRQs enabled in some cases. The other actions taken on +these paths happen to be IRQ-safe. + +Make entry_int82() and int80_direct_trap() unconditionally Interrupt Gates +rather than Trap Gates. Remove the conditional re-adjustment of +int80_direct_trap() in smp_prepare_cpus(), and have entry_int82() explicitly +enable interrupts when safe to do so. + +In smp_prepare_cpus(), with the conditional re-adjustment removed, the +clearing of pv_cr3 is the only remaining action gated on XPTI, and it is out +of place anyway, repeating work already done by smp_prepare_boot_cpu(). Drop +the entire if() condition to avoid leaving an incorrect vestigial remnant. + +Also drop comments which make incorrect statements about when its safe to +enable interrupts. + +This is XSA-446 / CVE-2023-46836 + +Signed-off-by: Andrew Cooper +Reviewed-by: Roger Pau Monné +--- + xen/arch/x86/pv/traps.c | 4 ++-- + xen/arch/x86/smpboot.c | 14 -------------- + xen/arch/x86/x86_64/compat/entry.S | 2 ++ + xen/arch/x86/x86_64/entry.S | 1 - + 4 files changed, 4 insertions(+), 17 deletions(-) + +diff --git a/xen/arch/x86/pv/traps.c b/xen/arch/x86/pv/traps.c +index 74f333da7e1c..240d1a2db7a3 100644 +--- a/xen/arch/x86/pv/traps.c ++++ b/xen/arch/x86/pv/traps.c +@@ -139,11 +139,11 @@ void __init pv_trap_init(void) + #ifdef CONFIG_PV32 + /* The 32-on-64 hypercall vector is only accessible from ring 1. */ + _set_gate(idt_table + HYPERCALL_VECTOR, +- SYS_DESC_trap_gate, 1, entry_int82); ++ SYS_DESC_irq_gate, 1, entry_int82); + #endif + + /* Fast trap for int80 (faster than taking the #GP-fixup path). */ +- _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_trap_gate, 3, ++ _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3, + &int80_direct_trap); + + open_softirq(NMI_SOFTIRQ, nmi_softirq); +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index 3a1a659082c6..4c54ecbc91d7 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -1158,20 +1158,6 @@ void __init smp_prepare_cpus(void) + + stack_base[0] = (void *)((unsigned long)stack_start & ~(STACK_SIZE - 1)); + +- if ( opt_xpti_hwdom || opt_xpti_domu ) +- { +- get_cpu_info()->pv_cr3 = 0; +- +-#ifdef CONFIG_PV +- /* +- * All entry points which may need to switch page tables have to start +- * with interrupts off. Re-write what pv_trap_init() has put there. +- */ +- _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3, +- &int80_direct_trap); +-#endif +- } +- + set_nr_sockets(); + + socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets); +diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S +index bd5abd8040bd..fcc3a721f147 100644 +--- a/xen/arch/x86/x86_64/compat/entry.S ++++ b/xen/arch/x86/x86_64/compat/entry.S +@@ -21,6 +21,8 @@ ENTRY(entry_int82) + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + ++ sti ++ + CR4_PV32_RESTORE + + GET_CURRENT(bx) +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 5ca74f5f62b2..9a7b129aa7e4 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -327,7 +327,6 @@ ENTRY(sysenter_entry) + #ifdef CONFIG_XEN_SHSTK + ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK + #endif +- /* sti could live here when we don't switch page tables below. */ + pushq $FLAT_USER_SS + pushq $0 + pushfq + +base-commit: 7befef87cc9b1bb8ca15d866ce1ecd9165ccb58c +prerequisite-patch-id: 142a87c707411d49e136c3fb76f1b14963ec6dc8 +-- +2.30.2 + diff --git a/xen.spec.in b/xen.spec.in index 9e7e0377..f2066ee3 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -118,6 +118,7 @@ Patch0508: 0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch Patch0509: 0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch Patch0510: 0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch Patch0511: 0511-xsa442-4.17.patch +Patch0512: 0512-xsa446.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch From bd8302945c876f9c53590f69cf89622af74d5748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 14 Nov 2023 13:09:13 +0100 Subject: [PATCH 27/64] version 4.17.2-5 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index b8626c4c..7ed6ff82 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -4 +5 From 822645c9d898140d8243107b89938a4fe4e16929 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 29 Nov 2023 01:02:09 +0100 Subject: [PATCH 28/64] Disable annobin for the hypervisor build The .annobin.notes section gets placed at the start of xen.efi, which (for unclear reasons) breaks booting under OVMF with "Out of resources" error message. The section looks like this: Idx Name Size VMA LMA File off Algn 0 .annobin.notes 0001286a ffff82d100000000 ffff82d100000000 00000480 2**2 CONTENTS, READONLY --- xen.spec.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xen.spec.in b/xen.spec.in index 9e7e0377..d164735a 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -462,6 +462,8 @@ export LDFLAGS="$LDFLAGS_SAVE" export CFLAGS="$CFLAGS_SAVE -Wno-error=address" %if %build_hyp +# QUBES SPECIFIC LINE +export CFLAGS=`echo $CFLAGS | sed -e 's/-specs=\/usr\/lib\/rpm\/redhat\/redhat-annobin-cc1//g'` %if %build_crosshyp export CFLAGS=`echo $CFLAGS | sed -e 's/-m32//g' -e 's/-march=i686//g' 's/-specs=\/usr\/lib\/rpm\/redhat\/redhat-annobin-cc1//g'` XEN_TARGET_ARCH=x86_64 %make_build %{?efi_flags} prefix=/usr xen CC="/usr/bin/x86_64-linux-gnu-gcc" From 7d397a83e71b31e3dff0d1fc8ae38f8ccc6554f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sun, 3 Dec 2023 04:21:36 +0100 Subject: [PATCH 29/64] backport: x86/x2apic: introduce a mixed physical/cluster mode Fixes QubesOS/qubes-issues#8111 --- ...move-usage-of-ACPI_FADT_APIC_CLUSTER.patch | 43 +++ ...roduce-a-mixed-physical-cluster-mode.patch | 319 ++++++++++++++++++ config | 2 + xen.spec.in | 2 + 4 files changed, 366 insertions(+) create mode 100644 0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch create mode 100644 0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch diff --git a/0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch b/0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch new file mode 100644 index 00000000..559c5021 --- /dev/null +++ b/0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch @@ -0,0 +1,43 @@ +From d10db37b785ae61e2e2c0326b12823e66afb40fc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 14 Nov 2023 14:01:07 +0100 +Subject: [PATCH] x86/x2apic: remove usage of ACPI_FADT_APIC_CLUSTER +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The ACPI FADT APIC_CLUSTER flag mandates that when the interrupt delivery is +Logical mode APIC must be configured for Cluster destination model. However in +apic_x2apic_probe() such flag is incorrectly used to gate whether Physical mode +can be used. + +Since Xen when in x2APIC mode only uses Logical mode together with Cluster +model completely remove checking for ACPI_FADT_APIC_CLUSTER, as Xen always +fulfills the requirement signaled by the flag. + +Fixes: eb40ae41b658 ('x86/Kconfig: add option for default x2APIC destination mode') +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +master commit: 26a449ce32cef33f2cb50602be19fcc0c4223ba9 +master date: 2023-11-02 10:50:26 +0100 +--- + xen/arch/x86/genapic/x2apic.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c +index d512c50fc5ad..15a62f874b0d 100644 +--- a/xen/arch/x86/genapic/x2apic.c ++++ b/xen/arch/x86/genapic/x2apic.c +@@ -242,8 +242,7 @@ const struct genapic *__init apic_x2apic_probe(void) + */ + x2apic_phys = iommu_intremap != iommu_intremap_full || + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) || +- (IS_ENABLED(CONFIG_X2APIC_PHYSICAL) && +- !(acpi_gbl_FADT.flags & ACPI_FADT_APIC_CLUSTER)); ++ IS_ENABLED(CONFIG_X2APIC_PHYSICAL); + } + else if ( !x2apic_phys ) + switch ( iommu_intremap ) +-- +2.41.0 + diff --git a/0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch b/0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch new file mode 100644 index 00000000..378578c8 --- /dev/null +++ b/0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch @@ -0,0 +1,319 @@ +From e3c409d59ac87ccdf97b8c7708c81efa8069cb31 Mon Sep 17 00:00:00 2001 +From: Roger Pau Monne +Date: Mon, 6 Nov 2023 15:27:39 +0100 +Subject: [PATCH] x86/x2apic: introduce a mixed physical/cluster mode +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The current implementation of x2APIC requires to either use Cluster Logical or +Physical mode for all interrupts. However the selection of Physical vs Logical +is not done at APIC setup, an APIC can be addressed both in Physical or Logical +destination modes concurrently. + +Introduce a new x2APIC mode called Mixed, which uses Logical Cluster mode for +IPIs, and Physical mode for external interrupts, thus attempting to use the +best method for each interrupt type. + +Using Physical mode for external interrupts allows more vectors to be used, and +interrupt balancing to be more accurate. + +Using Logical Cluster mode for IPIs allows fewer accesses to the ICR register +when sending those, as multiple CPUs can be targeted with a single ICR register +write. + +A simple test calling flush_tlb_all() 10000 times on a tight loop on AMD EPYC +9754 with 512 CPUs gives the following figures in nano seconds: + +x mixed ++ phys +* cluster + N Min Max Median Avg Stddev +x 25 3.5131328e+08 3.5716441e+08 3.5410987e+08 3.5432659e+08 1566737.4 ++ 12 1.231082e+09 1.238824e+09 1.2370528e+09 1.2357981e+09 2853892.9 +Difference at 95.0% confidence + 8.81472e+08 +/- 1.46849e+06 + 248.774% +/- 0.96566% + (Student's t, pooled s = 2.05985e+06) +* 11 3.5099276e+08 3.5561459e+08 3.5461234e+08 3.5415668e+08 1415071.9 +No difference proven at 95.0% confidence + +So Mixed has no difference when compared to Cluster mode, and Physical mode is +248% slower when compared to either Mixed or Cluster modes with a 95% +confidence. + +Note that Xen uses Cluster mode by default, and hence is already using the +fastest way for IPI delivery at the cost of reducing the amount of vectors +available system-wide. + +Make the newly introduced mode the default one. + +Note the printing of the APIC addressing mode done in connect_bsp_APIC() has +been removed, as with the newly introduced mixed mode this would require more +fine grained printing, or else would be incorrect. The addressing mode can +already be derived from the APIC driver in use, which is printed by different +helpers. + +Suggested-by: Andrew Cooper +Signed-off-by: Roger Pau Monné +Reviewed-by: Andrew Cooper +Reviewed-by: Jan Beulich +Acked-by: Henry Wang +--- + CHANGELOG.md | 3 + + docs/misc/xen-command-line.pandoc | 12 ++++ + xen/arch/x86/Kconfig | 35 +++++++++-- + xen/arch/x86/apic.c | 6 +- + xen/arch/x86/genapic/x2apic.c | 98 +++++++++++++++++++++++-------- + 5 files changed, 119 insertions(+), 35 deletions(-) + +diff --git a/CHANGELOG.md b/CHANGELOG.md +index b184dde8b15f..c341c9d0bf5d 100644 +--- a/CHANGELOG.md ++++ b/CHANGELOG.md +@@ -9,6 +9,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) + - The x86 MCE command line option info is now updated. + + ### Added / support upgraded ++ - On x86: ++ - Introduce a new x2APIC driver that uses Cluster Logical addressing mode ++ for IPIs and Physical addressing mode for external interrupts. + - Out-of-tree builds for the hypervisor now supported. + - __ro_after_init support, for marking data as immutable after boot. + - The project has officially adopted 4 directives and 24 rules of MISRA-C, +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index 9a19a04157cb..8e65f8bd18bf 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -2804,6 +2804,15 @@ the watchdog. + + Permit use of x2apic setup for SMP environments. + ++### x2apic-mode (x86) ++> `= physical | cluster | mixed` ++ ++> Default: `physical` if **FADT** mandates physical mode, otherwise set at ++> build time by CONFIG_X2APIC_{PHYSICAL,LOGICAL,MIXED}. ++ ++In the case that x2apic is in use, this option switches between modes to ++address APICs in the system as interrupt destinations. ++ + ### x2apic_phys (x86) + > `= ` + +@@ -2814,6 +2823,9 @@ In the case that x2apic is in use, this option switches between physical and + clustered mode. The default, given no hint from the **FADT**, is cluster + mode. + ++**WARNING: `x2apic_phys` is deprecated and superseded by `x2apic-mode`. ++The latter takes precedence if both are set.** ++ + ### xenheap_megabytes (arm32) + > `= ` + +diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig +index eac77573bd75..1acdffc51c22 100644 +--- a/xen/arch/x86/Kconfig ++++ b/xen/arch/x86/Kconfig +@@ -228,11 +228,18 @@ config XEN_ALIGN_2M + + endchoice + +-config X2APIC_PHYSICAL +- bool "x2APIC Physical Destination mode" ++choice ++ prompt "x2APIC Driver default" ++ default X2APIC_MIXED + help +- Use x2APIC Physical Destination mode by default when available. ++ Select APIC addressing when x2APIC is enabled. ++ ++ The default mode is mixed which should provide the best aspects ++ of both physical and cluster modes. + ++config X2APIC_PHYSICAL ++ bool "Physical Destination mode" ++ help + When using this mode APICs are addressed using the Physical + Destination mode, which allows using all dynamic vectors on each + CPU independently. +@@ -242,9 +249,27 @@ config X2APIC_PHYSICAL + destination inter processor interrupts (IPIs) slightly slower than + Logical Destination mode. + +- The mode when this option is not selected is Logical Destination. ++config X2APIC_CLUSTER ++ bool "Cluster Destination mode" ++ help ++ When using this mode APICs are addressed using the Cluster Logical ++ Destination mode. ++ ++ Cluster Destination has the benefit of sending IPIs faster since ++ multiple APICs can be targeted as destinations of a single IPI. ++ However the vector space is shared between all CPUs on the cluster, ++ and hence using this mode reduces the number of available vectors ++ when compared to Physical mode. + +- If unsure, say N. ++config X2APIC_MIXED ++ bool "Mixed Destination mode" ++ help ++ When using this mode APICs are addressed using the Cluster Logical ++ Destination mode for IPIs and Physical mode for external interrupts. ++ ++ Should provide the best of both modes. ++ ++endchoice + + config GUEST + bool +diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c +index f1264ce7ed1e..6acdd0ec1468 100644 +--- a/xen/arch/x86/apic.c ++++ b/xen/arch/x86/apic.c +@@ -229,11 +229,7 @@ void __init connect_bsp_APIC(void) + outb(0x01, 0x23); + } + +- printk("Enabling APIC mode: %s. Using %d I/O APICs\n", +- !INT_DEST_MODE ? "Physical" +- : init_apic_ldr == init_apic_ldr_flat ? "Flat" +- : "Clustered", +- nr_ioapics); ++ printk("Enabling APIC mode. Using %d I/O APICs\n", nr_ioapics); + enable_apic_mode(); + } + +diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c +index 707deef98c27..b88c7a96fe3e 100644 +--- a/xen/arch/x86/genapic/x2apic.c ++++ b/xen/arch/x86/genapic/x2apic.c +@@ -180,6 +180,36 @@ static const struct genapic __initconstrel apic_x2apic_cluster = { + .send_IPI_self = send_IPI_self_x2apic + }; + ++/* ++ * Mixed x2APIC mode: use physical for external (device) interrupts, and ++ * cluster for inter processor interrupts. Such mode has the benefits of not ++ * sharing the vector space with all CPUs on the cluster, while still allowing ++ * IPIs to be more efficiently delivered by not having to perform an ICR write ++ * for each target CPU. ++ */ ++static const struct genapic __initconstrel apic_x2apic_mixed = { ++ APIC_INIT("x2apic_mixed", NULL), ++ ++ /* ++ * The following fields are exclusively used by external interrupts and ++ * hence are set to use Physical destination mode handlers. ++ */ ++ .int_delivery_mode = dest_Fixed, ++ .int_dest_mode = 0 /* physical delivery */, ++ .vector_allocation_cpumask = vector_allocation_cpumask_phys, ++ .cpu_mask_to_apicid = cpu_mask_to_apicid_phys, ++ ++ /* ++ * The following fields are exclusively used by IPIs and hence are set to ++ * use Cluster Logical destination mode handlers. Note that init_apic_ldr ++ * is not used by IPIs, but the per-CPU fields it initializes are only used ++ * by the IPI hooks. ++ */ ++ .init_apic_ldr = init_apic_ldr_x2apic_cluster, ++ .send_IPI_mask = send_IPI_mask_x2apic_cluster, ++ .send_IPI_self = send_IPI_self_x2apic, ++}; ++ + static int cf_check update_clusterinfo( + struct notifier_block *nfb, unsigned long action, void *hcpu) + { +@@ -220,38 +250,56 @@ static struct notifier_block x2apic_cpu_nfb = { + static int8_t __initdata x2apic_phys = -1; + boolean_param("x2apic_phys", x2apic_phys); + ++enum { ++ unset, physical, cluster, mixed ++} static __initdata x2apic_mode = unset; ++ ++static int __init cf_check parse_x2apic_mode(const char *s) ++{ ++ if ( !cmdline_strcmp(s, "physical") ) ++ x2apic_mode = physical; ++ else if ( !cmdline_strcmp(s, "cluster") ) ++ x2apic_mode = cluster; ++ else if ( !cmdline_strcmp(s, "mixed") ) ++ x2apic_mode = mixed; ++ else ++ return -EINVAL; ++ ++ return 0; ++} ++custom_param("x2apic-mode", parse_x2apic_mode); ++ + const struct genapic *__init apic_x2apic_probe(void) + { +- if ( x2apic_phys < 0 ) ++ /* Honour the legacy cmdline setting if it's the only one provided. */ ++ if ( x2apic_mode == unset && x2apic_phys >= 0 ) ++ x2apic_mode = x2apic_phys ? physical : cluster; ++ ++ if ( x2apic_mode == unset ) + { +- /* +- * Force physical mode if there's no (full) interrupt remapping support: +- * The ID in clustered mode requires a 32 bit destination field due to +- * the usage of the high 16 bits to hold the cluster ID. +- */ +- x2apic_phys = iommu_intremap != iommu_intremap_full || +- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) || +- IS_ENABLED(CONFIG_X2APIC_PHYSICAL); +- } +- else if ( !x2apic_phys ) +- switch ( iommu_intremap ) ++ if ( acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL ) + { +- case iommu_intremap_off: +- case iommu_intremap_restricted: +- printk("WARNING: x2APIC cluster mode is not supported %s interrupt remapping -" +- " forcing phys mode\n", +- iommu_intremap == iommu_intremap_off ? "without" +- : "with restricted"); +- x2apic_phys = true; +- break; +- +- case iommu_intremap_full: +- break; ++ printk(XENLOG_INFO "ACPI FADT forcing x2APIC physical mode\n"); ++ x2apic_mode = physical; + } ++ else ++ x2apic_mode = IS_ENABLED(CONFIG_X2APIC_MIXED) ? mixed ++ : (IS_ENABLED(CONFIG_X2APIC_PHYSICAL) ? physical ++ : cluster); ++ } + +- if ( x2apic_phys ) ++ if ( x2apic_mode == physical ) + return &apic_x2apic_phys; + ++ if ( x2apic_mode == cluster && iommu_intremap != iommu_intremap_full ) ++ { ++ printk("WARNING: x2APIC cluster mode is not supported %s interrupt remapping -" ++ " forcing mixed mode\n", ++ iommu_intremap == iommu_intremap_off ? "without" ++ : "with restricted"); ++ x2apic_mode = mixed; ++ } ++ + if ( !this_cpu(cluster_cpus) ) + { + update_clusterinfo(NULL, CPU_UP_PREPARE, +@@ -260,7 +308,7 @@ const struct genapic *__init apic_x2apic_probe(void) + register_cpu_notifier(&x2apic_cpu_nfb); + } + +- return &apic_x2apic_cluster; ++ return x2apic_mode == cluster ? &apic_x2apic_cluster : &apic_x2apic_mixed; + } + + void __init check_x2apic_preenabled(void) +-- +2.41.0 diff --git a/config b/config index 4ffc4bae..1d49e89d 100644 --- a/config +++ b/config @@ -32,6 +32,8 @@ CONFIG_TBOOT=y CONFIG_XEN_ALIGN_DEFAULT=y # CONFIG_XEN_ALIGN_2M is not set # CONFIG_X2APIC_PHYSICAL is not set +# CONFIG_X2APIC_CLUSTER is not set +CONFIG_X2APIC_MIXED=y # CONFIG_XEN_GUEST is not set # CONFIG_HYPERV_GUEST is not set # CONFIG_MEM_PAGING is not set diff --git a/xen.spec.in b/xen.spec.in index f2066ee3..b5968231 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -104,6 +104,8 @@ Patch0307: 0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch Patch0308: 0308-x86-Replace-EPT_EMT_-constants-with-X86_MT_.patch Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch Patch0310: 0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch +Patch0311: 0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch +Patch0312: 0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch # Security fixes Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch From 973d80cc04cbb077436aa4d07a4ee7d43b6f6bad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sun, 3 Dec 2023 04:23:13 +0100 Subject: [PATCH 30/64] Backport cpupool fix Related to QubesOS/qubes-issues#8737 --- 0313-xen-sched-fix-sched_move_domain.patch | 79 ++++++++++++++++++++++ xen.spec.in | 1 + 2 files changed, 80 insertions(+) create mode 100644 0313-xen-sched-fix-sched_move_domain.patch diff --git a/0313-xen-sched-fix-sched_move_domain.patch b/0313-xen-sched-fix-sched_move_domain.patch new file mode 100644 index 00000000..d2f72713 --- /dev/null +++ b/0313-xen-sched-fix-sched_move_domain.patch @@ -0,0 +1,79 @@ +From e1f9cb16e2efbb202f2f8a9aa7c5ff1d392ece33 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 23 Nov 2023 12:24:12 +0100 +Subject: [PATCH] xen/sched: fix sched_move_domain() + +When moving a domain out of a cpupool running with the credit2 +scheduler and having multiple run-queues, the following ASSERT() can +be observed: + +(XEN) Xen call trace: +(XEN) [] R credit2.c#csched2_unit_remove+0xe3/0xe7 +(XEN) [] S sched_move_domain+0x2f3/0x5b1 +(XEN) [] S cpupool.c#cpupool_move_domain_locked+0x1d/0x3b +(XEN) [] S cpupool_move_domain+0x24/0x35 +(XEN) [] S domain_kill+0xa5/0x116 +(XEN) [] S do_domctl+0xe5f/0x1951 +(XEN) [] S timer.c#timer_lock+0x69/0x143 +(XEN) [] S pv_hypercall+0x44e/0x4a9 +(XEN) [] S lstar_enter+0x137/0x140 +(XEN) +(XEN) +(XEN) **************************************** +(XEN) Panic on CPU 1: +(XEN) Assertion 'svc->rqd == c2rqd(sched_unit_master(unit))' failed at common/sched/credit2.c:1159 +(XEN) **************************************** + +This is happening as sched_move_domain() is setting a different cpu +for a scheduling unit without telling the scheduler. When this unit is +removed from the scheduler, the ASSERT() will trigger. + +In non-debug builds the result is usually a clobbered pointer, leading +to another crash a short time later. + +Fix that by swapping the two involved actions (setting another cpu and +removing the unit from the scheduler). + +Link: https://github.com/Dasharo/dasharo-issues/issues/488 +Fixes: 70fadc41635b ("xen/cpupool: support moving domain between cpupools with different granularity") +Signed-off-by: Juergen Gross +Reviewed-by: George Dunlap +master commit: 4709ec82917668c2df958ef91b4f21c049c76bee +master date: 2023-11-20 10:49:29 +0100 +--- + xen/common/sched/core.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c +index 43132ff6e030..4bba16d315b9 100644 +--- a/xen/common/sched/core.c ++++ b/xen/common/sched/core.c +@@ -732,18 +732,20 @@ int sched_move_domain(struct domain *d, struct cpupool *c) + old_domdata = d->sched_priv; + + /* +- * Temporarily move all units to same processor to make locking +- * easier when moving the new units to the new processors. ++ * Remove all units from the old scheduler, and temporarily move them to ++ * the same processor to make locking easier when moving the new units to ++ * new processors. + */ + new_p = cpumask_first(d->cpupool->cpu_valid); + for_each_sched_unit ( d, unit ) + { +- spinlock_t *lock = unit_schedule_lock_irq(unit); ++ spinlock_t *lock; ++ ++ sched_remove_unit(old_ops, unit); + ++ lock = unit_schedule_lock_irq(unit); + sched_set_res(unit, get_sched_res(new_p)); + spin_unlock_irq(lock); +- +- sched_remove_unit(old_ops, unit); + } + + old_units = d->sched_unit_list; +-- +2.41.0 + diff --git a/xen.spec.in b/xen.spec.in index b5968231..32e4767e 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -106,6 +106,7 @@ Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch Patch0310: 0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch Patch0311: 0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch Patch0312: 0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch +Patch0313: 0313-xen-sched-fix-sched_move_domain.patch # Security fixes Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch From 1a02560699a0327684e638382be7a3f2252c5713 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 4 Dec 2023 02:53:53 +0100 Subject: [PATCH 31/64] version 4.17.2-6 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 7ed6ff82..1e8b3149 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -5 +6 From 20434253f189f60012eb8faddce2a1bb41bb2087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sun, 10 Dec 2023 17:15:15 +0100 Subject: [PATCH 32/64] version 4.17.2-7 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 1e8b3149..7f8f011e 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -6 +7 From 631ca94b39037ca6920d218c5a5abcd8ac6a19cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 27 Dec 2023 03:09:58 +0100 Subject: [PATCH 33/64] Backport XHCI console support for AMD QubesOS/qubes-issues#6834 --- ...s-char-support-up-to-1M-BAR0-of-xhci.patch | 66 +++++++++++++++++++ xen.spec.in | 1 + 2 files changed, 67 insertions(+) create mode 100644 0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch diff --git a/0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch b/0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch new file mode 100644 index 00000000..8a628f80 --- /dev/null +++ b/0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch @@ -0,0 +1,66 @@ +From 549b042943a57b748ce80070d1174e4ff5b8ef0b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= + +Date: Wed, 14 Dec 2022 12:04:26 +0100 +Subject: [PATCH] drivers/char: support up to 1M BAR0 of xhci +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +AMD's XHCI has BAR0 of 1M (compared to 64K on Intel). Map it as a whole +(reserving more space in the fixmap). Make fixmap slot conditional on +CONFIG_XHCI. + +Signed-off-by: Marek Marczykowski-Górecki +Reviewed-by: Jan Beulich +--- + xen/arch/x86/include/asm/fixmap.h | 4 +++- + xen/drivers/char/xhci-dbc.c | 6 ++++-- + 2 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/include/asm/fixmap.h b/xen/arch/x86/include/asm/fixmap.h +index bc39ffe896b1..516ec3fa6c95 100644 +--- a/xen/arch/x86/include/asm/fixmap.h ++++ b/xen/arch/x86/include/asm/fixmap.h +@@ -25,7 +25,7 @@ + #include + #include + +-#define MAX_XHCI_PAGES 16 ++#define MAX_XHCI_PAGES 256 + + /* + * Here we define all the compile-time 'special' virtual +@@ -45,8 +45,10 @@ enum fixed_addresses { + FIX_COM_BEGIN, + FIX_COM_END, + FIX_EHCI_DBGP, ++#ifdef CONFIG_XHCI + FIX_XHCI_BEGIN, + FIX_XHCI_END = FIX_XHCI_BEGIN + MAX_XHCI_PAGES - 1, ++#endif + #ifdef CONFIG_XEN_GUEST + FIX_PV_CONSOLE, + FIX_XEN_SHARED_INFO, +diff --git a/xen/drivers/char/xhci-dbc.c b/xen/drivers/char/xhci-dbc.c +index 86f6df6bef67..60b781f87202 100644 +--- a/xen/drivers/char/xhci-dbc.c ++++ b/xen/drivers/char/xhci-dbc.c +@@ -268,10 +268,12 @@ static void *dbc_sys_map_xhc(uint64_t phys, size_t size) + { + size_t i; + +- if ( size != MAX_XHCI_PAGES * PAGE_SIZE ) ++ if ( size > MAX_XHCI_PAGES * PAGE_SIZE ) + return NULL; + +- for ( i = FIX_XHCI_END; i >= FIX_XHCI_BEGIN; i-- ) ++ size >>= PAGE_SHIFT; ++ ++ for ( i = FIX_XHCI_END; i > FIX_XHCI_END - size; i-- ) + { + set_fixmap_nocache(i, phys); + phys += PAGE_SIZE; +-- +2.41.0 + diff --git a/xen.spec.in b/xen.spec.in index a332828f..626b3253 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -107,6 +107,7 @@ Patch0310: 0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch Patch0311: 0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch Patch0312: 0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch Patch0313: 0313-xen-sched-fix-sched_move_domain.patch +Patch0314: 0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch # Security fixes Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch From 38be433d0a176ad843022d496cfdfe64c61e1ded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 27 Dec 2023 03:37:32 +0100 Subject: [PATCH 34/64] Fix guest memory corruption caused by hvmloader Running qemu upstream in stubdomain missed one hvmloader setting, fix it now. QubesOS/qubes-issues#4321 --- ...locating-memory-for-qemu-xen-in-stub.patch | 46 +++++++++++++++++++ xen.spec.in | 1 + 2 files changed, 47 insertions(+) create mode 100644 0653-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch diff --git a/0653-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch b/0653-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch new file mode 100644 index 00000000..e4486b9b --- /dev/null +++ b/0653-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch @@ -0,0 +1,46 @@ +From 02ef8192418af1b2b9d13658a5dd46a2e938d721 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= + +Date: Wed, 27 Dec 2023 03:24:00 +0100 +Subject: [PATCH] libxl: Disable relocating memory for qemu-xen in stubdomain + too +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +According to comments (and experiments) qemu-xen cannot handle memory +reolcation done by hvmloader. The code was already disabled when running +qemu-xen in dom0 (see libxl__spawn_local_dm()), but it was missed when +adding qemu-xen support to stubdomain. Adjust libxl__spawn_stub_dm() to +be consistent in this regard. + +Reported-by: Neowutran +Signed-off-by: Marek Marczykowski-Górecki +--- + tools/libs/light/libxl_dm.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c +index 14b593110f7c..ed620a9d8e14 100644 +--- a/tools/libs/light/libxl_dm.c ++++ b/tools/libs/light/libxl_dm.c +@@ -2432,6 +2432,16 @@ void libxl__spawn_stub_dm(libxl__egc *egc, libxl__stub_dm_spawn_state *sdss) + "%s", + libxl_bios_type_to_string(guest_config->b_info.u.hvm.bios)); + } ++ /* Disable relocating memory to make the MMIO hole larger ++ * unless we're running qemu-traditional and vNUMA is not ++ * configured. */ ++ libxl__xs_printf(gc, XBT_NULL, ++ libxl__sprintf(gc, "%s/hvmloader/allow-memory-relocate", ++ libxl__xs_get_dompath(gc, guest_domid)), ++ "%d", ++ guest_config->b_info.device_model_version ++ == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL && ++ !libxl__vnuma_configured(&guest_config->b_info)); + ret = xc_domain_set_target(ctx->xch, dm_domid, guest_domid); + if (ret<0) { + LOGED(ERROR, guest_domid, "setting target domain %d -> %d", +-- +2.41.0 + diff --git a/xen.spec.in b/xen.spec.in index 626b3253..5033873d 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -169,6 +169,7 @@ Patch0643: 0643-cpufreq-enable-HWP-by-default.patch PAtch0651: 0651-x86-msi-passthrough-all-MSI-X-vector-ctrl-writes-to-.patch PAtch0652: 0652-x86-hvm-Allow-writes-to-registers-on-the-same-page-a.patch +Patch0653: 0653-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch # Qubes specific patches Patch1000: 1000-Do-not-access-network-during-the-build.patch From b567bd91d0cda0b65c6dd7f28468f97cbb85f0c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Wed, 3 Jan 2024 22:54:59 +0100 Subject: [PATCH 35/64] version 4.17.2-8 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 7f8f011e..45a4fb75 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -7 +8 From c09047e94ef08928be689e5769582a160bf3f802 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sun, 14 Jan 2024 05:20:44 +0100 Subject: [PATCH 36/64] Update to Xen 4.17.3 Drop patches included upstream already, refresh SOURCE_DATE_EPOCH patch to resolve conflict. --- ...not-expose-HWCR.TscFreqSel-to-guests.patch | 59 ---- ...move-usage-of-ACPI_FADT_APIC_CLUSTER.patch | 43 --- ...roduce-a-mixed-physical-cluster-mode.patch | 319 ------------------ 0313-xen-sched-fix-sched_move_domain.patch | 79 ----- ...a-Operand-Invariant-Timing-Mode-by-d.patch | 80 ----- ...enbleed-check-to-models-good-ucode-i.patch | 48 --- ...x-confusion-between-SPEC_CTRL_EXIT_T.patch | 74 ---- ...ld-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch | 85 ----- ...rn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch | 83 ----- ...prove-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch | 106 ------ ...-restore_all_xen-to-hold-stack_end-i.patch | 74 ---- ...the-IST-ness-of-an-entry-for-the-exi.patch | 109 ------ ...rl-Issue-VERW-during-IST-exit-to-Xen.patch | 89 ----- ...troduce-is_zen-1-2-_uarch-predicates.patch | 91 ----- ...c-ctrl-Mitigate-the-Zen1-DIV-leakage.patch | 228 ------------- 0511-xsa442-4.17.patch | 185 ---------- 0512-xsa446.patch | 115 ------- ...ates-time-based-on-SOURCE_DATE_EPOCH.patch | 37 +- ...-directories-in-tools-xen-and-xen-ar.patch | 16 +- rel | 2 +- version | 2 +- xen.spec.in | 17 - 22 files changed, 29 insertions(+), 1912 deletions(-) delete mode 100644 0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch delete mode 100644 0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch delete mode 100644 0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch delete mode 100644 0313-xen-sched-fix-sched_move_domain.patch delete mode 100644 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch delete mode 100644 0501-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch delete mode 100644 0502-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch delete mode 100644 0503-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch delete mode 100644 0504-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch delete mode 100644 0505-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch delete mode 100644 0506-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch delete mode 100644 0507-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch delete mode 100644 0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch delete mode 100644 0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch delete mode 100644 0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch delete mode 100644 0511-xsa442-4.17.patch delete mode 100644 0512-xsa446.patch diff --git a/0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch b/0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch deleted file mode 100644 index 480aba7c..00000000 --- a/0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch +++ /dev/null @@ -1,59 +0,0 @@ -From e4ca4e261da3fdddd541c3a9842b1e9e2ad00525 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Mon, 18 Sep 2023 15:07:49 +0200 -Subject: [PATCH] x86/amd: do not expose HWCR.TscFreqSel to guests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -OpenBSD 7.3 will unconditionally access HWCR if the TSC is reported as -Invariant, and it will then attempt to also unconditionally access PSTATE0 if -HWCR.TscFreqSel is set (currently the case on Xen). - -The motivation for exposing HWCR.TscFreqSel was to avoid warning messages from -Linux. It has been agreed that Linux should be changed instead to not -complaint about missing HWCR.TscFreqSel when running virtualized. - -The relation between HWCR.TscFreqSel and PSTATE0 is not clearly written down in -the PPR, but it's natural for OSes to attempt to fetch the P0 frequency if the -TSC is stated to increment at the P0 frequency. - -Exposing PSTATEn (PSTATE0 at least) with all zeroes is not a suitable solution -because the PstateEn bit is read-write, and OSes could legitimately attempt to -set PstateEn=1 which Xen couldn't handle. - -Furthermore, the TscFreqSel bit is model specific and was never safe to expose -like this in the first place. At a minimum it should have had a toolstack -adjustment to know not to migrate such a VM. - -Therefore, simply remove the bit. Note the HWCR itself is an architectural -register, and does need to be accessible by the guest. Since HWCR contains -both architectural and non-architectural bits, going forward care must be taken -to assert the exposed value is correct on newer CPU families. - -Reported-by: Solène Rapenne -Link: https://github.com/QubesOS/qubes-issues/issues/8502 -Fixes: 14b95b3b8546 ('x86/AMD: expose HWCR.TscFreqSel to guests') -Signed-off-by: Roger Pau Monné -Reviewed-by: Andrew Cooper ---- - xen/arch/x86/msr.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index 3f0450259cdf..c33dc78cd8f6 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -240,8 +240,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) - case MSR_K8_HWCR: - if ( !(cp->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) ) - goto gp_fault; -- *val = get_cpu_family(cp->basic.raw_fms, NULL, NULL) >= 0x10 -- ? K8_HWCR_TSC_FREQ_SEL : 0; -+ *val = 0; - break; - - case MSR_VIRT_SPEC_CTRL: --- -2.41.0 - diff --git a/0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch b/0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch deleted file mode 100644 index 559c5021..00000000 --- a/0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch +++ /dev/null @@ -1,43 +0,0 @@ -From d10db37b785ae61e2e2c0326b12823e66afb40fc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 14 Nov 2023 14:01:07 +0100 -Subject: [PATCH] x86/x2apic: remove usage of ACPI_FADT_APIC_CLUSTER -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The ACPI FADT APIC_CLUSTER flag mandates that when the interrupt delivery is -Logical mode APIC must be configured for Cluster destination model. However in -apic_x2apic_probe() such flag is incorrectly used to gate whether Physical mode -can be used. - -Since Xen when in x2APIC mode only uses Logical mode together with Cluster -model completely remove checking for ACPI_FADT_APIC_CLUSTER, as Xen always -fulfills the requirement signaled by the flag. - -Fixes: eb40ae41b658 ('x86/Kconfig: add option for default x2APIC destination mode') -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -master commit: 26a449ce32cef33f2cb50602be19fcc0c4223ba9 -master date: 2023-11-02 10:50:26 +0100 ---- - xen/arch/x86/genapic/x2apic.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c -index d512c50fc5ad..15a62f874b0d 100644 ---- a/xen/arch/x86/genapic/x2apic.c -+++ b/xen/arch/x86/genapic/x2apic.c -@@ -242,8 +242,7 @@ const struct genapic *__init apic_x2apic_probe(void) - */ - x2apic_phys = iommu_intremap != iommu_intremap_full || - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) || -- (IS_ENABLED(CONFIG_X2APIC_PHYSICAL) && -- !(acpi_gbl_FADT.flags & ACPI_FADT_APIC_CLUSTER)); -+ IS_ENABLED(CONFIG_X2APIC_PHYSICAL); - } - else if ( !x2apic_phys ) - switch ( iommu_intremap ) --- -2.41.0 - diff --git a/0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch b/0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch deleted file mode 100644 index 378578c8..00000000 --- a/0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch +++ /dev/null @@ -1,319 +0,0 @@ -From e3c409d59ac87ccdf97b8c7708c81efa8069cb31 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Mon, 6 Nov 2023 15:27:39 +0100 -Subject: [PATCH] x86/x2apic: introduce a mixed physical/cluster mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current implementation of x2APIC requires to either use Cluster Logical or -Physical mode for all interrupts. However the selection of Physical vs Logical -is not done at APIC setup, an APIC can be addressed both in Physical or Logical -destination modes concurrently. - -Introduce a new x2APIC mode called Mixed, which uses Logical Cluster mode for -IPIs, and Physical mode for external interrupts, thus attempting to use the -best method for each interrupt type. - -Using Physical mode for external interrupts allows more vectors to be used, and -interrupt balancing to be more accurate. - -Using Logical Cluster mode for IPIs allows fewer accesses to the ICR register -when sending those, as multiple CPUs can be targeted with a single ICR register -write. - -A simple test calling flush_tlb_all() 10000 times on a tight loop on AMD EPYC -9754 with 512 CPUs gives the following figures in nano seconds: - -x mixed -+ phys -* cluster - N Min Max Median Avg Stddev -x 25 3.5131328e+08 3.5716441e+08 3.5410987e+08 3.5432659e+08 1566737.4 -+ 12 1.231082e+09 1.238824e+09 1.2370528e+09 1.2357981e+09 2853892.9 -Difference at 95.0% confidence - 8.81472e+08 +/- 1.46849e+06 - 248.774% +/- 0.96566% - (Student's t, pooled s = 2.05985e+06) -* 11 3.5099276e+08 3.5561459e+08 3.5461234e+08 3.5415668e+08 1415071.9 -No difference proven at 95.0% confidence - -So Mixed has no difference when compared to Cluster mode, and Physical mode is -248% slower when compared to either Mixed or Cluster modes with a 95% -confidence. - -Note that Xen uses Cluster mode by default, and hence is already using the -fastest way for IPI delivery at the cost of reducing the amount of vectors -available system-wide. - -Make the newly introduced mode the default one. - -Note the printing of the APIC addressing mode done in connect_bsp_APIC() has -been removed, as with the newly introduced mixed mode this would require more -fine grained printing, or else would be incorrect. The addressing mode can -already be derived from the APIC driver in use, which is printed by different -helpers. - -Suggested-by: Andrew Cooper -Signed-off-by: Roger Pau Monné -Reviewed-by: Andrew Cooper -Reviewed-by: Jan Beulich -Acked-by: Henry Wang ---- - CHANGELOG.md | 3 + - docs/misc/xen-command-line.pandoc | 12 ++++ - xen/arch/x86/Kconfig | 35 +++++++++-- - xen/arch/x86/apic.c | 6 +- - xen/arch/x86/genapic/x2apic.c | 98 +++++++++++++++++++++++-------- - 5 files changed, 119 insertions(+), 35 deletions(-) - -diff --git a/CHANGELOG.md b/CHANGELOG.md -index b184dde8b15f..c341c9d0bf5d 100644 ---- a/CHANGELOG.md -+++ b/CHANGELOG.md -@@ -9,6 +9,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - - The x86 MCE command line option info is now updated. - - ### Added / support upgraded -+ - On x86: -+ - Introduce a new x2APIC driver that uses Cluster Logical addressing mode -+ for IPIs and Physical addressing mode for external interrupts. - - Out-of-tree builds for the hypervisor now supported. - - __ro_after_init support, for marking data as immutable after boot. - - The project has officially adopted 4 directives and 24 rules of MISRA-C, -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 9a19a04157cb..8e65f8bd18bf 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2804,6 +2804,15 @@ the watchdog. - - Permit use of x2apic setup for SMP environments. - -+### x2apic-mode (x86) -+> `= physical | cluster | mixed` -+ -+> Default: `physical` if **FADT** mandates physical mode, otherwise set at -+> build time by CONFIG_X2APIC_{PHYSICAL,LOGICAL,MIXED}. -+ -+In the case that x2apic is in use, this option switches between modes to -+address APICs in the system as interrupt destinations. -+ - ### x2apic_phys (x86) - > `= ` - -@@ -2814,6 +2823,9 @@ In the case that x2apic is in use, this option switches between physical and - clustered mode. The default, given no hint from the **FADT**, is cluster - mode. - -+**WARNING: `x2apic_phys` is deprecated and superseded by `x2apic-mode`. -+The latter takes precedence if both are set.** -+ - ### xenheap_megabytes (arm32) - > `= ` - -diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig -index eac77573bd75..1acdffc51c22 100644 ---- a/xen/arch/x86/Kconfig -+++ b/xen/arch/x86/Kconfig -@@ -228,11 +228,18 @@ config XEN_ALIGN_2M - - endchoice - --config X2APIC_PHYSICAL -- bool "x2APIC Physical Destination mode" -+choice -+ prompt "x2APIC Driver default" -+ default X2APIC_MIXED - help -- Use x2APIC Physical Destination mode by default when available. -+ Select APIC addressing when x2APIC is enabled. -+ -+ The default mode is mixed which should provide the best aspects -+ of both physical and cluster modes. - -+config X2APIC_PHYSICAL -+ bool "Physical Destination mode" -+ help - When using this mode APICs are addressed using the Physical - Destination mode, which allows using all dynamic vectors on each - CPU independently. -@@ -242,9 +249,27 @@ config X2APIC_PHYSICAL - destination inter processor interrupts (IPIs) slightly slower than - Logical Destination mode. - -- The mode when this option is not selected is Logical Destination. -+config X2APIC_CLUSTER -+ bool "Cluster Destination mode" -+ help -+ When using this mode APICs are addressed using the Cluster Logical -+ Destination mode. -+ -+ Cluster Destination has the benefit of sending IPIs faster since -+ multiple APICs can be targeted as destinations of a single IPI. -+ However the vector space is shared between all CPUs on the cluster, -+ and hence using this mode reduces the number of available vectors -+ when compared to Physical mode. - -- If unsure, say N. -+config X2APIC_MIXED -+ bool "Mixed Destination mode" -+ help -+ When using this mode APICs are addressed using the Cluster Logical -+ Destination mode for IPIs and Physical mode for external interrupts. -+ -+ Should provide the best of both modes. -+ -+endchoice - - config GUEST - bool -diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c -index f1264ce7ed1e..6acdd0ec1468 100644 ---- a/xen/arch/x86/apic.c -+++ b/xen/arch/x86/apic.c -@@ -229,11 +229,7 @@ void __init connect_bsp_APIC(void) - outb(0x01, 0x23); - } - -- printk("Enabling APIC mode: %s. Using %d I/O APICs\n", -- !INT_DEST_MODE ? "Physical" -- : init_apic_ldr == init_apic_ldr_flat ? "Flat" -- : "Clustered", -- nr_ioapics); -+ printk("Enabling APIC mode. Using %d I/O APICs\n", nr_ioapics); - enable_apic_mode(); - } - -diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c -index 707deef98c27..b88c7a96fe3e 100644 ---- a/xen/arch/x86/genapic/x2apic.c -+++ b/xen/arch/x86/genapic/x2apic.c -@@ -180,6 +180,36 @@ static const struct genapic __initconstrel apic_x2apic_cluster = { - .send_IPI_self = send_IPI_self_x2apic - }; - -+/* -+ * Mixed x2APIC mode: use physical for external (device) interrupts, and -+ * cluster for inter processor interrupts. Such mode has the benefits of not -+ * sharing the vector space with all CPUs on the cluster, while still allowing -+ * IPIs to be more efficiently delivered by not having to perform an ICR write -+ * for each target CPU. -+ */ -+static const struct genapic __initconstrel apic_x2apic_mixed = { -+ APIC_INIT("x2apic_mixed", NULL), -+ -+ /* -+ * The following fields are exclusively used by external interrupts and -+ * hence are set to use Physical destination mode handlers. -+ */ -+ .int_delivery_mode = dest_Fixed, -+ .int_dest_mode = 0 /* physical delivery */, -+ .vector_allocation_cpumask = vector_allocation_cpumask_phys, -+ .cpu_mask_to_apicid = cpu_mask_to_apicid_phys, -+ -+ /* -+ * The following fields are exclusively used by IPIs and hence are set to -+ * use Cluster Logical destination mode handlers. Note that init_apic_ldr -+ * is not used by IPIs, but the per-CPU fields it initializes are only used -+ * by the IPI hooks. -+ */ -+ .init_apic_ldr = init_apic_ldr_x2apic_cluster, -+ .send_IPI_mask = send_IPI_mask_x2apic_cluster, -+ .send_IPI_self = send_IPI_self_x2apic, -+}; -+ - static int cf_check update_clusterinfo( - struct notifier_block *nfb, unsigned long action, void *hcpu) - { -@@ -220,38 +250,56 @@ static struct notifier_block x2apic_cpu_nfb = { - static int8_t __initdata x2apic_phys = -1; - boolean_param("x2apic_phys", x2apic_phys); - -+enum { -+ unset, physical, cluster, mixed -+} static __initdata x2apic_mode = unset; -+ -+static int __init cf_check parse_x2apic_mode(const char *s) -+{ -+ if ( !cmdline_strcmp(s, "physical") ) -+ x2apic_mode = physical; -+ else if ( !cmdline_strcmp(s, "cluster") ) -+ x2apic_mode = cluster; -+ else if ( !cmdline_strcmp(s, "mixed") ) -+ x2apic_mode = mixed; -+ else -+ return -EINVAL; -+ -+ return 0; -+} -+custom_param("x2apic-mode", parse_x2apic_mode); -+ - const struct genapic *__init apic_x2apic_probe(void) - { -- if ( x2apic_phys < 0 ) -+ /* Honour the legacy cmdline setting if it's the only one provided. */ -+ if ( x2apic_mode == unset && x2apic_phys >= 0 ) -+ x2apic_mode = x2apic_phys ? physical : cluster; -+ -+ if ( x2apic_mode == unset ) - { -- /* -- * Force physical mode if there's no (full) interrupt remapping support: -- * The ID in clustered mode requires a 32 bit destination field due to -- * the usage of the high 16 bits to hold the cluster ID. -- */ -- x2apic_phys = iommu_intremap != iommu_intremap_full || -- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) || -- IS_ENABLED(CONFIG_X2APIC_PHYSICAL); -- } -- else if ( !x2apic_phys ) -- switch ( iommu_intremap ) -+ if ( acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL ) - { -- case iommu_intremap_off: -- case iommu_intremap_restricted: -- printk("WARNING: x2APIC cluster mode is not supported %s interrupt remapping -" -- " forcing phys mode\n", -- iommu_intremap == iommu_intremap_off ? "without" -- : "with restricted"); -- x2apic_phys = true; -- break; -- -- case iommu_intremap_full: -- break; -+ printk(XENLOG_INFO "ACPI FADT forcing x2APIC physical mode\n"); -+ x2apic_mode = physical; - } -+ else -+ x2apic_mode = IS_ENABLED(CONFIG_X2APIC_MIXED) ? mixed -+ : (IS_ENABLED(CONFIG_X2APIC_PHYSICAL) ? physical -+ : cluster); -+ } - -- if ( x2apic_phys ) -+ if ( x2apic_mode == physical ) - return &apic_x2apic_phys; - -+ if ( x2apic_mode == cluster && iommu_intremap != iommu_intremap_full ) -+ { -+ printk("WARNING: x2APIC cluster mode is not supported %s interrupt remapping -" -+ " forcing mixed mode\n", -+ iommu_intremap == iommu_intremap_off ? "without" -+ : "with restricted"); -+ x2apic_mode = mixed; -+ } -+ - if ( !this_cpu(cluster_cpus) ) - { - update_clusterinfo(NULL, CPU_UP_PREPARE, -@@ -260,7 +308,7 @@ const struct genapic *__init apic_x2apic_probe(void) - register_cpu_notifier(&x2apic_cpu_nfb); - } - -- return &apic_x2apic_cluster; -+ return x2apic_mode == cluster ? &apic_x2apic_cluster : &apic_x2apic_mixed; - } - - void __init check_x2apic_preenabled(void) --- -2.41.0 diff --git a/0313-xen-sched-fix-sched_move_domain.patch b/0313-xen-sched-fix-sched_move_domain.patch deleted file mode 100644 index d2f72713..00000000 --- a/0313-xen-sched-fix-sched_move_domain.patch +++ /dev/null @@ -1,79 +0,0 @@ -From e1f9cb16e2efbb202f2f8a9aa7c5ff1d392ece33 Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Thu, 23 Nov 2023 12:24:12 +0100 -Subject: [PATCH] xen/sched: fix sched_move_domain() - -When moving a domain out of a cpupool running with the credit2 -scheduler and having multiple run-queues, the following ASSERT() can -be observed: - -(XEN) Xen call trace: -(XEN) [] R credit2.c#csched2_unit_remove+0xe3/0xe7 -(XEN) [] S sched_move_domain+0x2f3/0x5b1 -(XEN) [] S cpupool.c#cpupool_move_domain_locked+0x1d/0x3b -(XEN) [] S cpupool_move_domain+0x24/0x35 -(XEN) [] S domain_kill+0xa5/0x116 -(XEN) [] S do_domctl+0xe5f/0x1951 -(XEN) [] S timer.c#timer_lock+0x69/0x143 -(XEN) [] S pv_hypercall+0x44e/0x4a9 -(XEN) [] S lstar_enter+0x137/0x140 -(XEN) -(XEN) -(XEN) **************************************** -(XEN) Panic on CPU 1: -(XEN) Assertion 'svc->rqd == c2rqd(sched_unit_master(unit))' failed at common/sched/credit2.c:1159 -(XEN) **************************************** - -This is happening as sched_move_domain() is setting a different cpu -for a scheduling unit without telling the scheduler. When this unit is -removed from the scheduler, the ASSERT() will trigger. - -In non-debug builds the result is usually a clobbered pointer, leading -to another crash a short time later. - -Fix that by swapping the two involved actions (setting another cpu and -removing the unit from the scheduler). - -Link: https://github.com/Dasharo/dasharo-issues/issues/488 -Fixes: 70fadc41635b ("xen/cpupool: support moving domain between cpupools with different granularity") -Signed-off-by: Juergen Gross -Reviewed-by: George Dunlap -master commit: 4709ec82917668c2df958ef91b4f21c049c76bee -master date: 2023-11-20 10:49:29 +0100 ---- - xen/common/sched/core.c | 12 +++++++----- - 1 file changed, 7 insertions(+), 5 deletions(-) - -diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c -index 43132ff6e030..4bba16d315b9 100644 ---- a/xen/common/sched/core.c -+++ b/xen/common/sched/core.c -@@ -732,18 +732,20 @@ int sched_move_domain(struct domain *d, struct cpupool *c) - old_domdata = d->sched_priv; - - /* -- * Temporarily move all units to same processor to make locking -- * easier when moving the new units to the new processors. -+ * Remove all units from the old scheduler, and temporarily move them to -+ * the same processor to make locking easier when moving the new units to -+ * new processors. - */ - new_p = cpumask_first(d->cpupool->cpu_valid); - for_each_sched_unit ( d, unit ) - { -- spinlock_t *lock = unit_schedule_lock_irq(unit); -+ spinlock_t *lock; -+ -+ sched_remove_unit(old_ops, unit); - -+ lock = unit_schedule_lock_irq(unit); - sched_set_res(unit, get_sched_res(new_p)); - spin_unlock_irq(lock); -- -- sched_remove_unit(old_ops, unit); - } - - old_units = d->sched_unit_list; --- -2.41.0 - diff --git a/0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch b/0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch deleted file mode 100644 index 3553e86f..00000000 --- a/0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 785ae49f9a2305766ff9198ac780e0ffbae99c1f Mon Sep 17 00:00:00 2001 -Message-Id: <785ae49f9a2305766ff9198ac780e0ffbae99c1f.1674598796.git.demi@invisiblethingslab.com> -From: Andrew Cooper -Date: Fri, 11 Nov 2022 19:36:59 -0500 -Subject: [PATCH] x86: Activate Data Operand Invariant Timing Mode by default -Cc: Marek Marczykowski-Górecki - -Intel IceLake and later CPUs have microarchitectural behaviors which cause -data-dependent timing behavior. This is not an issue for 99% of software, -but it is a problem for cryptography routines. On these CPUs, a new -architectural feature, DOITM, was retrofitted in microcode. - -For now, Xen can't enumerate DOITM to guest kernels; getting this working is -still in progress. The consequence is that guest kernels will incorrectly -conclude that they are safe. - -To maintain the safety of current software, activate DOITM unilaterally. This -will be relaxed in the future when we can enumerate the feature properly to -guests. - -[ Demi: add missing MSR values and drop a stopgap command-line option ] - -Signed-off-by: Andrew Cooper -Co-authored-by: Demi Marie Obenour -Signed-off-by: Demi Marie Obenour ---- - xen/arch/x86/cpu/common.c | 27 +++++++++++++++++++++++++++ - xen/arch/x86/include/asm/msr-index.h | 3 +++ - 2 files changed, 30 insertions(+) - -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 0412dbc915e51a518c4541e50c9690b6afcb79bb..9e35ff506646c474f78971f7abc911b3660f9066 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -209,6 +209,32 @@ void ctxt_switch_levelling(const struct vcpu *next) - alternative_vcall(ctxt_switch_masking, next); - } - -+static void doitm_init(void) -+{ -+ uint64_t val; -+ -+ if ( !cpu_has_arch_caps ) -+ return; -+ -+ rdmsrl(MSR_ARCH_CAPABILITIES, val); -+ if ( !(val & ARCH_CAPS_DOITM) ) -+ return; -+ -+ /* -+ * We are currently unable to enumerate MSR_ARCH_CAPS to guest. As a -+ * consequence, guest kernels will believe they're safe even when they are -+ * not. -+ * -+ * Until we can enumerate DOITM for guests, set it unilaterally. -+ * This prevents otherwise-correct crypto from becoming vulnerable to -+ * timing sidechannels. -+ */ -+ -+ rdmsrl(MSR_UARCH_MISC_CTRL, val); -+ val |= UARCH_CTRL_DOITM; -+ wrmsrl(MSR_UARCH_MISC_CTRL, val); -+} -+ - bool_t opt_cpu_info; - boolean_param("cpuinfo", opt_cpu_info); - -@@ -532,6 +558,7 @@ void identify_cpu(struct cpuinfo_x86 *c) - /* Now the feature flags better reflect actual CPU features! */ - - xstate_init(c); -+ doitm_init(); - - #ifdef NOISY_CAPS - printk(KERN_DEBUG "CPU: After all inits, caps:"); --- -Sincerely, -Demi Marie Obenour (she/her/hers) -Invisible Things Lab - diff --git a/0501-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch b/0501-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch deleted file mode 100644 index 2b2df9db..00000000 --- a/0501-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch +++ /dev/null @@ -1,48 +0,0 @@ -From d2d2dcae879c6cc05227c9620f0a772f35fe6886 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Wed, 23 Aug 2023 09:26:36 +0200 -Subject: [PATCH 501/510] x86/AMD: extend Zenbleed check to models "good" ucode - isn't known for - -Reportedly the AMD Custom APU 0405 found on SteamDeck, models 0x90 and -0x91, (quoting the respective Linux commit) is similarly affected. Put -another instance of our Zen1 vs Zen2 distinction checks in -amd_check_zenbleed(), forcing use of the chickenbit irrespective of -ucode version (building upon real hardware never surfacing a version of -0xffffffff). - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit 145a69c0944ac70cfcf9d247c85dee9e99d9d302) ---- - xen/arch/x86/cpu/amd.c | 13 ++++++++++--- - 1 file changed, 10 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index 3ea214fc2e84..1bb3044be15b 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -909,10 +909,17 @@ void amd_check_zenbleed(void) - case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; - default: - /* -- * With the Fam17h check above, parts getting here are Zen1. -- * They're not affected. -+ * With the Fam17h check above, most parts getting here are -+ * Zen1. They're not affected. Assume Zen2 ones making it -+ * here are affected regardless of microcode version. -+ * -+ * Zen1 vs Zen2 isn't a simple model number comparison, so use -+ * STIBP as a heuristic to distinguish. - */ -- return; -+ if (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+ return; -+ good_rev = ~0U; -+ break; - } - - rdmsrl(MSR_AMD64_DE_CFG, val); --- -2.41.0 - diff --git a/0502-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch b/0502-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch deleted file mode 100644 index f0898558..00000000 --- a/0502-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch +++ /dev/null @@ -1,74 +0,0 @@ -From dc28aba565f226f9bec24cfde993e78478acfb4e Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 12 Sep 2023 15:06:49 +0100 -Subject: [PATCH 502/510] x86/spec-ctrl: Fix confusion between - SPEC_CTRL_EXIT_TO_XEN{,_IST} - -c/s 3fffaf9c13e9 ("x86/entry: Avoid using alternatives in NMI/#MC paths") -dropped the only user, leaving behind the (incorrect) implication that Xen had -split exit paths. - -Delete the unused SPEC_CTRL_EXIT_TO_XEN and rename SPEC_CTRL_EXIT_TO_XEN_IST -to SPEC_CTRL_EXIT_TO_XEN for consistency. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 1c18d73774533a55ba9d1cbee8bdace03efdb5e7) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 10 ++-------- - xen/arch/x86/x86_64/entry.S | 2 +- - 2 files changed, 3 insertions(+), 9 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index f23bb105c51e..e8fd01243ce7 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -79,7 +79,6 @@ - * - SPEC_CTRL_ENTRY_FROM_PV - * - SPEC_CTRL_ENTRY_FROM_INTR - * - SPEC_CTRL_ENTRY_FROM_INTR_IST -- * - SPEC_CTRL_EXIT_TO_XEN_IST - * - SPEC_CTRL_EXIT_TO_XEN - * - SPEC_CTRL_EXIT_TO_PV - * -@@ -268,11 +267,6 @@ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ - X86_FEATURE_SC_MSR_PV - --/* Use when exiting to Xen context. */ --#define SPEC_CTRL_EXIT_TO_XEN \ -- ALTERNATIVE "", \ -- DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR_PV -- - /* Use when exiting to PV guest context. */ - #define SPEC_CTRL_EXIT_TO_PV \ - ALTERNATIVE "", \ -@@ -339,8 +333,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - UNLIKELY_END(\@_serialise) - .endm - --/* Use when exiting to Xen in IST context. */ --.macro SPEC_CTRL_EXIT_TO_XEN_IST -+/* Use when exiting to Xen context. */ -+.macro SPEC_CTRL_EXIT_TO_XEN - /* - * Requires %rbx=stack_end - * Clobbers %rax, %rcx, %rdx -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 7675a59ff057..b45a09823a03 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -673,7 +673,7 @@ UNLIKELY_START(ne, exit_cr3) - UNLIKELY_END(exit_cr3) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */ -+ SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ - - RESTORE_ALL adj=8 - iretq --- -2.41.0 - diff --git a/0503-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch b/0503-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch deleted file mode 100644 index 96b6e4c4..00000000 --- a/0503-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 84690fb82c4f4aecb72a6789d8994efa74841e09 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 12 Sep 2023 17:03:16 +0100 -Subject: [PATCH 503/510] x86/spec-ctrl: Fold DO_SPEC_CTRL_EXIT_TO_XEN into - it's single user - -With the SPEC_CTRL_EXIT_TO_XEN{,_IST} confusion fixed, it's now obvious that -there's only a single EXIT_TO_XEN path. Fold DO_SPEC_CTRL_EXIT_TO_XEN into -SPEC_CTRL_EXIT_TO_XEN to simplify further fixes. - -When merging labels, switch the name to .L\@_skip_sc_msr as "skip" on its own -is going to be too generic shortly. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 694bb0f280fd08a4377e36e32b84b5062def4de2) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 40 ++++++++++-------------- - 1 file changed, 16 insertions(+), 24 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index e8fd01243ce7..d5f65d80eafb 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -211,27 +211,6 @@ - wrmsr - .endm - --.macro DO_SPEC_CTRL_EXIT_TO_XEN --/* -- * Requires %rbx=stack_end -- * Clobbers %rax, %rcx, %rdx -- * -- * When returning to Xen context, look to see whether SPEC_CTRL shadowing is -- * in effect, and reload the shadow value. This covers race conditions which -- * exist with an NMI/MCE/etc hitting late in the return-to-guest path. -- */ -- xor %edx, %edx -- -- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -- jz .L\@_skip -- -- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax -- mov $MSR_SPEC_CTRL, %ecx -- wrmsr -- --.L\@_skip: --.endm -- - .macro DO_SPEC_CTRL_EXIT_TO_GUEST - /* - * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo -@@ -340,11 +319,24 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - * Clobbers %rax, %rcx, %rdx - */ - testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -- jz .L\@_skip -+ jz .L\@_skip_sc_msr - -- DO_SPEC_CTRL_EXIT_TO_XEN -+ /* -+ * When returning to Xen context, look to see whether SPEC_CTRL shadowing -+ * is in effect, and reload the shadow value. This covers race conditions -+ * which exist with an NMI/MCE/etc hitting late in the return-to-guest -+ * path. -+ */ -+ xor %edx, %edx - --.L\@_skip: -+ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -+ jz .L\@_skip_sc_msr -+ -+ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax -+ mov $MSR_SPEC_CTRL, %ecx -+ wrmsr -+ -+.L\@_skip_sc_msr: - .endm - - #endif /* __ASSEMBLY__ */ --- -2.41.0 - diff --git a/0504-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch b/0504-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch deleted file mode 100644 index 93c96563..00000000 --- a/0504-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 3952c73bdbd05f0e666986fce633a591237b3c88 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 1 Sep 2023 11:38:44 +0100 -Subject: [PATCH 504/510] x86/spec-ctrl: Turn the remaining - SPEC_CTRL_{ENTRY,EXIT}_* into asm macros - -These have grown more complex over time, with some already having been -converted. - -Provide full Requires/Clobbers comments, otherwise missing at this level of -indirection. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 7125429aafb9e3c9c88fc93001fc2300e0ac2cc8) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 37 ++++++++++++++++++------ - 1 file changed, 28 insertions(+), 9 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index d5f65d80eafb..c6d5f2ad0142 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -231,26 +231,45 @@ - .endm - - /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ --#define SPEC_CTRL_ENTRY_FROM_PV \ -+.macro SPEC_CTRL_ENTRY_FROM_PV -+/* -+ * Requires %rsp=regs/cpuinfo, %rdx=0 -+ * Clobbers %rax, %rcx, %rdx -+ */ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \ -- X86_FEATURE_IBPB_ENTRY_PV; \ -- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ -+ X86_FEATURE_IBPB_ENTRY_PV -+ -+ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV -+ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ - X86_FEATURE_SC_MSR_PV -+.endm - - /* Use in interrupt/exception context. May interrupt Xen or PV context. */ --#define SPEC_CTRL_ENTRY_FROM_INTR \ -+.macro SPEC_CTRL_ENTRY_FROM_INTR -+/* -+ * Requires %rsp=regs, %r14=stack_end, %rdx=0 -+ * Clobbers %rax, %rcx, %rdx -+ */ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \ -- X86_FEATURE_IBPB_ENTRY_PV; \ -- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ -+ X86_FEATURE_IBPB_ENTRY_PV -+ -+ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV -+ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ - X86_FEATURE_SC_MSR_PV -+.endm - - /* Use when exiting to PV guest context. */ --#define SPEC_CTRL_EXIT_TO_PV \ -- ALTERNATIVE "", \ -- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \ -+.macro SPEC_CTRL_EXIT_TO_PV -+/* -+ * Requires %rax=spec_ctrl, %rsp=regs/info -+ * Clobbers %rcx, %rdx -+ */ -+ ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV -+ - DO_SPEC_CTRL_COND_VERW -+.endm - - /* - * Use in IST interrupt/exception context. May interrupt Xen or PV context. --- -2.41.0 - diff --git a/0505-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch b/0505-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch deleted file mode 100644 index 4a0b5a00..00000000 --- a/0505-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch +++ /dev/null @@ -1,106 +0,0 @@ -From ba023e93d0b1e60b80251bf080bab694efb9f8e3 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 30 Aug 2023 20:11:50 +0100 -Subject: [PATCH 505/510] x86/spec-ctrl: Improve all SPEC_CTRL_{ENTER,EXIT}_* - comments - -... to better explain how they're used. - -Doing so highlights that SPEC_CTRL_EXIT_TO_XEN is missing a VERW flush for the -corner case when e.g. an NMI hits late in an exit-to-guest path. - -Leave a TODO, which will be addressed in subsequent patches which arrange for -VERW flushing to be safe within SPEC_CTRL_EXIT_TO_XEN. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 45f00557350dc7d0756551069803fc49c29184ca) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 36 ++++++++++++++++++++---- - 1 file changed, 31 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index c6d5f2ad0142..97c4db31cde9 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -230,7 +230,10 @@ - wrmsr - .endm - --/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ -+/* -+ * Used after an entry from PV context: SYSCALL, SYSENTER, INT, -+ * etc. There is always a guest speculation state in context. -+ */ - .macro SPEC_CTRL_ENTRY_FROM_PV - /* - * Requires %rsp=regs/cpuinfo, %rdx=0 -@@ -245,7 +248,11 @@ - X86_FEATURE_SC_MSR_PV - .endm - --/* Use in interrupt/exception context. May interrupt Xen or PV context. */ -+/* -+ * Used after an exception or maskable interrupt, hitting Xen or PV context. -+ * There will either be a guest speculation context, or (barring fatal -+ * exceptions) a well-formed Xen speculation context. -+ */ - .macro SPEC_CTRL_ENTRY_FROM_INTR - /* - * Requires %rsp=regs, %r14=stack_end, %rdx=0 -@@ -260,7 +267,10 @@ - X86_FEATURE_SC_MSR_PV - .endm - --/* Use when exiting to PV guest context. */ -+/* -+ * Used when exiting from any entry context, back to PV context. This -+ * includes from an IST entry which moved onto the primary stack. -+ */ - .macro SPEC_CTRL_EXIT_TO_PV - /* - * Requires %rax=spec_ctrl, %rsp=regs/info -@@ -272,7 +282,13 @@ - .endm - - /* -- * Use in IST interrupt/exception context. May interrupt Xen or PV context. -+ * Used after an IST entry hitting Xen or PV context. Special care is needed, -+ * because when hitting Xen context, there may not be a well-formed -+ * speculation context. (i.e. it can hit in the middle of -+ * SPEC_CTRL_{ENTRY,EXIT}_* regions.) -+ * -+ * An IST entry which hits PV context moves onto the primary stack and leaves -+ * via SPEC_CTRL_EXIT_TO_PV, *not* SPEC_CTRL_EXIT_TO_XEN. - */ - .macro SPEC_CTRL_ENTRY_FROM_INTR_IST - /* -@@ -331,7 +347,14 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - UNLIKELY_END(\@_serialise) - .endm - --/* Use when exiting to Xen context. */ -+/* -+ * Use when exiting from any entry context, back to Xen context. This -+ * includes returning to other SPEC_CTRL_{ENTRY,EXIT}_* regions with an -+ * incomplete speculation context. -+ * -+ * Because we might have interrupted Xen beyond SPEC_CTRL_EXIT_TO_$GUEST, we -+ * need to treat this as if it were an EXIT_TO_$GUEST case too. -+ */ - .macro SPEC_CTRL_EXIT_TO_XEN - /* - * Requires %rbx=stack_end -@@ -356,6 +379,9 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - wrmsr - - .L\@_skip_sc_msr: -+ -+ /* TODO VERW */ -+ - .endm - - #endif /* __ASSEMBLY__ */ --- -2.41.0 - diff --git a/0506-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch b/0506-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch deleted file mode 100644 index 2b46cec5..00000000 --- a/0506-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 5f7efd47c8273fde972637d0360851802f76eca9 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 13 Sep 2023 13:48:16 +0100 -Subject: [PATCH 506/510] x86/entry: Adjust restore_all_xen to hold stack_end - in %r14 - -All other SPEC_CTRL_{ENTRY,EXIT}_* helpers hold stack_end in %r14. Adjust it -for consistency. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 7aa28849a1155d856e214e9a80a7e65fffdc3e58) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 8 ++++---- - xen/arch/x86/x86_64/entry.S | 8 ++++---- - 2 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 97c4db31cde9..66c706496f94 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -357,10 +357,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - */ - .macro SPEC_CTRL_EXIT_TO_XEN - /* -- * Requires %rbx=stack_end -+ * Requires %r14=stack_end - * Clobbers %rax, %rcx, %rdx - */ -- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -+ testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) - jz .L\@_skip_sc_msr - - /* -@@ -371,10 +371,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - */ - xor %edx, %edx - -- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -+ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) - jz .L\@_skip_sc_msr - -- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax -+ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax - mov $MSR_SPEC_CTRL, %ecx - wrmsr - -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index b45a09823a03..92279a225dd6 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -665,15 +665,15 @@ restore_all_xen: - * Check whether we need to switch to the per-CPU page tables, in - * case we return to late PV exit code (from an NMI or #MC). - */ -- GET_STACK_END(bx) -- cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) -+ GET_STACK_END(14) -+ cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) - UNLIKELY_START(ne, exit_cr3) -- mov STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax -+ mov STACK_CPUINFO_FIELD(pv_cr3)(%r14), %rax - mov %rax, %cr3 - UNLIKELY_END(exit_cr3) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ -+ SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */ - - RESTORE_ALL adj=8 - iretq --- -2.41.0 - diff --git a/0507-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch b/0507-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch deleted file mode 100644 index 3de9cd4f..00000000 --- a/0507-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch +++ /dev/null @@ -1,109 +0,0 @@ -From e4a71bc0da0baf7464bb0d8e33053f330e5ea366 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 13 Sep 2023 12:20:12 +0100 -Subject: [PATCH 507/510] x86/entry: Track the IST-ness of an entry for the - exit paths - -Use %r12 to hold an ist_exit boolean. This register is zero elsewhere in the -entry/exit asm, so it only needs setting in the IST path. - -As this is subtle and fragile, add check_ist_exit() to be used in debugging -builds to cross-check that the ist_exit boolean matches the entry vector. - -Write check_ist_exit() it in C, because it's debug only and the logic more -complicated than I care to maintain in asm. - -For now, we only need to use this signal in the exit-to-Xen path, but some -exit-to-guest paths happen in IST context too. Check the correctness in all -exit paths to avoid the logic bit-rotting. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 21bdc25b05a0f8ab6bc73520a9ca01327360732c) - -x86/entry: Partially revert IST-exit checks - -The patch adding check_ist_exit() didn't account for the fact that -reset_stack_and_jump() is not an ABI-preserving boundary. The IST-ness in -%r12 doesn't survive into the next context, and is a stale value C. - -This shows up in Gitlab CI for the Clang build: - - https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/5112783827 - -and in OSSTest for GCC 8: - - http://logs.test-lab.xenproject.org/osstest/logs/183045/test-amd64-amd64-xl-qemuu-debianhvm-amd64/serial-pinot0.log - -There's no straightforward way to reconstruct the IST-exit-ness on the -exit-to-guest path after a context switch. For now, we only need IST-exit on -the return-to-Xen path. - -Fixes: 21bdc25b05a0 ("x86/entry: Track the IST-ness of an entry for the exit paths") -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 9b57c800b79b96769ea3dcd6468578fa664d19f9) ---- - xen/arch/x86/traps.c | 13 +++++++++++++ - xen/arch/x86/x86_64/entry.S | 13 ++++++++++++- - 2 files changed, 25 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c -index d12004b1c6fc..e65cc6004148 100644 ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -2315,6 +2315,19 @@ void asm_domain_crash_synchronous(unsigned long addr) - do_softirq(); - } - -+#ifdef CONFIG_DEBUG -+void check_ist_exit(const struct cpu_user_regs *regs, bool ist_exit) -+{ -+ const unsigned int ist_mask = -+ (1U << X86_EXC_NMI) | (1U << X86_EXC_DB) | -+ (1U << X86_EXC_DF) | (1U << X86_EXC_MC); -+ uint8_t ev = regs->entry_vector; -+ bool is_ist = (ev < TRAP_nr) && ((1U << ev) & ist_mask); -+ -+ ASSERT(is_ist == ist_exit); -+} -+#endif -+ - /* - * Local variables: - * mode: C -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 92279a225dd6..4cebc4fbe33f 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -659,8 +659,15 @@ ENTRY(early_page_fault) - .section .text.entry, "ax", @progbits - - ALIGN --/* No special register assumptions. */ -+/* %r12=ist_exit */ - restore_all_xen: -+ -+#ifdef CONFIG_DEBUG -+ mov %rsp, %rdi -+ mov %r12, %rsi -+ call check_ist_exit -+#endif -+ - /* - * Check whether we need to switch to the per-CPU page tables, in - * case we return to late PV exit code (from an NMI or #MC). -@@ -1091,6 +1098,10 @@ handle_ist_exception: - .L_ist_dispatch_done: - mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - mov %bl, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) -+ -+ /* This is an IST exit */ -+ mov $1, %r12d -+ - cmpb $TRAP_nmi,UREGS_entry_vector(%rsp) - jne ret_from_intr - --- -2.41.0 - diff --git a/0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch b/0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch deleted file mode 100644 index c21173a0..00000000 --- a/0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 2e2c3efcfc9f183674a8de6ed954ffbe7188b70d Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 13 Sep 2023 13:53:33 +0100 -Subject: [PATCH 508/510] x86/spec-ctrl: Issue VERW during IST exit to Xen - -There is a corner case where e.g. an NMI hitting an exit-to-guest path after -SPEC_CTRL_EXIT_TO_* would have run the entire NMI handler *after* the VERW -flush to scrub potentially sensitive data from uarch buffers. - -In order to compensate, issue VERW when exiting to Xen from an IST entry. - -SPEC_CTRL_EXIT_TO_XEN already has two reads of spec_ctrl_flags off the stack, -and we're about to add a third. Load the field into %ebx, and list the -register as clobbered. - -%r12 has been arranged to be the ist_exit signal, so add this as an input -dependency and use it to identify when to issue a VERW. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 3ee6066bcd737756b0990d417d94eddc0b0d2585) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 20 +++++++++++++++----- - xen/arch/x86/x86_64/entry.S | 2 +- - 2 files changed, 16 insertions(+), 6 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 66c706496f94..28a75796e652 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -357,10 +357,12 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - */ - .macro SPEC_CTRL_EXIT_TO_XEN - /* -- * Requires %r14=stack_end -- * Clobbers %rax, %rcx, %rdx -+ * Requires %r12=ist_exit, %r14=stack_end -+ * Clobbers %rax, %rbx, %rcx, %rdx - */ -- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) -+ movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx -+ -+ testb $SCF_ist_sc_msr, %bl - jz .L\@_skip_sc_msr - - /* -@@ -371,7 +373,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - */ - xor %edx, %edx - -- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) -+ testb $SCF_use_shadow, %bl - jz .L\@_skip_sc_msr - - mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax -@@ -380,8 +382,16 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - - .L\@_skip_sc_msr: - -- /* TODO VERW */ -+ test %r12, %r12 -+ jz .L\@_skip_ist_exit -+ -+ /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */ -+ testb $SCF_verw, %bl -+ jz .L\@_skip_verw -+ verw STACK_CPUINFO_FIELD(verw_sel)(%r14) -+.L\@_skip_verw: - -+.L\@_skip_ist_exit: - .endm - - #endif /* __ASSEMBLY__ */ -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 4cebc4fbe33f..c12e011b4d2a 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -680,7 +680,7 @@ UNLIKELY_START(ne, exit_cr3) - UNLIKELY_END(exit_cr3) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */ -+ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */ - - RESTORE_ALL adj=8 - iretq --- -2.41.0 - diff --git a/0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch b/0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch deleted file mode 100644 index 3bdff7f5..00000000 --- a/0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 19ee1e1faa32b79274b3484cb1170a5970f1e602 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 15 Sep 2023 12:13:51 +0100 -Subject: [PATCH 509/510] x86/amd: Introduce is_zen{1,2}_uarch() predicates - -We already have 3 cases using STIBP as a Zen1/2 heuristic, and are about to -introduce a 4th. Wrap the heuristic into a pair of predicates rather than -opencoding it, and the explanation of the heuristic, at each usage site. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit de1d265001397f308c5c3c5d3ffc30e7ef8c0705) ---- - xen/arch/x86/cpu/amd.c | 18 ++++-------------- - xen/arch/x86/include/asm/amd.h | 11 +++++++++++ - 2 files changed, 15 insertions(+), 14 deletions(-) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index 1bb3044be15b..e94ba5a0e006 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -855,15 +855,13 @@ void amd_set_legacy_ssbd(bool enable) - * non-branch instructions to be ignored. It is to be set unilaterally in - * newer microcode. - * -- * This chickenbit is something unrelated on Zen1, and Zen1 vs Zen2 isn't a -- * simple model number comparison, so use STIBP as a heuristic to separate the -- * two uarches in Fam17h(AMD)/18h(Hygon). -+ * This chickenbit is something unrelated on Zen1. - */ - void amd_init_spectral_chicken(void) - { - uint64_t val, chickenbit = 1 << 1; - -- if (cpu_has_hypervisor || !boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+ if (cpu_has_hypervisor || !is_zen2_uarch()) - return; - - if (rdmsr_safe(MSR_AMD64_DE_CFG2, val) == 0 && !(val & chickenbit)) -@@ -912,11 +910,8 @@ void amd_check_zenbleed(void) - * With the Fam17h check above, most parts getting here are - * Zen1. They're not affected. Assume Zen2 ones making it - * here are affected regardless of microcode version. -- * -- * Zen1 vs Zen2 isn't a simple model number comparison, so use -- * STIBP as a heuristic to distinguish. - */ -- if (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+ if (is_zen1_uarch()) - return; - good_rev = ~0U; - break; -@@ -1277,12 +1272,7 @@ static int __init cf_check zen2_c6_errata_check(void) - */ - s_time_t delta; - -- /* -- * Zen1 vs Zen2 isn't a simple model number comparison, so use STIBP as -- * a heuristic to separate the two uarches in Fam17h. -- */ -- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || -- !boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+ if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch()) - return 0; - - /* -diff --git a/xen/arch/x86/include/asm/amd.h b/xen/arch/x86/include/asm/amd.h -index a975d3de2688..82324110abdf 100644 ---- a/xen/arch/x86/include/asm/amd.h -+++ b/xen/arch/x86/include/asm/amd.h -@@ -140,6 +140,17 @@ - AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf), \ - AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf)) - -+/* -+ * The Zen1 and Zen2 microarchitectures are implemented by AMD (Fam17h) and -+ * Hygon (Fam18h) but without simple model number rules. Instead, use STIBP -+ * as a heuristic that distinguishes the two. -+ * -+ * The caller is required to perform the appropriate vendor/family checks -+ * first. -+ */ -+#define is_zen1_uarch() (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+#define is_zen2_uarch() boot_cpu_has(X86_FEATURE_AMD_STIBP) -+ - struct cpuinfo_x86; - int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...); - --- -2.41.0 - diff --git a/0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch b/0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch deleted file mode 100644 index 3655b5ac..00000000 --- a/0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch +++ /dev/null @@ -1,228 +0,0 @@ -From 9ac2f49f5fa3a5159409241d4f74fb0d721dd4c5 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 30 Aug 2023 20:24:25 +0100 -Subject: [PATCH 510/510] x86/spec-ctrl: Mitigate the Zen1 DIV leakage - -In the Zen1 microarchitecure, there is one divider in the pipeline which -services uops from both threads. In the case of #DE, the latched result from -the previous DIV to execute will be forwarded speculatively. - -This is an interesting covert channel that allows two threads to communicate -without any system calls. In also allows userspace to obtain the result of -the most recent DIV instruction executed (even speculatively) in the core, -which can be from a higher privilege context. - -Scrub the result from the divider by executing a non-faulting divide. This -needs performing on the exit-to-guest paths, and ist_exit-to-Xen. - -Alternatives in IST context is believed safe now that it's done in NMI -context. - -This is XSA-439 / CVE-2023-20588. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit b5926c6ecf05c28ee99c6248c42d691ccbf0c315) ---- - docs/misc/xen-command-line.pandoc | 6 ++- - xen/arch/x86/hvm/svm/entry.S | 1 + - xen/arch/x86/include/asm/cpufeatures.h | 2 +- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 17 +++++++++ - xen/arch/x86/spec_ctrl.c | 48 +++++++++++++++++++++++- - 5 files changed, 71 insertions(+), 3 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index d9dae740ccba..b92c8f969c33 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2315,7 +2315,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - > {msr-sc,rsb,md-clear,ibpb-entry}=|{pv,hvm}=, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, --> unpriv-mmio,gds-mit}= ]` -+> unpriv-mmio,gds-mit,div-scrub}= ]` - - Controls for speculative execution sidechannel mitigations. By default, Xen - will pick the most appropriate mitigations based on compiled in support, -@@ -2437,6 +2437,10 @@ has elected not to lock the configuration, Xen will use GDS_CTRL to mitigate - GDS with. Otherwise, Xen will mitigate by disabling AVX, which blocks the use - of the AVX2 Gather instructions. - -+On all hardware, the `div-scrub=` option can be used to force or prevent Xen -+from mitigating the DIV-leakage vulnerability. By default, Xen will mitigate -+DIV-leakage on hardware believed to be vulnerable. -+ - ### sync_console - > `= ` - -diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S -index 981cd82e7c0b..934f12cf5cdd 100644 ---- a/xen/arch/x86/hvm/svm/entry.S -+++ b/xen/arch/x86/hvm/svm/entry.S -@@ -74,6 +74,7 @@ __UNLIKELY_END(nsvm_hap) - 1: /* No Spectre v1 concerns. Execution will hit VMRUN imminently. */ - .endm - ALTERNATIVE "", svm_vmentry_spec_ctrl, X86_FEATURE_SC_MSR_HVM -+ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV - - pop %r15 - pop %r14 -diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h -index da0593de8542..c3aad21c3b43 100644 ---- a/xen/arch/x86/include/asm/cpufeatures.h -+++ b/xen/arch/x86/include/asm/cpufeatures.h -@@ -35,7 +35,7 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM - XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */ - XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */ - XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */ --/* Bits 23 unused. */ -+XEN_CPUFEATURE(SC_DIV, X86_SYNTH(23)) /* DIV scrub needed */ - XEN_CPUFEATURE(SC_RSB_IDLE, X86_SYNTH(24)) /* RSB overwrite needed for idle. */ - XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */ - XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */ -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 28a75796e652..f4b8b9d9561c 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -177,6 +177,19 @@ - .L\@_verw_skip: - .endm - -+.macro DO_SPEC_CTRL_DIV -+/* -+ * Requires nothing -+ * Clobbers %rax -+ * -+ * Issue a DIV for its flushing side effect (Zen1 uarch specific). Any -+ * non-faulting DIV will do; a byte DIV has least latency, and doesn't clobber -+ * %rdx. -+ */ -+ mov $1, %eax -+ div %al -+.endm -+ - .macro DO_SPEC_CTRL_ENTRY maybexen:req - /* - * Requires %rsp=regs (also cpuinfo if !maybexen) -@@ -279,6 +292,8 @@ - ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV - - DO_SPEC_CTRL_COND_VERW -+ -+ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV - .endm - - /* -@@ -391,6 +406,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - verw STACK_CPUINFO_FIELD(verw_sel)(%r14) - .L\@_skip_verw: - -+ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV -+ - .L\@_skip_ist_exit: - .endm - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 79b98f0fe7ba..0ff3c895ac72 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -79,6 +79,7 @@ static int8_t __initdata opt_srb_lock = -1; - static bool __initdata opt_unpriv_mmio; - static bool __ro_after_init opt_fb_clear_mmio; - static int8_t __initdata opt_gds_mit = -1; -+static int8_t __initdata opt_div_scrub = -1; - - static int __init cf_check parse_spec_ctrl(const char *s) - { -@@ -133,6 +134,7 @@ static int __init cf_check parse_spec_ctrl(const char *s) - opt_srb_lock = 0; - opt_unpriv_mmio = false; - opt_gds_mit = 0; -+ opt_div_scrub = 0; - } - else if ( val > 0 ) - rc = -EINVAL; -@@ -285,6 +287,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) - opt_unpriv_mmio = val; - else if ( (val = parse_boolean("gds-mit", s, ss)) >= 0 ) - opt_gds_mit = val; -+ else if ( (val = parse_boolean("div-scrub", s, ss)) >= 0 ) -+ opt_div_scrub = val; - else - rc = -EINVAL; - -@@ -485,7 +489,7 @@ static void __init print_details(enum ind_thunk thunk) - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s\n", -+ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", - thunk == THUNK_NONE ? "N/A" : - thunk == THUNK_RETPOLINE ? "RETPOLINE" : - thunk == THUNK_LFENCE ? "LFENCE" : -@@ -510,6 +514,7 @@ static void __init print_details(enum ind_thunk thunk) - opt_l1d_flush ? " L1D_FLUSH" : "", - opt_md_clear_pv || opt_md_clear_hvm || - opt_fb_clear_mmio ? " VERW" : "", -+ opt_div_scrub ? " DIV" : "", - opt_branch_harden ? " BRANCH_HARDEN" : ""); - - /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */ -@@ -967,6 +972,45 @@ static void __init srso_calculations(bool hw_smt_enabled) - setup_force_cpu_cap(X86_FEATURE_SRSO_NO); - } - -+/* -+ * The Div leakage issue is specific to the AMD Zen1 microarchitecure. -+ * -+ * However, there's no $FOO_NO bit defined, so if we're virtualised we have no -+ * hope of spotting the case where we might move to vulnerable hardware. We -+ * also can't make any useful conclusion about SMT-ness. -+ * -+ * Don't check the hypervisor bit, so at least we do the safe thing when -+ * booting on something that looks like a Zen1 CPU. -+ */ -+static bool __init has_div_vuln(void) -+{ -+ if ( !(boot_cpu_data.x86_vendor & -+ (X86_VENDOR_AMD | X86_VENDOR_HYGON)) ) -+ return false; -+ -+ if ( boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18 ) -+ return false; -+ -+ return is_zen1_uarch(); -+} -+ -+static void __init div_calculations(bool hw_smt_enabled) -+{ -+ bool cpu_bug_div = has_div_vuln(); -+ -+ if ( opt_div_scrub == -1 ) -+ opt_div_scrub = cpu_bug_div; -+ -+ if ( opt_div_scrub ) -+ setup_force_cpu_cap(X86_FEATURE_SC_DIV); -+ -+ if ( opt_smt == -1 && !cpu_has_hypervisor && cpu_bug_div && hw_smt_enabled ) -+ warning_add( -+ "Booted on leaky-DIV hardware with SMT/Hyperthreading\n" -+ "enabled. Please assess your configuration and choose an\n" -+ "explicit 'smt=' setting. See XSA-439.\n"); -+} -+ - static void __init ibpb_calculations(void) - { - bool def_ibpb_entry = false; -@@ -1726,6 +1770,8 @@ void __init init_speculation_mitigations(void) - - ibpb_calculations(); - -+ div_calculations(hw_smt_enabled); -+ - /* Check whether Eager FPU should be enabled by default. */ - if ( opt_eager_fpu == -1 ) - opt_eager_fpu = should_use_eager_fpu(); --- -2.41.0 - diff --git a/0511-xsa442-4.17.patch b/0511-xsa442-4.17.patch deleted file mode 100644 index a78bfdd2..00000000 --- a/0511-xsa442-4.17.patch +++ /dev/null @@ -1,185 +0,0 @@ -From 5b2ccb60ff22fbff44dd66214c2956a434ee6271 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Tue, 13 Jun 2023 15:01:05 +0200 -Subject: [PATCH] iommu/amd-vi: flush IOMMU TLB when flushing the DTE -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The caching invalidation guidelines from the AMD-Vi specification (48882—Rev -3.07-PUB—Oct 2022) seem to be misleading on some hardware, as devices will -malfunction (see stale DMA mappings) if some fields of the DTE are updated but -the IOMMU TLB is not flushed. This has been observed in practice on AMD -systems. Due to the lack of guidance from the currently published -specification this patch aims to increase the flushing done in order to prevent -device malfunction. - -In order to fix, issue an INVALIDATE_IOMMU_PAGES command from -amd_iommu_flush_device(), flushing all the address space. Note this requires -callers to be adjusted in order to pass the DomID on the DTE previous to the -modification. - -Some call sites don't provide a valid DomID to amd_iommu_flush_device() in -order to avoid the flush. That's because the device had address translations -disabled and hence the previous DomID on the DTE is not valid. Note the -current logic relies on the entity disabling address translations to also flush -the TLB of the in use DomID. - -Device I/O TLB flushing when ATS are enabled is not covered by the current -change, as ATS usage is not security supported. - -This is XSA-442 / CVE-2023-34326 - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich ---- - xen/drivers/passthrough/amd/iommu.h | 3 ++- - xen/drivers/passthrough/amd/iommu_cmd.c | 10 +++++++++- - xen/drivers/passthrough/amd/iommu_guest.c | 5 +++-- - xen/drivers/passthrough/amd/iommu_init.c | 6 +++++- - xen/drivers/passthrough/amd/pci_amd_iommu.c | 14 ++++++++++---- - 5 files changed, 29 insertions(+), 9 deletions(-) - -diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h -index 5429ada58ef5..a58be28bf96d 100644 ---- a/xen/drivers/passthrough/amd/iommu.h -+++ b/xen/drivers/passthrough/amd/iommu.h -@@ -283,7 +283,8 @@ void amd_iommu_flush_pages(struct domain *d, unsigned long dfn, - unsigned int order); - void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev, - uint64_t gaddr, unsigned int order); --void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf); -+void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf, -+ domid_t domid); - void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf); - void amd_iommu_flush_all_caches(struct amd_iommu *iommu); - -diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c -index 40ddf366bb4d..cb28b36abc38 100644 ---- a/xen/drivers/passthrough/amd/iommu_cmd.c -+++ b/xen/drivers/passthrough/amd/iommu_cmd.c -@@ -363,10 +363,18 @@ void amd_iommu_flush_pages(struct domain *d, - _amd_iommu_flush_pages(d, __dfn_to_daddr(dfn), order); - } - --void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf) -+void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf, -+ domid_t domid) - { - invalidate_dev_table_entry(iommu, bdf); - flush_command_buffer(iommu, 0); -+ -+ /* Also invalidate IOMMU TLB entries when flushing the DTE. */ -+ if ( domid != DOMID_INVALID ) -+ { -+ invalidate_iommu_pages(iommu, INV_IOMMU_ALL_PAGES_ADDRESS, domid, 0); -+ flush_command_buffer(iommu, 0); -+ } - } - - void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf) -diff --git a/xen/drivers/passthrough/amd/iommu_guest.c b/xen/drivers/passthrough/amd/iommu_guest.c -index 80a331f546ed..be86bce6fb03 100644 ---- a/xen/drivers/passthrough/amd/iommu_guest.c -+++ b/xen/drivers/passthrough/amd/iommu_guest.c -@@ -385,7 +385,7 @@ static int do_completion_wait(struct domain *d, cmd_entry_t *cmd) - - static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd) - { -- uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id; -+ uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id, prev_domid; - struct amd_iommu_dte *gdte, *mdte, *dte_base; - struct amd_iommu *iommu = NULL; - struct guest_iommu *g_iommu; -@@ -445,13 +445,14 @@ static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd) - req_id = get_dma_requestor_id(iommu->seg, mbdf); - dte_base = iommu->dev_table.buffer; - mdte = &dte_base[req_id]; -+ prev_domid = mdte->domain_id; - - spin_lock_irqsave(&iommu->lock, flags); - dte_set_gcr3_table(mdte, hdom_id, gcr3_mfn << PAGE_SHIFT, gv, glx); - - spin_unlock_irqrestore(&iommu->lock, flags); - -- amd_iommu_flush_device(iommu, req_id); -+ amd_iommu_flush_device(iommu, req_id, prev_domid); - - return 0; - } -diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c -index 166570648d26..101a60ce1794 100644 ---- a/xen/drivers/passthrough/amd/iommu_init.c -+++ b/xen/drivers/passthrough/amd/iommu_init.c -@@ -1547,7 +1547,11 @@ static int cf_check _invalidate_all_devices( - req_id = ivrs_mappings[bdf].dte_requestor_id; - if ( iommu ) - { -- amd_iommu_flush_device(iommu, req_id); -+ /* -+ * IOMMU TLB flush performed separately (see -+ * invalidate_all_domain_pages()). -+ */ -+ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID); - amd_iommu_flush_intremap(iommu, req_id); - } - } -diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c -index 94e37755064b..8641b84712a0 100644 ---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c -+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c -@@ -192,10 +192,13 @@ static int __must_check amd_iommu_setup_domain_device( - - spin_unlock_irqrestore(&iommu->lock, flags); - -- amd_iommu_flush_device(iommu, req_id); -+ /* DTE didn't have DMA translations enabled, do not flush the TLB. */ -+ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID); - } - else if ( dte->pt_root != mfn_x(page_to_mfn(root_pg)) ) - { -+ domid_t prev_domid = dte->domain_id; -+ - /* - * Strictly speaking if the device is the only one with this requestor - * ID, it could be allowed to be re-assigned regardless of unity map -@@ -252,7 +255,7 @@ static int __must_check amd_iommu_setup_domain_device( - - spin_unlock_irqrestore(&iommu->lock, flags); - -- amd_iommu_flush_device(iommu, req_id); -+ amd_iommu_flush_device(iommu, req_id, prev_domid); - } - else - spin_unlock_irqrestore(&iommu->lock, flags); -@@ -421,6 +424,8 @@ static void amd_iommu_disable_domain_device(const struct domain *domain, - spin_lock_irqsave(&iommu->lock, flags); - if ( dte->tv || dte->v ) - { -+ domid_t prev_domid = dte->domain_id; -+ - /* See the comment in amd_iommu_setup_device_table(). */ - dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_ABORTED; - smp_wmb(); -@@ -439,7 +444,7 @@ static void amd_iommu_disable_domain_device(const struct domain *domain, - - spin_unlock_irqrestore(&iommu->lock, flags); - -- amd_iommu_flush_device(iommu, req_id); -+ amd_iommu_flush_device(iommu, req_id, prev_domid); - - AMD_IOMMU_DEBUG("Disable: device id = %#x, " - "domain = %d, paging mode = %d\n", -@@ -610,7 +615,8 @@ static int cf_check amd_iommu_add_device(u8 devfn, struct pci_dev *pdev) - - spin_unlock_irqrestore(&iommu->lock, flags); - -- amd_iommu_flush_device(iommu, bdf); -+ /* DTE didn't have DMA translations enabled, do not flush the TLB. */ -+ amd_iommu_flush_device(iommu, bdf, DOMID_INVALID); - } - - if ( amd_iommu_reserve_domain_unity_map( --- -2.42.0 - diff --git a/0512-xsa446.patch b/0512-xsa446.patch deleted file mode 100644 index acf1d0f7..00000000 --- a/0512-xsa446.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 80d5aada598c3a800a350003d5d582931545e13c Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 26 Oct 2023 14:37:38 +0100 -Subject: [PATCH] x86/spec-ctrl: Remove conditional IRQs-on-ness for INT - $0x80/0x82 paths -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Before speculation defences, some paths in Xen could genuinely get away with -being IRQs-on at entry. But XPTI invalidated this property on most paths, and -attempting to maintain it on the remaining paths was a mistake. - -Fast forward, and DO_SPEC_CTRL_COND_IBPB (protection for AMD BTC/SRSO) is not -IRQ-safe, running with IRQs enabled in some cases. The other actions taken on -these paths happen to be IRQ-safe. - -Make entry_int82() and int80_direct_trap() unconditionally Interrupt Gates -rather than Trap Gates. Remove the conditional re-adjustment of -int80_direct_trap() in smp_prepare_cpus(), and have entry_int82() explicitly -enable interrupts when safe to do so. - -In smp_prepare_cpus(), with the conditional re-adjustment removed, the -clearing of pv_cr3 is the only remaining action gated on XPTI, and it is out -of place anyway, repeating work already done by smp_prepare_boot_cpu(). Drop -the entire if() condition to avoid leaving an incorrect vestigial remnant. - -Also drop comments which make incorrect statements about when its safe to -enable interrupts. - -This is XSA-446 / CVE-2023-46836 - -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné ---- - xen/arch/x86/pv/traps.c | 4 ++-- - xen/arch/x86/smpboot.c | 14 -------------- - xen/arch/x86/x86_64/compat/entry.S | 2 ++ - xen/arch/x86/x86_64/entry.S | 1 - - 4 files changed, 4 insertions(+), 17 deletions(-) - -diff --git a/xen/arch/x86/pv/traps.c b/xen/arch/x86/pv/traps.c -index 74f333da7e1c..240d1a2db7a3 100644 ---- a/xen/arch/x86/pv/traps.c -+++ b/xen/arch/x86/pv/traps.c -@@ -139,11 +139,11 @@ void __init pv_trap_init(void) - #ifdef CONFIG_PV32 - /* The 32-on-64 hypercall vector is only accessible from ring 1. */ - _set_gate(idt_table + HYPERCALL_VECTOR, -- SYS_DESC_trap_gate, 1, entry_int82); -+ SYS_DESC_irq_gate, 1, entry_int82); - #endif - - /* Fast trap for int80 (faster than taking the #GP-fixup path). */ -- _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_trap_gate, 3, -+ _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3, - &int80_direct_trap); - - open_softirq(NMI_SOFTIRQ, nmi_softirq); -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 3a1a659082c6..4c54ecbc91d7 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -1158,20 +1158,6 @@ void __init smp_prepare_cpus(void) - - stack_base[0] = (void *)((unsigned long)stack_start & ~(STACK_SIZE - 1)); - -- if ( opt_xpti_hwdom || opt_xpti_domu ) -- { -- get_cpu_info()->pv_cr3 = 0; -- --#ifdef CONFIG_PV -- /* -- * All entry points which may need to switch page tables have to start -- * with interrupts off. Re-write what pv_trap_init() has put there. -- */ -- _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3, -- &int80_direct_trap); --#endif -- } -- - set_nr_sockets(); - - socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index bd5abd8040bd..fcc3a721f147 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -21,6 +21,8 @@ ENTRY(entry_int82) - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - -+ sti -+ - CR4_PV32_RESTORE - - GET_CURRENT(bx) -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 5ca74f5f62b2..9a7b129aa7e4 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -327,7 +327,6 @@ ENTRY(sysenter_entry) - #ifdef CONFIG_XEN_SHSTK - ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK - #endif -- /* sti could live here when we don't switch page tables below. */ - pushq $FLAT_USER_SS - pushq $0 - pushfq - -base-commit: 7befef87cc9b1bb8ca15d866ce1ecd9165ccb58c -prerequisite-patch-id: 142a87c707411d49e136c3fb76f1b14963ec6dc8 --- -2.30.2 - diff --git a/1100-Define-build-dates-time-based-on-SOURCE_DATE_EPOCH.patch b/1100-Define-build-dates-time-based-on-SOURCE_DATE_EPOCH.patch index a6c29668..a49f262f 100644 --- a/1100-Define-build-dates-time-based-on-SOURCE_DATE_EPOCH.patch +++ b/1100-Define-build-dates-time-based-on-SOURCE_DATE_EPOCH.patch @@ -1,4 +1,4 @@ -From b66d690cfae5e75e18ce29057df6fa82467fc3ed Mon Sep 17 00:00:00 2001 +From ff55b35b061b87748304b3776042394b9af01364 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Pierret=20=28fepitre=29?= Date: Sat, 31 Oct 2020 14:57:49 +0100 @@ -13,11 +13,11 @@ Improvements from Jan Beuluch . INSTALL | 6 ++++++ tools/firmware/hvmloader/Makefile | 2 +- tools/firmware/vgabios/Makefile | 3 ++- - xen/Makefile | 5 +++-- - 5 files changed, 14 insertions(+), 4 deletions(-) + xen/Makefile | 4 ++-- + 5 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Config.mk b/Config.mk -index e0ce59346896..21b300008b18 100644 +index 8d91e4c4d5b5..b4985f044913 100644 --- a/Config.mk +++ b/Config.mk @@ -253,3 +253,5 @@ QEMU_UPSTREAM_LOC ?= $(call or,$(wildcard $(QEMU_UPSTREAM_INTREE)),\ @@ -72,21 +72,22 @@ index 3284812fdec8..4350ef402127 100644 VGABIOS_DATE = "-DVGABIOS_DATE=\"$(VGABIOS_REL_DATE)\"" diff --git a/xen/Makefile b/xen/Makefile -index 9d0df5e2c543..f8e3db7f128e 100644 +index a46e6330dbca..9ac87ea9b27c 100644 --- a/xen/Makefile +++ b/xen/Makefile -@@ -12,8 +12,9 @@ export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) - - export XEN_WHOAMI ?= $(USER) - export XEN_DOMAIN ?= $(shell ([ -x /bin/dnsdomainname ] && /bin/dnsdomainname) || ([ -x /bin/domainname ] && /bin/domainname || echo [unknown])) --export XEN_BUILD_DATE ?= $(shell LC_ALL=C date) --export XEN_BUILD_TIME ?= $(shell LC_ALL=C date +%T) -+ -+export XEN_BUILD_DATE ?= $(shell LC_ALL=C date $(DATE_EPOCH_OPTS)) -+export XEN_BUILD_TIME ?= $(shell LC_ALL=C date $(DATE_EPOCH_OPTS) +%T) - export XEN_BUILD_HOST ?= $(shell hostname) - - # Best effort attempt to find a python interpreter, defaulting to Python 3 if +@@ -15,10 +15,10 @@ ifeq ($(origin XEN_DOMAIN), undefined) + export XEN_DOMAIN := $(shell ([ -x /bin/dnsdomainname ] && /bin/dnsdomainname) || ([ -x /bin/domainname ] && /bin/domainname || echo [unknown])) + endif + ifeq ($(origin XEN_BUILD_DATE), undefined) +-export XEN_BUILD_DATE := $(shell LC_ALL=C date) ++export XEN_BUILD_DATE := $(shell LC_ALL=C date $(DATE_EPOCH_OPTS)) + endif + ifeq ($(origin XEN_BUILD_TIME), undefined) +-export XEN_BUILD_TIME := $(shell LC_ALL=C date +%T) ++export XEN_BUILD_TIME := $(shell LC_ALL=C date $(DATE_EPOCH_OPTS) +%T) + endif + ifeq ($(origin XEN_BUILD_HOST), undefined) + export XEN_BUILD_HOST := $(shell hostname) -- -2.37.3 +2.41.0 diff --git a/1103-Strip-build-path-directories-in-tools-xen-and-xen-ar.patch b/1103-Strip-build-path-directories-in-tools-xen-and-xen-ar.patch index 1f2faa48..2a96a1ec 100644 --- a/1103-Strip-build-path-directories-in-tools-xen-and-xen-ar.patch +++ b/1103-Strip-build-path-directories-in-tools-xen-and-xen-ar.patch @@ -1,4 +1,4 @@ -From 8b9073d98074295d08bab005d9a6f5b470ff9f6c Mon Sep 17 00:00:00 2001 +From 4c7ed3308ce0270d47c196c910e7aad0f6032d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Pierret=20=28fepitre=29?= Date: Sun, 8 Nov 2020 16:17:29 +0100 @@ -14,7 +14,7 @@ properly pathes in strings sections 3 files changed, 5 insertions(+) diff --git a/tools/Rules.mk b/tools/Rules.mk -index 6e135387bd7e..c75695fc5cfc 100644 +index a5229bb5acb4..e4ec18fc7782 100644 --- a/tools/Rules.mk +++ b/tools/Rules.mk @@ -176,6 +176,8 @@ endif @@ -27,10 +27,10 @@ index 6e135387bd7e..c75695fc5cfc 100644 INSTALL_PYTHON_PROG = \ diff --git a/xen/Makefile b/xen/Makefile -index f8e3db7f128e..0ec7602ddf89 100644 +index 9ac87ea9b27c..bc9656b8ab41 100644 --- a/xen/Makefile +++ b/xen/Makefile -@@ -400,6 +400,8 @@ ifneq ($(CONFIG_CC_IS_CLANG),y) +@@ -404,6 +404,8 @@ ifneq ($(CONFIG_CC_IS_CLANG),y) CFLAGS += -Wa,--strip-local-absolute endif @@ -38,12 +38,12 @@ index f8e3db7f128e..0ec7602ddf89 100644 + AFLAGS += -D__ASSEMBLY__ - $(call cc-option-add,AFLAGS,CC,-Wa$(comma)--noexecstack) + $(call cc-option-add,AFLAGS,CC,-Wa$$(comma)--noexecstack) diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile -index 177a2ff74272..7340d44a9705 100644 +index f213a6b56a4d..4eec76510612 100644 --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile -@@ -132,6 +132,7 @@ $(TARGET): $(TARGET)-syms $(efi-y) $(obj)/boot/mkelf32 +@@ -133,6 +133,7 @@ $(TARGET): $(TARGET)-syms $(efi-y) $(obj)/boot/mkelf32 mv $(TMP) $(TARGET) CFLAGS-$(XEN_BUILD_EFI) += -DXEN_BUILD_EFI @@ -52,5 +52,5 @@ index 177a2ff74272..7340d44a9705 100644 $(TARGET)-syms: $(objtree)/prelink.o $(obj)/xen.lds $(LD) $(XEN_LDFLAGS) -T $(obj)/xen.lds -N $< $(build_id_linker) \ -- -2.37.3 +2.41.0 diff --git a/rel b/rel index 45a4fb75..d00491fd 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -8 +1 diff --git a/version b/version index 1c8fd19f..ab268c20 100644 --- a/version +++ b/version @@ -1 +1 @@ -4.17.2 +4.17.3 diff --git a/xen.spec.in b/xen.spec.in index 5033873d..4d3180c7 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -103,26 +103,9 @@ Patch0306: 0306-x86-Replace-PAT_-with-X86_MT_.patch Patch0307: 0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch Patch0308: 0308-x86-Replace-EPT_EMT_-constants-with-X86_MT_.patch Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch -Patch0310: 0310-x86-amd-do-not-expose-HWCR.TscFreqSel-to-guests.patch -Patch0311: 0311-x86-x2apic-remove-usage-of-ACPI_FADT_APIC_CLUSTER.patch -Patch0312: 0312-x86-x2apic-introduce-a-mixed-physical-cluster-mode.patch -Patch0313: 0313-xen-sched-fix-sched_move_domain.patch Patch0314: 0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch # Security fixes -Patch0500: 0500-x86-Activate-Data-Operand-Invariant-Timing-Mode-by-d.patch -Patch0501: 0501-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch -Patch0502: 0502-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch -Patch0503: 0503-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch -Patch0504: 0504-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch -Patch0505: 0505-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch -Patch0506: 0506-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch -Patch0507: 0507-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch -Patch0508: 0508-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch -Patch0509: 0509-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch -Patch0510: 0510-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch -Patch0511: 0511-xsa442-4.17.patch -Patch0512: 0512-xsa446.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch From cacb1a165abd906971e2d1762409f2d71e62a8a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 30 Jan 2024 14:45:53 +0100 Subject: [PATCH 37/64] Update Xen config to 4.17.3, enable DOITM --- config | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/config b/config index 1d49e89d..b8758bf5 100644 --- a/config +++ b/config @@ -1,9 +1,9 @@ # # Automatically generated file; DO NOT EDIT. -# Xen/x86 4.17-rc Configuration +# Xen/x86 4.17.3 Configuration # CONFIG_CC_IS_GCC=y -CONFIG_GCC_VERSION=120201 +CONFIG_GCC_VERSION=120301 CONFIG_CLANG_VERSION=0 CONFIG_LD_IS_GNU=y CONFIG_CC_HAS_VISIBILITY_ATTRIBUTE=y @@ -50,6 +50,7 @@ CONFIG_ALTERNATIVE_CALL=y CONFIG_ARCH_MAP_DOMAIN_PAGE=y CONFIG_HAS_ALTERNATIVE=y CONFIG_HAS_COMPAT=y +CONFIG_HAS_DIT=y CONFIG_HAS_EX_TABLE=y CONFIG_HAS_FAST_MULTIPLY=y CONFIG_HAS_IOPORTS=y @@ -71,6 +72,7 @@ CONFIG_SPECULATIVE_HARDEN_BRANCH=y CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS=y # end of Speculative hardening +CONFIG_DIT_DEFAULT=y CONFIG_HYPFS=y CONFIG_HYPFS_CONFIG=y CONFIG_IOREQ_SERVER=y From a98e7e7a7e9ffb198c7b0b8614f13e147022f58c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 30 Jan 2024 14:46:28 +0100 Subject: [PATCH 38/64] Apply XSA-449 patch --- 0500-xsa449.patch | 89 +++++++++++++++++++++++++++++++++++++++++++++++ xen.spec.in | 1 + 2 files changed, 90 insertions(+) create mode 100644 0500-xsa449.patch diff --git a/0500-xsa449.patch b/0500-xsa449.patch new file mode 100644 index 00000000..80aeac29 --- /dev/null +++ b/0500-xsa449.patch @@ -0,0 +1,89 @@ +From d8b92b21b224126860978e4c604302f3c1e3bf75 Mon Sep 17 00:00:00 2001 +From: Roger Pau Monne +Date: Wed, 13 Dec 2023 15:51:59 +0100 +Subject: [PATCH] pci: fail device assignment if phantom functions cannot be + assigned +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The current behavior is that no error is reported if (some) phantom functions +fail to be assigned during device add or assignment, so the operation succeeds +even if some phantom functions are not correctly setup. + +This can lead to devices possibly being successfully assigned to a domU while +some of the device phantom functions are still assigned to dom0. Even when the +device is assigned domIO before being assigned to a domU phantom functions +might fail to be assigned to domIO, and also fail to be assigned to the domU, +leaving them assigned to dom0. + +Since the device can generate requests using the IDs of those phantom +functions, given the scenario above a device in such state would be in control +of a domU, but still capable of generating transactions that use a context ID +targeting dom0 owned memory. + +Modify device assign in order to attempt to deassign the device if phantom +functions failed to be assigned. + +Note that device addition is not modified in the same way, as in that case the +device is assigned to a trusted domain, and hence partial assign can lead to +device malfunction but not a security issue. + +This is XSA-449 / CVE-2023-46839 + +Fixes: 4e9950dc1bd2 ('IOMMU: add phantom function support') +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +--- + xen/drivers/passthrough/pci.c | 27 +++++++++++++++++++++------ + 1 file changed, 21 insertions(+), 6 deletions(-) + +diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c +index 1439d1ef2b26..47c0eee7bdcc 100644 +--- a/xen/drivers/passthrough/pci.c ++++ b/xen/drivers/passthrough/pci.c +@@ -1488,11 +1488,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) + + pdev->fault.count = 0; + +- if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn, +- pci_to_dev(pdev), flag)) ) +- goto done; ++ rc = iommu_call(hd->platform_ops, assign_device, d, devfn, pci_to_dev(pdev), ++ flag); + +- for ( ; pdev->phantom_stride; rc = 0 ) ++ while ( pdev->phantom_stride && !rc ) + { + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) +@@ -1503,8 +1502,24 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) + + done: + if ( rc ) +- printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n", +- d, &PCI_SBDF(seg, bus, devfn), rc); ++ { ++ printk(XENLOG_G_WARNING "%pd: assign %s(%pp) failed (%d)\n", ++ d, devfn != pdev->devfn ? "phantom function " : "", ++ &PCI_SBDF(seg, bus, devfn), rc); ++ ++ if ( devfn != pdev->devfn && deassign_device(d, seg, bus, pdev->devfn) ) ++ { ++ /* ++ * Device with phantom functions that failed to both assign and ++ * rollback. Mark the device as broken and crash the target domain, ++ * as the state of the functions at this point is unknown and Xen ++ * has no way to assert consistent context assignment among them. ++ */ ++ pdev->broken = true; ++ if ( !is_hardware_domain(d) && d != dom_io ) ++ domain_crash(d); ++ } ++ } + /* The device is assigned to dom_io so mark it as quarantined */ + else if ( d == dom_io ) + pdev->quarantine = true; +-- +2.43.0 + diff --git a/xen.spec.in b/xen.spec.in index 4d3180c7..af0bffde 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -106,6 +106,7 @@ Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch Patch0314: 0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch # Security fixes +Patch0500: 0500-xsa449.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch From f67c3d809570027d7432cda8a18390a0921b141a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 30 Jan 2024 14:58:08 +0100 Subject: [PATCH 39/64] version 4.17.3-2 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index d00491fd..0cfbf088 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -1 +2 From 4ae2033b6ca4a3bec4cbb161e57cd85fdc59dd66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Fri, 2 Feb 2024 22:36:29 +0100 Subject: [PATCH 40/64] Backport IVMD fix See patch description --- 0315-amd-vi-fix-IVMD-memory-type-checks.patch | 46 +++++++++++++++++++ xen.spec.in | 1 + 2 files changed, 47 insertions(+) create mode 100644 0315-amd-vi-fix-IVMD-memory-type-checks.patch diff --git a/0315-amd-vi-fix-IVMD-memory-type-checks.patch b/0315-amd-vi-fix-IVMD-memory-type-checks.patch new file mode 100644 index 00000000..419c5d58 --- /dev/null +++ b/0315-amd-vi-fix-IVMD-memory-type-checks.patch @@ -0,0 +1,46 @@ +From: Roger Pau Monne +Subject: [PATCH 1/4] amd-vi: fix IVMD memory type checks +Date: Thu, 1 Feb 2024 18:01:56 +0100 + +The current code that parses the IVMD blocks is relaxed with regard to the +restriction that such unity regions should always fall into memory ranges +marked as reserved in the memory map. + +However the type checks for the IVMD addresses are inverted, and as a result +IVMD ranges falling into RAM areas are accepted. Note that having such ranges +in the first place is a firmware bug, as IVMD should always fall into reserved +ranges. + +Fixes: ed6c77ebf0c1 ('AMD/IOMMU: check / convert IVMD ranges for being / to be reserved') +Signed-off-by: Roger Pau Monné +--- + xen/drivers/passthrough/amd/iommu_acpi.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c +index 2e3b83014beb..ca70f4f3ae2c 100644 +--- a/xen/drivers/passthrough/amd/iommu_acpi.c ++++ b/xen/drivers/passthrough/amd/iommu_acpi.c +@@ -426,9 +426,14 @@ static int __init parse_ivmd_block(const struct acpi_ivrs_memory *ivmd_block) + return -EIO; + } + +- /* Types which won't be handed out are considered good enough. */ +- if ( !(type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI | +- RAM_TYPE_UNUSABLE)) ) ++ /* ++ * Types which aren't RAM are considered good enough. ++ * Note that a page being partially RESERVED, ACPI or UNUSABLE will ++ * force Xen into assuming the whole page as having that type in ++ * practice. ++ */ ++ if ( type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI | ++ RAM_TYPE_UNUSABLE) ) + continue; + + AMD_IOMMU_ERROR("IVMD: page at %lx can't be converted\n", addr); +-- +2.43.0 + + + diff --git a/xen.spec.in b/xen.spec.in index af0bffde..72d19a1a 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -104,6 +104,7 @@ Patch0307: 0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch Patch0308: 0308-x86-Replace-EPT_EMT_-constants-with-X86_MT_.patch Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch Patch0314: 0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch +Patch0315: 0315-amd-vi-fix-IVMD-memory-type-checks.patch # Security fixes Patch0500: 0500-xsa449.patch From 8eeb295af3f7f3d14d13257ea33cb4a72dcae374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 6 Feb 2024 00:54:33 +0100 Subject: [PATCH 41/64] version 4.17.3-3 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 0cfbf088..00750edc 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -2 +3 From dd51e0ad61ce5c5fe718f624cf423436ce6673e8 Mon Sep 17 00:00:00 2001 From: Simon Gaiser Date: Mon, 19 Feb 2024 12:18:04 +0100 Subject: [PATCH 42/64] Basic S0ix support This adds some basic support but even if working as intended this doesn't reach satisfactory residency values yet. And more importantly in Qubes' default configuration common devices (at least Intel integrated USB and Thunderbolt) need more work to not completely block residency. --- ...-legacy-replacement-mode-after-test-.patch | 60 ++++ ...8.10-counters-for-Tiger-and-Alder-La.patch | 59 ++++ ...entries-marked-as-unusable-when-pars.patch | 104 ++++++ ...rdware-domain-to-read-C-state-reside.patch | 75 ++++ ...se-ACPI-for-CPUs-without-hardcoded-C.patch | 326 ++++++++++++++++++ 0675-libxl_pci-Pass-power_mgmt-via-QMP.patch | 29 ++ xen.spec.in | 8 + 7 files changed, 661 insertions(+) create mode 100644 0670-x86-hpet-Disable-legacy-replacement-mode-after-test-.patch create mode 100644 0671-x86-idle-Get-PC-8.10-counters-for-Tiger-and-Alder-La.patch create mode 100644 0672-x86-ACPI-Ignore-entries-marked-as-unusable-when-pars.patch create mode 100644 0673-x86-msr-Allow-hardware-domain-to-read-C-state-reside.patch create mode 100644 0674-x86-mwait-idle-Use-ACPI-for-CPUs-without-hardcoded-C.patch create mode 100644 0675-libxl_pci-Pass-power_mgmt-via-QMP.patch diff --git a/0670-x86-hpet-Disable-legacy-replacement-mode-after-test-.patch b/0670-x86-hpet-Disable-legacy-replacement-mode-after-test-.patch new file mode 100644 index 00000000..d71f5834 --- /dev/null +++ b/0670-x86-hpet-Disable-legacy-replacement-mode-after-test-.patch @@ -0,0 +1,60 @@ +From a112f0fbbb333fc29a35d0a81853d59409a33fde Mon Sep 17 00:00:00 2001 +: +: Upstreaming should be done soon: +: https://lore.kernel.org/xen-devel/20230807113117.1277-1-simon@invisiblethingslab.com/ +: +From: Simon Gaiser +To: xen-devel@lists.xenproject.org +Cc: Jan Beulich +Cc: Andrew Cooper +Cc: "Roger Pau Monné" +Cc: Wei Liu +Cc: Marek Marczykowski-Górecki +Date: Mon, 31 Jul 2023 12:13:42 +0200 +Subject: [XEN PATCH v3] x86/hpet: Disable legacy replacement mode after IRQ test + +As far as I understand the HPET legacy mode is not required after the +timer IRQ test. For previous discussion see [1] and [2]. Keeping it +enabled prevents reaching deeper C-states on some systems and thereby +also S0ix residency. So disable it after the timer IRQ test worked. Note +that this code path is only reached when opt_hpet_legacy_replacement < 0, +so explicit user choice is still honored. + +Link: https://lore.kernel.org/xen-devel/cb408368-077d-edb5-b4ad-f80086db48c1@invisiblethingslab.com/ # [1] +Link: https://lore.kernel.org/xen-devel/20230718122603.2002-1-simon@invisiblethingslab.com/ # [2] +Signed-off-by: Simon Gaiser +--- + +[ Resending v3, now with a unique Message-ID, sorry. ] + +Changes in v3: + + - Edit log message and downgrade it to XENLOG_DEBUG. + +Changes in v2: + + - Always disable legacy mode after test, not only when ARAT is + available. See [3] for reasoning. + +[3]: https://lore.kernel.org/xen-devel/ac77ecba-6804-1d16-60dc-f184e5d31dcb@invisiblethingslab.com/ + +--- + xen/arch/x86/io_apic.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c +index 041233b9b7..b4b4cd5939 100644 +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -1967,6 +1967,8 @@ static void __init check_timer(void) + + if ( timer_irq_works() ) + { ++ printk(XENLOG_DEBUG "IRQ test with HPET Legacy Replacement Mode worked - disabling it again\n"); ++ hpet_disable_legacy_replacement_mode(); + local_irq_restore(flags); + return; + } +-- +2.40.1 + diff --git a/0671-x86-idle-Get-PC-8.10-counters-for-Tiger-and-Alder-La.patch b/0671-x86-idle-Get-PC-8.10-counters-for-Tiger-and-Alder-La.patch new file mode 100644 index 00000000..8b7d9024 --- /dev/null +++ b/0671-x86-idle-Get-PC-8.10-counters-for-Tiger-and-Alder-La.patch @@ -0,0 +1,59 @@ +From 6ca7bb0e831200eb9bec6cb850489be514591e7f Mon Sep 17 00:00:00 2001 +: +: Upstreaming needs more verbose log message and since Intel's SDM doesn't +: properly document those MSRs we need to argue about that. +: +: https://lore.kernel.org/xen-devel/20230718132334.2087-1-simon@invisiblethingslab.com/ +: +From: Simon Gaiser +To: xen-devel@lists.xenproject.org +Cc: Jan Beulich +Cc: Andrew Cooper +Cc: "Roger Pau Monné" +Cc: Wei Liu +Cc: Marek Marczykowski-Górecki +Subject: [XEN PATCH v2] x86/idle: Get PC{8..10} counters for Tiger and Alder Lake + +TODO + +Signed-off-by: Simon Gaiser +--- +Changes in v2: + - Fix wrong subject prefix + - Add missing Signed-off-by + - TODO +--- + + xen/arch/x86/acpi/cpu_idle.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c +index 557bc6ef86..a6d3175156 100644 +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -155,6 +155,12 @@ static void cf_check do_get_hw_residencies(void *arg) + + switch ( c->x86_model ) + { ++ /* Tiger Lake */ ++ case 0x8C: ++ case 0x8D: ++ /* Alder Lake */ ++ case 0x97: ++ case 0x9A: + /* 4th generation Intel Core (Haswell) */ + case 0x45: + GET_PC8_RES(hw_res->pc8); +@@ -185,9 +191,6 @@ static void cf_check do_get_hw_residencies(void *arg) + case 0x6C: + case 0x7D: + case 0x7E: +- /* Tiger Lake */ +- case 0x8C: +- case 0x8D: + /* Kaby Lake */ + case 0x8E: + case 0x9E: +-- +2.40.1 + diff --git a/0672-x86-ACPI-Ignore-entries-marked-as-unusable-when-pars.patch b/0672-x86-ACPI-Ignore-entries-marked-as-unusable-when-pars.patch new file mode 100644 index 00000000..5deac4d9 --- /dev/null +++ b/0672-x86-ACPI-Ignore-entries-marked-as-unusable-when-pars.patch @@ -0,0 +1,104 @@ +From 5d7a1a552199908db046b47893c9312ecf119e62 Mon Sep 17 00:00:00 2001 +: +: Upstreaming in progress +: https://lore.kernel.org/xen-devel/ac77ecba-6804-1d16-60dc-f184e5d31dcb@invisiblethingslab.com/ +: (And also see Link [5] for previous upstream discussion) +: +From: Simon Gaiser +Date: Mon, 14 Aug 2023 10:21:38 +0200 +Subject: [XEN PATCH] x86/ACPI: Ignore entries marked as unusable when parsing MADT + +Up to version 6.2 Errata B [2] the ACPI spec only defines +ACPI_MADT_ENABLE as: + + If zero, this processor is unusable, and the operating system + support will not attempt to use it. + +The bit that later will be ACPI_MADT_ONLINE_CAPABLE is reserved with +"Must be zero". + +Version 6.3 [3] then adds ACPI_MADT_ONLINE_CAPABLE and changes the +meaning of ACPI_MADT_ENABLE: + + Enabled + If this bit is set the processor is ready for use. If this bit + is clear and the Online Capable bit is set, system hardware + supports enabling this processor during OS runtime. If this bit + is clear and the Online Capable bit is also clear, this + processor is unusable, and OSPM shall ignore the contents of the + Processor Local APIC Structure. + + Online Capbable + The information conveyed by this bit depends on the value of the + Enabled bit. If the Enabled bit is set, this bit is reserved and + must be zero. Otherwise, if this this bit is set, system + hardware supports enabling this processor during OS runtime. + +So with conforming firmwares it should be safe to simply ignore the +entry if !ACPI_MADT_ENABLED && !ACPI_MADT_ONLINE_CAPABLE + +As a precaution against buggy firmwares this change, like Linux [4], +ignores ACPI_MADT_ONLINE_CAPABLE completely if MADT revision < 5. Note +that the MADT revision was already increased to 5 with spec version 6.2 +Errata A [1], so before introducing the online capable flag. But it +wasn't changed for the new flag, so this is the best we can do here. + +For previous discussion see thread [5]. + +Link: http://www.uefi.org/sites/default/files/resources/ACPI%206_2_A_Sept29.pdf # [1] +Link: https://uefi.org/sites/default/files/resources/ACPI_6_2_B_final_Jan30.pdf # [2] +Link: https://uefi.org/sites/default/files/resources/ACPI_6_3_May16.pdf # [3] +Link: https://git.kernel.org/torvalds/c/e2869bd7af608c343988429ceb1c2fe99644a01f # [4] +Link: https://lore.kernel.org/xen-devel/80bae614-052e-0f90-cf13-0e5e4ed1a5cd@invisiblethingslab.com/ # [5] +Signed-off-by: Simon Gaiser +--- + xen/arch/x86/acpi/boot.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c +index 54b72d716b..df5e0c4f25 100644 +--- a/xen/arch/x86/acpi/boot.c ++++ b/xen/arch/x86/acpi/boot.c +@@ -77,6 +77,17 @@ static int __init cf_check acpi_parse_madt(struct acpi_table_header *table) + return 0; + } + ++static bool __init acpi_is_processor_usable(uint32_t lapic_flags) ++{ ++ if (lapic_flags & ACPI_MADT_ENABLED) ++ return true; ++ ++ if (madt_revision >= 5 && (lapic_flags & ACPI_MADT_ONLINE_CAPABLE)) ++ return true; ++ ++ return false; ++} ++ + static int __init cf_check + acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) + { +@@ -88,9 +99,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) + return -EINVAL; + + /* Don't register processors that cannot be onlined. */ +- if (madt_revision >= 5 && +- !(processor->lapic_flags & ACPI_MADT_ENABLED) && +- !(processor->lapic_flags & ACPI_MADT_ONLINE_CAPABLE)) ++ if (!acpi_is_processor_usable(processor->lapic_flags)) + return 0; + + if ((processor->lapic_flags & ACPI_MADT_ENABLED) || +@@ -144,9 +153,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) + return -EINVAL; + + /* Don't register processors that cannot be onlined. */ +- if (madt_revision >= 5 && +- !(processor->lapic_flags & ACPI_MADT_ENABLED) && +- !(processor->lapic_flags & ACPI_MADT_ONLINE_CAPABLE)) ++ if (!acpi_is_processor_usable(processor->lapic_flags)) + return 0; + + if ((processor->lapic_flags & ACPI_MADT_ENABLED) || +-- +2.40.1 + diff --git a/0673-x86-msr-Allow-hardware-domain-to-read-C-state-reside.patch b/0673-x86-msr-Allow-hardware-domain-to-read-C-state-reside.patch new file mode 100644 index 00000000..807e57e3 --- /dev/null +++ b/0673-x86-msr-Allow-hardware-domain-to-read-C-state-reside.patch @@ -0,0 +1,75 @@ +From 946e5494801866c93332cc5d9ec0fa03a4df00d7 Mon Sep 17 00:00:00 2001 +: +: NAKed by upstream: https://lore.kernel.org/xen-devel/15a30769-4a61-ca22-7b5a-6249186cd8a2@suse.com/ +: Keep it in Qubes for now since it's very helpful for debugging with +: existing software. +: +From: Simon Gaiser +To: xen-devel@lists.xenproject.org +Cc: Jan Beulich +Cc: Andrew Cooper +Cc: "Roger Pau Monné" +Cc: Wei Liu +Cc: Marek Marczykowski-Górecki +Subject: [XEN PATCH] x86/msr: Allow hardware domain to read package C-state + residency counters + +Since it's limited to the hardware domain it should be safe and it's +very useful to have access to this directly in dom0 when debugging power +related things for example S0ix. +--- + xen/arch/x86/include/asm/msr-index.h | 9 +++++++++ + xen/arch/x86/pv/emul-priv-op.c | 14 ++++++++++++++ + 2 files changed, 23 insertions(+) + +diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h +index 4f861c0bb4..7e7255383d 100644 +--- a/xen/arch/x86/include/asm/msr-index.h ++++ b/xen/arch/x86/include/asm/msr-index.h +@@ -704,4 +704,13 @@ + #define MSR_PKGC9_IRTL 0x00000634 + #define MSR_PKGC10_IRTL 0x00000635 + ++/* Package C-state residency counters */ ++#define MSR_PKG_C2_RESIDENCY 0x0000060d ++#define MSR_PKG_C3_RESIDENCY 0x000003f8 ++#define MSR_PKG_C6_RESIDENCY 0x000003f9 ++#define MSR_PKG_C7_RESIDENCY 0x000003fa ++#define MSR_PKG_C8_RESIDENCY 0x00000630 ++#define MSR_PKG_C9_RESIDENCY 0x00000631 ++#define MSR_PKG_C10_RESIDENCY 0x00000632 ++ + #endif /* __ASM_MSR_INDEX_H */ +diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c +index 5da00e24e4..9e0e582c5d 100644 +--- a/xen/arch/x86/pv/emul-priv-op.c ++++ b/xen/arch/x86/pv/emul-priv-op.c +@@ -979,6 +979,25 @@ static int cf_check read_msr( + *val = 0; + return X86EMUL_OKAY; + ++ case MSR_PKG_C2_RESIDENCY: ++ case MSR_PKG_C3_RESIDENCY: ++ case MSR_PKG_C6_RESIDENCY: ++ case MSR_PKG_C7_RESIDENCY: ++ case MSR_PKG_C8_RESIDENCY: ++ case MSR_PKG_C9_RESIDENCY: ++ case MSR_PKG_C10_RESIDENCY: ++ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) ++ break; ++ if ( !is_hardware_domain(currd) ) ++ break; ++ if ( nr_sockets > 1 ) { ++ // When being rescheduled the VM might see inconsistent data when ++ // running on a system with multiple sockets, since those MSRs are ++ // per package. ++ break; ++ } ++ goto normal; ++ + case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR(7): + case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL(3): + case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR2: +-- +2.40.1 + diff --git a/0674-x86-mwait-idle-Use-ACPI-for-CPUs-without-hardcoded-C.patch b/0674-x86-mwait-idle-Use-ACPI-for-CPUs-without-hardcoded-C.patch new file mode 100644 index 00000000..5b2b6059 --- /dev/null +++ b/0674-x86-mwait-idle-Use-ACPI-for-CPUs-without-hardcoded-C.patch @@ -0,0 +1,326 @@ +From ed046789908a154d98cdd8202050810aade9c6a6 Mon Sep 17 00:00:00 2001 +: +: It's currently unclear if it's really needed or not. Keep it disabled +: by default but compile it in such that testing with it is easy, +: especially because this is closer to what Linux does. If it turn's out +: to be needed, we need to check what exactly is needed and then discuss +: this again [1] with upstream. +: +: [1]: https://lore.kernel.org/xen-devel/10f37ea5-7fa5-976f-3e7b-fc675db20ec0@suse.com/ +: +From: Simon Gaiser +Date: Mon, 14 Aug 2023 10:09:59 +0200 +Subject: [RFC XEN PATCH] x86/mwait-idle: Use ACPI for CPUs without hardcoded + C-state table + +mwait-idle includes a hardcoded config for many CPUs. But some are +missing, for example Tiger Lake. Linux' driver reads the config from +ACPI in those cases. This adds this to Xen's implementation. + +The Linux driver also has a feature to combine the internal table with +the infos from ACPI. This is not implemented here, for CPUs with +internal config nothing is changed. + +Signed-off-by: Simon Gaiser +--- + xen/arch/x86/acpi/cpu_idle.c | 58 ++++++++++----- + xen/arch/x86/cpu/mwait-idle.c | 116 +++++++++++++++++++++++++---- + xen/arch/x86/include/asm/cpuidle.h | 2 +- + 3 files changed, 142 insertions(+), 34 deletions(-) + +diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c +index cfce4cc040..ca8f25fe2f 100644 +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -78,6 +78,7 @@ + static void cf_check lapic_timer_nop(void) { } + void (*__read_mostly lapic_timer_off)(void); + void (*__read_mostly lapic_timer_on)(void); ++static struct notifier_block cpu_nfb; + + bool lapic_timer_init(void) + { +@@ -1313,6 +1314,26 @@ static void print_cx_pminfo(uint32_t cpu, struct xen_processor_power *power) + #define print_cx_pminfo(c, p) + #endif + ++ ++static void repark_cpu(int cpu_id) ++{ ++ uint32_t apic_id = x86_cpu_to_apicid[cpu_id]; ++ ++ /* ++ * If we've just learned of more available C states, wake the CPU if ++ * it's parked, so it can go back to sleep in perhaps a deeper state. ++ */ ++ if ( park_offline_cpus && apic_id != BAD_APICID ) ++ { ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ apic_wait_icr_idle(); ++ apic_icr_write(APIC_DM_NMI | APIC_DEST_PHYSICAL, apic_id); ++ local_irq_restore(flags); ++ } ++} ++ + long set_cx_pminfo(uint32_t acpi_id, struct xen_processor_power *power) + { + XEN_GUEST_HANDLE(xen_processor_cx_t) states; +@@ -1360,24 +1381,27 @@ long set_cx_pminfo(uint32_t acpi_id, struct xen_processor_power *power) + set_cx(acpi_power, &xen_cx); + } + +- if ( !cpu_online(cpu_id) ) +- { +- uint32_t apic_id = x86_cpu_to_apicid[cpu_id]; +- +- /* +- * If we've just learned of more available C states, wake the CPU if +- * it's parked, so it can go back to sleep in perhaps a deeper state. +- */ +- if ( park_offline_cpus && apic_id != BAD_APICID ) +- { +- unsigned long flags; +- +- local_irq_save(flags); +- apic_wait_icr_idle(); +- apic_icr_write(APIC_DM_NMI | APIC_DEST_PHYSICAL, apic_id); +- local_irq_restore(flags); ++ if ( cpu_id == 0 && pm_idle_save == NULL ) { ++ /* Now that we have the ACPI info from dom0, try again to setup ++ * mwait-idle*/ ++ ret = mwait_idle_init(&cpu_nfb, true); ++ if (ret >= 0) { ++ unsigned int cpu; ++ /* mwait-idle took over, call it's initializer for all CPUs*/ ++ for_each_present_cpu ( cpu ) ++ { ++ cpu_nfb.notifier_call(&cpu_nfb, CPU_UP_PREPARE, (void *)(long)cpu); ++ cpu_nfb.notifier_call(&cpu_nfb, CPU_ONLINE, (void *)(long)cpu); ++ if ( !cpu_online(cpu) ) { ++ repark_cpu(cpu); ++ } ++ } ++ return 0; + } + } ++ ++ if ( !cpu_online(cpu_id) ) ++ repark_cpu(cpu_id); + else if ( cpuidle_current_governor->enable ) + { + ret = cpuidle_current_governor->enable(acpi_power); +@@ -1677,7 +1701,7 @@ static int __init cf_check cpuidle_presmp_init(void) + if ( !xen_cpuidle ) + return 0; + +- mwait_idle_init(&cpu_nfb); ++ mwait_idle_init(&cpu_nfb, false); + cpu_nfb.notifier_call(&cpu_nfb, CPU_UP_PREPARE, cpu); + cpu_nfb.notifier_call(&cpu_nfb, CPU_ONLINE, cpu); + register_cpu_notifier(&cpu_nfb); +diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c +index ff5c808bc9..b8fe28c3d7 100644 +--- a/xen/arch/x86/cpu/mwait-idle.c ++++ b/xen/arch/x86/cpu/mwait-idle.c +@@ -60,15 +60,20 @@ + #undef PREFIX + #define PREFIX "mwait-idle: " + ++#define pr_err(fmt...) printk(KERN_ERR fmt) ++ + #ifdef DEBUG + # define pr_debug(fmt...) printk(KERN_DEBUG fmt) + #else + # define pr_debug(fmt...) + #endif + +-static __initdata bool opt_mwait_idle = true; ++static bool opt_mwait_idle = true; + boolean_param("mwait-idle", opt_mwait_idle); + ++static bool opt_mwait_idle_acpi = false; ++boolean_param("mwait-idle-acpi", opt_mwait_idle_acpi); ++ + static unsigned int mwait_substates; + + /* +@@ -81,7 +86,7 @@ static unsigned int mwait_substates; + * exclusive C-states, this parameter has no effect. + */ + static unsigned int __ro_after_init preferred_states_mask; +-static char __initdata preferred_states[64]; ++static char preferred_states[64]; + string_param("preferred-cstates", preferred_states); + + #define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF +@@ -1140,6 +1145,9 @@ static const struct idle_cpu idle_cpu_snr = { + .c1e_promotion = C1E_PROMOTION_DISABLE, + }; + ++static struct idle_cpu __read_mostly idle_cpu_acpi = { ++}; ++ + #define ICPU(model, cpu) \ + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ ## model, X86_FEATURE_ALWAYS, \ + &idle_cpu_ ## cpu} +@@ -1425,21 +1433,92 @@ static void __init mwait_idle_state_table_update(void) + } + } + +-static int __init mwait_idle_probe(void) ++static int mwait_idle_state_table_from_acpi(void) { ++ // Linux tries every CPU until it finds one that declares FFH as entry ++ // method for all C-states in it's ACPI table. It assumes that the ++ // config is identical for all CPUs. So let's just check the first CPU. ++ ++ int rc = -EINVAL; ++ struct acpi_processor_power *acpi_power = processor_powers[0]; ++ struct cpuidle_state *state_table = xzalloc_array( ++ struct cpuidle_state, ++ acpi_power->count + 1 /* NULL at end */ - 1 /* no C0 */ ++ ); ++ ++ if (state_table == NULL) { ++ pr_err(PREFIX "failed to allocate state table\n"); ++ rc = -ENOMEM; ++ goto ret; ++ } ++ ++ for (unsigned int cstate = 1; cstate < acpi_power->count; ++cstate) { ++ struct acpi_processor_cx *acpi_cx = &acpi_power->states[cstate]; ++ struct cpuidle_state *idle_cx = &state_table[cstate - 1]; ++ if (acpi_cx->entry_method != ACPI_CSTATE_EM_FFH) { ++ pr_debug(PREFIX "ACPI based config not usable: Entry method for C-state %u isn't FFH\n", cstate); ++ rc = -ENODEV; ++ goto ret; ++ } ++ ++ snprintf(idle_cx->name, sizeof(idle_cx->name), "C%u", cstate); ++ ++ idle_cx->flags = MWAIT2flg(acpi_cx->address); ++ if (acpi_cx->type > ACPI_STATE_C2) ++ idle_cx->flags |= CPUIDLE_FLAG_TLB_FLUSHED; ++ // Like Linux we don't set CPUIDLE_FLAG_IBRS ++ ++ idle_cx->exit_latency = acpi_cx->latency; ++ ++ idle_cx->target_residency = acpi_cx->latency; ++ if (acpi_cx->type > ACPI_STATE_C1) ++ idle_cx->target_residency *= 3; ++ } ++ ++ idle_cpu_acpi.state_table = state_table; ++ rc = 0; ++ pr_debug(PREFIX "config read from ACPI\n"); ++ ++ret: ++ if (rc < 0 && state_table != NULL) { ++ xfree(state_table); ++ } ++ return rc; ++} ++ ++static int mwait_idle_probe(bool from_acpi) + { + unsigned int eax, ebx, ecx; +- const struct x86_cpu_id *id = x86_match_cpu(intel_idle_ids); + const char *str; + +- if (!id) { +- pr_debug(PREFIX "does not run on family %d model %d\n", +- boot_cpu_data.x86, boot_cpu_data.x86_model); +- return -ENODEV; +- } ++ if (from_acpi) { ++ int rc; + +- if (!boot_cpu_has(X86_FEATURE_MONITOR)) { +- pr_debug(PREFIX "Please enable MWAIT in BIOS SETUP\n"); +- return -ENODEV; ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || ++ boot_cpu_data.x86 != 6 || ++ !boot_cpu_has(X86_FEATURE_MONITOR)) { ++ pr_debug(PREFIX "skipping ACPI check on unsupported CPU\n"); ++ return -ENODEV; ++ } ++ ++ rc = mwait_idle_state_table_from_acpi(); ++ if (rc < 0) ++ return rc; ++ ++ icpu = &idle_cpu_acpi; ++ } else { ++ const struct x86_cpu_id *id = x86_match_cpu(intel_idle_ids); ++ if (!id) { ++ pr_debug(PREFIX "no interal config for family %d model %d\n", ++ boot_cpu_data.x86, boot_cpu_data.x86_model); ++ return -ENODEV; ++ } ++ ++ if (!boot_cpu_has(X86_FEATURE_MONITOR)) { ++ pr_debug(PREFIX "Please enable MWAIT in BIOS SETUP\n"); ++ return -ENODEV; ++ } ++ ++ icpu = id->driver_data; + } + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) +@@ -1459,7 +1538,6 @@ static int __init mwait_idle_probe(void) + + pr_debug(PREFIX "MWAIT substates: %#x\n", mwait_substates); + +- icpu = id->driver_data; + cpuidle_state_table = icpu->state_table; + + if (boot_cpu_has(X86_FEATURE_ARAT)) +@@ -1504,7 +1582,8 @@ static int __init mwait_idle_probe(void) + if (str[0]) + printk("unrecognized \"preferred-cstates=%s\"\n", str); + +- mwait_idle_state_table_update(); ++ if (!from_acpi) ++ mwait_idle_state_table_update(); + + return 0; + } +@@ -1613,14 +1692,19 @@ static int cf_check mwait_idle_cpu_init( + return NOTIFY_DONE; + } + +-int __init mwait_idle_init(struct notifier_block *nfb) ++int mwait_idle_init(struct notifier_block *nfb, bool from_acpi) + { + int err; + ++ if (from_acpi && !opt_mwait_idle_acpi) { ++ pr_debug(PREFIX "ACPI based config disabled\n"); ++ return -EPERM; ++ } ++ + if (pm_idle_save) + return -ENODEV; + +- err = mwait_idle_probe(); ++ err = mwait_idle_probe(from_acpi); + if (!err && !boot_cpu_has(X86_FEATURE_ARAT)) { + hpet_broadcast_init(); + if (xen_cpuidle < 0 && !hpet_broadcast_is_available()) +diff --git a/xen/arch/x86/include/asm/cpuidle.h b/xen/arch/x86/include/asm/cpuidle.h +index 3edd7a75d2..f8913c7304 100644 +--- a/xen/arch/x86/include/asm/cpuidle.h ++++ b/xen/arch/x86/include/asm/cpuidle.h +@@ -15,7 +15,7 @@ extern void (*lapic_timer_on)(void); + + extern uint64_t (*cpuidle_get_tick)(void); + +-int mwait_idle_init(struct notifier_block *); ++int mwait_idle_init(struct notifier_block *, bool); + int cpuidle_init_cpu(unsigned int cpu); + void cf_check default_dead_idle(void); + void cf_check acpi_dead_idle(void); +-- +2.40.1 + diff --git a/0675-libxl_pci-Pass-power_mgmt-via-QMP.patch b/0675-libxl_pci-Pass-power_mgmt-via-QMP.patch new file mode 100644 index 00000000..034d7785 --- /dev/null +++ b/0675-libxl_pci-Pass-power_mgmt-via-QMP.patch @@ -0,0 +1,29 @@ +From cace989bbe09b45b6936f32a8a06d03d6cf5d5d7 Mon Sep 17 00:00:00 2001 +: +: Upstreaming only makes sense once QEMU has proper support for this. +: +From: Simon Gaiser +Date: Mon, 12 Feb 2024 11:18:40 +0100 +Subject: [PATCH] libxl_pci: Pass power_mgmt via QMP + +Signed-off-by: Simon Gaiser +--- + tools/libs/light/libxl_pci.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/libs/light/libxl_pci.c b/tools/libs/light/libxl_pci.c +index f4c4f17545..4b48698f18 100644 +--- a/tools/libs/light/libxl_pci.c ++++ b/tools/libs/light/libxl_pci.c +@@ -1240,6 +1240,8 @@ static void pci_add_qmp_device_add(libxl__egc *egc, pci_add_state *pas) + */ + if (pci->permissive) + libxl__qmp_param_add_bool(gc, &args, "permissive", true); ++ if (pci->power_mgmt) ++ libxl__qmp_param_add_bool(gc, &args, "power_mgmt", true); + + qmp->ao = pas->aodev->ao; + qmp->domid = domid; +-- +2.43.0 + diff --git a/xen.spec.in b/xen.spec.in index 72d19a1a..d1708078 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -156,6 +156,14 @@ PAtch0651: 0651-x86-msi-passthrough-all-MSI-X-vector-ctrl-writes-to-.patch PAtch0652: 0652-x86-hvm-Allow-writes-to-registers-on-the-same-page-a.patch Patch0653: 0653-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch +# S0ix support +Patch0670: 0670-x86-hpet-Disable-legacy-replacement-mode-after-test-.patch +Patch0671: 0671-x86-idle-Get-PC-8.10-counters-for-Tiger-and-Alder-La.patch +Patch0672: 0672-x86-ACPI-Ignore-entries-marked-as-unusable-when-pars.patch +Patch0673: 0673-x86-msr-Allow-hardware-domain-to-read-C-state-reside.patch +Patch0674: 0674-x86-mwait-idle-Use-ACPI-for-CPUs-without-hardcoded-C.patch +Patch0675: 0675-libxl_pci-Pass-power_mgmt-via-QMP.patch + # Qubes specific patches Patch1000: 1000-Do-not-access-network-during-the-build.patch Patch1001: 1001-hotplug-store-block-params-for-cleanup.patch From f22008ff1f41a91213383b6ce532548bf2c26b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 12 Mar 2024 03:11:55 +0100 Subject: [PATCH 43/64] Include patches from stable-4.17 branch Some of them were already backported, but there are few more relevant fixes. This may also ease applying future patches. --- ...assignment-if-phantom-functions-cann.patch | 20 +- ...-VT-d-Fix-else-vs-endif-misplacement.patch | 70 ++++++ ...PU-erratum-1474-fix-to-more-affected.patch | 123 ++++++++++ 0318-CirrusCI-drop-FreeBSD-12.patch | 39 ++++ ...-Global-Performance-Counter-Control-.patch | 74 ++++++ ...ix-IRQ-handling-for-EXIT_REASON_INIT.patch | 65 ++++++ ...isallow-the-use-of-inactivity-states.patch | 126 +++++++++++ ...-lib-fdt-elf-temp.o-and-their-deps-t.patch | 70 ++++++ ...fix-off-by-one-in-entry-check-assert.patch | 36 +++ ...top-fix-sorting-bug-for-some-columns.patch | 67 ++++++ 0325-amd-vi-fix-IVMD-memory-type-checks.patch | 53 +++++ ...ix-fast-singlestep-state-persistence.patch | 86 +++++++ ...te-on-hvmemul_map_linear_addr-s-erro.patch | 63 ++++++ 0328-build-Replace-which-with-command-v.patch | 57 +++++ ...locating-memory-for-qemu-xen-in-stub.patch | 16 +- ...build-fails-when-running-kconfig-fai.patch | 59 +++++ 0331-x86emul-add-missing-EVEX.R-checks.patch | 50 +++++ ...ch-fix-norevert-test-hook-setup-typo.patch | 36 +++ ...printf-format-specifier-in-no_config.patch | 38 ++++ ...a-union-as-register-type-for-functio.patch | 141 ++++++++++++ ...NCH_HARDEN-option-to-only-be-set-whe.patch | 57 +++++ ...shadow-stack-in-exception-from-stub-.patch | 212 ++++++++++++++++++ ...n-arm-Fix-UBSAN-failure-in-start_xen.patch | 52 +++++ ...-VMX-when-their-enabling-is-prohibit.patch | 67 ++++++ ...-Fix-UB-shift-in-compat_set_timer_op.patch | 86 +++++++ ...he-built-in-SPECULATIVE_HARDEN_-opti.patch | 54 +++++ ...IRECT_THUNK-option-to-only-be-set-wh.patch | 67 ++++++ ...print-thunk-option-selection-if-not-.patch | 50 +++++ ...gister-livepatch-regions-when-loaded.patch | 159 +++++++++++++ ...arch-for-symbols-in-all-loaded-paylo.patch | 149 ++++++++++++ ...x-norevert-test-attempt-to-open-code.patch | 186 +++++++++++++++ ...operly-build-the-noapply-and-norever.patch | 43 ++++ ...gfault-in-device_model_spawn_outcome.patch | 39 ++++ ...ys-use-a-temporary-parameter-stashin.patch | 197 ++++++++++++++++ ...llow-for-levelling-of-VERW-side-effe.patch | 102 +++++++++ xen.spec.in | 38 +++- 36 files changed, 2829 insertions(+), 18 deletions(-) rename 0500-xsa449.patch => 0315-pci-fail-device-assignment-if-phantom-functions-cann.patch (86%) create mode 100644 0316-VT-d-Fix-else-vs-endif-misplacement.patch create mode 100644 0317-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch create mode 100644 0318-CirrusCI-drop-FreeBSD-12.patch create mode 100644 0319-x86-intel-ensure-Global-Performance-Counter-Control-.patch create mode 100644 0320-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch create mode 100644 0321-x86-vmx-Disallow-the-use-of-inactivity-states.patch create mode 100644 0322-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch create mode 100644 0323-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch create mode 100644 0324-tools-xentop-fix-sorting-bug-for-some-columns.patch create mode 100644 0325-amd-vi-fix-IVMD-memory-type-checks.patch create mode 100644 0326-x86-hvm-Fix-fast-singlestep-state-persistence.patch create mode 100644 0327-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch create mode 100644 0328-build-Replace-which-with-command-v.patch rename 0653-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch => 0329-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch (80%) create mode 100644 0330-build-make-sure-build-fails-when-running-kconfig-fai.patch create mode 100644 0331-x86emul-add-missing-EVEX.R-checks.patch create mode 100644 0332-xen-livepatch-fix-norevert-test-hook-setup-typo.patch create mode 100644 0333-xen-cmdline-fix-printf-format-specifier-in-no_config.patch create mode 100644 0334-x86-altcall-use-a-union-as-register-type-for-functio.patch create mode 100644 0335-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch create mode 100644 0336-x86-account-for-shadow-stack-in-exception-from-stub-.patch create mode 100644 0337-xen-arm-Fix-UBSAN-failure-in-start_xen.patch create mode 100644 0338-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch create mode 100644 0339-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch create mode 100644 0340-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch create mode 100644 0341-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch create mode 100644 0342-x86-spec-do-not-print-thunk-option-selection-if-not-.patch create mode 100644 0343-xen-livepatch-register-livepatch-regions-when-loaded.patch create mode 100644 0344-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch create mode 100644 0345-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch create mode 100644 0346-xen-livepatch-properly-build-the-noapply-and-norever.patch create mode 100644 0347-libxl-Fix-segfault-in-device_model_spawn_outcome.patch create mode 100644 0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch create mode 100644 0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch diff --git a/0500-xsa449.patch b/0315-pci-fail-device-assignment-if-phantom-functions-cann.patch similarity index 86% rename from 0500-xsa449.patch rename to 0315-pci-fail-device-assignment-if-phantom-functions-cann.patch index 80aeac29..88e78bb5 100644 --- a/0500-xsa449.patch +++ b/0315-pci-fail-device-assignment-if-phantom-functions-cann.patch @@ -1,8 +1,8 @@ -From d8b92b21b224126860978e4c604302f3c1e3bf75 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Wed, 13 Dec 2023 15:51:59 +0100 -Subject: [PATCH] pci: fail device assignment if phantom functions cannot be - assigned +From f9e1ed51bdba31017ea17e1819eb2ade6b5c8615 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 30 Jan 2024 14:37:39 +0100 +Subject: [PATCH 315/349] pci: fail device assignment if phantom functions + cannot be assigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @@ -34,15 +34,17 @@ This is XSA-449 / CVE-2023-46839 Fixes: 4e9950dc1bd2 ('IOMMU: add phantom function support') Signed-off-by: Roger Pau Monné Reviewed-by: Jan Beulich +master commit: cb4ecb3cc17b02c2814bc817efd05f3f3ba33d1e +master date: 2024-01-30 14:28:01 +0100 --- xen/drivers/passthrough/pci.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c -index 1439d1ef2b26..47c0eee7bdcc 100644 +index 07d1986d33..8c62b14d19 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c -@@ -1488,11 +1488,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) +@@ -1444,11 +1444,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) pdev->fault.count = 0; @@ -57,7 +59,7 @@ index 1439d1ef2b26..47c0eee7bdcc 100644 { devfn += pdev->phantom_stride; if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) -@@ -1503,8 +1502,24 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) +@@ -1459,8 +1458,24 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) done: if ( rc ) @@ -85,5 +87,5 @@ index 1439d1ef2b26..47c0eee7bdcc 100644 else if ( d == dom_io ) pdev->quarantine = true; -- -2.43.0 +2.44.0 diff --git a/0316-VT-d-Fix-else-vs-endif-misplacement.patch b/0316-VT-d-Fix-else-vs-endif-misplacement.patch new file mode 100644 index 00000000..ca037af3 --- /dev/null +++ b/0316-VT-d-Fix-else-vs-endif-misplacement.patch @@ -0,0 +1,70 @@ +From 6b1864afc14d484cdbc9754ce3172ac3dc189846 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 30 Jan 2024 14:38:38 +0100 +Subject: [PATCH 316/349] VT-d: Fix "else" vs "#endif" misplacement + +In domain_pgd_maddr() the "#endif" is misplaced with respect to "else". This +generates incorrect logic when CONFIG_HVM is compiled out, as the "else" body +is executed unconditionally. + +Rework the logic to use IS_ENABLED() instead of explicit #ifdef-ary, as it's +clearer to follow. This in turn involves adjusting p2m_get_pagetable() to +compile when CONFIG_HVM is disabled. + +This is XSA-450 / CVE-2023-46840. + +Fixes: 033ff90aa9c1 ("x86/P2M: p2m_{alloc,free}_ptp() and p2m_alloc_table() are HVM-only") +Reported-by: Teddy Astie +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: cc6ba68edf6dcd18c3865e7d7c0f1ed822796426 +master date: 2024-01-30 14:29:15 +0100 +--- + xen/arch/x86/include/asm/p2m.h | 9 ++++++++- + xen/drivers/passthrough/vtd/iommu.c | 4 +--- + 2 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/xen/arch/x86/include/asm/p2m.h b/xen/arch/x86/include/asm/p2m.h +index cd43d8621a..4f691533d5 100644 +--- a/xen/arch/x86/include/asm/p2m.h ++++ b/xen/arch/x86/include/asm/p2m.h +@@ -447,7 +447,14 @@ static inline bool_t p2m_is_altp2m(const struct p2m_domain *p2m) + return p2m->p2m_class == p2m_alternate; + } + +-#define p2m_get_pagetable(p2m) ((p2m)->phys_table) ++#ifdef CONFIG_HVM ++static inline pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m) ++{ ++ return p2m->phys_table; ++} ++#else ++pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m); ++#endif + + /* + * Ensure any deferred p2m TLB flush has been completed on all VCPUs. +diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c +index b4c11a6b48..908b3ba6ee 100644 +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -441,15 +441,13 @@ static paddr_t domain_pgd_maddr(struct domain *d, paddr_t pgd_maddr, + + if ( pgd_maddr ) + /* nothing */; +-#ifdef CONFIG_HVM +- else if ( iommu_use_hap_pt(d) ) ++ else if ( IS_ENABLED(CONFIG_HVM) && iommu_use_hap_pt(d) ) + { + pagetable_t pgt = p2m_get_pagetable(p2m_get_hostp2m(d)); + + pgd_maddr = pagetable_get_paddr(pgt); + } + else +-#endif + { + if ( !hd->arch.vtd.pgd_maddr ) + { +-- +2.44.0 + diff --git a/0317-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch b/0317-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch new file mode 100644 index 00000000..b0867d66 --- /dev/null +++ b/0317-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch @@ -0,0 +1,123 @@ +From abcc32f0634627fe21117a48bd10e792bfbdd6dc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Fri, 2 Feb 2024 08:01:09 +0100 +Subject: [PATCH 317/349] x86/amd: Extend CPU erratum #1474 fix to more + affected models +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Erratum #1474 has now been extended to cover models from family 17h ranges +00-2Fh, so the errata now covers all the models released under Family +17h (Zen, Zen+ and Zen2). + +Additionally extend the workaround to Family 18h (Hygon), since it's based on +the Zen architecture and very likely affected. + +Rename all the zen2 related symbols to fam17, since the errata doesn't +exclusively affect Zen2 anymore. + +Reported-by: Andrew Cooper +Signed-off-by: Roger Pau Monné +Reviewed-by: Andrew Cooper +master commit: 23db507a01a4ec5259ec0ab43d296a41b1c326ba +master date: 2023-12-21 12:19:40 +0000 +--- + xen/arch/x86/cpu/amd.c | 27 ++++++++++++++------------- + 1 file changed, 14 insertions(+), 13 deletions(-) + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index 29ae97e7c0..3d85e9797d 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -54,7 +54,7 @@ bool __read_mostly amd_acpi_c1e_quirk; + bool __ro_after_init amd_legacy_ssbd; + bool __initdata amd_virt_spec_ctrl; + +-static bool __read_mostly zen2_c6_disabled; ++static bool __read_mostly fam17_c6_disabled; + + static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo, + unsigned int *hi) +@@ -951,24 +951,24 @@ void amd_check_zenbleed(void) + val & chickenbit ? "chickenbit" : "microcode"); + } + +-static void cf_check zen2_disable_c6(void *arg) ++static void cf_check fam17_disable_c6(void *arg) + { + /* Disable C6 by clearing the CCR{0,1,2}_CC6EN bits. */ + const uint64_t mask = ~((1ul << 6) | (1ul << 14) | (1ul << 22)); + uint64_t val; + +- if (!zen2_c6_disabled) { ++ if (!fam17_c6_disabled) { + printk(XENLOG_WARNING + "Disabling C6 after 1000 days apparent uptime due to AMD errata 1474\n"); +- zen2_c6_disabled = true; ++ fam17_c6_disabled = true; + /* + * Prevent CPU hotplug so that started CPUs will either see +- * zen2_c6_disabled set, or will be handled by ++ * zen_c6_disabled set, or will be handled by + * smp_call_function(). + */ + while (!get_cpu_maps()) + process_pending_softirqs(); +- smp_call_function(zen2_disable_c6, NULL, 0); ++ smp_call_function(fam17_disable_c6, NULL, 0); + put_cpu_maps(); + } + +@@ -1273,8 +1273,8 @@ static void cf_check init_amd(struct cpuinfo_x86 *c) + amd_check_zenbleed(); + amd_check_erratum_1485(); + +- if (zen2_c6_disabled) +- zen2_disable_c6(NULL); ++ if (fam17_c6_disabled) ++ fam17_disable_c6(NULL); + + check_syscfg_dram_mod_en(); + +@@ -1286,7 +1286,7 @@ const struct cpu_dev amd_cpu_dev = { + .c_init = init_amd, + }; + +-static int __init cf_check zen2_c6_errata_check(void) ++static int __init cf_check amd_check_erratum_1474(void) + { + /* + * Errata #1474: A Core May Hang After About 1044 Days +@@ -1294,7 +1294,8 @@ static int __init cf_check zen2_c6_errata_check(void) + */ + s_time_t delta; + +- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch()) ++ if (cpu_has_hypervisor || ++ (boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18)) + return 0; + + /* +@@ -1309,10 +1310,10 @@ static int __init cf_check zen2_c6_errata_check(void) + if (delta > 0) { + static struct timer errata_c6; + +- init_timer(&errata_c6, zen2_disable_c6, NULL, 0); ++ init_timer(&errata_c6, fam17_disable_c6, NULL, 0); + set_timer(&errata_c6, NOW() + delta); + } else +- zen2_disable_c6(NULL); ++ fam17_disable_c6(NULL); + + return 0; + } +@@ -1320,4 +1321,4 @@ static int __init cf_check zen2_c6_errata_check(void) + * Must be executed after early_time_init() for tsc_ticks2ns() to have been + * calibrated. That prevents us doing the check in init_amd(). + */ +-presmp_initcall(zen2_c6_errata_check); ++presmp_initcall(amd_check_erratum_1474); +-- +2.44.0 + diff --git a/0318-CirrusCI-drop-FreeBSD-12.patch b/0318-CirrusCI-drop-FreeBSD-12.patch new file mode 100644 index 00000000..10332e0b --- /dev/null +++ b/0318-CirrusCI-drop-FreeBSD-12.patch @@ -0,0 +1,39 @@ +From 0ef1fb43ddd61b3c4c953e833e012ac21ad5ca0f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Fri, 2 Feb 2024 08:01:50 +0100 +Subject: [PATCH 318/349] CirrusCI: drop FreeBSD 12 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Went EOL by the end of December 2023, and the pkg repos have been shut down. + +Reported-by: Andrew Cooper +Signed-off-by: Roger Pau Monné +Acked-by: Andrew Cooper +master commit: c2ce3466472e9c9eda79f5dc98eb701bc6fdba20 +master date: 2024-01-15 12:20:11 +0100 +--- + .cirrus.yml | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/.cirrus.yml b/.cirrus.yml +index 7e0beb200d..63f3afb104 100644 +--- a/.cirrus.yml ++++ b/.cirrus.yml +@@ -14,12 +14,6 @@ freebsd_template: &FREEBSD_TEMPLATE + - ./configure --with-system-seabios=/usr/local/share/seabios/bios.bin + - gmake -j`sysctl -n hw.ncpu` clang=y + +-task: +- name: 'FreeBSD 12' +- freebsd_instance: +- image_family: freebsd-12-4 +- << : *FREEBSD_TEMPLATE +- + task: + name: 'FreeBSD 13' + freebsd_instance: +-- +2.44.0 + diff --git a/0319-x86-intel-ensure-Global-Performance-Counter-Control-.patch b/0319-x86-intel-ensure-Global-Performance-Counter-Control-.patch new file mode 100644 index 00000000..502e6aba --- /dev/null +++ b/0319-x86-intel-ensure-Global-Performance-Counter-Control-.patch @@ -0,0 +1,74 @@ +From d0ad2cc5eac1b5d3cfd14204d377ce2384f52607 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Fri, 2 Feb 2024 08:02:20 +0100 +Subject: [PATCH 319/349] x86/intel: ensure Global Performance Counter Control + is setup correctly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When Architectural Performance Monitoring is available, the PERF_GLOBAL_CTRL +MSR contains per-counter enable bits that is ANDed with the enable bit in the +counter EVNTSEL MSR in order for a PMC counter to be enabled. + +So far the watchdog code seems to have relied on the PERF_GLOBAL_CTRL enable +bits being set by default, but at least on some Intel Sapphire and Emerald +Rapids this is no longer the case, and Xen reports: + +Testing NMI watchdog on all CPUs: 0 40 stuck + +The first CPU on each package is started with PERF_GLOBAL_CTRL zeroed, so PMC0 +doesn't start counting when the enable bit in EVNTSEL0 is set, due to the +relevant enable bit in PERF_GLOBAL_CTRL not being set. + +Check and adjust PERF_GLOBAL_CTRL during CPU initialization so that all the +general-purpose PMCs are enabled. Doing so brings the state of the package-BSP +PERF_GLOBAL_CTRL in line with the rest of the CPUs on the system. + +Signed-off-by: Roger Pau Monné +Acked-by: Jan Beulich +master commit: 6bdb965178bbb3fc50cd4418d4770a7789956e2c +master date: 2024-01-17 10:40:52 +0100 +--- + xen/arch/x86/cpu/intel.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c +index b40ac696e6..96723b5d44 100644 +--- a/xen/arch/x86/cpu/intel.c ++++ b/xen/arch/x86/cpu/intel.c +@@ -528,9 +528,30 @@ static void cf_check init_intel(struct cpuinfo_x86 *c) + init_intel_cacheinfo(c); + if (c->cpuid_level > 9) { + unsigned eax = cpuid_eax(10); ++ unsigned int cnt = (eax >> 8) & 0xff; ++ + /* Check for version and the number of counters */ +- if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) ++ if ((eax & 0xff) && (cnt > 1) && (cnt <= 32)) { ++ uint64_t global_ctrl; ++ unsigned int cnt_mask = (1UL << cnt) - 1; ++ ++ /* ++ * On (some?) Sapphire/Emerald Rapids platforms each ++ * package-BSP starts with all the enable bits for the ++ * general-purpose PMCs cleared. Adjust so counters ++ * can be enabled from EVNTSEL. ++ */ ++ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); ++ if ((global_ctrl & cnt_mask) != cnt_mask) { ++ printk("CPU%u: invalid PERF_GLOBAL_CTRL: %#" ++ PRIx64 " adjusting to %#" PRIx64 "\n", ++ smp_processor_id(), global_ctrl, ++ global_ctrl | cnt_mask); ++ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ++ global_ctrl | cnt_mask); ++ } + __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); ++ } + } + + if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) ) +-- +2.44.0 + diff --git a/0320-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch b/0320-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch new file mode 100644 index 00000000..a2acd839 --- /dev/null +++ b/0320-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch @@ -0,0 +1,65 @@ +From eca5416f9b0e179de9553900de8de660ab09199d Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 2 Feb 2024 08:02:51 +0100 +Subject: [PATCH 320/349] x86/vmx: Fix IRQ handling for EXIT_REASON_INIT + +When receiving an INIT, a prior bugfix tried to ignore the INIT and continue +onwards. + +Unfortunately it's not safe to return at that point in vmx_vmexit_handler(). +Just out of context in the first hunk is a local_irqs_enabled() which is +depended-upon by the return-to-guest path, causing the following checklock +failure in debug builds: + + (XEN) Error: INIT received - ignoring + (XEN) CHECKLOCK FAILURE: prev irqsafe: 0, curr irqsafe 1 + (XEN) Xen BUG at common/spinlock.c:132 + (XEN) ----[ Xen-4.19-unstable x86_64 debug=y Tainted: H ]---- + ... + (XEN) Xen call trace: + (XEN) [] R check_lock+0xcd/0xe1 + (XEN) [] F _spin_lock+0x1b/0x60 + (XEN) [] F pt_update_irq+0x32/0x3bb + (XEN) [] F vmx_intr_assist+0x3b/0x51d + (XEN) [] F vmx_asm_vmexit_handler+0xf7/0x210 + +Luckily, this is benign in release builds. Accidentally having IRQs disabled +when trying to take an IRQs-on lock isn't a deadlock-vulnerable pattern. + +Drop the problematic early return. In hindsight, it's wrong to skip other +normal VMExit steps. + +Fixes: b1f11273d5a7 ("x86/vmx: Don't spuriously crash the domain when INIT is received") +Reported-by: Reima ISHII +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: d1f8883aebe00f6a9632d77ab0cd5c6d02c9cbe4 +master date: 2024-01-18 20:59:06 +0000 +--- + xen/arch/x86/hvm/vmx/vmx.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c +index 072288a5ef..31f4a861c6 100644 +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -4037,7 +4037,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) + + case EXIT_REASON_INIT: + printk(XENLOG_ERR "Error: INIT received - ignoring\n"); +- return; /* Renter the guest without further processing */ ++ break; + } + + /* Now enable interrupts so it's safe to take locks. */ +@@ -4323,6 +4323,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) + break; + } + case EXIT_REASON_EXTERNAL_INTERRUPT: ++ case EXIT_REASON_INIT: + /* Already handled above. */ + break; + case EXIT_REASON_TRIPLE_FAULT: +-- +2.44.0 + diff --git a/0321-x86-vmx-Disallow-the-use-of-inactivity-states.patch b/0321-x86-vmx-Disallow-the-use-of-inactivity-states.patch new file mode 100644 index 00000000..f6830472 --- /dev/null +++ b/0321-x86-vmx-Disallow-the-use-of-inactivity-states.patch @@ -0,0 +1,126 @@ +From 7bd612727df792671e44152a8205f0cf821ad984 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 2 Feb 2024 08:03:26 +0100 +Subject: [PATCH 321/349] x86/vmx: Disallow the use of inactivity states + +Right now, vvmx will blindly copy L12's ACTIVITY_STATE into the L02 VMCS and +enter the vCPU. Luckily for us, nested-virt is explicitly unsupported for +security bugs. + +The inactivity states are HLT, SHUTDOWN and WAIT-FOR-SIPI, and as noted by the +SDM in Vol3 27.7 "Special Features of VM Entry": + + If VM entry ends with the logical processor in an inactive activity state, + the VM entry generates any special bus cycle that is normally generated when + that activity state is entered from the active state. + +Also, + + Some activity states unconditionally block certain events. + +I.e. A VMEntry with ACTIVITY=SHUTDOWN will initiate a platform reset, while a +VMEntry with ACTIVITY=WAIT-FOR-SIPI will really block everything other than +SIPIs. + +Both of these activity states are for the TXT ACM to use, not for regular +hypervisors, and Xen doesn't support dropping the HLT intercept either. + +There are two paths in Xen which operate on ACTIVITY_STATE. + +1) The vmx_{get,set}_nonreg_state() helpers for VM-Fork. + + As regular VMs can't use any inactivity states, this is just duplicating + the 0 from construct_vmcs(). Retain the ability to query activity_state, + but crash the domain on any attempt to set an inactivity state. + +2) Nested virt, because of ACTIVITY_STATE in vmcs_gstate_field[]. + + Explicitly hide the inactivity states in the guest's view of MSR_VMX_MISC, + and remove ACTIVITY_STATE from vmcs_gstate_field[]. + + In virtual_vmentry(), we should trigger a VMEntry failure for the use of + any inactivity states, but there's no support for that in the code at all + so leave a TODO for when we finally start working on nested-virt in + earnest. + +Reported-by: Reima Ishii +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +Reviewed-by: Tamas K Lengyel +master commit: 3643bb53a05b7c8fbac072c63bef1538f2a6d0d2 +master date: 2024-01-18 20:59:06 +0000 +--- + xen/arch/x86/hvm/vmx/vmx.c | 8 +++++++- + xen/arch/x86/hvm/vmx/vvmx.c | 9 +++++++-- + xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 1 + + 3 files changed, 15 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c +index 31f4a861c6..35d391d8e5 100644 +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -1499,7 +1499,13 @@ static void cf_check vmx_set_nonreg_state(struct vcpu *v, + { + vmx_vmcs_enter(v); + +- __vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state); ++ if ( nrs->vmx.activity_state ) ++ { ++ printk("Attempt to set %pv activity_state %#lx\n", ++ v, nrs->vmx.activity_state); ++ domain_crash(v->domain); ++ } ++ + __vmwrite(GUEST_INTERRUPTIBILITY_INFO, nrs->vmx.interruptibility_info); + __vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, nrs->vmx.pending_dbg); + +diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c +index f8fe8d0c14..515cb5ae77 100644 +--- a/xen/arch/x86/hvm/vmx/vvmx.c ++++ b/xen/arch/x86/hvm/vmx/vvmx.c +@@ -910,7 +910,10 @@ static const u16 vmcs_gstate_field[] = { + GUEST_LDTR_AR_BYTES, + GUEST_TR_AR_BYTES, + GUEST_INTERRUPTIBILITY_INFO, ++ /* ++ * ACTIVITY_STATE is handled specially. + GUEST_ACTIVITY_STATE, ++ */ + GUEST_SYSENTER_CS, + GUEST_PREEMPTION_TIMER, + /* natural */ +@@ -1211,6 +1214,8 @@ static void virtual_vmentry(struct cpu_user_regs *regs) + nvcpu->nv_vmentry_pending = 0; + nvcpu->nv_vmswitch_in_progress = 1; + ++ /* TODO: Fail VMentry for GUEST_ACTIVITY_STATE != 0 */ ++ + /* + * EFER handling: + * hvm_set_efer won't work if CR0.PG = 1, so we change the value +@@ -2327,8 +2332,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) + data = hvm_cr4_guest_valid_bits(d); + break; + case MSR_IA32_VMX_MISC: +- /* Do not support CR3-target feature now */ +- data = host_data & ~VMX_MISC_CR3_TARGET; ++ /* Do not support CR3-targets or activity states. */ ++ data = host_data & ~(VMX_MISC_CR3_TARGET | VMX_MISC_ACTIVITY_MASK); + break; + case MSR_IA32_VMX_EPT_VPID_CAP: + data = nept_get_ept_vpid_cap(); +diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h +index 78404e42b3..0af021d5f5 100644 +--- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h ++++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h +@@ -288,6 +288,7 @@ extern u32 vmx_secondary_exec_control; + #define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL 0x80000000000ULL + extern u64 vmx_ept_vpid_cap; + ++#define VMX_MISC_ACTIVITY_MASK 0x000001c0 + #define VMX_MISC_PROC_TRACE 0x00004000 + #define VMX_MISC_CR3_TARGET 0x01ff0000 + #define VMX_MISC_VMWRITE_ALL 0x20000000 +-- +2.44.0 + diff --git a/0322-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch b/0322-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch new file mode 100644 index 00000000..c6dee11b --- /dev/null +++ b/0322-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch @@ -0,0 +1,70 @@ +From afb85cf1e8f165abf88de9d8a6df625692a753b1 Mon Sep 17 00:00:00 2001 +From: Michal Orzel +Date: Fri, 2 Feb 2024 08:04:07 +0100 +Subject: [PATCH 322/349] lib{fdt,elf}: move lib{fdt,elf}-temp.o and their deps + to $(targets) + +At the moment, trying to run xencov read/reset (calling SYSCTL_coverage_op +under the hood) results in a crash. This is due to a profiler trying to +access data in the .init.* sections (libfdt for Arm and libelf for x86) +that are stripped after boot. Normally, the build system compiles any +*.init.o file without COV_FLAGS. However, these two libraries are +handled differently as sections will be renamed to init after linking. + +To override COV_FLAGS to empty for these libraries, lib{fdt,elf}.o were +added to nocov-y. This worked until e321576f4047 ("xen/build: start using +if_changed") that added lib{fdt,elf}-temp.o and their deps to extra-y. +This way, even though these objects appear as prerequisites of +lib{fdt,elf}.o and the settings should propagate to them, make can also +build them as a prerequisite of __build, in which case COV_FLAGS would +still have the unwanted flags. Fix it by switching to $(targets) instead. + +Also, for libfdt, append libfdt.o to nocov-y only if CONFIG_OVERLAY_DTB +is not set. Otherwise, there is no section renaming and we should be able +to run the coverage. + +Fixes: e321576f4047 ("xen/build: start using if_changed") +Signed-off-by: Michal Orzel +Reviewed-by: Anthony PERARD +Acked-by: Jan Beulich +master commit: 79519fcfa0605bbf19d8c02b979af3a2c8afed68 +master date: 2024-01-23 12:02:44 +0100 +--- + xen/common/libelf/Makefile | 2 +- + xen/common/libfdt/Makefile | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/xen/common/libelf/Makefile b/xen/common/libelf/Makefile +index 8a4522e4e1..917d12b006 100644 +--- a/xen/common/libelf/Makefile ++++ b/xen/common/libelf/Makefile +@@ -13,4 +13,4 @@ $(obj)/libelf.o: $(obj)/libelf-temp.o FORCE + $(obj)/libelf-temp.o: $(addprefix $(obj)/,$(libelf-objs)) FORCE + $(call if_changed,ld) + +-extra-y += libelf-temp.o $(libelf-objs) ++targets += libelf-temp.o $(libelf-objs) +diff --git a/xen/common/libfdt/Makefile b/xen/common/libfdt/Makefile +index 75aaefa2e3..4d14fd61ba 100644 +--- a/xen/common/libfdt/Makefile ++++ b/xen/common/libfdt/Makefile +@@ -2,9 +2,9 @@ include $(src)/Makefile.libfdt + + SECTIONS := text data $(SPECIAL_DATA_SECTIONS) + OBJCOPYFLAGS := $(foreach s,$(SECTIONS),--rename-section .$(s)=.init.$(s)) ++nocov-y += libfdt.o + + obj-y += libfdt.o +-nocov-y += libfdt.o + + CFLAGS-y += -I$(srctree)/include/xen/libfdt/ + +@@ -14,4 +14,4 @@ $(obj)/libfdt.o: $(obj)/libfdt-temp.o FORCE + $(obj)/libfdt-temp.o: $(addprefix $(obj)/,$(LIBFDT_OBJS)) FORCE + $(call if_changed,ld) + +-extra-y += libfdt-temp.o $(LIBFDT_OBJS) ++targets += libfdt-temp.o $(LIBFDT_OBJS) +-- +2.44.0 + diff --git a/0323-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch b/0323-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch new file mode 100644 index 00000000..57dc04da --- /dev/null +++ b/0323-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch @@ -0,0 +1,36 @@ +From 091466ba55d1e2e75738f751818ace2e3ed08ccf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Fri, 2 Feb 2024 08:04:33 +0100 +Subject: [PATCH 323/349] x86/p2m-pt: fix off by one in entry check assert +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The MMIO RO rangeset overlap check is bogus: the rangeset is inclusive so the +passed end mfn should be the last mfn to be mapped (not last + 1). + +Fixes: 6fa1755644d0 ('amd/npt/shadow: replace assert that prevents creating 2M/1G MMIO entries') +Signed-off-by: Roger Pau Monné +Reviewed-by: George Dunlap +master commit: 610775d0dd61c1bd2f4720c755986098e6a5bafd +master date: 2024-01-25 16:09:04 +0100 +--- + xen/arch/x86/mm/p2m-pt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c +index eaba2b0fb4..f02ebae372 100644 +--- a/xen/arch/x86/mm/p2m-pt.c ++++ b/xen/arch/x86/mm/p2m-pt.c +@@ -564,7 +564,7 @@ static void check_entry(mfn_t mfn, p2m_type_t new, p2m_type_t old, + if ( new == p2m_mmio_direct ) + ASSERT(!mfn_eq(mfn, INVALID_MFN) && + !rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn), +- mfn_x(mfn) + (1ul << order))); ++ mfn_x(mfn) + (1UL << order) - 1)); + else if ( p2m_allows_invalid_mfn(new) || new == p2m_invalid || + new == p2m_mmio_dm ) + ASSERT(mfn_valid(mfn) || mfn_eq(mfn, INVALID_MFN)); +-- +2.44.0 + diff --git a/0324-tools-xentop-fix-sorting-bug-for-some-columns.patch b/0324-tools-xentop-fix-sorting-bug-for-some-columns.patch new file mode 100644 index 00000000..770ebd2b --- /dev/null +++ b/0324-tools-xentop-fix-sorting-bug-for-some-columns.patch @@ -0,0 +1,67 @@ +From 61da71968ea44964fd1dd2e449b053c77eb83139 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Cyril=20R=C3=A9bert=20=28zithro=29?= +Date: Tue, 27 Feb 2024 14:06:53 +0100 +Subject: [PATCH 324/349] tools/xentop: fix sorting bug for some columns +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Sort doesn't work on columns VBD_OO, VBD_RD, VBD_WR and VBD_RSECT. +Fix by adjusting variables names in compare functions. +Bug fix only. No functional change. + +Fixes: 91c3e3dc91d6 ("tools/xentop: Display '-' when stats are not available.") +Signed-off-by: Cyril Rébert (zithro) +Reviewed-by: Anthony PERARD +master commit: 29f17d837421f13c0e0010802de1b2d51d2ded4a +master date: 2024-02-05 17:58:23 +0000 +--- + tools/xentop/xentop.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/tools/xentop/xentop.c b/tools/xentop/xentop.c +index 950e8935c4..545bd5e96d 100644 +--- a/tools/xentop/xentop.c ++++ b/tools/xentop/xentop.c +@@ -684,7 +684,7 @@ static int compare_vbd_oo(xenstat_domain *domain1, xenstat_domain *domain2) + unsigned long long dom1_vbd_oo = 0, dom2_vbd_oo = 0; + + tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom1_vbd_oo); +- tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom2_vbd_oo); ++ tot_vbd_reqs(domain2, FIELD_VBD_OO, &dom2_vbd_oo); + + return -compare(dom1_vbd_oo, dom2_vbd_oo); + } +@@ -711,9 +711,9 @@ static int compare_vbd_rd(xenstat_domain *domain1, xenstat_domain *domain2) + unsigned long long dom1_vbd_rd = 0, dom2_vbd_rd = 0; + + tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom1_vbd_rd); +- tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom2_vbd_rd); ++ tot_vbd_reqs(domain2, FIELD_VBD_RD, &dom2_vbd_rd); + +- return -compare(dom1_vbd_rd, dom1_vbd_rd); ++ return -compare(dom1_vbd_rd, dom2_vbd_rd); + } + + /* Prints number of total VBD READ requests statistic */ +@@ -738,7 +738,7 @@ static int compare_vbd_wr(xenstat_domain *domain1, xenstat_domain *domain2) + unsigned long long dom1_vbd_wr = 0, dom2_vbd_wr = 0; + + tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom1_vbd_wr); +- tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom2_vbd_wr); ++ tot_vbd_reqs(domain2, FIELD_VBD_WR, &dom2_vbd_wr); + + return -compare(dom1_vbd_wr, dom2_vbd_wr); + } +@@ -765,7 +765,7 @@ static int compare_vbd_rsect(xenstat_domain *domain1, xenstat_domain *domain2) + unsigned long long dom1_vbd_rsect = 0, dom2_vbd_rsect = 0; + + tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom1_vbd_rsect); +- tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom2_vbd_rsect); ++ tot_vbd_reqs(domain2, FIELD_VBD_RSECT, &dom2_vbd_rsect); + + return -compare(dom1_vbd_rsect, dom2_vbd_rsect); + } +-- +2.44.0 + diff --git a/0325-amd-vi-fix-IVMD-memory-type-checks.patch b/0325-amd-vi-fix-IVMD-memory-type-checks.patch new file mode 100644 index 00000000..f2547f59 --- /dev/null +++ b/0325-amd-vi-fix-IVMD-memory-type-checks.patch @@ -0,0 +1,53 @@ +From 463aaf3fbf62d24e898ae0c2ba53d85ca0f94d3f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 27 Feb 2024 14:07:12 +0100 +Subject: [PATCH 325/349] amd-vi: fix IVMD memory type checks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The current code that parses the IVMD blocks is relaxed with regard to the +restriction that such unity regions should always fall into memory ranges +marked as reserved in the memory map. + +However the type checks for the IVMD addresses are inverted, and as a result +IVMD ranges falling into RAM areas are accepted. Note that having such ranges +in the first place is a firmware bug, as IVMD should always fall into reserved +ranges. + +Fixes: ed6c77ebf0c1 ('AMD/IOMMU: check / convert IVMD ranges for being / to be reserved') +Reported-by: Ox +Signed-off-by: Roger Pau Monné +Tested-by: oxjo +Reviewed-by: Jan Beulich +master commit: 83afa313583019d9f159c122cecf867735d27ec5 +master date: 2024-02-06 11:56:13 +0100 +--- + xen/drivers/passthrough/amd/iommu_acpi.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c +index 3b577c9b39..3a7045c39b 100644 +--- a/xen/drivers/passthrough/amd/iommu_acpi.c ++++ b/xen/drivers/passthrough/amd/iommu_acpi.c +@@ -426,9 +426,14 @@ static int __init parse_ivmd_block(const struct acpi_ivrs_memory *ivmd_block) + return -EIO; + } + +- /* Types which won't be handed out are considered good enough. */ +- if ( !(type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI | +- RAM_TYPE_UNUSABLE)) ) ++ /* ++ * Types which aren't RAM are considered good enough. ++ * Note that a page being partially RESERVED, ACPI or UNUSABLE will ++ * force Xen into assuming the whole page as having that type in ++ * practice. ++ */ ++ if ( type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI | ++ RAM_TYPE_UNUSABLE) ) + continue; + + AMD_IOMMU_ERROR("IVMD: page at %lx can't be converted\n", addr); +-- +2.44.0 + diff --git a/0326-x86-hvm-Fix-fast-singlestep-state-persistence.patch b/0326-x86-hvm-Fix-fast-singlestep-state-persistence.patch new file mode 100644 index 00000000..8dafe387 --- /dev/null +++ b/0326-x86-hvm-Fix-fast-singlestep-state-persistence.patch @@ -0,0 +1,86 @@ +From 415f770d23f9fcbc02436560fa6583dcd8e1343f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Petr=20Bene=C5=A1?= +Date: Tue, 27 Feb 2024 14:07:45 +0100 +Subject: [PATCH 326/349] x86/hvm: Fix fast singlestep state persistence +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This patch addresses an issue where the fast singlestep setting would persist +despite xc_domain_debug_control being called with XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF. +Specifically, if fast singlestep was enabled in a VMI session and that session +stopped before the MTF trap occurred, the fast singlestep setting remained +active even though MTF itself was disabled. This led to a situation where, upon +starting a new VMI session, the first event to trigger an EPT violation would +cause the corresponding EPT event callback to be skipped due to the lingering +fast singlestep setting. + +The fix ensures that the fast singlestep setting is properly reset when +disabling single step debugging operations. + +Signed-off-by: Petr Beneš +Reviewed-by: Tamas K Lengyel +master commit: 897def94b56175ce569673a05909d2f223e1e749 +master date: 2024-02-12 09:37:58 +0100 +--- + xen/arch/x86/hvm/hvm.c | 34 ++++++++++++++++++++++++---------- + 1 file changed, 24 insertions(+), 10 deletions(-) + +diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c +index d6c6ab8897..558dc3eddc 100644 +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -5153,26 +5153,40 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) + + int hvm_debug_op(struct vcpu *v, int32_t op) + { +- int rc; ++ int rc = 0; + + switch ( op ) + { + case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON: + case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF: +- rc = -EOPNOTSUPP; + if ( !cpu_has_monitor_trap_flag ) +- break; +- rc = 0; +- vcpu_pause(v); +- v->arch.hvm.single_step = +- (op == XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON); +- vcpu_unpause(v); /* guest will latch new state */ ++ return -EOPNOTSUPP; + break; + default: +- rc = -ENOSYS; +- break; ++ return -ENOSYS; ++ } ++ ++ vcpu_pause(v); ++ ++ switch ( op ) ++ { ++ case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON: ++ v->arch.hvm.single_step = true; ++ break; ++ ++ case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF: ++ v->arch.hvm.single_step = false; ++ v->arch.hvm.fast_single_step.enabled = false; ++ v->arch.hvm.fast_single_step.p2midx = 0; ++ break; ++ ++ default: /* Excluded above */ ++ ASSERT_UNREACHABLE(); ++ return -ENOSYS; + } + ++ vcpu_unpause(v); /* guest will latch new state */ ++ + return rc; + } + +-- +2.44.0 + diff --git a/0327-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch b/0327-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch new file mode 100644 index 00000000..2fc1929d --- /dev/null +++ b/0327-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch @@ -0,0 +1,63 @@ +From b3ae0e6201495216b12157bd8b2382b28fdd7dae Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Tue, 27 Feb 2024 14:08:20 +0100 +Subject: [PATCH 327/349] x86/HVM: tidy state on hvmemul_map_linear_addr()'s + error path + +While in the vast majority of cases failure of the function will not +be followed by re-invocation with the same emulation context, a few +very specific insns - involving multiple independent writes, e.g. ENTER +and PUSHA - exist where this can happen. Since failure of the function +only signals to the caller that it ought to try an MMIO write instead, +such failure also cannot be assumed to result in wholesale failure of +emulation of the current insn. Instead we have to maintain internal +state such that another invocation of the function with the same +emulation context remains possible. To achieve that we need to reset MFN +slots after putting page references on the error path. + +Note that all of this affects debugging code only, in causing an +assertion to trigger (higher up in the function). There's otherwise no +misbehavior - such a "leftover" slot would simply be overwritten by new +contents in a release build. + +Also extend the related unmap() assertion, to further check for MFN 0. + +Fixes: 8cbd4fb0b7ea ("x86/hvm: implement hvmemul_write() using real mappings") +Reported-by: Manuel Andreas +Signed-off-by: Jan Beulich +Acked-by: Paul Durrant +master commit: e72f951df407bc3be82faac64d8733a270036ba1 +master date: 2024-02-13 09:36:14 +0100 +--- + xen/arch/x86/hvm/emulate.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c +index 275451dd36..27928dc3f3 100644 +--- a/xen/arch/x86/hvm/emulate.c ++++ b/xen/arch/x86/hvm/emulate.c +@@ -697,7 +697,12 @@ static void *hvmemul_map_linear_addr( + out: + /* Drop all held references. */ + while ( mfn-- > hvmemul_ctxt->mfn ) ++ { + put_page(mfn_to_page(*mfn)); ++#ifndef NDEBUG /* Clean slot for a subsequent map()'s error checking. */ ++ *mfn = _mfn(0); ++#endif ++ } + + return err; + } +@@ -719,7 +724,7 @@ static void hvmemul_unmap_linear_addr( + + for ( i = 0; i < nr_frames; i++ ) + { +- ASSERT(mfn_valid(*mfn)); ++ ASSERT(mfn_x(*mfn) && mfn_valid(*mfn)); + paging_mark_dirty(currd, *mfn); + put_page(mfn_to_page(*mfn)); + +-- +2.44.0 + diff --git a/0328-build-Replace-which-with-command-v.patch b/0328-build-Replace-which-with-command-v.patch new file mode 100644 index 00000000..5f0bf92d --- /dev/null +++ b/0328-build-Replace-which-with-command-v.patch @@ -0,0 +1,57 @@ +From 1330a5fe44ca91f98857b53fe8bbe06522d9db27 Mon Sep 17 00:00:00 2001 +From: Anthony PERARD +Date: Tue, 27 Feb 2024 14:08:50 +0100 +Subject: [PATCH 328/349] build: Replace `which` with `command -v` +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The `which` command is not standard, may not exist on the build host, +or may not behave as expected by the build system. It is recommended +to use `command -v` to find out if a command exist and have its path, +and it's part of a POSIX shell standard (at least, it seems to be +mandatory since IEEE Std 1003.1-2008, but was optional before). + +Fixes: c8a8645f1efe ("xen/build: Automatically locate a suitable python interpreter") +Fixes: 3b47bcdb6d38 ("xen/build: Use a distro version of figlet") +Signed-off-by: Anthony PERARD +Tested-by: Marek Marczykowski-Górecki +Acked-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: f93629b18b528a5ab1b1092949c5420069c7226c +master date: 2024-02-19 12:45:48 +0100 +--- + xen/Makefile | 4 ++-- + xen/build.mk | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/xen/Makefile b/xen/Makefile +index dd0b004e1c..7ea13a6791 100644 +--- a/xen/Makefile ++++ b/xen/Makefile +@@ -25,8 +25,8 @@ export XEN_BUILD_HOST := $(shell hostname) + endif + + # Best effort attempt to find a python interpreter, defaulting to Python 3 if +-# available. Fall back to just `python` if `which` is nowhere to be found. +-PYTHON_INTERPRETER := $(word 1,$(shell which python3 python python2 2>/dev/null) python) ++# available. Fall back to just `python`. ++PYTHON_INTERPRETER := $(word 1,$(shell command -v python3 || command -v python || command -v python2) python) + export PYTHON ?= $(PYTHON_INTERPRETER) + + export CHECKPOLICY ?= checkpolicy +diff --git a/xen/build.mk b/xen/build.mk +index 9ecb104f1e..b489f77b7c 100644 +--- a/xen/build.mk ++++ b/xen/build.mk +@@ -1,6 +1,6 @@ + quiet_cmd_banner = BANNER $@ + define cmd_banner +- if which figlet >/dev/null 2>&1 ; then \ ++ if command -v figlet >/dev/null 2>&1 ; then \ + echo " Xen $(XEN_FULLVERSION)" | figlet -f $< > $@.tmp; \ + else \ + echo " Xen $(XEN_FULLVERSION)" > $@.tmp; \ +-- +2.44.0 + diff --git a/0653-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch b/0329-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch similarity index 80% rename from 0653-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch rename to 0329-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch index e4486b9b..db46c5a6 100644 --- a/0653-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch +++ b/0329-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch @@ -1,9 +1,9 @@ -From 02ef8192418af1b2b9d13658a5dd46a2e938d721 Mon Sep 17 00:00:00 2001 +From b9745280736ee526374873aa3c4142596e2ba10b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= -Date: Wed, 27 Dec 2023 03:24:00 +0100 -Subject: [PATCH] libxl: Disable relocating memory for qemu-xen in stubdomain - too +Date: Tue, 27 Feb 2024 14:09:19 +0100 +Subject: [PATCH 329/349] libxl: Disable relocating memory for qemu-xen in + stubdomain too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @@ -16,12 +16,16 @@ be consistent in this regard. Reported-by: Neowutran Signed-off-by: Marek Marczykowski-Górecki +Reviewed-by: Jason Andryuk +Acked-by: Anthony PERARD +master commit: 97883aa269f6745a6ded232be3a855abb1297e0d +master date: 2024-02-22 11:48:22 +0100 --- tools/libs/light/libxl_dm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c -index 14b593110f7c..ed620a9d8e14 100644 +index 14b593110f..ed620a9d8e 100644 --- a/tools/libs/light/libxl_dm.c +++ b/tools/libs/light/libxl_dm.c @@ -2432,6 +2432,16 @@ void libxl__spawn_stub_dm(libxl__egc *egc, libxl__stub_dm_spawn_state *sdss) @@ -42,5 +46,5 @@ index 14b593110f7c..ed620a9d8e14 100644 if (ret<0) { LOGED(ERROR, guest_domid, "setting target domain %d -> %d", -- -2.41.0 +2.44.0 diff --git a/0330-build-make-sure-build-fails-when-running-kconfig-fai.patch b/0330-build-make-sure-build-fails-when-running-kconfig-fai.patch new file mode 100644 index 00000000..8b65b919 --- /dev/null +++ b/0330-build-make-sure-build-fails-when-running-kconfig-fai.patch @@ -0,0 +1,59 @@ +From ea869977271f93945451908be9b6117ffd1fb02d Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Tue, 27 Feb 2024 14:09:37 +0100 +Subject: [PATCH 330/349] build: make sure build fails when running kconfig + fails +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Because of using "-include", failure to (re)build auto.conf (with +auto.conf.cmd produced as a secondary target) won't stop make from +continuing the build. Arrange for it being possible to drop the - from +Rules.mk, requiring that the include be skipped for tools-only targets. +Note that relying on the inclusion in those cases wouldn't be correct +anyway, as it might be a stale file (yet to be rebuilt) which would be +included, while during initial build, the file would be absent +altogether. + +Fixes: 8d4c17a90b0a ("xen/build: silence make warnings about missing auto.conf*") +Reported-by: Roger Pau Monné +Signed-off-by: Jan Beulich +Reviewed-by: Anthony PERARD +master commit: d34e5fa2e8db19f23081f46a3e710bb122130691 +master date: 2024-02-22 11:52:47 +0100 +--- + xen/Makefile | 1 + + xen/Rules.mk | 4 +++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/xen/Makefile b/xen/Makefile +index 7ea13a6791..bac3684a36 100644 +--- a/xen/Makefile ++++ b/xen/Makefile +@@ -374,6 +374,7 @@ $(KCONFIG_CONFIG): tools_fixdep + # This exploits the 'multi-target pattern rule' trick. + # The syncconfig should be executed only once to make all the targets. + include/config/%.conf include/config/%.conf.cmd: $(KCONFIG_CONFIG) ++ $(Q)rm -f include/config/auto.conf + $(Q)$(MAKE) $(build)=tools/kconfig syncconfig + + ifeq ($(CONFIG_DEBUG),y) +diff --git a/xen/Rules.mk b/xen/Rules.mk +index 8af3dd7277..d759cccee3 100644 +--- a/xen/Rules.mk ++++ b/xen/Rules.mk +@@ -15,7 +15,9 @@ srcdir := $(srctree)/$(src) + PHONY := __build + __build: + +--include $(objtree)/include/config/auto.conf ++ifneq ($(firstword $(subst /, ,$(obj))),tools) ++include $(objtree)/include/config/auto.conf ++endif + + include $(XEN_ROOT)/Config.mk + include $(srctree)/scripts/Kbuild.include +-- +2.44.0 + diff --git a/0331-x86emul-add-missing-EVEX.R-checks.patch b/0331-x86emul-add-missing-EVEX.R-checks.patch new file mode 100644 index 00000000..765e946a --- /dev/null +++ b/0331-x86emul-add-missing-EVEX.R-checks.patch @@ -0,0 +1,50 @@ +From 16f2e47eb1207d866f95cf694a60a7ceb8f96a36 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Tue, 27 Feb 2024 14:09:55 +0100 +Subject: [PATCH 331/349] x86emul: add missing EVEX.R' checks + +EVEX.R' is not ignored in 64-bit code when encoding a GPR or mask +register. While for mask registers suitable checks are in place (there +also covering EVEX.R), they were missing for the few cases where in +EVEX-encoded instructions ModR/M.reg encodes a GPR. While for VPEXTRW +the bit is replaced before an emulation stub is invoked, for +VCVT{,T}{S,D,H}2{,U}SI this actually would have led to #UD from inside +an emulation stub, in turn raising #UD to the guest, but accompanied by +log messages indicating something's wrong in Xen nevertheless. + +Fixes: 001bd91ad864 ("x86emul: support AVX512{F,BW,DQ} extract insns") +Fixes: baf4a376f550 ("x86emul: support AVX512F legacy-equivalent scalar int/FP conversion insns") +Signed-off-by: Jan Beulich +Acked-by: Andrew Cooper +master commit: cb319824bfa8d3c9ea0410cc71daaedc3e11aa2a +master date: 2024-02-22 11:54:07 +0100 +--- + xen/arch/x86/x86_emulate/x86_emulate.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c +index 0c0336f737..995670cbc8 100644 +--- a/xen/arch/x86/x86_emulate/x86_emulate.c ++++ b/xen/arch/x86/x86_emulate/x86_emulate.c +@@ -6829,7 +6829,8 @@ x86_emulate( + CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */ + CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */ + CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x79): /* vcvts{s,d}2usi xmm/mem,reg */ +- generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk || ++ generate_exception_if((evex.reg != 0xf || !evex.RX || !evex.R || ++ evex.opmsk || + (ea.type != OP_REG && evex.brs)), + EXC_UD); + host_and_vcpu_must_have(avx512f); +@@ -10705,7 +10706,7 @@ x86_emulate( + goto pextr; + + case X86EMUL_OPC_EVEX_66(0x0f, 0xc5): /* vpextrw $imm8,xmm,reg */ +- generate_exception_if(ea.type != OP_REG, EXC_UD); ++ generate_exception_if(ea.type != OP_REG || !evex.R, EXC_UD); + /* Convert to alternative encoding: We want to use a memory operand. */ + evex.opcx = ext_0f3a; + b = 0x15; +-- +2.44.0 + diff --git a/0332-xen-livepatch-fix-norevert-test-hook-setup-typo.patch b/0332-xen-livepatch-fix-norevert-test-hook-setup-typo.patch new file mode 100644 index 00000000..050b5093 --- /dev/null +++ b/0332-xen-livepatch-fix-norevert-test-hook-setup-typo.patch @@ -0,0 +1,36 @@ +From f6b12792542e372f36a71ea4c2563e6dd6e4fa57 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 27 Feb 2024 14:10:24 +0100 +Subject: [PATCH 332/349] xen/livepatch: fix norevert test hook setup typo +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The test code has a typo in using LIVEPATCH_APPLY_HOOK() instead of +LIVEPATCH_REVERT_HOOK(). + +Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker') +Signed-off-by: Roger Pau Monné +Reviewed-by: Ross Lagerwall +master commit: f0622dd4fd6ae6ddb523a45d89ed9b8f3a9a8f36 +master date: 2024-02-26 10:13:46 +0100 +--- + xen/test/livepatch/xen_action_hooks_norevert.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c +index 3e21ade6ab..c173855192 100644 +--- a/xen/test/livepatch/xen_action_hooks_norevert.c ++++ b/xen/test/livepatch/xen_action_hooks_norevert.c +@@ -120,7 +120,7 @@ static void post_revert_hook(livepatch_payload_t *payload) + printk(KERN_DEBUG "%s: Hook done.\n", __func__); + } + +-LIVEPATCH_APPLY_HOOK(revert_hook); ++LIVEPATCH_REVERT_HOOK(revert_hook); + + LIVEPATCH_PREAPPLY_HOOK(pre_apply_hook); + LIVEPATCH_POSTAPPLY_HOOK(post_apply_hook); +-- +2.44.0 + diff --git a/0333-xen-cmdline-fix-printf-format-specifier-in-no_config.patch b/0333-xen-cmdline-fix-printf-format-specifier-in-no_config.patch new file mode 100644 index 00000000..1812da57 --- /dev/null +++ b/0333-xen-cmdline-fix-printf-format-specifier-in-no_config.patch @@ -0,0 +1,38 @@ +From 229e8a72ee4cde5698aaf42cc59ae57446dce60f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 27 Feb 2024 14:10:39 +0100 +Subject: [PATCH 333/349] xen/cmdline: fix printf format specifier in + no_config_param() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +'*' sets the width field, which is the minimum number of characters to output, +but what we want in no_config_param() is the precision instead, which is '.*' +as it imposes a maximum limit on the output. + +Fixes: 68d757df8dd2 ('x86/pv: Options to disable and/or compile out 32bit PV support') +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +master commit: ef101f525173cf51dc70f4c77862f6f10a8ddccf +master date: 2024-02-26 10:17:40 +0100 +--- + xen/include/xen/param.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xen/include/xen/param.h b/xen/include/xen/param.h +index 93c3fe7cb7..e02e49635c 100644 +--- a/xen/include/xen/param.h ++++ b/xen/include/xen/param.h +@@ -191,7 +191,7 @@ static inline void no_config_param(const char *cfg, const char *param, + { + int len = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s); + +- printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%*s' setting\n", ++ printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%.*s' setting\n", + cfg, param, len, s); + } + +-- +2.44.0 + diff --git a/0334-x86-altcall-use-a-union-as-register-type-for-functio.patch b/0334-x86-altcall-use-a-union-as-register-type-for-functio.patch new file mode 100644 index 00000000..c5dddc16 --- /dev/null +++ b/0334-x86-altcall-use-a-union-as-register-type-for-functio.patch @@ -0,0 +1,141 @@ +From 1aafe054e7d1efbf8e8482a9cdd4be5753b79e2f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 27 Feb 2024 14:11:04 +0100 +Subject: [PATCH 334/349] x86/altcall: use a union as register type for + function parameters on clang +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The current code for alternative calls uses the caller parameter types as the +types for the register variables that serve as function parameters: + +uint8_t foo; +[...] +alternative_call(myfunc, foo); + +Would expand roughly into: + +register unint8_t a1_ asm("rdi") = foo; +register unsigned long a2_ asm("rsi"); +[...] +asm volatile ("call *%c[addr](%%rip)"...); + +However with -O2 clang will generate incorrect code, given the following +example: + +unsigned int func(uint8_t t) +{ + return t; +} + +static void bar(uint8_t b) +{ + int ret_; + register uint8_t di asm("rdi") = b; + register unsigned long si asm("rsi"); + register unsigned long dx asm("rdx"); + register unsigned long cx asm("rcx"); + register unsigned long r8 asm("r8"); + register unsigned long r9 asm("r9"); + register unsigned long r10 asm("r10"); + register unsigned long r11 asm("r11"); + + asm volatile ( "call %c[addr]" + : "+r" (di), "=r" (si), "=r" (dx), + "=r" (cx), "=r" (r8), "=r" (r9), + "=r" (r10), "=r" (r11), "=a" (ret_) + : [addr] "i" (&(func)), "g" (func) + : "memory" ); +} + +void foo(unsigned int a) +{ + bar(a); +} + +Clang generates the following assembly code: + +func: # @func + movl %edi, %eax + retq +foo: # @foo + callq func + retq + +Note the truncation of the unsigned int parameter 'a' of foo() to uint8_t when +passed into bar() is lost. clang doesn't zero extend the parameters in the +callee when required, as the psABI mandates. + +The above can be worked around by using a union when defining the register +variables, so that `di` becomes: + +register union { + uint8_t e; + unsigned long r; +} di asm("rdi") = { .e = b }; + +Which results in following code generated for `foo()`: + +foo: # @foo + movzbl %dil, %edi + callq func + retq + +So the truncation is not longer lost. Apply such workaround only when built +with clang. + +Reported-by: Matthew Grooms +Link: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=277200 +Link: https://github.com/llvm/llvm-project/issues/12579 +Link: https://github.com/llvm/llvm-project/issues/82598 +Signed-off-by: Roger Pau Monné +Acked-by: Jan Beulich +master commit: 2ce562b2a413cbdb2e1128989ed1722290a27c4e +master date: 2024-02-26 10:18:01 +0100 +--- + xen/arch/x86/include/asm/alternative.h | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h +index a7a82c2c03..bcb1dc94f4 100644 +--- a/xen/arch/x86/include/asm/alternative.h ++++ b/xen/arch/x86/include/asm/alternative.h +@@ -167,9 +167,34 @@ extern void alternative_branches(void); + #define ALT_CALL_arg5 "r8" + #define ALT_CALL_arg6 "r9" + ++#ifdef CONFIG_CC_IS_CLANG ++/* ++ * Use a union with an unsigned long in order to prevent clang from ++ * skipping a possible truncation of the value. By using the union any ++ * truncation is carried before the call instruction, in turn covering ++ * for ABI-non-compliance in that the necessary clipping / extension of ++ * the value is supposed to be carried out in the callee. ++ * ++ * Note this behavior is not mandated by the standard, and hence could ++ * stop being a viable workaround, or worse, could cause a different set ++ * of code-generation issues in future clang versions. ++ * ++ * This has been reported upstream: ++ * https://github.com/llvm/llvm-project/issues/12579 ++ * https://github.com/llvm/llvm-project/issues/82598 ++ */ ++#define ALT_CALL_ARG(arg, n) \ ++ register union { \ ++ typeof(arg) e; \ ++ unsigned long r; \ ++ } a ## n ## _ asm ( ALT_CALL_arg ## n ) = { \ ++ .e = ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); }) \ ++ } ++#else + #define ALT_CALL_ARG(arg, n) \ + register typeof(arg) a ## n ## _ asm ( ALT_CALL_arg ## n ) = \ + ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); }) ++#endif + #define ALT_CALL_NO_ARG(n) \ + register unsigned long a ## n ## _ asm ( ALT_CALL_arg ## n ) + +-- +2.44.0 + diff --git a/0335-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch b/0335-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch new file mode 100644 index 00000000..285c2c7e --- /dev/null +++ b/0335-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch @@ -0,0 +1,57 @@ +From 91650010815f3da0834bc9781c4359350d1162a5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 27 Feb 2024 14:11:40 +0100 +Subject: [PATCH 335/349] x86/spec: fix BRANCH_HARDEN option to only be set + when build-enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The current logic to handle the BRANCH_HARDEN option will report it as enabled +even when build-time disabled. Fix this by only allowing the option to be set +when support for it is built into Xen. + +Fixes: 2d6f36daa086 ('x86/nospec: Introduce CONFIG_SPECULATIVE_HARDEN_BRANCH') +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +master commit: 60e00f77a5cc671d30c5ef3318f5b8e9b74e4aa3 +master date: 2024-02-26 16:06:42 +0100 +--- + xen/arch/x86/spec_ctrl.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 56e07d7536..661716d695 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -62,7 +62,8 @@ int8_t __initdata opt_psfd = -1; + int8_t __ro_after_init opt_ibpb_ctxt_switch = -1; + int8_t __read_mostly opt_eager_fpu = -1; + int8_t __read_mostly opt_l1d_flush = -1; +-static bool __initdata opt_branch_harden = true; ++static bool __initdata opt_branch_harden = ++ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH); + + bool __initdata bsp_delay_spec_ctrl; + uint8_t __read_mostly default_xen_spec_ctrl; +@@ -280,7 +281,16 @@ static int __init cf_check parse_spec_ctrl(const char *s) + else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 ) + opt_l1d_flush = val; + else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 ) +- opt_branch_harden = val; ++ { ++ if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) ) ++ opt_branch_harden = val; ++ else ++ { ++ no_config_param("SPECULATIVE_HARDEN_BRANCH", "spec-ctrl", s, ++ ss); ++ rc = -EINVAL; ++ } ++ } + else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 ) + opt_srb_lock = val; + else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 ) +-- +2.44.0 + diff --git a/0336-x86-account-for-shadow-stack-in-exception-from-stub-.patch b/0336-x86-account-for-shadow-stack-in-exception-from-stub-.patch new file mode 100644 index 00000000..133451db --- /dev/null +++ b/0336-x86-account-for-shadow-stack-in-exception-from-stub-.patch @@ -0,0 +1,212 @@ +From 49f77602373b58b7bbdb40cea2b49d2f88d4003d Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Tue, 27 Feb 2024 14:12:11 +0100 +Subject: [PATCH 336/349] x86: account for shadow stack in exception-from-stub + recovery + +Dealing with exceptions raised from within emulation stubs involves +discarding return address (replaced by exception related information). +Such discarding of course also requires removing the corresponding entry +from the shadow stack. + +Also amend the comment in fixup_exception_return(), to further clarify +why use of ptr[1] can't be an out-of-bounds access. + +This is CVE-2023-46841 / XSA-451. + +Fixes: 209fb9919b50 ("x86/extable: Adjust extable handling to be shadow stack compatible") +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +master commit: 91f5f7a9154919a765c3933521760acffeddbf28 +master date: 2024-02-27 13:49:22 +0100 +--- + xen/arch/x86/extable.c | 20 ++++++---- + xen/arch/x86/include/asm/uaccess.h | 3 +- + xen/arch/x86/traps.c | 63 +++++++++++++++++++++++++++--- + 3 files changed, 71 insertions(+), 15 deletions(-) + +diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c +index 6758ba1dca..dd9583f2a5 100644 +--- a/xen/arch/x86/extable.c ++++ b/xen/arch/x86/extable.c +@@ -86,26 +86,29 @@ search_one_extable(const struct exception_table_entry *first, + } + + unsigned long +-search_exception_table(const struct cpu_user_regs *regs) ++search_exception_table(const struct cpu_user_regs *regs, unsigned long *stub_ra) + { + const struct virtual_region *region = find_text_region(regs->rip); + unsigned long stub = this_cpu(stubs.addr); + + if ( region && region->ex ) ++ { ++ *stub_ra = 0; + return search_one_extable(region->ex, region->ex_end, regs->rip); ++ } + + if ( regs->rip >= stub + STUB_BUF_SIZE / 2 && + regs->rip < stub + STUB_BUF_SIZE && + regs->rsp > (unsigned long)regs && + regs->rsp < (unsigned long)get_cpu_info() ) + { +- unsigned long retptr = *(unsigned long *)regs->rsp; ++ unsigned long retaddr = *(unsigned long *)regs->rsp, fixup; + +- region = find_text_region(retptr); +- retptr = region && region->ex +- ? search_one_extable(region->ex, region->ex_end, retptr) +- : 0; +- if ( retptr ) ++ region = find_text_region(retaddr); ++ fixup = region && region->ex ++ ? search_one_extable(region->ex, region->ex_end, retaddr) ++ : 0; ++ if ( fixup ) + { + /* + * Put trap number and error code on the stack (in place of the +@@ -117,7 +120,8 @@ search_exception_table(const struct cpu_user_regs *regs) + }; + + *(unsigned long *)regs->rsp = token.raw; +- return retptr; ++ *stub_ra = retaddr; ++ return fixup; + } + } + +diff --git a/xen/arch/x86/include/asm/uaccess.h b/xen/arch/x86/include/asm/uaccess.h +index 684fccd95c..74bb222c03 100644 +--- a/xen/arch/x86/include/asm/uaccess.h ++++ b/xen/arch/x86/include/asm/uaccess.h +@@ -421,7 +421,8 @@ union stub_exception_token { + unsigned long raw; + }; + +-extern unsigned long search_exception_table(const struct cpu_user_regs *regs); ++extern unsigned long search_exception_table(const struct cpu_user_regs *regs, ++ unsigned long *stub_ra); + extern void sort_exception_tables(void); + extern void sort_exception_table(struct exception_table_entry *start, + const struct exception_table_entry *stop); +diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c +index 06c4f3868b..7599bee361 100644 +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -856,7 +856,7 @@ void do_unhandled_trap(struct cpu_user_regs *regs) + } + + static void fixup_exception_return(struct cpu_user_regs *regs, +- unsigned long fixup) ++ unsigned long fixup, unsigned long stub_ra) + { + if ( IS_ENABLED(CONFIG_XEN_SHSTK) ) + { +@@ -873,7 +873,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs, + /* + * Search for %rip. The shstk currently looks like this: + * +- * ... [Likely pointed to by SSP] ++ * tok [Supervisor token, == &tok | BUSY, only with FRED inactive] ++ * ... [Pointed to by SSP for most exceptions, empty in IST cases] + * %cs [== regs->cs] + * %rip [== regs->rip] + * SSP [Likely points to 3 slots higher, above %cs] +@@ -891,7 +892,56 @@ static void fixup_exception_return(struct cpu_user_regs *regs, + */ + if ( ptr[0] == regs->rip && ptr[1] == regs->cs ) + { ++ unsigned long primary_shstk = ++ (ssp & ~(STACK_SIZE - 1)) + ++ (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8; ++ + wrss(fixup, ptr); ++ ++ if ( !stub_ra ) ++ goto shstk_done; ++ ++ /* ++ * Stub recovery ought to happen only when the outer context ++ * was on the main shadow stack. We need to also "pop" the ++ * stub's return address from the interrupted context's shadow ++ * stack. That is, ++ * - if we're still on the main stack, we need to move the ++ * entire stack (up to and including the exception frame) ++ * up by one slot, incrementing the original SSP in the ++ * exception frame, ++ * - if we're on an IST stack, we need to increment the ++ * original SSP. ++ */ ++ BUG_ON((ptr[-1] ^ primary_shstk) >> PAGE_SHIFT); ++ ++ if ( (ssp ^ primary_shstk) >> PAGE_SHIFT ) ++ { ++ /* ++ * We're on an IST stack. First make sure the two return ++ * addresses actually match. Then increment the interrupted ++ * context's SSP. ++ */ ++ BUG_ON(stub_ra != *(unsigned long*)ptr[-1]); ++ wrss(ptr[-1] + 8, &ptr[-1]); ++ goto shstk_done; ++ } ++ ++ /* Make sure the two return addresses actually match. */ ++ BUG_ON(stub_ra != ptr[2]); ++ ++ /* Move exception frame, updating SSP there. */ ++ wrss(ptr[1], &ptr[2]); /* %cs */ ++ wrss(ptr[0], &ptr[1]); /* %rip */ ++ wrss(ptr[-1] + 8, &ptr[0]); /* SSP */ ++ ++ /* Move all newer entries. */ ++ while ( --ptr != _p(ssp) ) ++ wrss(ptr[-1], &ptr[0]); ++ ++ /* Finally account for our own stack having shifted up. */ ++ asm volatile ( "incsspd %0" :: "r" (2) ); ++ + goto shstk_done; + } + } +@@ -912,7 +962,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs, + + static bool extable_fixup(struct cpu_user_regs *regs, bool print) + { +- unsigned long fixup = search_exception_table(regs); ++ unsigned long stub_ra = 0; ++ unsigned long fixup = search_exception_table(regs, &stub_ra); + + if ( unlikely(fixup == 0) ) + return false; +@@ -926,7 +977,7 @@ static bool extable_fixup(struct cpu_user_regs *regs, bool print) + vector_name(regs->entry_vector), regs->error_code, + _p(regs->rip), _p(regs->rip), _p(fixup)); + +- fixup_exception_return(regs, fixup); ++ fixup_exception_return(regs, fixup, stub_ra); + this_cpu(last_extable_addr) = regs->rip; + + return true; +@@ -1214,7 +1265,7 @@ void do_invalid_op(struct cpu_user_regs *regs) + void (*fn)(struct cpu_user_regs *) = bug_ptr(bug); + + fn(regs); +- fixup_exception_return(regs, (unsigned long)eip); ++ fixup_exception_return(regs, (unsigned long)eip, 0); + return; + } + +@@ -1235,7 +1286,7 @@ void do_invalid_op(struct cpu_user_regs *regs) + case BUGFRAME_warn: + printk("Xen WARN at %s%s:%d\n", prefix, filename, lineno); + show_execution_state(regs); +- fixup_exception_return(regs, (unsigned long)eip); ++ fixup_exception_return(regs, (unsigned long)eip, 0); + return; + + case BUGFRAME_bug: +-- +2.44.0 + diff --git a/0337-xen-arm-Fix-UBSAN-failure-in-start_xen.patch b/0337-xen-arm-Fix-UBSAN-failure-in-start_xen.patch new file mode 100644 index 00000000..91ac7a04 --- /dev/null +++ b/0337-xen-arm-Fix-UBSAN-failure-in-start_xen.patch @@ -0,0 +1,52 @@ +From 6cbccc4071ef49a8c591ecaddfdcb1cc26d28411 Mon Sep 17 00:00:00 2001 +From: Michal Orzel +Date: Thu, 8 Feb 2024 11:43:39 +0100 +Subject: [PATCH 337/349] xen/arm: Fix UBSAN failure in start_xen() + +When running Xen on arm32, in scenario where Xen is loaded at an address +such as boot_phys_offset >= 2GB, UBSAN reports the following: + +(XEN) UBSAN: Undefined behaviour in arch/arm/setup.c:739:58 +(XEN) pointer operation underflowed 00200000 to 86800000 +(XEN) Xen WARN at common/ubsan/ubsan.c:172 +(XEN) ----[ Xen-4.19-unstable arm32 debug=y ubsan=y Not tainted ]---- +... +(XEN) Xen call trace: +(XEN) [<0031b4c0>] ubsan.c#ubsan_epilogue+0x18/0xf0 (PC) +(XEN) [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4 (LR) +(XEN) [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4 +(XEN) [<004d15a8>] start_xen+0xe0/0xbe0 +(XEN) [<0020007c>] head.o#primary_switched+0x4/0x30 + +The failure is reported for the following line: +(paddr_t)(uintptr_t)(_start + boot_phys_offset) + +This occurs because the compiler treats (ptr + size) with size bigger than +PTRDIFF_MAX as undefined behavior. To address this, switch to macro +virt_to_maddr(), given the future plans to eliminate boot_phys_offset. + +Signed-off-by: Michal Orzel +Reviewed-by: Luca Fancellu +Tested-by: Luca Fancellu +Acked-by: Julien Grall +(cherry picked from commit e11f5766503c0ff074b4e0f888bbfc931518a169) +--- + xen/arch/arm/setup.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c +index 4395640019..9ee19c2bc1 100644 +--- a/xen/arch/arm/setup.c ++++ b/xen/arch/arm/setup.c +@@ -1025,7 +1025,7 @@ void __init start_xen(unsigned long boot_phys_offset, + + /* Register Xen's load address as a boot module. */ + xen_bootmodule = add_boot_module(BOOTMOD_XEN, +- (paddr_t)(uintptr_t)(_start + boot_phys_offset), ++ virt_to_maddr(_start), + (paddr_t)(uintptr_t)(_end - _start), false); + BUG_ON(!xen_bootmodule); + +-- +2.44.0 + diff --git a/0338-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch b/0338-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch new file mode 100644 index 00000000..b76fef91 --- /dev/null +++ b/0338-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch @@ -0,0 +1,67 @@ +From 9c0d518eb8dc69430e6a8d767bd101dad19b846a Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Tue, 5 Mar 2024 11:56:31 +0100 +Subject: [PATCH 338/349] x86/HVM: hide SVM/VMX when their enabling is + prohibited by firmware +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +... or we fail to enable the functionality on the BSP for other reasons. +The only place where hardware announcing the feature is recorded is the +raw CPU policy/featureset. + +Inspired by https://lore.kernel.org/all/20230921114940.957141-1-pbonzini@redhat.com/. + +Signed-off-by: Jan Beulich +Acked-by: Roger Pau Monné +master commit: 0b5f149338e35a795bf609ce584640b0977f9e6c +master date: 2024-01-09 14:06:34 +0100 +--- + xen/arch/x86/hvm/svm/svm.c | 1 + + xen/arch/x86/hvm/vmx/vmcs.c | 17 +++++++++++++++++ + 2 files changed, 18 insertions(+) + +diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c +index fd32600ae3..3c17464550 100644 +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -1669,6 +1669,7 @@ const struct hvm_function_table * __init start_svm(void) + + if ( _svm_cpu_up(true) ) + { ++ setup_clear_cpu_cap(X86_FEATURE_SVM); + printk("SVM: failed to initialise.\n"); + return NULL; + } +diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c +index bcbecc6945..b5ecc51b43 100644 +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -2163,6 +2163,23 @@ int __init vmx_vmcs_init(void) + + if ( !ret ) + register_keyhandler('v', vmcs_dump, "dump VT-x VMCSs", 1); ++ else ++ { ++ setup_clear_cpu_cap(X86_FEATURE_VMX); ++ ++ /* ++ * _vmx_vcpu_up() may have made it past feature identification. ++ * Make sure all dependent features are off as well. ++ */ ++ vmx_basic_msr = 0; ++ vmx_pin_based_exec_control = 0; ++ vmx_cpu_based_exec_control = 0; ++ vmx_secondary_exec_control = 0; ++ vmx_vmexit_control = 0; ++ vmx_vmentry_control = 0; ++ vmx_ept_vpid_cap = 0; ++ vmx_vmfunc = 0; ++ } + + return ret; + } +-- +2.44.0 + diff --git a/0339-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch b/0339-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch new file mode 100644 index 00000000..3ed6b9ea --- /dev/null +++ b/0339-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch @@ -0,0 +1,86 @@ +From b75bee183210318150e678e14b35224d7c73edb6 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 5 Mar 2024 11:57:02 +0100 +Subject: [PATCH 339/349] xen/sched: Fix UB shift in compat_set_timer_op() + +Tamas reported this UBSAN failure from fuzzing: + + (XEN) ================================================================================ + (XEN) UBSAN: Undefined behaviour in common/sched/compat.c:48:37 + (XEN) left shift of negative value -2147425536 + (XEN) ----[ Xen-4.19-unstable x86_64 debug=y ubsan=y Not tainted ]---- + ... + (XEN) Xen call trace: + (XEN) [] R ubsan.c#ubsan_epilogue+0xa/0xd9 + (XEN) [] F __ubsan_handle_shift_out_of_bounds+0x11a/0x1c5 + (XEN) [] F compat_set_timer_op+0x41/0x43 + (XEN) [] F hvm_do_multicall_call+0x77f/0xa75 + (XEN) [] F arch_do_multicall_call+0xec/0xf1 + (XEN) [] F do_multicall+0x1dc/0xde3 + (XEN) [] F hvm_hypercall+0xa00/0x149a + (XEN) [] F vmx_vmexit_handler+0x1596/0x279c + (XEN) [] F vmx_asm_vmexit_handler+0xdb/0x200 + +Left-shifting any negative value is strictly undefined behaviour in C, and +the two parameters here come straight from the guest. + +The fuzzer happened to choose lo 0xf, hi 0x8000e300. + +Switch everything to be unsigned values, making the shift well defined. + +As GCC documents: + + As an extension to the C language, GCC does not use the latitude given in + C99 and C11 only to treat certain aspects of signed '<<' as undefined. + However, -fsanitize=shift (and -fsanitize=undefined) will diagnose such + cases. + +this was deemed not to need an XSA. + +Note: The unsigned -> signed conversion for do_set_timer_op()'s s_time_t +parameter is also well defined. C makes it implementation defined, and GCC +defines it as reduction modulo 2^N to be within range of the new type. + +Fixes: 2942f45e09fb ("Enable compatibility mode operation for HYPERVISOR_sched_op and HYPERVISOR_set_timer_op.") +Reported-by: Tamas K Lengyel +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: ae6d4fd876765e6d623eec67d14f5d0464be09cb +master date: 2024-02-01 19:52:44 +0000 +--- + xen/common/sched/compat.c | 4 ++-- + xen/include/hypercall-defs.c | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/xen/common/sched/compat.c b/xen/common/sched/compat.c +index 040b4caca2..b827fdecb8 100644 +--- a/xen/common/sched/compat.c ++++ b/xen/common/sched/compat.c +@@ -39,9 +39,9 @@ static int compat_poll(struct compat_sched_poll *compat) + + #include "core.c" + +-int compat_set_timer_op(u32 lo, s32 hi) ++int compat_set_timer_op(uint32_t lo, uint32_t hi) + { +- return do_set_timer_op(((s64)hi << 32) | lo); ++ return do_set_timer_op(((uint64_t)hi << 32) | lo); + } + + /* +diff --git a/xen/include/hypercall-defs.c b/xen/include/hypercall-defs.c +index 1896121074..c442dee284 100644 +--- a/xen/include/hypercall-defs.c ++++ b/xen/include/hypercall-defs.c +@@ -127,7 +127,7 @@ xenoprof_op(int op, void *arg) + + #ifdef CONFIG_COMPAT + prefix: compat +-set_timer_op(uint32_t lo, int32_t hi) ++set_timer_op(uint32_t lo, uint32_t hi) + multicall(multicall_entry_compat_t *call_list, uint32_t nr_calls) + memory_op(unsigned int cmd, void *arg) + #ifdef CONFIG_IOREQ_SERVER +-- +2.44.0 + diff --git a/0340-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch b/0340-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch new file mode 100644 index 00000000..e5de8a10 --- /dev/null +++ b/0340-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch @@ -0,0 +1,54 @@ +From 76ea2aab3652cc34e474de0905f0a9cd4df7d087 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 5 Mar 2024 11:57:41 +0100 +Subject: [PATCH 340/349] x86/spec: print the built-in SPECULATIVE_HARDEN_* + options +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Just like it's done for INDIRECT_THUNK and SHADOW_PAGING. + +Reported-by: Jan Beulich +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +master commit: 6e9507f7d51fe49df8bc70f83e49ce06c92e4e54 +master date: 2024-02-27 14:57:52 +0100 +--- + xen/arch/x86/spec_ctrl.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 661716d695..93f1cf3bb5 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -488,13 +488,25 @@ static void __init print_details(enum ind_thunk thunk) + (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : ""); + + /* Compiled-in support which pertains to mitigations. */ +- if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ) ++ if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) || ++ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) || ++ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) || ++ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) ) + printk(" Compiled-in support:" + #ifdef CONFIG_INDIRECT_THUNK + " INDIRECT_THUNK" + #endif + #ifdef CONFIG_SHADOW_PAGING + " SHADOW_PAGING" ++#endif ++#ifdef CONFIG_SPECULATIVE_HARDEN_ARRAY ++ " HARDEN_ARRAY" ++#endif ++#ifdef CONFIG_SPECULATIVE_HARDEN_BRANCH ++ " HARDEN_BRANCH" ++#endif ++#ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS ++ " HARDEN_GUEST_ACCESS" + #endif + "\n"); + +-- +2.44.0 + diff --git a/0341-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch b/0341-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch new file mode 100644 index 00000000..350d0344 --- /dev/null +++ b/0341-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch @@ -0,0 +1,67 @@ +From 693455c3c370e535eb6cd065800ff91e147815fa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 5 Mar 2024 11:58:04 +0100 +Subject: [PATCH 341/349] x86/spec: fix INDIRECT_THUNK option to only be set + when build-enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Attempt to provide a more helpful error message when the user attempts to set +spec-ctrl=bti-thunk option but the support is build-time disabled. + +While there also adjust the command line documentation to mention +CONFIG_INDIRECT_THUNK instead of INDIRECT_THUNK. + +Reported-by: Andrew Cooper +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +master commit: 8441fa806a3b778867867cd0159fa1722e90397e +master date: 2024-02-27 14:58:20 +0100 +--- + docs/misc/xen-command-line.pandoc | 10 +++++----- + xen/arch/x86/spec_ctrl.c | 7 ++++++- + 2 files changed, 11 insertions(+), 6 deletions(-) + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index 05f613c71c..2006697226 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -2378,11 +2378,11 @@ guests to use. + performance reasons dom0 is unprotected by default. If it is necessary to + protect dom0 too, boot with `spec-ctrl=ibpb-entry`. + +-If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to +-select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` +-locations. The default thunk is `retpoline` (generally preferred), with the +-alternatives being `jmp` (a `jmp *%reg` gadget, minimal overhead), and +-`lfence` (an `lfence; jmp *%reg` gadget). ++If Xen was compiled with `CONFIG_INDIRECT_THUNK` support, `bti-thunk=` can be ++used to select which of the thunks gets patched into the ++`__x86_indirect_thunk_%reg` locations. The default thunk is `retpoline` ++(generally preferred), with the alternatives being `jmp` (a `jmp *%reg` gadget, ++minimal overhead), and `lfence` (an `lfence; jmp *%reg` gadget). + + On hardware supporting IBRS (Indirect Branch Restricted Speculation), the + `ibrs=` option can be used to force or prevent Xen using the feature itself. +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 93f1cf3bb5..098fa3184d 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -253,7 +253,12 @@ static int __init cf_check parse_spec_ctrl(const char *s) + { + s += 10; + +- if ( !cmdline_strcmp(s, "retpoline") ) ++ if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) ) ++ { ++ no_config_param("INDIRECT_THUNK", "spec-ctrl", s - 10, ss); ++ rc = -EINVAL; ++ } ++ else if ( !cmdline_strcmp(s, "retpoline") ) + opt_thunk = THUNK_RETPOLINE; + else if ( !cmdline_strcmp(s, "lfence") ) + opt_thunk = THUNK_LFENCE; +-- +2.44.0 + diff --git a/0342-x86-spec-do-not-print-thunk-option-selection-if-not-.patch b/0342-x86-spec-do-not-print-thunk-option-selection-if-not-.patch new file mode 100644 index 00000000..5789f1f9 --- /dev/null +++ b/0342-x86-spec-do-not-print-thunk-option-selection-if-not-.patch @@ -0,0 +1,50 @@ +From 0ce25b46ab2fb53a1b58f7682ca14971453f4f2c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 5 Mar 2024 11:58:36 +0100 +Subject: [PATCH 342/349] x86/spec: do not print thunk option selection if not + built-in +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Since the thunk built-in enable is printed as part of the "Compiled-in +support:" line, avoid printing anything in "Xen settings:" if the thunk is +disabled at build time. + +Note the BTI-Thunk option printing is also adjusted to print a colon in the +same way the other options on the line do. + +Requested-by: Jan Beulich +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +master commit: 576528a2a742069af203e90c613c5c93e23c9755 +master date: 2024-02-27 14:58:40 +0100 +--- + xen/arch/x86/spec_ctrl.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 098fa3184d..25a18ac598 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -516,11 +516,12 @@ static void __init print_details(enum ind_thunk thunk) + "\n"); + + /* Settings for Xen's protection, irrespective of guests. */ +- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", +- thunk == THUNK_NONE ? "N/A" : +- thunk == THUNK_RETPOLINE ? "RETPOLINE" : +- thunk == THUNK_LFENCE ? "LFENCE" : +- thunk == THUNK_JMP ? "JMP" : "?", ++ printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", ++ thunk != THUNK_NONE ? "BTI-Thunk: " : "", ++ thunk == THUNK_NONE ? "" : ++ thunk == THUNK_RETPOLINE ? "RETPOLINE, " : ++ thunk == THUNK_LFENCE ? "LFENCE, " : ++ thunk == THUNK_JMP ? "JMP, " : "?, ", + (!boot_cpu_has(X86_FEATURE_IBRSB) && + !boot_cpu_has(X86_FEATURE_IBRS)) ? "No" : + (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-", +-- +2.44.0 + diff --git a/0343-xen-livepatch-register-livepatch-regions-when-loaded.patch b/0343-xen-livepatch-register-livepatch-regions-when-loaded.patch new file mode 100644 index 00000000..f7affef0 --- /dev/null +++ b/0343-xen-livepatch-register-livepatch-regions-when-loaded.patch @@ -0,0 +1,159 @@ +From b11917de0cd261a878beaf50c18a689bde0b2f50 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 5 Mar 2024 11:59:26 +0100 +Subject: [PATCH 343/349] xen/livepatch: register livepatch regions when loaded +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Currently livepatch regions are registered as virtual regions only after the +livepatch has been applied. + +This can lead to issues when using the pre-apply or post-revert hooks, as at +that point the livepatch is not in the virtual regions list. If a livepatch +pre-apply hook contains a WARN() it would trigger an hypervisor crash, as the +code to handle the bug frame won't be able to find the instruction pointer that +triggered the #UD in any of the registered virtual regions, and hence crash. + +Fix this by adding the livepatch payloads as virtual regions as soon as loaded, +and only remove them once the payload is unloaded. This requires some changes +to the virtual regions code, as the removal of the virtual regions is no longer +done in stop machine context, and hence an RCU barrier is added in order to +make sure there are no users of the virtual region after it's been removed from +the list. + +Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks') +Signed-off-by: Roger Pau Monné +Reviewed-by: Ross Lagerwall +master commit: a57b4074ab39bee78b6c116277f0a9963bd8e687 +master date: 2024-02-28 16:57:25 +0000 +--- + xen/common/livepatch.c | 4 ++-- + xen/common/virtual_region.c | 44 ++++++++++++++----------------------- + 2 files changed, 19 insertions(+), 29 deletions(-) + +diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c +index c2ae84d18b..537e9f33e4 100644 +--- a/xen/common/livepatch.c ++++ b/xen/common/livepatch.c +@@ -1015,6 +1015,7 @@ static int build_symbol_table(struct payload *payload, + static void free_payload(struct payload *data) + { + ASSERT(spin_is_locked(&payload_lock)); ++ unregister_virtual_region(&data->region); + list_del(&data->list); + payload_cnt--; + payload_version++; +@@ -1114,6 +1115,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload) + INIT_LIST_HEAD(&data->list); + INIT_LIST_HEAD(&data->applied_list); + ++ register_virtual_region(&data->region); + list_add_tail(&data->list, &payload_list); + payload_cnt++; + payload_version++; +@@ -1330,7 +1332,6 @@ static inline void apply_payload_tail(struct payload *data) + * The applied_list is iterated by the trap code. + */ + list_add_tail_rcu(&data->applied_list, &applied_list); +- register_virtual_region(&data->region); + + data->state = LIVEPATCH_STATE_APPLIED; + } +@@ -1376,7 +1377,6 @@ static inline void revert_payload_tail(struct payload *data) + * The applied_list is iterated by the trap code. + */ + list_del_rcu(&data->applied_list); +- unregister_virtual_region(&data->region); + + data->reverted = true; + data->state = LIVEPATCH_STATE_CHECKED; +diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c +index 5f89703f51..9f12c30efe 100644 +--- a/xen/common/virtual_region.c ++++ b/xen/common/virtual_region.c +@@ -23,14 +23,8 @@ static struct virtual_region core_init __initdata = { + }; + + /* +- * RCU locking. Additions are done either at startup (when there is only +- * one CPU) or when all CPUs are running without IRQs. +- * +- * Deletions are bit tricky. We do it when Live Patch (all CPUs running +- * without IRQs) or during bootup (when clearing the init). +- * +- * Hence we use list_del_rcu (which sports an memory fence) and a spinlock +- * on deletion. ++ * RCU locking. Modifications to the list must be done in exclusive mode, and ++ * hence need to hold the spinlock. + * + * All readers of virtual_region_list MUST use list_for_each_entry_rcu. + */ +@@ -58,41 +52,36 @@ const struct virtual_region *find_text_region(unsigned long addr) + + void register_virtual_region(struct virtual_region *r) + { +- ASSERT(!local_irq_is_enabled()); ++ unsigned long flags; + ++ spin_lock_irqsave(&virtual_region_lock, flags); + list_add_tail_rcu(&r->list, &virtual_region_list); ++ spin_unlock_irqrestore(&virtual_region_lock, flags); + } + +-static void remove_virtual_region(struct virtual_region *r) ++/* ++ * Suggest inline so when !CONFIG_LIVEPATCH the function is not left ++ * unreachable after init code is removed. ++ */ ++static void inline remove_virtual_region(struct virtual_region *r) + { + unsigned long flags; + + spin_lock_irqsave(&virtual_region_lock, flags); + list_del_rcu(&r->list); + spin_unlock_irqrestore(&virtual_region_lock, flags); +- /* +- * We do not need to invoke call_rcu. +- * +- * This is due to the fact that on the deletion we have made sure +- * to use spinlocks (to guard against somebody else calling +- * unregister_virtual_region) and list_deletion spiced with +- * memory barrier. +- * +- * That protects us from corrupting the list as the readers all +- * use list_for_each_entry_rcu which is safe against concurrent +- * deletions. +- */ + } + ++#ifdef CONFIG_LIVEPATCH + void unregister_virtual_region(struct virtual_region *r) + { +- /* Expected to be called from Live Patch - which has IRQs disabled. */ +- ASSERT(!local_irq_is_enabled()); +- + remove_virtual_region(r); ++ ++ /* Assert that no CPU might be using the removed region. */ ++ rcu_barrier(); + } + +-#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_X86) ++#ifdef CONFIG_X86 + void relax_virtual_region_perms(void) + { + const struct virtual_region *region; +@@ -116,7 +105,8 @@ void tighten_virtual_region_perms(void) + PAGE_HYPERVISOR_RX); + rcu_read_unlock(&rcu_virtual_region_lock); + } +-#endif ++#endif /* CONFIG_X86 */ ++#endif /* CONFIG_LIVEPATCH */ + + void __init unregister_init_virtual_region(void) + { +-- +2.44.0 + diff --git a/0344-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch b/0344-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch new file mode 100644 index 00000000..d14413ab --- /dev/null +++ b/0344-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch @@ -0,0 +1,149 @@ +From c54cf903b06fb1933fad053cc547580c92c856ea Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 5 Mar 2024 11:59:35 +0100 +Subject: [PATCH 344/349] xen/livepatch: search for symbols in all loaded + payloads +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When checking if an address belongs to a patch, or when resolving a symbol, +take into account all loaded livepatch payloads, even if not applied. + +This is required in order for the pre-apply and post-revert hooks to work +properly, or else Xen won't detect the instruction pointer belonging to those +hooks as being part of the currently active text. + +Move the RCU handling to be used for payload_list instead of applied_list, as +now the calls from trap code will iterate over the payload_list. + +Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks') +Signed-off-by: Roger Pau Monné +Reviewed-by: Ross Lagerwall +master commit: d2daa40fb3ddb8f83e238e57854bd878924cde90 +master date: 2024-02-28 16:57:25 +0000 +--- + xen/common/livepatch.c | 49 +++++++++++++++--------------------------- + 1 file changed, 17 insertions(+), 32 deletions(-) + +diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c +index 537e9f33e4..a129ab9973 100644 +--- a/xen/common/livepatch.c ++++ b/xen/common/livepatch.c +@@ -36,13 +36,14 @@ + * caller in schedule_work. + */ + static DEFINE_SPINLOCK(payload_lock); +-static LIST_HEAD(payload_list); +- + /* +- * Patches which have been applied. Need RCU in case we crash (and then +- * traps code would iterate via applied_list) when adding entries on the list. ++ * Need RCU in case we crash (and then traps code would iterate via ++ * payload_list) when adding entries on the list. + */ +-static DEFINE_RCU_READ_LOCK(rcu_applied_lock); ++static DEFINE_RCU_READ_LOCK(rcu_payload_lock); ++static LIST_HEAD(payload_list); ++ ++/* Patches which have been applied. Only modified from stop machine context. */ + static LIST_HEAD(applied_list); + + static unsigned int payload_cnt; +@@ -111,12 +112,8 @@ bool_t is_patch(const void *ptr) + const struct payload *data; + bool_t r = 0; + +- /* +- * Only RCU locking since this list is only ever changed during apply +- * or revert context. And in case it dies there we need an safe list. +- */ +- rcu_read_lock(&rcu_applied_lock); +- list_for_each_entry_rcu ( data, &applied_list, applied_list ) ++ rcu_read_lock(&rcu_payload_lock); ++ list_for_each_entry_rcu ( data, &payload_list, list ) + { + if ( (ptr >= data->rw_addr && + ptr < (data->rw_addr + data->rw_size)) || +@@ -130,7 +127,7 @@ bool_t is_patch(const void *ptr) + } + + } +- rcu_read_unlock(&rcu_applied_lock); ++ rcu_read_unlock(&rcu_payload_lock); + + return r; + } +@@ -166,12 +163,8 @@ static const char *cf_check livepatch_symbols_lookup( + const void *va = (const void *)addr; + const char *n = NULL; + +- /* +- * Only RCU locking since this list is only ever changed during apply +- * or revert context. And in case it dies there we need an safe list. +- */ +- rcu_read_lock(&rcu_applied_lock); +- list_for_each_entry_rcu ( data, &applied_list, applied_list ) ++ rcu_read_lock(&rcu_payload_lock); ++ list_for_each_entry_rcu ( data, &payload_list, list ) + { + if ( va < data->text_addr || + va >= (data->text_addr + data->text_size) ) +@@ -200,7 +193,7 @@ static const char *cf_check livepatch_symbols_lookup( + n = data->symtab[best].name; + break; + } +- rcu_read_unlock(&rcu_applied_lock); ++ rcu_read_unlock(&rcu_payload_lock); + + return n; + } +@@ -1016,7 +1009,8 @@ static void free_payload(struct payload *data) + { + ASSERT(spin_is_locked(&payload_lock)); + unregister_virtual_region(&data->region); +- list_del(&data->list); ++ list_del_rcu(&data->list); ++ rcu_barrier(); + payload_cnt--; + payload_version++; + free_payload_data(data); +@@ -1116,7 +1110,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload) + INIT_LIST_HEAD(&data->applied_list); + + register_virtual_region(&data->region); +- list_add_tail(&data->list, &payload_list); ++ list_add_tail_rcu(&data->list, &payload_list); + payload_cnt++; + payload_version++; + } +@@ -1327,11 +1321,7 @@ static int apply_payload(struct payload *data) + + static inline void apply_payload_tail(struct payload *data) + { +- /* +- * We need RCU variant (which has barriers) in case we crash here. +- * The applied_list is iterated by the trap code. +- */ +- list_add_tail_rcu(&data->applied_list, &applied_list); ++ list_add_tail(&data->applied_list, &applied_list); + + data->state = LIVEPATCH_STATE_APPLIED; + } +@@ -1371,12 +1361,7 @@ static int revert_payload(struct payload *data) + + static inline void revert_payload_tail(struct payload *data) + { +- +- /* +- * We need RCU variant (which has barriers) in case we crash here. +- * The applied_list is iterated by the trap code. +- */ +- list_del_rcu(&data->applied_list); ++ list_del(&data->applied_list); + + data->reverted = true; + data->state = LIVEPATCH_STATE_CHECKED; +-- +2.44.0 + diff --git a/0345-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch b/0345-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch new file mode 100644 index 00000000..feeb4329 --- /dev/null +++ b/0345-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch @@ -0,0 +1,186 @@ +From 5564323f643715f9d364df88e0eb9c7d6fd2c22b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 5 Mar 2024 11:59:43 +0100 +Subject: [PATCH 345/349] xen/livepatch: fix norevert test attempt to open-code + revert +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The purpose of the norevert test is to install a dummy handler that replaces +the internal Xen revert code, and then perform the revert in the post-revert +hook. For that purpose the usage of the previous common_livepatch_revert() is +not enough, as that just reverts specific functions, but not the whole state of +the payload. + +Remove both common_livepatch_{apply,revert}() and instead expose +revert_payload{,_tail}() in order to perform the patch revert from the +post-revert hook. + +Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker') +Signed-off-by: Roger Pau Monné +Reviewed-by: Ross Lagerwall +master commit: cdae267ce10d04d71d1687b5701ff2911a96b6dc +master date: 2024-02-28 16:57:25 +0000 +--- + xen/common/livepatch.c | 41 +++++++++++++++++-- + xen/include/xen/livepatch.h | 32 ++------------- + .../livepatch/xen_action_hooks_norevert.c | 22 +++------- + 3 files changed, 46 insertions(+), 49 deletions(-) + +diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c +index a129ab9973..a5068a2217 100644 +--- a/xen/common/livepatch.c ++++ b/xen/common/livepatch.c +@@ -1310,7 +1310,22 @@ static int apply_payload(struct payload *data) + ASSERT(!local_irq_is_enabled()); + + for ( i = 0; i < data->nfuncs; i++ ) +- common_livepatch_apply(&data->funcs[i], &data->fstate[i]); ++ { ++ const struct livepatch_func *func = &data->funcs[i]; ++ struct livepatch_fstate *state = &data->fstate[i]; ++ ++ /* If the action has been already executed on this function, do nothing. */ ++ if ( state->applied == LIVEPATCH_FUNC_APPLIED ) ++ { ++ printk(XENLOG_WARNING LIVEPATCH ++ "%s: %s has been already applied before\n", ++ __func__, func->name); ++ continue; ++ } ++ ++ arch_livepatch_apply(func, state); ++ state->applied = LIVEPATCH_FUNC_APPLIED; ++ } + + arch_livepatch_revive(); + +@@ -1326,7 +1341,7 @@ static inline void apply_payload_tail(struct payload *data) + data->state = LIVEPATCH_STATE_APPLIED; + } + +-static int revert_payload(struct payload *data) ++int revert_payload(struct payload *data) + { + unsigned int i; + int rc; +@@ -1341,7 +1356,25 @@ static int revert_payload(struct payload *data) + } + + for ( i = 0; i < data->nfuncs; i++ ) +- common_livepatch_revert(&data->funcs[i], &data->fstate[i]); ++ { ++ const struct livepatch_func *func = &data->funcs[i]; ++ struct livepatch_fstate *state = &data->fstate[i]; ++ ++ /* ++ * If the apply action hasn't been executed on this function, do ++ * nothing. ++ */ ++ if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED ) ++ { ++ printk(XENLOG_WARNING LIVEPATCH ++ "%s: %s has not been applied before\n", ++ __func__, func->name); ++ continue; ++ } ++ ++ arch_livepatch_revert(func, state); ++ state->applied = LIVEPATCH_FUNC_NOT_APPLIED; ++ } + + /* + * Since we are running with IRQs disabled and the hooks may call common +@@ -1359,7 +1392,7 @@ static int revert_payload(struct payload *data) + return 0; + } + +-static inline void revert_payload_tail(struct payload *data) ++void revert_payload_tail(struct payload *data) + { + list_del(&data->applied_list); + +diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h +index 537d3d58b6..c9ee58fd37 100644 +--- a/xen/include/xen/livepatch.h ++++ b/xen/include/xen/livepatch.h +@@ -136,35 +136,11 @@ void arch_livepatch_post_action(void); + void arch_livepatch_mask(void); + void arch_livepatch_unmask(void); + +-static inline void common_livepatch_apply(const struct livepatch_func *func, +- struct livepatch_fstate *state) +-{ +- /* If the action has been already executed on this function, do nothing. */ +- if ( state->applied == LIVEPATCH_FUNC_APPLIED ) +- { +- printk(XENLOG_WARNING LIVEPATCH "%s: %s has been already applied before\n", +- __func__, func->name); +- return; +- } +- +- arch_livepatch_apply(func, state); +- state->applied = LIVEPATCH_FUNC_APPLIED; +-} ++/* Only for testing purposes. */ ++struct payload; ++int revert_payload(struct payload *data); ++void revert_payload_tail(struct payload *data); + +-static inline void common_livepatch_revert(const struct livepatch_func *func, +- struct livepatch_fstate *state) +-{ +- /* If the apply action hasn't been executed on this function, do nothing. */ +- if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED ) +- { +- printk(XENLOG_WARNING LIVEPATCH "%s: %s has not been applied before\n", +- __func__, func->name); +- return; +- } +- +- arch_livepatch_revert(func, state); +- state->applied = LIVEPATCH_FUNC_NOT_APPLIED; +-} + #else + + /* +diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c +index c173855192..c5fbab1746 100644 +--- a/xen/test/livepatch/xen_action_hooks_norevert.c ++++ b/xen/test/livepatch/xen_action_hooks_norevert.c +@@ -96,26 +96,14 @@ static int revert_hook(livepatch_payload_t *payload) + + static void post_revert_hook(livepatch_payload_t *payload) + { +- int i; ++ unsigned long flags; + + printk(KERN_DEBUG "%s: Hook starting.\n", __func__); + +- for (i = 0; i < payload->nfuncs; i++) +- { +- const struct livepatch_func *func = &payload->funcs[i]; +- struct livepatch_fstate *fstate = &payload->fstate[i]; +- +- BUG_ON(revert_cnt != 1); +- BUG_ON(fstate->applied != LIVEPATCH_FUNC_APPLIED); +- +- /* Outside of quiesce zone: MAY TRIGGER HOST CRASH/UNDEFINED BEHAVIOR */ +- arch_livepatch_quiesce(); +- common_livepatch_revert(payload); +- arch_livepatch_revive(); +- BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED); +- +- printk(KERN_DEBUG "%s: post reverted: %s\n", __func__, func->name); +- } ++ local_irq_save(flags); ++ BUG_ON(revert_payload(payload)); ++ revert_payload_tail(payload); ++ local_irq_restore(flags); + + printk(KERN_DEBUG "%s: Hook done.\n", __func__); + } +-- +2.44.0 + diff --git a/0346-xen-livepatch-properly-build-the-noapply-and-norever.patch b/0346-xen-livepatch-properly-build-the-noapply-and-norever.patch new file mode 100644 index 00000000..1063767e --- /dev/null +++ b/0346-xen-livepatch-properly-build-the-noapply-and-norever.patch @@ -0,0 +1,43 @@ +From a59106b27609b6ae2873bd6755949b1258290872 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 5 Mar 2024 11:59:51 +0100 +Subject: [PATCH 346/349] xen/livepatch: properly build the noapply and + norevert tests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +It seems the build variables for those tests where copy-pasted from +xen_action_hooks_marker-objs and not adjusted to use the correct source files. + +Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker') +Signed-off-by: Roger Pau Monné +Reviewed-by: Ross Lagerwall +master commit: e579677095782c7dec792597ba8b037b7d716b32 +master date: 2024-02-28 16:57:25 +0000 +--- + xen/test/livepatch/Makefile | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xen/test/livepatch/Makefile b/xen/test/livepatch/Makefile +index c258ab0b59..d987a8367f 100644 +--- a/xen/test/livepatch/Makefile ++++ b/xen/test/livepatch/Makefile +@@ -118,12 +118,12 @@ xen_action_hooks_marker-objs := xen_action_hooks_marker.o xen_hello_world_func.o + $(obj)/xen_action_hooks_noapply.o: $(obj)/config.h + + extra-y += xen_action_hooks_noapply.livepatch +-xen_action_hooks_noapply-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o ++xen_action_hooks_noapply-objs := xen_action_hooks_noapply.o xen_hello_world_func.o note.o xen_note.o + + $(obj)/xen_action_hooks_norevert.o: $(obj)/config.h + + extra-y += xen_action_hooks_norevert.livepatch +-xen_action_hooks_norevert-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o ++xen_action_hooks_norevert-objs := xen_action_hooks_norevert.o xen_hello_world_func.o note.o xen_note.o + + EXPECT_BYTES_COUNT := 8 + CODE_GET_EXPECT=$(shell $(OBJDUMP) -d --insn-width=1 $(1) | sed -n -e '/<'$(2)'>:$$/,/^$$/ p' | tail -n +2 | head -n $(EXPECT_BYTES_COUNT) | awk '{$$0=$$2; printf "%s", substr($$0,length-1)}' | sed 's/.\{2\}/0x&,/g' | sed 's/^/{/;s/,$$/}/g') +-- +2.44.0 + diff --git a/0347-libxl-Fix-segfault-in-device_model_spawn_outcome.patch b/0347-libxl-Fix-segfault-in-device_model_spawn_outcome.patch new file mode 100644 index 00000000..6f85b4f3 --- /dev/null +++ b/0347-libxl-Fix-segfault-in-device_model_spawn_outcome.patch @@ -0,0 +1,39 @@ +From c4ee68eda9937743527fff41f4ede0f6a3228080 Mon Sep 17 00:00:00 2001 +From: Jason Andryuk +Date: Tue, 5 Mar 2024 12:00:30 +0100 +Subject: [PATCH 347/349] libxl: Fix segfault in device_model_spawn_outcome + +libxl__spawn_qdisk_backend() explicitly sets guest_config to NULL when +starting QEMU (the usual launch through libxl__spawn_local_dm() has a +guest_config though). + +Bail early on a NULL guest_config/d_config. This skips the QMP queries +for chardevs and VNC, but this xenpv QEMU instance isn't expected to +provide those - only qdisk (or 9pfs backends after an upcoming change). + +Signed-off-by: Jason Andryuk +Acked-by: Anthony PERARD +master commit: d4f3d35f043f6ef29393166b0dd131c8102cf255 +master date: 2024-02-29 08:18:38 +0100 +--- + tools/libs/light/libxl_dm.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c +index ed620a9d8e..29b43ed20a 100644 +--- a/tools/libs/light/libxl_dm.c ++++ b/tools/libs/light/libxl_dm.c +@@ -3172,8 +3172,8 @@ static void device_model_spawn_outcome(libxl__egc *egc, + + /* Check if spawn failed */ + if (rc) goto out; +- +- if (d_config->b_info.device_model_version ++ /* d_config is NULL for xl devd/libxl__spawn_qemu_xenpv_backend(). */ ++ if (d_config && d_config->b_info.device_model_version + == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) { + rc = libxl__ev_time_register_rel(ao, &dmss->timeout, + devise_model_postconfig_timeout, +-- +2.44.0 + diff --git a/0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch b/0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch new file mode 100644 index 00000000..0b7bda09 --- /dev/null +++ b/0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch @@ -0,0 +1,197 @@ +From 2f49d9f89c14519d4cb1e06ab8370cf4ba50fab7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 5 Mar 2024 12:00:47 +0100 +Subject: [PATCH 348/349] x86/altcall: always use a temporary parameter + stashing variable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The usage in ALT_CALL_ARG() on clang of: + +register union { + typeof(arg) e; + const unsigned long r; +} ... + +When `arg` is the first argument to alternative_{,v}call() and +const_vlapic_vcpu() is used results in clang 3.5.0 complaining with: + +arch/x86/hvm/vlapic.c:141:47: error: non-const static data member must be initialized out of line + alternative_call(hvm_funcs.test_pir, const_vlapic_vcpu(vlapic), vec) ) + +Workaround this by pulling `arg1` into a local variable, like it's done for +further arguments (arg2, arg3...) + +Originally arg1 wasn't pulled into a variable because for the a1_ register +local variable the possible clobbering as a result of operators on other +variables don't matter: + +https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables + +Note clang version 3.8.1 seems to already be fixed and don't require the +workaround, but since it's harmless do it uniformly everywhere. + +Reported-by: Andrew Cooper +Fixes: 2ce562b2a413 ('x86/altcall: use a union as register type for function parameters on clang') +Signed-off-by: Roger Pau Monné +Acked-by: Jan Beulich +master commit: c20850540ad6a32f4fc17bde9b01c92b0df18bf0 +master date: 2024-02-29 08:21:49 +0100 +--- + xen/arch/x86/include/asm/alternative.h | 36 +++++++++++++++++--------- + 1 file changed, 24 insertions(+), 12 deletions(-) + +diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h +index bcb1dc94f4..fa04481316 100644 +--- a/xen/arch/x86/include/asm/alternative.h ++++ b/xen/arch/x86/include/asm/alternative.h +@@ -253,21 +253,24 @@ extern void alternative_branches(void); + }) + + #define alternative_vcall1(func, arg) ({ \ +- ALT_CALL_ARG(arg, 1); \ ++ typeof(arg) v1_ = (arg); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_NO_ARG2; \ + (void)sizeof(func(arg)); \ + (void)alternative_callN(1, int, func); \ + }) + + #define alternative_call1(func, arg) ({ \ +- ALT_CALL_ARG(arg, 1); \ ++ typeof(arg) v1_ = (arg); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_NO_ARG2; \ + alternative_callN(1, typeof(func(arg)), func); \ + }) + + #define alternative_vcall2(func, arg1, arg2) ({ \ ++ typeof(arg1) v1_ = (arg1); \ + typeof(arg2) v2_ = (arg2); \ +- ALT_CALL_ARG(arg1, 1); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_ARG(v2_, 2); \ + ALT_CALL_NO_ARG3; \ + (void)sizeof(func(arg1, arg2)); \ +@@ -275,17 +278,19 @@ extern void alternative_branches(void); + }) + + #define alternative_call2(func, arg1, arg2) ({ \ ++ typeof(arg1) v1_ = (arg1); \ + typeof(arg2) v2_ = (arg2); \ +- ALT_CALL_ARG(arg1, 1); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_ARG(v2_, 2); \ + ALT_CALL_NO_ARG3; \ + alternative_callN(2, typeof(func(arg1, arg2)), func); \ + }) + + #define alternative_vcall3(func, arg1, arg2, arg3) ({ \ ++ typeof(arg1) v1_ = (arg1); \ + typeof(arg2) v2_ = (arg2); \ + typeof(arg3) v3_ = (arg3); \ +- ALT_CALL_ARG(arg1, 1); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_ARG(v2_, 2); \ + ALT_CALL_ARG(v3_, 3); \ + ALT_CALL_NO_ARG4; \ +@@ -294,9 +299,10 @@ extern void alternative_branches(void); + }) + + #define alternative_call3(func, arg1, arg2, arg3) ({ \ ++ typeof(arg1) v1_ = (arg1); \ + typeof(arg2) v2_ = (arg2); \ + typeof(arg3) v3_ = (arg3); \ +- ALT_CALL_ARG(arg1, 1); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_ARG(v2_, 2); \ + ALT_CALL_ARG(v3_, 3); \ + ALT_CALL_NO_ARG4; \ +@@ -305,10 +311,11 @@ extern void alternative_branches(void); + }) + + #define alternative_vcall4(func, arg1, arg2, arg3, arg4) ({ \ ++ typeof(arg1) v1_ = (arg1); \ + typeof(arg2) v2_ = (arg2); \ + typeof(arg3) v3_ = (arg3); \ + typeof(arg4) v4_ = (arg4); \ +- ALT_CALL_ARG(arg1, 1); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_ARG(v2_, 2); \ + ALT_CALL_ARG(v3_, 3); \ + ALT_CALL_ARG(v4_, 4); \ +@@ -318,10 +325,11 @@ extern void alternative_branches(void); + }) + + #define alternative_call4(func, arg1, arg2, arg3, arg4) ({ \ ++ typeof(arg1) v1_ = (arg1); \ + typeof(arg2) v2_ = (arg2); \ + typeof(arg3) v3_ = (arg3); \ + typeof(arg4) v4_ = (arg4); \ +- ALT_CALL_ARG(arg1, 1); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_ARG(v2_, 2); \ + ALT_CALL_ARG(v3_, 3); \ + ALT_CALL_ARG(v4_, 4); \ +@@ -332,11 +340,12 @@ extern void alternative_branches(void); + }) + + #define alternative_vcall5(func, arg1, arg2, arg3, arg4, arg5) ({ \ ++ typeof(arg1) v1_ = (arg1); \ + typeof(arg2) v2_ = (arg2); \ + typeof(arg3) v3_ = (arg3); \ + typeof(arg4) v4_ = (arg4); \ + typeof(arg5) v5_ = (arg5); \ +- ALT_CALL_ARG(arg1, 1); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_ARG(v2_, 2); \ + ALT_CALL_ARG(v3_, 3); \ + ALT_CALL_ARG(v4_, 4); \ +@@ -347,11 +356,12 @@ extern void alternative_branches(void); + }) + + #define alternative_call5(func, arg1, arg2, arg3, arg4, arg5) ({ \ ++ typeof(arg1) v1_ = (arg1); \ + typeof(arg2) v2_ = (arg2); \ + typeof(arg3) v3_ = (arg3); \ + typeof(arg4) v4_ = (arg4); \ + typeof(arg5) v5_ = (arg5); \ +- ALT_CALL_ARG(arg1, 1); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_ARG(v2_, 2); \ + ALT_CALL_ARG(v3_, 3); \ + ALT_CALL_ARG(v4_, 4); \ +@@ -363,12 +373,13 @@ extern void alternative_branches(void); + }) + + #define alternative_vcall6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \ ++ typeof(arg1) v1_ = (arg1); \ + typeof(arg2) v2_ = (arg2); \ + typeof(arg3) v3_ = (arg3); \ + typeof(arg4) v4_ = (arg4); \ + typeof(arg5) v5_ = (arg5); \ + typeof(arg6) v6_ = (arg6); \ +- ALT_CALL_ARG(arg1, 1); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_ARG(v2_, 2); \ + ALT_CALL_ARG(v3_, 3); \ + ALT_CALL_ARG(v4_, 4); \ +@@ -379,12 +390,13 @@ extern void alternative_branches(void); + }) + + #define alternative_call6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \ ++ typeof(arg1) v1_ = (arg1); \ + typeof(arg2) v2_ = (arg2); \ + typeof(arg3) v3_ = (arg3); \ + typeof(arg4) v4_ = (arg4); \ + typeof(arg5) v5_ = (arg5); \ + typeof(arg6) v6_ = (arg6); \ +- ALT_CALL_ARG(arg1, 1); \ ++ ALT_CALL_ARG(v1_, 1); \ + ALT_CALL_ARG(v2_, 2); \ + ALT_CALL_ARG(v3_, 3); \ + ALT_CALL_ARG(v4_, 4); \ +-- +2.44.0 + diff --git a/0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch b/0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch new file mode 100644 index 00000000..e233ca51 --- /dev/null +++ b/0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch @@ -0,0 +1,102 @@ +From 54dacb5c02cba4676879ed077765734326b78e39 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 5 Mar 2024 12:01:22 +0100 +Subject: [PATCH 349/349] x86/cpu-policy: Allow for levelling of VERW side + effects +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +MD_CLEAR and FB_CLEAR need OR-ing across a migrate pool. Allow this, by +having them unconditinally set in max, with the host values reflected in +default. Annotate the bits as having special properies. + +Signed-off-by: Andrew Cooper +Reviewed-by: Roger Pau Monné +master commit: de17162cafd27f2865a3102a2ec0f386a02ed03d +master date: 2024-03-01 20:14:19 +0000 +--- + xen/arch/x86/cpu-policy.c | 24 +++++++++++++++++++++ + xen/arch/x86/include/asm/cpufeature.h | 1 + + xen/include/public/arch-x86/cpufeatureset.h | 4 ++-- + 3 files changed, 27 insertions(+), 2 deletions(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index f0f2c8a1c0..7b875a7221 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -435,6 +435,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) + __set_bit(X86_FEATURE_RSBA, fs); + __set_bit(X86_FEATURE_RRSBA, fs); + ++ /* ++ * These bits indicate that the VERW instruction may have gained ++ * scrubbing side effects. With pooling, they mean "you might migrate ++ * somewhere where scrubbing is necessary", and may need exposing on ++ * unaffected hardware. This is fine, because the VERW instruction ++ * has been around since the 286. ++ */ ++ __set_bit(X86_FEATURE_MD_CLEAR, fs); ++ __set_bit(X86_FEATURE_FB_CLEAR, fs); ++ + /* + * The Gather Data Sampling microcode mitigation (August 2023) has an + * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. +@@ -469,6 +479,20 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) + cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) + __clear_bit(X86_FEATURE_RDRAND, fs); + ++ /* ++ * These bits indicate that the VERW instruction may have gained ++ * scrubbing side effects. The max policy has them set for migration ++ * reasons, so reset the default policy back to the host values in ++ * case we're unaffected. ++ */ ++ __clear_bit(X86_FEATURE_MD_CLEAR, fs); ++ if ( cpu_has_md_clear ) ++ __set_bit(X86_FEATURE_MD_CLEAR, fs); ++ ++ __clear_bit(X86_FEATURE_FB_CLEAR, fs); ++ if ( cpu_has_fb_clear ) ++ __set_bit(X86_FEATURE_FB_CLEAR, fs); ++ + /* + * The Gather Data Sampling microcode mitigation (August 2023) has an + * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. +diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h +index 9ef7756593..ec824e8954 100644 +--- a/xen/arch/x86/include/asm/cpufeature.h ++++ b/xen/arch/x86/include/asm/cpufeature.h +@@ -136,6 +136,7 @@ + #define cpu_has_avx512_4fmaps boot_cpu_has(X86_FEATURE_AVX512_4FMAPS) + #define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT) + #define cpu_has_srbds_ctrl boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ++#define cpu_has_md_clear boot_cpu_has(X86_FEATURE_MD_CLEAR) + #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) + #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) + #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE) +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 94d211df2f..aec1407613 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -260,7 +260,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single + XEN_CPUFEATURE(FSRM, 9*32+ 4) /*A Fast Short REP MOVS */ + XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a VP2INTERSECT{D,Q} insns */ + XEN_CPUFEATURE(SRBDS_CTRL, 9*32+ 9) /* MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */ +-XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*A VERW clears microarchitectural buffers */ ++XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffers */ + XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */ + XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ + XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */ +@@ -321,7 +321,7 @@ XEN_CPUFEATURE(DOITM, 16*32+12) /* Data Operand Invariant Timing + XEN_CPUFEATURE(SBDR_SSDP_NO, 16*32+13) /*A No Shared Buffer Data Read or Sideband Stale Data Propagation */ + XEN_CPUFEATURE(FBSDP_NO, 16*32+14) /*A No Fill Buffer Stale Data Propagation */ + XEN_CPUFEATURE(PSDP_NO, 16*32+15) /*A No Primary Stale Data Propagation */ +-XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*A Fill Buffers cleared by VERW */ ++XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*!A Fill Buffers cleared by VERW */ + XEN_CPUFEATURE(FB_CLEAR_CTRL, 16*32+18) /* MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */ + XEN_CPUFEATURE(RRSBA, 16*32+19) /*! Restricted RSB Alternative */ + XEN_CPUFEATURE(BHI_NO, 16*32+20) /*A No Branch History Injection */ +-- +2.44.0 + diff --git a/xen.spec.in b/xen.spec.in index 72d19a1a..3bca658f 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -104,10 +104,43 @@ Patch0307: 0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch Patch0308: 0308-x86-Replace-EPT_EMT_-constants-with-X86_MT_.patch Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch Patch0314: 0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch -Patch0315: 0315-amd-vi-fix-IVMD-memory-type-checks.patch +Patch0315: 0315-pci-fail-device-assignment-if-phantom-functions-cann.patch +Patch0316: 0316-VT-d-Fix-else-vs-endif-misplacement.patch +Patch0317: 0317-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch +Patch0318: 0318-CirrusCI-drop-FreeBSD-12.patch +Patch0319: 0319-x86-intel-ensure-Global-Performance-Counter-Control-.patch +Patch0320: 0320-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch +Patch0321: 0321-x86-vmx-Disallow-the-use-of-inactivity-states.patch +Patch0322: 0322-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch +Patch0323: 0323-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch +Patch0324: 0324-tools-xentop-fix-sorting-bug-for-some-columns.patch +Patch0325: 0325-amd-vi-fix-IVMD-memory-type-checks.patch +Patch0326: 0326-x86-hvm-Fix-fast-singlestep-state-persistence.patch +Patch0327: 0327-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch +Patch0328: 0328-build-Replace-which-with-command-v.patch +Patch0329: 0329-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch +Patch0330: 0330-build-make-sure-build-fails-when-running-kconfig-fai.patch +Patch0331: 0331-x86emul-add-missing-EVEX.R-checks.patch +Patch0332: 0332-xen-livepatch-fix-norevert-test-hook-setup-typo.patch +Patch0333: 0333-xen-cmdline-fix-printf-format-specifier-in-no_config.patch +Patch0334: 0334-x86-altcall-use-a-union-as-register-type-for-functio.patch +Patch0335: 0335-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch +Patch0336: 0336-x86-account-for-shadow-stack-in-exception-from-stub-.patch +Patch0337: 0337-xen-arm-Fix-UBSAN-failure-in-start_xen.patch +Patch0338: 0338-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch +Patch0339: 0339-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch +Patch0340: 0340-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch +Patch0341: 0341-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch +Patch0342: 0342-x86-spec-do-not-print-thunk-option-selection-if-not-.patch +Patch0343: 0343-xen-livepatch-register-livepatch-regions-when-loaded.patch +Patch0344: 0344-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch +Patch0345: 0345-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch +Patch0346: 0346-xen-livepatch-properly-build-the-noapply-and-norever.patch +Patch0347: 0347-libxl-Fix-segfault-in-device_model_spawn_outcome.patch +Patch0348: 0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch +Patch0349: 0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch # Security fixes -Patch0500: 0500-xsa449.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch @@ -154,7 +187,6 @@ Patch0643: 0643-cpufreq-enable-HWP-by-default.patch PAtch0651: 0651-x86-msi-passthrough-all-MSI-X-vector-ctrl-writes-to-.patch PAtch0652: 0652-x86-hvm-Allow-writes-to-registers-on-the-same-page-a.patch -Patch0653: 0653-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch # Qubes specific patches Patch1000: 1000-Do-not-access-network-during-the-build.patch From 606f99ae86189ece9a9ec394de1f56529947f7d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 12 Mar 2024 03:44:14 +0100 Subject: [PATCH 44/64] Temporarily switch to a hash file for download verification The signing key still uses SHA1, which is rejected by sequoia-sq. Switch to hash file until the key is updated. --- .qubesbuilder | 7 ++++--- xen-4.17.3.tar.gz.sha512 | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 xen-4.17.3.tar.gz.sha512 diff --git a/.qubesbuilder b/.qubesbuilder index 8700b019..e87fd428 100644 --- a/.qubesbuilder +++ b/.qubesbuilder @@ -9,6 +9,7 @@ vm: source: files: - url: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz - signature: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz.sig - pubkeys: - - xen.org-key.asc +# signature: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz.sig +# pubkeys: +# - xen.org-key.asc + sha512: xen-@VERSION@.tar.gz.sha512 diff --git a/xen-4.17.3.tar.gz.sha512 b/xen-4.17.3.tar.gz.sha512 new file mode 100644 index 00000000..b9f48101 --- /dev/null +++ b/xen-4.17.3.tar.gz.sha512 @@ -0,0 +1 @@ +56ead90af00b4c1aa452b5edba980a2873e4c76e8c518220a88089a2771cd76fe2478e15a41abf5d247e4bc5af8415d53614dff7eb028ced80701c1a1263f91f From 323d22707cf56529f667229eefd259dca6fe25ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 12 Mar 2024 18:48:24 +0100 Subject: [PATCH 45/64] Apply XSA-452 patches --- 0500-xsa452-4.17-1.patch | 304 ++++++++++++++++++++++++++++++++++++++ 0501-xsa452-4.17-2.patch | 90 ++++++++++++ 0502-xsa452-4.17-3.patch | 135 +++++++++++++++++ 0503-xsa452-4.17-4.patch | 197 +++++++++++++++++++++++++ 0504-xsa452-4.17-5.patch | 239 ++++++++++++++++++++++++++++++ 0505-xsa452-4.17-6.patch | 163 +++++++++++++++++++++ 0506-xsa452-4.17-7.patch | 307 +++++++++++++++++++++++++++++++++++++++ xen.spec.in | 7 + 8 files changed, 1442 insertions(+) create mode 100644 0500-xsa452-4.17-1.patch create mode 100644 0501-xsa452-4.17-2.patch create mode 100644 0502-xsa452-4.17-3.patch create mode 100644 0503-xsa452-4.17-4.patch create mode 100644 0504-xsa452-4.17-5.patch create mode 100644 0505-xsa452-4.17-6.patch create mode 100644 0506-xsa452-4.17-7.patch diff --git a/0500-xsa452-4.17-1.patch b/0500-xsa452-4.17-1.patch new file mode 100644 index 00000000..cdec10eb --- /dev/null +++ b/0500-xsa452-4.17-1.patch @@ -0,0 +1,304 @@ +From: Andrew Cooper +Subject: x86/entry: Introduce EFRAME_* constants + +restore_all_guest() does a lot of manipulation of the stack after popping the +GPRs, and uses raw %rsp displacements to do so. Also, almost all entrypaths +use raw %rsp displacements prior to pushing GPRs. + +Provide better mnemonics, to aid readability and reduce the chance of errors +when editing. + +No functional change. The resulting binary is identical. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 37541208f119a9c552c6c6c3246ea61be0d44035) + +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index 287dac101ad4..31fa63b77fd1 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -51,6 +51,23 @@ void __dummy__(void) + OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, es); + BLANK(); + ++ /* ++ * EFRAME_* is for the entry/exit logic where %rsp is pointing at ++ * UREGS_error_code and GPRs are still/already guest values. ++ */ ++#define OFFSET_EF(sym, mem) \ ++ DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \ ++ offsetof(struct cpu_user_regs, error_code)) ++ ++ OFFSET_EF(EFRAME_entry_vector, entry_vector); ++ OFFSET_EF(EFRAME_rip, rip); ++ OFFSET_EF(EFRAME_cs, cs); ++ OFFSET_EF(EFRAME_eflags, eflags); ++ OFFSET_EF(EFRAME_rsp, rsp); ++ BLANK(); ++ ++#undef OFFSET_EF ++ + OFFSET(VCPU_processor, struct vcpu, processor); + OFFSET(VCPU_domain, struct vcpu, domain); + OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info); +diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S +index 253bb1688c4f..7c211314d885 100644 +--- a/xen/arch/x86/x86_64/compat/entry.S ++++ b/xen/arch/x86/x86_64/compat/entry.S +@@ -15,7 +15,7 @@ ENTRY(entry_int82) + ENDBR64 + ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + pushq $0 +- movl $HYPERCALL_VECTOR, 4(%rsp) ++ movl $HYPERCALL_VECTOR, EFRAME_entry_vector(%rsp) + SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ + + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 585b0c955191..412cbeb3eca4 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -190,15 +190,15 @@ restore_all_guest: + SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ + + RESTORE_ALL +- testw $TRAP_syscall,4(%rsp) ++ testw $TRAP_syscall, EFRAME_entry_vector(%rsp) + jz iret_exit_to_guest + +- movq 24(%rsp),%r11 # RFLAGS ++ mov EFRAME_eflags(%rsp), %r11 + andq $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11 + orq $X86_EFLAGS_IF,%r11 + + /* Don't use SYSRET path if the return address is not canonical. */ +- movq 8(%rsp),%rcx ++ mov EFRAME_rip(%rsp), %rcx + sarq $47,%rcx + incl %ecx + cmpl $1,%ecx +@@ -213,20 +213,20 @@ restore_all_guest: + ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK + #endif + +- movq 8(%rsp), %rcx # RIP +- cmpw $FLAT_USER_CS32,16(%rsp)# CS +- movq 32(%rsp),%rsp # RSP ++ mov EFRAME_rip(%rsp), %rcx ++ cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp) ++ mov EFRAME_rsp(%rsp), %rsp + je 1f + sysretq + 1: sysretl + + ALIGN + .Lrestore_rcx_iret_exit_to_guest: +- movq 8(%rsp), %rcx # RIP ++ mov EFRAME_rip(%rsp), %rcx + /* No special register assumptions. */ + iret_exit_to_guest: +- andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), 24(%rsp) +- orl $X86_EFLAGS_IF,24(%rsp) ++ andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp) ++ orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp) + addq $8,%rsp + .Lft0: iretq + _ASM_PRE_EXTABLE(.Lft0, handle_exception) +@@ -257,7 +257,7 @@ ENTRY(lstar_enter) + pushq $FLAT_KERNEL_CS64 + pushq %rcx + pushq $0 +- movl $TRAP_syscall, 4(%rsp) ++ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) + SAVE_ALL + + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ +@@ -294,7 +294,7 @@ ENTRY(cstar_enter) + pushq $FLAT_USER_CS32 + pushq %rcx + pushq $0 +- movl $TRAP_syscall, 4(%rsp) ++ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) + SAVE_ALL + + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ +@@ -335,7 +335,7 @@ GLOBAL(sysenter_eflags_saved) + pushq $3 /* ring 3 null cs */ + pushq $0 /* null rip */ + pushq $0 +- movl $TRAP_syscall, 4(%rsp) ++ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) + SAVE_ALL + + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ +@@ -389,7 +389,7 @@ ENTRY(int80_direct_trap) + ENDBR64 + ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + pushq $0 +- movl $0x80, 4(%rsp) ++ movl $0x80, EFRAME_entry_vector(%rsp) + SAVE_ALL + + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ +@@ -649,7 +649,7 @@ ret_from_intr: + .section .init.text, "ax", @progbits + ENTRY(early_page_fault) + ENDBR64 +- movl $TRAP_page_fault, 4(%rsp) ++ movl $TRAP_page_fault, EFRAME_entry_vector(%rsp) + SAVE_ALL + movq %rsp, %rdi + call do_early_page_fault +@@ -716,7 +716,7 @@ ENTRY(common_interrupt) + + ENTRY(page_fault) + ENDBR64 +- movl $TRAP_page_fault,4(%rsp) ++ movl $TRAP_page_fault, EFRAME_entry_vector(%rsp) + /* No special register assumptions. */ + GLOBAL(handle_exception) + ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP +@@ -892,90 +892,90 @@ FATAL_exception_with_ints_disabled: + ENTRY(divide_error) + ENDBR64 + pushq $0 +- movl $TRAP_divide_error,4(%rsp) ++ movl $TRAP_divide_error, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(coprocessor_error) + ENDBR64 + pushq $0 +- movl $TRAP_copro_error,4(%rsp) ++ movl $TRAP_copro_error, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(simd_coprocessor_error) + ENDBR64 + pushq $0 +- movl $TRAP_simd_error,4(%rsp) ++ movl $TRAP_simd_error, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(device_not_available) + ENDBR64 + pushq $0 +- movl $TRAP_no_device,4(%rsp) ++ movl $TRAP_no_device, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(debug) + ENDBR64 + pushq $0 +- movl $TRAP_debug,4(%rsp) ++ movl $TRAP_debug, EFRAME_entry_vector(%rsp) + jmp handle_ist_exception + + ENTRY(int3) + ENDBR64 + pushq $0 +- movl $TRAP_int3,4(%rsp) ++ movl $TRAP_int3, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(overflow) + ENDBR64 + pushq $0 +- movl $TRAP_overflow,4(%rsp) ++ movl $TRAP_overflow, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(bounds) + ENDBR64 + pushq $0 +- movl $TRAP_bounds,4(%rsp) ++ movl $TRAP_bounds, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(invalid_op) + ENDBR64 + pushq $0 +- movl $TRAP_invalid_op,4(%rsp) ++ movl $TRAP_invalid_op, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(invalid_TSS) + ENDBR64 +- movl $TRAP_invalid_tss,4(%rsp) ++ movl $TRAP_invalid_tss, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(segment_not_present) + ENDBR64 +- movl $TRAP_no_segment,4(%rsp) ++ movl $TRAP_no_segment, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(stack_segment) + ENDBR64 +- movl $TRAP_stack_error,4(%rsp) ++ movl $TRAP_stack_error, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(general_protection) + ENDBR64 +- movl $TRAP_gp_fault,4(%rsp) ++ movl $TRAP_gp_fault, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(alignment_check) + ENDBR64 +- movl $TRAP_alignment_check,4(%rsp) ++ movl $TRAP_alignment_check, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(entry_CP) + ENDBR64 +- movl $X86_EXC_CP, 4(%rsp) ++ movl $X86_EXC_CP, EFRAME_entry_vector(%rsp) + jmp handle_exception + + ENTRY(double_fault) + ENDBR64 +- movl $TRAP_double_fault,4(%rsp) ++ movl $TRAP_double_fault, EFRAME_entry_vector(%rsp) + /* Set AC to reduce chance of further SMAP faults */ + ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP + SAVE_ALL +@@ -1001,7 +1001,7 @@ ENTRY(double_fault) + ENTRY(nmi) + ENDBR64 + pushq $0 +- movl $TRAP_nmi,4(%rsp) ++ movl $TRAP_nmi, EFRAME_entry_vector(%rsp) + handle_ist_exception: + ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP + SAVE_ALL +@@ -1134,7 +1134,7 @@ handle_ist_exception: + ENTRY(machine_check) + ENDBR64 + pushq $0 +- movl $TRAP_machine_check,4(%rsp) ++ movl $TRAP_machine_check, EFRAME_entry_vector(%rsp) + jmp handle_ist_exception + + /* No op trap handler. Required for kexec crash path. */ +@@ -1171,7 +1171,7 @@ autogen_stubs: /* Automatically generated stubs. */ + 1: + ENDBR64 + pushq $0 +- movb $vec,4(%rsp) ++ movb $vec, EFRAME_entry_vector(%rsp) + jmp common_interrupt + + entrypoint 1b +@@ -1185,7 +1185,7 @@ autogen_stubs: /* Automatically generated stubs. */ + test $8,%spl /* 64bit exception frames are 16 byte aligned, but the word */ + jz 2f /* size is 8 bytes. Check whether the processor gave us an */ + pushq $0 /* error code, and insert an empty one if not. */ +-2: movb $vec,4(%rsp) ++2: movb $vec, EFRAME_entry_vector(%rsp) + jmp handle_exception + + entrypoint 1b diff --git a/0501-xsa452-4.17-2.patch b/0501-xsa452-4.17-2.patch new file mode 100644 index 00000000..45353971 --- /dev/null +++ b/0501-xsa452-4.17-2.patch @@ -0,0 +1,90 @@ +From: Andrew Cooper +Subject: x86: Resync intel-family.h from Linux + +From v6.8-rc6 + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 195e75371b13c4f7ecdf7b5c50aed0d02f2d7ce8) + +diff --git a/xen/arch/x86/include/asm/intel-family.h b/xen/arch/x86/include/asm/intel-family.h +index ffc49151befe..b65e9c46b922 100644 +--- a/xen/arch/x86/include/asm/intel-family.h ++++ b/xen/arch/x86/include/asm/intel-family.h +@@ -26,6 +26,9 @@ + * _G - parts with extra graphics on + * _X - regular server parts + * _D - micro server parts ++ * _N,_P - other mobile parts ++ * _H - premium mobile parts ++ * _S - other client parts + * + * Historical OPTDIFFs: + * +@@ -37,6 +40,9 @@ + * their own names :-( + */ + ++/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ ++#define INTEL_FAM6_ANY X86_MODEL_ANY ++ + #define INTEL_FAM6_CORE_YONAH 0x0E + + #define INTEL_FAM6_CORE2_MEROM 0x0F +@@ -93,8 +99,6 @@ + #define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */ + #define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */ + +-#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ +- + #define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */ + + #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ +@@ -102,12 +106,31 @@ + + #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ + ++#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF ++ ++#define INTEL_FAM6_GRANITERAPIDS_X 0xAD ++#define INTEL_FAM6_GRANITERAPIDS_D 0xAE ++ ++/* "Hybrid" Processors (P-Core/E-Core) */ ++ ++#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ ++ + #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ + #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ + +-#define INTEL_FAM6_RAPTORLAKE 0xB7 ++#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */ ++#define INTEL_FAM6_RAPTORLAKE_P 0xBA ++#define INTEL_FAM6_RAPTORLAKE_S 0xBF ++ ++#define INTEL_FAM6_METEORLAKE 0xAC ++#define INTEL_FAM6_METEORLAKE_L 0xAA ++ ++#define INTEL_FAM6_ARROWLAKE_H 0xC5 ++#define INTEL_FAM6_ARROWLAKE 0xC6 ++ ++#define INTEL_FAM6_LUNARLAKE_M 0xBD + +-/* "Small Core" Processors (Atom) */ ++/* "Small Core" Processors (Atom/E-Core) */ + + #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ + #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ +@@ -134,6 +157,13 @@ + #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ + #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ + ++#define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */ ++ ++#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ ++#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ ++ ++#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */ ++ + /* Xeon Phi */ + + #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ diff --git a/0502-xsa452-4.17-3.patch b/0502-xsa452-4.17-3.patch new file mode 100644 index 00000000..0a39333e --- /dev/null +++ b/0502-xsa452-4.17-3.patch @@ -0,0 +1,135 @@ +From: Andrew Cooper +Subject: x86/vmx: Perform VERW flushing later in the VMExit path + +Broken out of the following patch because this change is subtle enough on its +own. See it for the rational of why we're moving VERW. + +As for how, extend the trick already used to hold one condition in +flags (RESUME vs LAUNCH) through the POPing of GPRs. + +Move the MOV CR earlier. Intel specify flags to be undefined across it. + +Encode the two conditions we want using SF and PF. See the code comment for +exactly how. + +Leave a comment to explain the lack of any content around +SPEC_CTRL_EXIT_TO_VMX, but leave the block in place. Sods law says if we +delete it, we'll need to reintroduce it. + +This is part of XSA-452 / CVE-2023-28746. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 475fa20b7384464210f42bad7195f87bd6f1c63f) + +diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S +index 5f5de45a1309..cdde76e13892 100644 +--- a/xen/arch/x86/hvm/vmx/entry.S ++++ b/xen/arch/x86/hvm/vmx/entry.S +@@ -87,17 +87,39 @@ UNLIKELY_END(realmode) + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ + /* SPEC_CTRL_EXIT_TO_VMX Req: %rsp=regs/cpuinfo Clob: */ +- DO_SPEC_CTRL_COND_VERW ++ /* ++ * All speculation safety work happens to be elsewhere. VERW is after ++ * popping the GPRs, while restoring the guest MSR_SPEC_CTRL is left ++ * to the MSR load list. ++ */ + + mov VCPU_hvm_guest_cr2(%rbx),%rax ++ mov %rax, %cr2 ++ ++ /* ++ * We need to perform two conditional actions (VERW, and Resume vs ++ * Launch) after popping GPRs. With some cunning, we can encode both ++ * of these in eflags together. ++ * ++ * Parity is only calculated over the bottom byte of the answer, while ++ * Sign is simply the top bit. ++ * ++ * Therefore, the final OR instruction ends up producing: ++ * SF = VCPU_vmx_launched ++ * PF = !SCF_verw ++ */ ++ BUILD_BUG_ON(SCF_verw & ~0xff) ++ movzbl VCPU_vmx_launched(%rbx), %ecx ++ shl $31, %ecx ++ movzbl CPUINFO_spec_ctrl_flags(%rsp), %eax ++ and $SCF_verw, %eax ++ or %eax, %ecx + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp +- mov %rax,%cr2 +- cmpb $0,VCPU_vmx_launched(%rbx) + pop %rbx + pop %r11 + pop %r10 +@@ -108,7 +130,13 @@ UNLIKELY_END(realmode) + pop %rdx + pop %rsi + pop %rdi +- je .Lvmx_launch ++ ++ jpe .L_skip_verw ++ /* VERW clobbers ZF, but preserves all others, including SF. */ ++ verw STK_REL(CPUINFO_verw_sel, CPUINFO_error_code)(%rsp) ++.L_skip_verw: ++ ++ jns .Lvmx_launch + + /*.Lvmx_resume:*/ + VMRESUME +diff --git a/xen/arch/x86/include/asm/asm_defns.h b/xen/arch/x86/include/asm/asm_defns.h +index d9431180cfba..abc6822b08c8 100644 +--- a/xen/arch/x86/include/asm/asm_defns.h ++++ b/xen/arch/x86/include/asm/asm_defns.h +@@ -81,6 +81,14 @@ register unsigned long current_stack_pointer asm("rsp"); + + #ifdef __ASSEMBLY__ + ++.macro BUILD_BUG_ON condstr, cond:vararg ++ .if \cond ++ .error "Condition \"\condstr\" not satisfied" ++ .endif ++.endm ++/* preprocessor macro to make error message more user friendly */ ++#define BUILD_BUG_ON(cond) BUILD_BUG_ON #cond, cond ++ + #ifdef HAVE_AS_QUOTED_SYM + #define SUBSECTION_LBL(tag) \ + .ifndef .L.tag; \ +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index f4b8b9d9561c..ca9cb0f5dd1d 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -164,6 +164,13 @@ + #endif + .endm + ++/* ++ * Helper to improve the readibility of stack dispacements with %rsp in ++ * unusual positions. Both @field and @top_of_stack should be constants from ++ * the same object. @top_of_stack should be where %rsp is currently pointing. ++ */ ++#define STK_REL(field, top_of_stk) ((field) - (top_of_stk)) ++ + .macro DO_SPEC_CTRL_COND_VERW + /* + * Requires %rsp=cpuinfo +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index 31fa63b77fd1..a4e94d693024 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -135,6 +135,7 @@ void __dummy__(void) + #endif + + OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs); ++ OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code); + OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel); + OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); + OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset); diff --git a/0503-xsa452-4.17-4.patch b/0503-xsa452-4.17-4.patch new file mode 100644 index 00000000..fa9bb127 --- /dev/null +++ b/0503-xsa452-4.17-4.patch @@ -0,0 +1,197 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Perform VERW flushing later in exit paths + +On parts vulnerable to RFDS, VERW's side effects are extended to scrub all +non-architectural entries in various Physical Register Files. To remove all +of Xen's values, the VERW must be after popping the GPRs. + +Rework SPEC_CTRL_COND_VERW to default to an CPUINFO_error_code %rsp position, +but with overrides for other contexts. Identify that it clobbers eflags; this +is particularly relevant for the SYSRET path. + +For the IST exit return to Xen, have the main SPEC_CTRL_EXIT_TO_XEN put a +shadow copy of spec_ctrl_flags, as GPRs can't be used at the point we want to +issue the VERW. + +This is part of XSA-452 / CVE-2023-28746. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 0a666cf2cd99df6faf3eebc81a1fc286e4eca4c7) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index ca9cb0f5dd1d..97a97b2b82c9 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -171,16 +171,23 @@ + */ + #define STK_REL(field, top_of_stk) ((field) - (top_of_stk)) + +-.macro DO_SPEC_CTRL_COND_VERW ++.macro SPEC_CTRL_COND_VERW \ ++ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_error_code), \ ++ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code) + /* +- * Requires %rsp=cpuinfo ++ * Requires \scf and \sel as %rsp-relative expressions ++ * Clobbers eflags ++ * ++ * VERW needs to run after guest GPRs have been restored, where only %rsp is ++ * good to use. Default to expecting %rsp pointing at CPUINFO_error_code. ++ * Contexts where this is not true must provide an alternative \scf and \sel. + * + * Issue a VERW for its flushing side effect, if indicated. This is a Spectre + * v1 gadget, but the IRET/VMEntry is serialising. + */ +- testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp) ++ testb $SCF_verw, \scf(%rsp) + jz .L\@_verw_skip +- verw CPUINFO_verw_sel(%rsp) ++ verw \sel(%rsp) + .L\@_verw_skip: + .endm + +@@ -298,8 +305,6 @@ + */ + ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV + +- DO_SPEC_CTRL_COND_VERW +- + ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV + .endm + +@@ -379,7 +384,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + */ + .macro SPEC_CTRL_EXIT_TO_XEN + /* +- * Requires %r12=ist_exit, %r14=stack_end ++ * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs + * Clobbers %rax, %rbx, %rcx, %rdx + */ + movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx +@@ -407,11 +412,18 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + test %r12, %r12 + jz .L\@_skip_ist_exit + +- /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */ +- testb $SCF_verw, %bl +- jz .L\@_skip_verw +- verw STACK_CPUINFO_FIELD(verw_sel)(%r14) +-.L\@_skip_verw: ++ /* ++ * Stash SCF and verw_sel above eflags in the case of an IST_exit. The ++ * VERW logic needs to run after guest GPRs have been restored; i.e. where ++ * we cannot use %r12 or %r14 for the purposes they have here. ++ * ++ * When the CPU pushed this exception frame, it zero-extended eflags. ++ * Therefore it is safe for the VERW logic to look at the stashed SCF ++ * outside of the ist_exit condition. Also, this stashing won't influence ++ * any other restore_all_guest() paths. ++ */ ++ or $(__HYPERVISOR_DS32 << 16), %ebx ++ mov %ebx, UREGS_eflags + 4(%rsp) /* EFRAME_shadow_scf/sel */ + + ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV + +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index a4e94d693024..4cd5938d7b9d 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -55,14 +55,22 @@ void __dummy__(void) + * EFRAME_* is for the entry/exit logic where %rsp is pointing at + * UREGS_error_code and GPRs are still/already guest values. + */ +-#define OFFSET_EF(sym, mem) \ ++#define OFFSET_EF(sym, mem, ...) \ + DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \ +- offsetof(struct cpu_user_regs, error_code)) ++ offsetof(struct cpu_user_regs, error_code) __VA_ARGS__) + + OFFSET_EF(EFRAME_entry_vector, entry_vector); + OFFSET_EF(EFRAME_rip, rip); + OFFSET_EF(EFRAME_cs, cs); + OFFSET_EF(EFRAME_eflags, eflags); ++ ++ /* ++ * These aren't real fields. They're spare space, used by the IST ++ * exit-to-xen path. ++ */ ++ OFFSET_EF(EFRAME_shadow_scf, eflags, +4); ++ OFFSET_EF(EFRAME_shadow_sel, eflags, +6); ++ + OFFSET_EF(EFRAME_rsp, rsp); + BLANK(); + +@@ -136,6 +144,7 @@ void __dummy__(void) + + OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs); + OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code); ++ OFFSET(CPUINFO_rip, struct cpu_info, guest_cpu_user_regs.rip); + OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel); + OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); + OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset); +diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S +index 7c211314d885..3b2fbcd8733a 100644 +--- a/xen/arch/x86/x86_64/compat/entry.S ++++ b/xen/arch/x86/x86_64/compat/entry.S +@@ -161,6 +161,12 @@ ENTRY(compat_restore_all_guest) + SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ + + RESTORE_ALL adj=8 compat=1 ++ ++ /* Account for ev/ec having already been popped off the stack. */ ++ SPEC_CTRL_COND_VERW \ ++ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_rip), \ ++ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_rip) ++ + .Lft0: iretq + _ASM_PRE_EXTABLE(.Lft0, handle_exception) + +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 412cbeb3eca4..ef517e2945b0 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -214,6 +214,9 @@ restore_all_guest: + #endif + + mov EFRAME_rip(%rsp), %rcx ++ ++ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */ ++ + cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp) + mov EFRAME_rsp(%rsp), %rsp + je 1f +@@ -227,6 +230,9 @@ restore_all_guest: + iret_exit_to_guest: + andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp) + orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp) ++ ++ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */ ++ + addq $8,%rsp + .Lft0: iretq + _ASM_PRE_EXTABLE(.Lft0, handle_exception) +@@ -679,9 +685,22 @@ UNLIKELY_START(ne, exit_cr3) + UNLIKELY_END(exit_cr3) + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */ ++ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end %rsp=regs, Clob: abcd */ + + RESTORE_ALL adj=8 ++ ++ /* ++ * When the CPU pushed this exception frame, it zero-extended eflags. ++ * For an IST exit, SPEC_CTRL_EXIT_TO_XEN stashed shadow copies of ++ * spec_ctrl_flags and ver_sel above eflags, as we can't use any GPRs, ++ * and we're at a random place on the stack, not in a CPUFINFO block. ++ * ++ * Account for ev/ec having already been popped off the stack. ++ */ ++ SPEC_CTRL_COND_VERW \ ++ scf=STK_REL(EFRAME_shadow_scf, EFRAME_rip), \ ++ sel=STK_REL(EFRAME_shadow_sel, EFRAME_rip) ++ + iretq + + ENTRY(common_interrupt) diff --git a/0504-xsa452-4.17-5.patch b/0504-xsa452-4.17-5.patch new file mode 100644 index 00000000..0230b338 --- /dev/null +++ b/0504-xsa452-4.17-5.patch @@ -0,0 +1,239 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Rename VERW related options + +VERW is going to be used for a 3rd purpose, and the existing nomenclature +didn't survive the Stale MMIO issues terribly well. + +Rename the command line option from `md-clear=` to `verw=`. This is more +consistent with other options which tend to be named based on what they're +doing, not which feature enumeration they use behind the scenes. Retain +`md-clear=` as a deprecated alias. + +Rename opt_md_clear_{pv,hvm} and opt_fb_clear_mmio to opt_verw_{pv,hvm,mmio}, +which has a side effect of making spec_ctrl_init_domain() rather clearer to +follow. + +No functional change. + +This is part of XSA-452 / CVE-2023-28746. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit f7603ca252e4226739eb3129a5290ee3da3f8ea4) + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index 2006697226de..d909ec94fe7c 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -2324,7 +2324,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). + + ### spec-ctrl (x86) + > `= List of [ , xen=, {pv,hvm}=, +-> {msr-sc,rsb,md-clear,ibpb-entry}=|{pv,hvm}=, ++> {msr-sc,rsb,verw,ibpb-entry}=|{pv,hvm}=, + > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, + > eager-fpu,l1d-flush,branch-harden,srb-lock, + > unpriv-mmio,gds-mit,div-scrub}= ]` +@@ -2349,7 +2349,7 @@ in place for guests to use. + + Use of a positive boolean value for either of these options is invalid. + +-The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `md-clear=` and `ibpb-entry=` options ++The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=` and `ibpb-entry=` options + offer fine grained control over the primitives by Xen. These impact Xen's + ability to protect itself, and/or Xen's ability to virtualise support for + guests to use. +@@ -2366,11 +2366,12 @@ guests to use. + guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc. + * `rsb=` offers control over whether to overwrite the Return Stack Buffer / + Return Address Stack on entry to Xen and on idle. +-* `md-clear=` offers control over whether to use VERW to flush +- microarchitectural buffers on idle and exit from Xen. *Note: For +- compatibility with development versions of this fix, `mds=` is also accepted +- on Xen 4.12 and earlier as an alias. Consult vendor documentation in +- preference to here.* ++* `verw=` offers control over whether to use VERW for its scrubbing side ++ effects at appropriate privilege transitions. The exact side effects are ++ microarchitecture and microcode specific. *Note: `md-clear=` is accepted as ++ a deprecated alias. For compatibility with development versions of XSA-297, ++ `mds=` is also accepted on Xen 4.12 and earlier as an alias. Consult vendor ++ documentation in preference to here.* + * `ibpb-entry=` offers control over whether IBPB (Indirect Branch Prediction + Barrier) is used on entry to Xen. This is used by default on hardware + vulnerable to Branch Type Confusion, and hardware vulnerable to Speculative +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 25a18ac598fa..e12ec9930cf7 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -37,8 +37,8 @@ static bool __initdata opt_msr_sc_pv = true; + static bool __initdata opt_msr_sc_hvm = true; + static int8_t __initdata opt_rsb_pv = -1; + static bool __initdata opt_rsb_hvm = true; +-static int8_t __ro_after_init opt_md_clear_pv = -1; +-static int8_t __ro_after_init opt_md_clear_hvm = -1; ++static int8_t __ro_after_init opt_verw_pv = -1; ++static int8_t __ro_after_init opt_verw_hvm = -1; + + static int8_t __ro_after_init opt_ibpb_entry_pv = -1; + static int8_t __ro_after_init opt_ibpb_entry_hvm = -1; +@@ -78,7 +78,7 @@ static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. + + static int8_t __initdata opt_srb_lock = -1; + static bool __initdata opt_unpriv_mmio; +-static bool __ro_after_init opt_fb_clear_mmio; ++static bool __ro_after_init opt_verw_mmio; + static int8_t __initdata opt_gds_mit = -1; + static int8_t __initdata opt_div_scrub = -1; + +@@ -120,8 +120,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) + disable_common: + opt_rsb_pv = false; + opt_rsb_hvm = false; +- opt_md_clear_pv = 0; +- opt_md_clear_hvm = 0; ++ opt_verw_pv = 0; ++ opt_verw_hvm = 0; + opt_ibpb_entry_pv = 0; + opt_ibpb_entry_hvm = 0; + opt_ibpb_entry_dom0 = false; +@@ -152,14 +152,14 @@ static int __init cf_check parse_spec_ctrl(const char *s) + { + opt_msr_sc_pv = val; + opt_rsb_pv = val; +- opt_md_clear_pv = val; ++ opt_verw_pv = val; + opt_ibpb_entry_pv = val; + } + else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) + { + opt_msr_sc_hvm = val; + opt_rsb_hvm = val; +- opt_md_clear_hvm = val; ++ opt_verw_hvm = val; + opt_ibpb_entry_hvm = val; + } + else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 ) +@@ -204,21 +204,22 @@ static int __init cf_check parse_spec_ctrl(const char *s) + break; + } + } +- else if ( (val = parse_boolean("md-clear", s, ss)) != -1 ) ++ else if ( (val = parse_boolean("verw", s, ss)) != -1 || ++ (val = parse_boolean("md-clear", s, ss)) != -1 ) + { + switch ( val ) + { + case 0: + case 1: +- opt_md_clear_pv = opt_md_clear_hvm = val; ++ opt_verw_pv = opt_verw_hvm = val; + break; + + case -2: +- s += strlen("md-clear="); ++ s += (*s == 'v') ? strlen("verw=") : strlen("md-clear="); + if ( (val = parse_boolean("pv", s, ss)) >= 0 ) +- opt_md_clear_pv = val; ++ opt_verw_pv = val; + else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) +- opt_md_clear_hvm = val; ++ opt_verw_hvm = val; + else + default: + rc = -EINVAL; +@@ -540,8 +541,8 @@ static void __init print_details(enum ind_thunk thunk) + opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-", + opt_ibpb_ctxt_switch ? " IBPB-ctxt" : "", + opt_l1d_flush ? " L1D_FLUSH" : "", +- opt_md_clear_pv || opt_md_clear_hvm || +- opt_fb_clear_mmio ? " VERW" : "", ++ opt_verw_pv || opt_verw_hvm || ++ opt_verw_mmio ? " VERW" : "", + opt_div_scrub ? " DIV" : "", + opt_branch_harden ? " BRANCH_HARDEN" : ""); + +@@ -562,13 +563,13 @@ static void __init print_details(enum ind_thunk thunk) + boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) || + amd_virt_spec_ctrl || +- opt_eager_fpu || opt_md_clear_hvm) ? "" : " None", ++ opt_eager_fpu || opt_verw_hvm) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", + (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || + amd_virt_spec_ctrl) ? " MSR_VIRT_SPEC_CTRL" : "", + boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "", + opt_eager_fpu ? " EAGER_FPU" : "", +- opt_md_clear_hvm ? " MD_CLEAR" : "", ++ opt_verw_hvm ? " VERW" : "", + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : ""); + + #endif +@@ -577,11 +578,11 @@ static void __init print_details(enum ind_thunk thunk) + (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || + boot_cpu_has(X86_FEATURE_SC_RSB_PV) || + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) || +- opt_eager_fpu || opt_md_clear_pv) ? "" : " None", ++ opt_eager_fpu || opt_verw_pv) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", + boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", + opt_eager_fpu ? " EAGER_FPU" : "", +- opt_md_clear_pv ? " MD_CLEAR" : "", ++ opt_verw_pv ? " VERW" : "", + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : ""); + + printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", +@@ -1514,8 +1515,8 @@ void spec_ctrl_init_domain(struct domain *d) + { + bool pv = is_pv_domain(d); + +- bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) || +- (opt_fb_clear_mmio && is_iommu_enabled(d))); ++ bool verw = ((pv ? opt_verw_pv : opt_verw_hvm) || ++ (opt_verw_mmio && is_iommu_enabled(d))); + + bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) && + (d->domain_id != 0 || opt_ibpb_entry_dom0)); +@@ -1878,19 +1879,20 @@ void __init init_speculation_mitigations(void) + * the return-to-guest path. + */ + if ( opt_unpriv_mmio ) +- opt_fb_clear_mmio = cpu_has_fb_clear; ++ opt_verw_mmio = cpu_has_fb_clear; + + /* + * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. + * This will only be a token effort for MLPDS/MFBDS when HT is enabled, + * but it is somewhat better than nothing. + */ +- if ( opt_md_clear_pv == -1 ) +- opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && +- boot_cpu_has(X86_FEATURE_MD_CLEAR)); +- if ( opt_md_clear_hvm == -1 ) +- opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && +- boot_cpu_has(X86_FEATURE_MD_CLEAR)); ++ if ( opt_verw_pv == -1 ) ++ opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && ++ cpu_has_md_clear); ++ ++ if ( opt_verw_hvm == -1 ) ++ opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && ++ cpu_has_md_clear); + + /* + * Enable MDS/MMIO defences as applicable. The Idle blocks need using if +@@ -1903,12 +1905,12 @@ void __init init_speculation_mitigations(void) + * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.) + * + * After calculating the appropriate idle setting, simplify +- * opt_md_clear_hvm to mean just "should we VERW on the way into HVM ++ * opt_verw_hvm to mean just "should we VERW on the way into HVM + * guests", so spec_ctrl_init_domain() can calculate suitable settings. + */ +- if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio ) ++ if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio ) + setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); +- opt_md_clear_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; ++ opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; + + /* + * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT diff --git a/0505-xsa452-4.17-6.patch b/0505-xsa452-4.17-6.patch new file mode 100644 index 00000000..bbe617eb --- /dev/null +++ b/0505-xsa452-4.17-6.patch @@ -0,0 +1,163 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: VERW-handling adjustments + +... before we add yet more complexity to this logic. Mostly expanded +comments, but with three minor changes. + +1) Introduce cpu_has_useful_md_clear to simplify later logic in this patch and + future ones. + +2) We only ever need SC_VERW_IDLE when SMT is active. If SMT isn't active, + then there's no re-partition of pipeline resources based on thread-idleness + to worry about. + +3) The logic to adjust HVM VERW based on L1D_FLUSH is unmaintainable and, as + it turns out, wrong. SKIP_L1DFL is just a hint bit, whereas opt_l1d_flush + is the relevant decision of whether to use L1D_FLUSH based on + susceptibility and user preference. + + Rewrite the logic so it can be followed, and incorporate the fact that when + FB_CLEAR is visible, L1D_FLUSH isn't a safe substitution. + +This is part of XSA-452 / CVE-2023-28746. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 1eb91a8a06230b4b64228c9a380194f8cfe6c5e2) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index e12ec9930cf7..adb6bc74e8e6 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -1531,7 +1531,7 @@ void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; + bool has_spec_ctrl, ibrs = false, hw_smt_enabled; +- bool cpu_has_bug_taa, retpoline_safe; ++ bool cpu_has_bug_taa, cpu_has_useful_md_clear, retpoline_safe; + + hw_smt_enabled = check_smt_enabled(); + +@@ -1867,50 +1867,97 @@ void __init init_speculation_mitigations(void) + "enabled. Please assess your configuration and choose an\n" + "explicit 'smt=' setting. See XSA-273.\n"); + ++ /* ++ * A brief summary of VERW-related changes. ++ * ++ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html ++ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html ++ * ++ * Relevant ucodes: ++ * ++ * - May 2019, for MDS. Introduces the MD_CLEAR CPUID bit and VERW side ++ * effects to scrub Store/Load/Fill buffers as applicable. MD_CLEAR ++ * exists architecturally, even when the side effects have been removed. ++ * ++ * Use VERW to scrub on return-to-guest. Parts with L1D_FLUSH to ++ * mitigate L1TF have the same side effect, so no need to do both. ++ * ++ * Various Atoms suffer from Store-buffer sampling only. Store buffers ++ * are statically partitioned between non-idle threads, so scrubbing is ++ * wanted when going idle too. ++ * ++ * Load ports and Fill buffers are competitively shared between threads. ++ * SMT must be disabled for VERW scrubbing to be fully effective. ++ * ++ * - November 2019, for TAA. Extended VERW side effects to TSX-enabled ++ * MDS_NO parts. ++ * ++ * - February 2022, for Client TSX de-feature. Removed VERW side effects ++ * from Client CPUs only. ++ * ++ * - May 2022, for MMIO Stale Data. (Re)introduced Fill Buffer scrubbing ++ * on all MMIO-affected parts which didn't already have it for MDS ++ * reasons, enumerating FB_CLEAR on those parts only. ++ * ++ * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing ++ * side effects as VERW and cannot be used in its place. ++ */ + mds_calculations(); + + /* +- * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have +- * reintroduced the VERW fill buffer flushing side effect because of a +- * susceptibility to FBSDP. ++ * Parts which enumerate FB_CLEAR are those with now-updated microcode ++ * which weren't susceptible to the original MFBDS (and therefore didn't ++ * have Fill Buffer scrubbing side effects to begin with, or were Client ++ * MDS_NO non-TAA_NO parts where the scrubbing was removed), but have had ++ * the scrubbing reintroduced because of a susceptibility to FBSDP. + * + * If unprivileged guests have (or will have) MMIO mappings, we can + * mitigate cross-domain leakage of fill buffer data by issuing VERW on +- * the return-to-guest path. ++ * the return-to-guest path. This is only a token effort if SMT is ++ * active. + */ + if ( opt_unpriv_mmio ) + opt_verw_mmio = cpu_has_fb_clear; + + /* +- * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. +- * This will only be a token effort for MLPDS/MFBDS when HT is enabled, +- * but it is somewhat better than nothing. ++ * MD_CLEAR is enumerated architecturally forevermore, even after the ++ * scrubbing side effects have been removed. Create ourselves an version ++ * which expressed whether we think MD_CLEAR is having any useful side ++ * effect. ++ */ ++ cpu_has_useful_md_clear = (cpu_has_md_clear && ++ (cpu_has_bug_mds || cpu_has_bug_msbds_only)); ++ ++ /* ++ * By default, use VERW scrubbing on applicable hardware, if we think it's ++ * going to have an effect. This will only be a token effort for ++ * MLPDS/MFBDS when SMT is enabled. + */ + if ( opt_verw_pv == -1 ) +- opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && +- cpu_has_md_clear); ++ opt_verw_pv = cpu_has_useful_md_clear; + + if ( opt_verw_hvm == -1 ) +- opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && +- cpu_has_md_clear); ++ opt_verw_hvm = cpu_has_useful_md_clear; + + /* +- * Enable MDS/MMIO defences as applicable. The Idle blocks need using if +- * either the PV or HVM MDS defences are used, or if we may give MMIO +- * access to untrusted guests. +- * +- * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with +- * equivalent semantics to avoid needing to perform both flushes on the +- * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH (for +- * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.) +- * +- * After calculating the appropriate idle setting, simplify +- * opt_verw_hvm to mean just "should we VERW on the way into HVM +- * guests", so spec_ctrl_init_domain() can calculate suitable settings. ++ * If SMT is active, and we're protecting against MDS or MMIO stale data, ++ * we need to scrub before going idle as well as on return to guest. ++ * Various pipeline resources are repartitioned amongst non-idle threads. + */ +- if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio ) ++ if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) || ++ opt_verw_mmio) && hw_smt_enabled ) + setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); +- opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; ++ ++ /* ++ * After calculating the appropriate idle setting, simplify opt_verw_hvm ++ * to mean just "should we VERW on the way into HVM guests", so ++ * spec_ctrl_init_domain() can calculate suitable settings. ++ * ++ * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the ++ * only *_CLEAR we can see. ++ */ ++ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear ) ++ opt_verw_hvm = false; + + /* + * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT diff --git a/0506-xsa452-4.17-7.patch b/0506-xsa452-4.17-7.patch new file mode 100644 index 00000000..26ba4ebd --- /dev/null +++ b/0506-xsa452-4.17-7.patch @@ -0,0 +1,307 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Mitigation Register File Data Sampling + +RFDS affects Atom cores, also branded E-cores, between the Goldmont and +Gracemont microarchitectures. This includes Alder Lake and Raptor Lake hybrid +clien systems which have a mix of Gracemont and other types of cores. + +Two new bits have been defined; RFDS_CLEAR to indicate VERW has more side +effets, and RFDS_NO to incidate that the system is unaffected. Plenty of +unaffected CPUs won't be getting RFDS_NO retrofitted in microcode, so we +synthesise it. Alder Lake and Raptor Lake Xeon-E's are unaffected due to +their platform configuration, and we must use the Hybrid CPUID bit to +distinguish them from their non-Xeon counterparts. + +Like MD_CLEAR and FB_CLEAR, RFDS_CLEAR needs OR-ing across a resource pool, so +set it in the max policies and reflect the host setting in default. + +This is part of XSA-452 / CVE-2023-28746. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit fb5b6f6744713410c74cfc12b7176c108e3c9a31) + +diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c +index aefc140d6651..5ceea8be073b 100644 +--- a/tools/misc/xen-cpuid.c ++++ b/tools/misc/xen-cpuid.c +@@ -172,7 +172,7 @@ static const char *const str_7d0[32] = + [ 8] = "avx512-vp2intersect", [ 9] = "srbds-ctrl", + [10] = "md-clear", [11] = "rtm-always-abort", + /* 12 */ [13] = "tsx-force-abort", +- [14] = "serialize", ++ [14] = "serialize", [15] = "hybrid", + [16] = "tsxldtrk", + [18] = "pconfig", + [20] = "cet-ibt", +@@ -237,7 +237,8 @@ static const char *const str_m10Al[32] = + [20] = "bhi-no", [21] = "xapic-status", + /* 22 */ [23] = "ovrclk-status", + [24] = "pbrsb-no", [25] = "gds-ctrl", +- [26] = "gds-no", ++ [26] = "gds-no", [27] = "rfds-no", ++ [28] = "rfds-clear", + }; + + static const char *const str_m10Ah[32] = +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index 7b875a722142..96c2cee1a857 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -444,6 +444,7 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) + */ + __set_bit(X86_FEATURE_MD_CLEAR, fs); + __set_bit(X86_FEATURE_FB_CLEAR, fs); ++ __set_bit(X86_FEATURE_RFDS_CLEAR, fs); + + /* + * The Gather Data Sampling microcode mitigation (August 2023) has an +@@ -493,6 +494,10 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) + if ( cpu_has_fb_clear ) + __set_bit(X86_FEATURE_FB_CLEAR, fs); + ++ __clear_bit(X86_FEATURE_RFDS_CLEAR, fs); ++ if ( cpu_has_rfds_clear ) ++ __set_bit(X86_FEATURE_RFDS_CLEAR, fs); ++ + /* + * The Gather Data Sampling microcode mitigation (August 2023) has an + * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. +diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h +index ec824e895498..a6b8af12964c 100644 +--- a/xen/arch/x86/include/asm/cpufeature.h ++++ b/xen/arch/x86/include/asm/cpufeature.h +@@ -140,6 +140,7 @@ + #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) + #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) + #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE) ++#define cpu_has_hybrid boot_cpu_has(X86_FEATURE_HYBRID) + #define cpu_has_avx512_fp16 boot_cpu_has(X86_FEATURE_AVX512_FP16) + #define cpu_has_arch_caps boot_cpu_has(X86_FEATURE_ARCH_CAPS) + +@@ -161,6 +162,8 @@ + #define cpu_has_rrsba boot_cpu_has(X86_FEATURE_RRSBA) + #define cpu_has_gds_ctrl boot_cpu_has(X86_FEATURE_GDS_CTRL) + #define cpu_has_gds_no boot_cpu_has(X86_FEATURE_GDS_NO) ++#define cpu_has_rfds_no boot_cpu_has(X86_FEATURE_RFDS_NO) ++#define cpu_has_rfds_clear boot_cpu_has(X86_FEATURE_RFDS_CLEAR) + + /* Synthesized. */ + #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) +diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h +index 6abf7bc34a4f..9b5f67711f0c 100644 +--- a/xen/arch/x86/include/asm/msr-index.h ++++ b/xen/arch/x86/include/asm/msr-index.h +@@ -88,6 +88,8 @@ + #define ARCH_CAPS_PBRSB_NO (_AC(1, ULL) << 24) + #define ARCH_CAPS_GDS_CTRL (_AC(1, ULL) << 25) + #define ARCH_CAPS_GDS_NO (_AC(1, ULL) << 26) ++#define ARCH_CAPS_RFDS_NO (_AC(1, ULL) << 27) ++#define ARCH_CAPS_RFDS_CLEAR (_AC(1, ULL) << 28) + + #define MSR_FLUSH_CMD 0x0000010b + #define FLUSH_CMD_L1D (_AC(1, ULL) << 0) +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index adb6bc74e8e6..1ee81e2dfe79 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -24,6 +24,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -447,7 +448,7 @@ static void __init print_details(enum ind_thunk thunk) + * Hardware read-only information, stating immunity to certain issues, or + * suggestions of which mitigation to use. + */ +- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", ++ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", + (caps & ARCH_CAPS_EIBRS) ? " EIBRS" : "", + (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", +@@ -463,6 +464,7 @@ static void __init print_details(enum ind_thunk thunk) + (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "", + (caps & ARCH_CAPS_PBRSB_NO) ? " PBRSB_NO" : "", + (caps & ARCH_CAPS_GDS_NO) ? " GDS_NO" : "", ++ (caps & ARCH_CAPS_RFDS_NO) ? " RFDS_NO" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "", + (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "", +@@ -473,7 +475,7 @@ static void __init print_details(enum ind_thunk thunk) + (e21a & cpufeat_mask(X86_FEATURE_SRSO_NO)) ? " SRSO_NO" : ""); + + /* Hardware features which need driving to mitigate issues. */ +- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s\n", ++ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + (e8b & cpufeat_mask(X86_FEATURE_IBPB)) || + (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBPB" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBRS)) || +@@ -491,6 +493,7 @@ static void __init print_details(enum ind_thunk thunk) + (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : "", + (caps & ARCH_CAPS_FB_CLEAR_CTRL) ? " FB_CLEAR_CTRL" : "", + (caps & ARCH_CAPS_GDS_CTRL) ? " GDS_CTRL" : "", ++ (caps & ARCH_CAPS_RFDS_CLEAR) ? " RFDS_CLEAR" : "", + (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : ""); + + /* Compiled-in support which pertains to mitigations. */ +@@ -1359,6 +1362,83 @@ static __init void mds_calculations(void) + } + } + ++/* ++ * Register File Data Sampling affects Atom cores from the Goldmont to ++ * Gracemont microarchitectures. The March 2024 microcode adds RFDS_NO to ++ * some but not all unaffected parts, and RFDS_CLEAR to affected parts still ++ * in support. ++ * ++ * Alder Lake and Raptor Lake client CPUs have a mix of P cores ++ * (Golden/Raptor Cove, not vulnerable) and E cores (Gracemont, ++ * vulnerable), and both enumerate RFDS_CLEAR. ++ * ++ * Both exist in a Xeon SKU, which has the E cores (Gracemont) disabled by ++ * platform configuration, and enumerate RFDS_NO. ++ * ++ * With older parts, or with out-of-date microcode, synthesise RFDS_NO when ++ * safe to do so. ++ * ++ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html ++ */ ++static void __init rfds_calculations(void) ++{ ++ /* RFDS is only known to affect Intel Family 6 processors at this time. */ ++ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || ++ boot_cpu_data.x86 != 6 ) ++ return; ++ ++ /* ++ * If RFDS_NO or RFDS_CLEAR are visible, we've either got suitable ++ * microcode, or an RFDS-aware hypervisor is levelling us in a pool. ++ */ ++ if ( cpu_has_rfds_no || cpu_has_rfds_clear ) ++ return; ++ ++ /* If we're virtualised, don't attempt to synthesise RFDS_NO. */ ++ if ( cpu_has_hypervisor ) ++ return; ++ ++ /* ++ * Not all CPUs are expected to get a microcode update enumerating one of ++ * RFDS_{NO,CLEAR}, or we might have out-of-date microcode. ++ */ ++ switch ( boot_cpu_data.x86_model ) ++ { ++ case INTEL_FAM6_ALDERLAKE: ++ case INTEL_FAM6_RAPTORLAKE: ++ /* ++ * Alder Lake and Raptor Lake might be a client SKU (with the ++ * Gracemont cores active, and therefore vulnerable) or might be a ++ * server SKU (with the Gracemont cores disabled, and therefore not ++ * vulnerable). ++ * ++ * See if the CPU identifies as hybrid to distinguish the two cases. ++ */ ++ if ( !cpu_has_hybrid ) ++ break; ++ fallthrough; ++ case INTEL_FAM6_ALDERLAKE_L: ++ case INTEL_FAM6_RAPTORLAKE_P: ++ case INTEL_FAM6_RAPTORLAKE_S: ++ ++ case INTEL_FAM6_ATOM_GOLDMONT: /* Apollo Lake */ ++ case INTEL_FAM6_ATOM_GOLDMONT_D: /* Denverton */ ++ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: /* Gemini Lake */ ++ case INTEL_FAM6_ATOM_TREMONT_D: /* Snow Ridge / Parker Ridge */ ++ case INTEL_FAM6_ATOM_TREMONT: /* Elkhart Lake */ ++ case INTEL_FAM6_ATOM_TREMONT_L: /* Jasper Lake */ ++ case INTEL_FAM6_ATOM_GRACEMONT: /* Alder Lake N */ ++ return; ++ } ++ ++ /* ++ * We appear to be on an unaffected CPU which didn't enumerate RFDS_NO, ++ * perhaps because of it's age or because of out-of-date microcode. ++ * Synthesise it. ++ */ ++ setup_force_cpu_cap(X86_FEATURE_RFDS_NO); ++} ++ + static bool __init cpu_has_gds(void) + { + /* +@@ -1872,6 +1952,7 @@ void __init init_speculation_mitigations(void) + * + * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html + * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html ++ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html + * + * Relevant ucodes: + * +@@ -1901,8 +1982,12 @@ void __init init_speculation_mitigations(void) + * + * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing + * side effects as VERW and cannot be used in its place. ++ * ++ * - March 2023, for RFDS. Enumerate RFDS_CLEAR to mean that VERW now ++ * scrubs non-architectural entries from certain register files. + */ + mds_calculations(); ++ rfds_calculations(); + + /* + * Parts which enumerate FB_CLEAR are those with now-updated microcode +@@ -1934,15 +2019,19 @@ void __init init_speculation_mitigations(void) + * MLPDS/MFBDS when SMT is enabled. + */ + if ( opt_verw_pv == -1 ) +- opt_verw_pv = cpu_has_useful_md_clear; ++ opt_verw_pv = cpu_has_useful_md_clear || cpu_has_rfds_clear; + + if ( opt_verw_hvm == -1 ) +- opt_verw_hvm = cpu_has_useful_md_clear; ++ opt_verw_hvm = cpu_has_useful_md_clear || cpu_has_rfds_clear; + + /* + * If SMT is active, and we're protecting against MDS or MMIO stale data, + * we need to scrub before going idle as well as on return to guest. + * Various pipeline resources are repartitioned amongst non-idle threads. ++ * ++ * We don't need to scrub on idle for RFDS. There are no affected cores ++ * which support SMT, despite there being affected cores in hybrid systems ++ * which have SMT elsewhere in the platform. + */ + if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) || + opt_verw_mmio) && hw_smt_enabled ) +@@ -1956,7 +2045,8 @@ void __init init_speculation_mitigations(void) + * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the + * only *_CLEAR we can see. + */ +- if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear ) ++ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear && ++ !cpu_has_rfds_clear ) + opt_verw_hvm = false; + + /* +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index aec1407613c3..113e6cadc17d 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -264,6 +264,7 @@ XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffe + XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */ + XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ + XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */ ++XEN_CPUFEATURE(HYBRID, 9*32+15) /* Heterogeneous platform */ + XEN_CPUFEATURE(TSXLDTRK, 9*32+16) /*a TSX load tracking suspend/resume insns */ + XEN_CPUFEATURE(CET_IBT, 9*32+20) /* CET - Indirect Branch Tracking */ + XEN_CPUFEATURE(AVX512_FP16, 9*32+23) /* AVX512 FP16 instructions */ +@@ -330,6 +331,8 @@ XEN_CPUFEATURE(OVRCLK_STATUS, 16*32+23) /* MSR_OVERCLOCKING_STATUS */ + XEN_CPUFEATURE(PBRSB_NO, 16*32+24) /*A No Post-Barrier RSB predictions */ + XEN_CPUFEATURE(GDS_CTRL, 16*32+25) /* MCU_OPT_CTRL.GDS_MIT_{DIS,LOCK} */ + XEN_CPUFEATURE(GDS_NO, 16*32+26) /*A No Gather Data Sampling */ ++XEN_CPUFEATURE(RFDS_NO, 16*32+27) /*A No Register File Data Sampling */ ++XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A Register File(s) cleared by VERW */ + + /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ + diff --git a/xen.spec.in b/xen.spec.in index 3bca658f..1f3eecc1 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -141,6 +141,13 @@ Patch0348: 0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch Patch0349: 0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch # Security fixes +Patch0500: 0500-xsa452-4.17-1.patch +Patch0501: 0501-xsa452-4.17-2.patch +Patch0502: 0502-xsa452-4.17-3.patch +Patch0503: 0503-xsa452-4.17-4.patch +Patch0504: 0504-xsa452-4.17-5.patch +Patch0505: 0505-xsa452-4.17-6.patch +Patch0506: 0506-xsa452-4.17-7.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch From 7ebe19ebf18511a8989ef8e38b4ff952a5aee506 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 12 Mar 2024 21:01:05 +0100 Subject: [PATCH 46/64] version 4.17.3-4 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 00750edc..b8626c4c 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -3 +4 From 16560c669b8b90e4ff8ea9a42793f46a8479ca93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 9 Apr 2024 21:13:08 +0200 Subject: [PATCH 47/64] Apply XSA-455 and XSA-456 fixes The patches are conflicting (contextually and semantically) with 1017-Disable-TSX-by-default.patch so drop the latter. --- ...pu-policy-Hide-x2APIC-from-PV-guests.patch | 90 ++++ ...ix-visibility-of-HTT-CMP_LEGACY-in-m.patch | 85 ++++ 0507-xsa455-4.17.patch | 48 ++ ...finish-genapic-conversion-to-altcall.patch | 111 +++++ ...cpufreq-finish-conversion-to-altcall.patch | 157 +++++++ 0510-x86-HPET-avoid-an-indirect-call.patch | 107 +++++ 0511-core-parking-use-alternative_call.patch | 95 ++++ ...86-MTRR-avoid-several-indirect-calls.patch | 366 +++++++++++++++ ...irect-call-for-I-O-emulation-quirk-h.patch | 102 +++++ ...MCE-separate-BSP-only-initialization.patch | 192 ++++++++ ...some-callback-invocations-to-altcall.patch | 405 +++++++++++++++++ 0516-IRQ-generalize-gs-et_irq_regs.patch | 134 ++++++ ...pec-ctrl-Expose-IPRED_CTRL-to-guests.patch | 78 ++++ ...pec-ctrl-Expose-RRSBA_CTRL-to-guests.patch | 66 +++ ...-spec-ctrl-Expose-BHI_CTRL-to-guests.patch | 65 +++ ...ENDBR-zapping-from-vendor-_ctxt_swit.patch | 69 +++ ...6-guest-finish-conversion-to-altcall.patch | 84 ++++ ...t-vendor-hook-invocations-to-altcall.patch | 152 +++++++ ...ary-execution-control-infrastructure.patch | 256 +++++++++++ ...ve-__read_mostly-data-into-__ro_afte.patch | 71 +++ ...ith-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch | 123 +++++ ...-fix-.init-section-reference-in-_app.patch | 43 ++ ...expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch | 42 ++ ...c-ctrl-Rename-spec_ctrl_flags-to-scf.patch | 425 ++++++++++++++++++ ...work-conditional-safety-for-SPEC_CTR.patch | 196 ++++++++ ...e-for-r14-to-be-STACK_END-across-SPE.patch | 171 +++++++ ...ld-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch | 122 +++++ ...-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch | 95 ++++ ...tail-the-safety-properties-in-SPEC_C.patch | 183 ++++++++ ...Add-support-for-virtualize-SPEC_CTRL.patch | 206 +++++++++ ...den-the-xen-last-default-_spec_ctrl-.patch | 145 ++++++ ...-calls-in-reset-stack-infrastructure.patch | 111 +++++ 0537-x86-Drop-INDIRECT_JMP.patch | 68 +++ ...sx-Expose-RTM_ALWAYS_ABORT-to-guests.patch | 189 ++++++++ ...pport-BHI_DIS_S-in-order-to-mitigate.patch | 177 ++++++++ ...ctrl-Software-BHB-clearing-sequences.patch | 349 ++++++++++++++ ...re-up-the-Native-BHI-software-sequen.patch | 347 ++++++++++++++ ...l-Support-the-long-BHB-loop-sequence.patch | 130 ++++++ 1017-Disable-TSX-by-default.patch | 68 --- xen.spec.in | 40 +- 40 files changed, 5894 insertions(+), 69 deletions(-) create mode 100644 0350-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch create mode 100644 0351-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch create mode 100644 0507-xsa455-4.17.patch create mode 100644 0508-x86-APIC-finish-genapic-conversion-to-altcall.patch create mode 100644 0509-cpufreq-finish-conversion-to-altcall.patch create mode 100644 0510-x86-HPET-avoid-an-indirect-call.patch create mode 100644 0511-core-parking-use-alternative_call.patch create mode 100644 0512-x86-MTRR-avoid-several-indirect-calls.patch create mode 100644 0513-x86-PV-avoid-indirect-call-for-I-O-emulation-quirk-h.patch create mode 100644 0514-x86-MCE-separate-BSP-only-initialization.patch create mode 100644 0515-x86-MCE-switch-some-callback-invocations-to-altcall.patch create mode 100644 0516-IRQ-generalize-gs-et_irq_regs.patch create mode 100644 0517-x86-spec-ctrl-Expose-IPRED_CTRL-to-guests.patch create mode 100644 0518-x86-spec-ctrl-Expose-RRSBA_CTRL-to-guests.patch create mode 100644 0519-x86-spec-ctrl-Expose-BHI_CTRL-to-guests.patch create mode 100644 0520-x86-arrange-for-ENDBR-zapping-from-vendor-_ctxt_swit.patch create mode 100644 0521-x86-guest-finish-conversion-to-altcall.patch create mode 100644 0522-x86-CPU-convert-vendor-hook-invocations-to-altcall.patch create mode 100644 0523-VMX-tertiary-execution-control-infrastructure.patch create mode 100644 0524-x86-spec-ctrl-Move-__read_mostly-data-into-__ro_afte.patch create mode 100644 0525-x86-tsx-Cope-with-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch create mode 100644 0526-x86-alternatives-fix-.init-section-reference-in-_app.patch create mode 100644 0527-x86-cpuid-Don-t-expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch create mode 100644 0528-x86-spec-ctrl-Rename-spec_ctrl_flags-to-scf.patch create mode 100644 0529-x86-spec-ctrl-Rework-conditional-safety-for-SPEC_CTR.patch create mode 100644 0530-x86-entry-Arrange-for-r14-to-be-STACK_END-across-SPE.patch create mode 100644 0531-x86-spec_ctrl-Hold-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch create mode 100644 0532-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch create mode 100644 0533-x86-spec-ctrl-Detail-the-safety-properties-in-SPEC_C.patch create mode 100644 0534-x86-vmx-Add-support-for-virtualize-SPEC_CTRL.patch create mode 100644 0535-x86-spec-ctrl-Widen-the-xen-last-default-_spec_ctrl-.patch create mode 100644 0536-x86-Use-indirect-calls-in-reset-stack-infrastructure.patch create mode 100644 0537-x86-Drop-INDIRECT_JMP.patch create mode 100644 0538-x86-tsx-Expose-RTM_ALWAYS_ABORT-to-guests.patch create mode 100644 0539-x86-spec-ctrl-Support-BHI_DIS_S-in-order-to-mitigate.patch create mode 100644 0540-x86-spec-ctrl-Software-BHB-clearing-sequences.patch create mode 100644 0541-x86-spec-ctrl-Wire-up-the-Native-BHI-software-sequen.patch create mode 100644 0542-x86-spec-ctrl-Support-the-long-BHB-loop-sequence.patch delete mode 100644 1017-Disable-TSX-by-default.patch diff --git a/0350-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch b/0350-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch new file mode 100644 index 00000000..1ea5fc11 --- /dev/null +++ b/0350-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch @@ -0,0 +1,90 @@ +From bb27e11c56963e170d1f6d2fbddbc956f7164121 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 2 Apr 2024 16:17:25 +0200 +Subject: [PATCH] x86/cpu-policy: Hide x2APIC from PV guests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +PV guests can't write to MSR_APIC_BASE (in order to set EXTD), nor can they +access any of the x2APIC MSR range. Therefore they mustn't see the x2APIC +CPUID bit saying that they can. + +Right now, the host x2APIC flag filters into PV guests, meaning that PV guests +generally see x2APIC except on Zen1-and-older AMD systems. + +Linux works around this by explicitly hiding the bit itself, and filtering +EXTD out of MSR_APIC_BASE reads. NetBSD behaves more in the spirit of PV +guests, and entirely ignores the APIC when built as a PV guest. + +Change the annotation from !A to !S. This has a consequence of stripping it +out of both PV featuremasks. However, as existing guests may have seen the +bit, set it back into the PV Max policy; a VM which saw the bit and is alive +enough to migrate will have ignored it one way or another. + +Hiding x2APIC does change the contents of leaf 0xb, but as the information is +nonsense to begin with, this is likely an improvement on the status quo. + +Xen's blind assumption that APIC_ID = vCPU_ID * 2 isn't interlinked with the +host's topology structure, where a PV guest may see real host values, and the +APIC_IDs are useless without an MADT to start with. Dom0 is the only PV VM to +get an MADT but it's the host one, meaning the two sets of APIC_IDs are from +different address spaces. + +Signed-off-by: Andrew Cooper +Reviewed-by: Roger Pau Monné +master commit: 5420aa165dfa5fe95dd84bb71cb96c15459935b1 +master date: 2024-03-01 20:14:19 +0000 +--- + xen/arch/x86/cpu-policy.c | 11 +++++++++-- + xen/include/public/arch-x86/cpufeatureset.h | 2 +- + 2 files changed, 10 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index 96c2cee1a8..ed64d56294 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -559,6 +559,14 @@ static void __init calculate_pv_max_policy(void) + for ( i = 0; i < ARRAY_SIZE(fs); ++i ) + fs[i] &= pv_max_featuremask[i]; + ++ /* ++ * Xen at the time of writing (Feb 2024, 4.19 dev cycle) used to leak the ++ * host x2APIC capability into PV guests, but never supported the guest ++ * trying to turn x2APIC mode on. Tolerate an incoming VM which saw the ++ * x2APIC CPUID bit and is alive enough to migrate. ++ */ ++ __set_bit(X86_FEATURE_X2APIC, fs); ++ + /* + * If Xen isn't virtualising MSR_SPEC_CTRL for PV guests (functional + * availability, or admin choice), hide the feature. +@@ -837,11 +845,10 @@ void recalculate_cpuid_policy(struct domain *d) + } + + /* +- * Allow the toolstack to set HTT, X2APIC and CMP_LEGACY. These bits ++ * Allow the toolstack to set HTT and CMP_LEGACY. These bits + * affect how to interpret topology information in other cpuid leaves. + */ + __set_bit(X86_FEATURE_HTT, max_fs); +- __set_bit(X86_FEATURE_X2APIC, max_fs); + __set_bit(X86_FEATURE_CMP_LEGACY, max_fs); + + /* +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 113e6cadc1..bc971f3c6f 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -123,7 +123,7 @@ XEN_CPUFEATURE(PCID, 1*32+17) /*H Process Context ID */ + XEN_CPUFEATURE(DCA, 1*32+18) /* Direct Cache Access */ + XEN_CPUFEATURE(SSE4_1, 1*32+19) /*A Streaming SIMD Extensions 4.1 */ + XEN_CPUFEATURE(SSE4_2, 1*32+20) /*A Streaming SIMD Extensions 4.2 */ +-XEN_CPUFEATURE(X2APIC, 1*32+21) /*!A Extended xAPIC */ ++XEN_CPUFEATURE(X2APIC, 1*32+21) /*!S Extended xAPIC */ + XEN_CPUFEATURE(MOVBE, 1*32+22) /*A movbe instruction */ + XEN_CPUFEATURE(POPCNT, 1*32+23) /*A POPCNT instruction */ + XEN_CPUFEATURE(TSC_DEADLINE, 1*32+24) /*S TSC Deadline Timer */ +-- +2.44.0 + diff --git a/0351-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch b/0351-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch new file mode 100644 index 00000000..6f3b7de2 --- /dev/null +++ b/0351-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch @@ -0,0 +1,85 @@ +From 70ad9c5fdeac4814050080c87e06d44292ecf868 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 2 Apr 2024 16:18:05 +0200 +Subject: [PATCH] x86/cpu-policy: Fix visibility of HTT/CMP_LEGACY in max + policies +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The block in recalculate_cpuid_policy() predates the proper split between +default and max policies, and was a "slightly max for a toolstack which knows +about it" capability. It didn't get transformed properly in Xen 4.14. + +Because Xen will accept a VM with HTT/CMP_LEGACY seen, they should be visible +in the max polices. Keep the default policy matching host settings. + +This manifested as an incorrectly-rejected migration across XenServer's Xen +4.13 -> 4.17 upgrade, as Xapi is slowly growing the logic to check a VM +against the target max policy. + +Signed-off-by: Andrew Cooper +Reviewed-by: Roger Pau Monné +master commit: e2d8a652251660c3252d92b442e1a9c5d6e6a1e9 +master date: 2024-03-01 20:14:19 +0000 +--- + xen/arch/x86/cpu-policy.c | 29 ++++++++++++++++++++++------- + 1 file changed, 22 insertions(+), 7 deletions(-) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index ed64d56294..24acd12ce2 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -458,6 +458,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) + raw_cpu_policy.feat.clwb ) + __set_bit(X86_FEATURE_CLWB, fs); + } ++ ++ /* ++ * Topology information inside the guest is entirely at the toolstack's ++ * discretion, and bears no relationship to the host we're running on. ++ * ++ * HTT identifies p->basic.lppp as valid ++ * CMP_LEGACY identifies p->extd.nc as valid ++ */ ++ __set_bit(X86_FEATURE_HTT, fs); ++ __set_bit(X86_FEATURE_CMP_LEGACY, fs); + } + + static void __init guest_common_default_feature_adjustments(uint32_t *fs) +@@ -512,6 +522,18 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) + __clear_bit(X86_FEATURE_CLWB, fs); + } + ++ /* ++ * Topology information is at the toolstack's discretion so these are ++ * unconditionally set in max, but pick a default which matches the host. ++ */ ++ __clear_bit(X86_FEATURE_HTT, fs); ++ if ( cpu_has_htt ) ++ __set_bit(X86_FEATURE_HTT, fs); ++ ++ __clear_bit(X86_FEATURE_CMP_LEGACY, fs); ++ if ( cpu_has_cmp_legacy ) ++ __set_bit(X86_FEATURE_CMP_LEGACY, fs); ++ + /* + * On certain hardware, speculative or errata workarounds can result in + * TSX being placed in "force-abort" mode, where it doesn't actually +@@ -844,13 +866,6 @@ void recalculate_cpuid_policy(struct domain *d) + } + } + +- /* +- * Allow the toolstack to set HTT and CMP_LEGACY. These bits +- * affect how to interpret topology information in other cpuid leaves. +- */ +- __set_bit(X86_FEATURE_HTT, max_fs); +- __set_bit(X86_FEATURE_CMP_LEGACY, max_fs); +- + /* + * 32bit PV domains can't use any Long Mode features, and cannot use + * SYSCALL on non-AMD hardware. +-- +2.44.0 + diff --git a/0507-xsa455-4.17.patch b/0507-xsa455-4.17.patch new file mode 100644 index 00000000..c1deea56 --- /dev/null +++ b/0507-xsa455-4.17.patch @@ -0,0 +1,48 @@ +From 19887194865cff7d87650c323d5c6b185dfe3ddc Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 26 Mar 2024 22:47:25 +0000 +Subject: [PATCH] x86/spec-ctrl: Fix BTC/SRSO mitigations + +We were looking for SCF_entry_ibpb in the wrong variable in the top-of-stack +block, and xen_spec_ctrl won't have had bit 5 set because Xen doesn't +understand SPEC_CTRL_RRSBA_DIS_U yet. + +This is XSA-455 / CVE-2024-31142. + +Fixes: 53a570b28569 ("x86/spec-ctrl: Support IBPB-on-entry") +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +--- + xen/arch/x86/hvm/svm/entry.S | 2 +- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S +index 934f12cf5c..c19e964bc6 100644 +--- a/xen/arch/x86/hvm/svm/entry.S ++++ b/xen/arch/x86/hvm/svm/entry.S +@@ -103,7 +103,7 @@ __UNLIKELY_END(nsvm_hap) + /* SPEC_CTRL_ENTRY_FROM_SVM Req: %rsp=regs/cpuinfo, %rdx=0 Clob: acd */ + + .macro svm_vmexit_cond_ibpb +- testb $SCF_entry_ibpb, CPUINFO_xen_spec_ctrl(%rsp) ++ testb $SCF_entry_ibpb, CPUINFO_spec_ctrl_flags(%rsp) + jz .L_skip_ibpb + + mov $MSR_PRED_CMD, %ecx +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index 97a97b2b82..e85db1a329 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -102,7 +102,7 @@ + jz .L\@_skip + testb $3, UREGS_cs(%rsp) + .else +- testb $SCF_entry_ibpb, CPUINFO_xen_spec_ctrl(%rsp) ++ testb $SCF_entry_ibpb, CPUINFO_spec_ctrl_flags(%rsp) + .endif + jz .L\@_skip + +-- +2.44.0 + diff --git a/0508-x86-APIC-finish-genapic-conversion-to-altcall.patch b/0508-x86-APIC-finish-genapic-conversion-to-altcall.patch new file mode 100644 index 00000000..645ec55f --- /dev/null +++ b/0508-x86-APIC-finish-genapic-conversion-to-altcall.patch @@ -0,0 +1,111 @@ +From 88d5e21e165351feef0f17157005dece78275cea Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Wed, 17 Jan 2024 10:41:52 +0100 +Subject: [PATCH 508/542] x86/APIC: finish genapic conversion to altcall + +While .probe() doesn't need fiddling with for being run only very early, +init_apic_ldr() wants converting too despite not being on a frequently +executed path: This way all pre-filled struct genapic instances can +become __initconst_cf_clobber, thus allowing to eliminate 15 more ENDBR +during the 2nd phase of alternatives patching. + +While fiddling with section annotations here, also move "genapic" itself +to .data.ro_after_init. + +Signed-off-by: Jan Beulich +Acked-by: Andrew Cooper +(cherry picked from commit b1cc53753cba4c3253f2e1093a3a6a9a828314bf) +--- + xen/arch/x86/genapic/bigsmp.c | 2 +- + xen/arch/x86/genapic/default.c | 2 +- + xen/arch/x86/genapic/probe.c | 2 +- + xen/arch/x86/genapic/x2apic.c | 6 +++--- + xen/arch/x86/include/asm/mach-generic/mach_apic.h | 2 +- + 5 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/xen/arch/x86/genapic/bigsmp.c b/xen/arch/x86/genapic/bigsmp.c +index 2000383ab0..7219ec53b0 100644 +--- a/xen/arch/x86/genapic/bigsmp.c ++++ b/xen/arch/x86/genapic/bigsmp.c +@@ -41,7 +41,7 @@ static int __init cf_check probe_bigsmp(void) + return def_to_bigsmp; + } + +-const struct genapic __initconstrel apic_bigsmp = { ++const struct genapic __initconst_cf_clobber apic_bigsmp = { + APIC_INIT("bigsmp", probe_bigsmp), + GENAPIC_PHYS + }; +diff --git a/xen/arch/x86/genapic/default.c b/xen/arch/x86/genapic/default.c +index 2c63c1f917..a968836a18 100644 +--- a/xen/arch/x86/genapic/default.c ++++ b/xen/arch/x86/genapic/default.c +@@ -14,7 +14,7 @@ + #include + + /* should be called last. */ +-const struct genapic __initconstrel apic_default = { ++const struct genapic __initconst_cf_clobber apic_default = { + APIC_INIT("default", NULL), + GENAPIC_FLAT + }; +diff --git a/xen/arch/x86/genapic/probe.c b/xen/arch/x86/genapic/probe.c +index ad57912f50..10ceeae4d2 100644 +--- a/xen/arch/x86/genapic/probe.c ++++ b/xen/arch/x86/genapic/probe.c +@@ -16,7 +16,7 @@ + #include + #include + +-struct genapic __read_mostly genapic; ++struct genapic __ro_after_init genapic; + + static const struct genapic *const __initconstrel apic_probe[] = { + &apic_bigsmp, +diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c +index c64038adaa..eba09d7719 100644 +--- a/xen/arch/x86/genapic/x2apic.c ++++ b/xen/arch/x86/genapic/x2apic.c +@@ -169,7 +169,7 @@ static void cf_check send_IPI_mask_x2apic_cluster( + local_irq_restore(flags); + } + +-static const struct genapic __initconstrel apic_x2apic_phys = { ++static const struct genapic __initconst_cf_clobber apic_x2apic_phys = { + APIC_INIT("x2apic_phys", NULL), + .int_delivery_mode = dest_Fixed, + .int_dest_mode = 0 /* physical delivery */, +@@ -180,7 +180,7 @@ static const struct genapic __initconstrel apic_x2apic_phys = { + .send_IPI_self = send_IPI_self_x2apic + }; + +-static const struct genapic __initconstrel apic_x2apic_cluster = { ++static const struct genapic __initconst_cf_clobber apic_x2apic_cluster = { + APIC_INIT("x2apic_cluster", NULL), + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = 1 /* logical delivery */, +@@ -198,7 +198,7 @@ static const struct genapic __initconstrel apic_x2apic_cluster = { + * IPIs to be more efficiently delivered by not having to perform an ICR write + * for each target CPU. + */ +-static const struct genapic __initconstrel apic_x2apic_mixed = { ++static const struct genapic __initconst_cf_clobber apic_x2apic_mixed = { + APIC_INIT("x2apic_mixed", NULL), + + /* +diff --git a/xen/arch/x86/include/asm/mach-generic/mach_apic.h b/xen/arch/x86/include/asm/mach-generic/mach_apic.h +index b6f6361c60..d9e02f0bc4 100644 +--- a/xen/arch/x86/include/asm/mach-generic/mach_apic.h ++++ b/xen/arch/x86/include/asm/mach-generic/mach_apic.h +@@ -13,7 +13,7 @@ + #define INT_DELIVERY_MODE (genapic.int_delivery_mode) + #define INT_DEST_MODE (genapic.int_dest_mode) + #define TARGET_CPUS ((const typeof(cpu_online_map) *)&cpu_online_map) +-#define init_apic_ldr (genapic.init_apic_ldr) ++#define init_apic_ldr() alternative_vcall(genapic.init_apic_ldr) + #define cpu_mask_to_apicid(mask) ({ \ + /* \ + * There are a number of places where the address of a local variable \ +-- +2.44.0 + diff --git a/0509-cpufreq-finish-conversion-to-altcall.patch b/0509-cpufreq-finish-conversion-to-altcall.patch new file mode 100644 index 00000000..ac0bb34d --- /dev/null +++ b/0509-cpufreq-finish-conversion-to-altcall.patch @@ -0,0 +1,157 @@ +From 653560e02d40c480d08032f3cf1e450db79f5d71 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Wed, 17 Jan 2024 10:42:27 +0100 +Subject: [PATCH 509/542] cpufreq: finish conversion to altcall + +Even functions used on infrequently executed paths want converting: This +way all pre-filled struct cpufreq_driver instances can become +__initconst_cf_clobber, thus allowing to eliminate another 15 ENDBR +during the 2nd phase of alternatives patching. + +For acpi-cpufreq's optionally populated .get hook make sure alternatives +patching can actually see the pointer. See also the code comment. + +Signed-off-by: Jan Beulich +Acked-by: Andrew Cooper +(cherry picked from commit 467ae515caee491e9b6ae1da8b9b98d094955822) +--- + xen/arch/x86/acpi/cpufreq/cpufreq.c | 17 ++++++++++++++++- + xen/arch/x86/acpi/cpufreq/powernow.c | 3 ++- + xen/drivers/acpi/pmstat.c | 3 ++- + xen/drivers/cpufreq/cpufreq.c | 6 +++--- + xen/drivers/cpufreq/utility.c | 6 +++--- + 5 files changed, 26 insertions(+), 9 deletions(-) + +diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c b/xen/arch/x86/acpi/cpufreq/cpufreq.c +index c27cbb2304..5786943cfb 100644 +--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c ++++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c +@@ -622,12 +622,14 @@ static int cf_check acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) + return 0; + } + +-static const struct cpufreq_driver __initconstrel acpi_cpufreq_driver = { ++static const struct cpufreq_driver __initconst_cf_clobber ++acpi_cpufreq_driver = { + .name = "acpi-cpufreq", + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, ++ .get = get_cur_freq_on_cpu, + }; + + static int __init cf_check cpufreq_driver_init(void) +@@ -653,6 +655,19 @@ static int __init cf_check cpufreq_driver_init(void) + } + presmp_initcall(cpufreq_driver_init); + ++static int __init cf_check cpufreq_driver_late_init(void) ++{ ++ /* ++ * While acpi_cpufreq_driver wants to unconditionally have all hooks ++ * populated for __initconst_cf_clobber to have as much of an effect as ++ * possible, zap the .get hook here (but not in cpufreq_driver_init()), ++ * until acpi_cpufreq_cpu_init() knows whether it's wanted / needed. ++ */ ++ cpufreq_driver.get = NULL; ++ return 0; ++} ++__initcall(cpufreq_driver_late_init); ++ + int cpufreq_cpu_init(unsigned int cpuid) + { + int ret; +diff --git a/xen/arch/x86/acpi/cpufreq/powernow.c b/xen/arch/x86/acpi/cpufreq/powernow.c +index d4c7dcd5d9..497bf24470 100644 +--- a/xen/arch/x86/acpi/cpufreq/powernow.c ++++ b/xen/arch/x86/acpi/cpufreq/powernow.c +@@ -317,7 +317,8 @@ static int cf_check powernow_cpufreq_cpu_exit(struct cpufreq_policy *policy) + return 0; + } + +-static const struct cpufreq_driver __initconstrel powernow_cpufreq_driver = { ++static const struct cpufreq_driver __initconst_cf_clobber ++powernow_cpufreq_driver = { + .name = "powernow", + .verify = powernow_cpufreq_verify, + .target = powernow_cpufreq_target, +diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c +index 1bae635101..0c51c220a7 100644 +--- a/xen/drivers/acpi/pmstat.c ++++ b/xen/drivers/acpi/pmstat.c +@@ -255,7 +255,8 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op) + return ret; + + op->u.get_para.cpuinfo_cur_freq = +- cpufreq_driver.get ? cpufreq_driver.get(op->cpuid) : policy->cur; ++ cpufreq_driver.get ? alternative_call(cpufreq_driver.get, op->cpuid) ++ : policy->cur; + op->u.get_para.cpuinfo_max_freq = policy->cpuinfo.max_freq; + op->u.get_para.cpuinfo_min_freq = policy->cpuinfo.min_freq; + op->u.get_para.scaling_cur_freq = policy->cur; +diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c +index a94520ee57..daa399bbec 100644 +--- a/xen/drivers/cpufreq/cpufreq.c ++++ b/xen/drivers/cpufreq/cpufreq.c +@@ -240,7 +240,7 @@ int cpufreq_add_cpu(unsigned int cpu) + policy->cpu = cpu; + per_cpu(cpufreq_cpu_policy, cpu) = policy; + +- ret = cpufreq_driver.init(policy); ++ ret = alternative_call(cpufreq_driver.init, policy); + if (ret) { + free_cpumask_var(policy->cpus); + xfree(policy); +@@ -299,7 +299,7 @@ err1: + cpumask_clear_cpu(cpu, cpufreq_dom->map); + + if (cpumask_empty(policy->cpus)) { +- cpufreq_driver.exit(policy); ++ alternative_call(cpufreq_driver.exit, policy); + free_cpumask_var(policy->cpus); + xfree(policy); + } +@@ -363,7 +363,7 @@ int cpufreq_del_cpu(unsigned int cpu) + cpumask_clear_cpu(cpu, cpufreq_dom->map); + + if (cpumask_empty(policy->cpus)) { +- cpufreq_driver.exit(policy); ++ alternative_call(cpufreq_driver.exit, policy); + free_cpumask_var(policy->cpus); + xfree(policy); + } +diff --git a/xen/drivers/cpufreq/utility.c b/xen/drivers/cpufreq/utility.c +index 9eb7ecedcd..ec7072078c 100644 +--- a/xen/drivers/cpufreq/utility.c ++++ b/xen/drivers/cpufreq/utility.c +@@ -412,7 +412,7 @@ int cpufreq_update_turbo(int cpuid, int new_state) + policy->turbo = new_state; + if (cpufreq_driver.update) + { +- ret = cpufreq_driver.update(cpuid, policy); ++ ret = alternative_call(cpufreq_driver.update, cpuid, policy); + if (ret) + policy->turbo = curr_state; + } +@@ -448,7 +448,7 @@ int __cpufreq_set_policy(struct cpufreq_policy *data, + return -EINVAL; + + /* verify the cpu speed can be set within this limit */ +- ret = cpufreq_driver.verify(policy); ++ ret = alternative_call(cpufreq_driver.verify, policy); + if (ret) + return ret; + +@@ -456,7 +456,7 @@ int __cpufreq_set_policy(struct cpufreq_policy *data, + data->max = policy->max; + data->limits = policy->limits; + if (cpufreq_driver.setpolicy) +- return cpufreq_driver.setpolicy(data); ++ return alternative_call(cpufreq_driver.setpolicy, data); + + if (policy->governor != data->governor) { + /* save old, working values */ +-- +2.44.0 + diff --git a/0510-x86-HPET-avoid-an-indirect-call.patch b/0510-x86-HPET-avoid-an-indirect-call.patch new file mode 100644 index 00000000..0d8c07af --- /dev/null +++ b/0510-x86-HPET-avoid-an-indirect-call.patch @@ -0,0 +1,107 @@ +From 06c81ea90c18c71725f51dfff79d4c4396b53d6c Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Wed, 17 Jan 2024 10:43:02 +0100 +Subject: [PATCH 510/542] x86/HPET: avoid an indirect call + +When this code was written, indirect branches still weren't considered +much of a problem (besides being a little slower). Instead of a function +pointer, pass a boolean to _disable_pit_irq(), thus allowing to +eliminate two ENDBR (one of them in .text). + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit 730d2637a8e5b98dc8e4e366179b4cedc496b3ad) +--- + xen/arch/x86/hpet.c | 4 ++-- + xen/arch/x86/include/asm/hpet.h | 4 ++-- + xen/arch/x86/time.c | 12 ++++++------ + 3 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/xen/arch/x86/hpet.c b/xen/arch/x86/hpet.c +index bc164dd82c..50d788cb6e 100644 +--- a/xen/arch/x86/hpet.c ++++ b/xen/arch/x86/hpet.c +@@ -563,7 +563,7 @@ static void cf_check handle_rtc_once(uint8_t index, uint8_t value) + } + } + +-void __init cf_check hpet_broadcast_init(void) ++void __init hpet_broadcast_init(void) + { + u64 hpet_rate = hpet_setup(); + u32 hpet_id, cfg; +@@ -634,7 +634,7 @@ void __init cf_check hpet_broadcast_init(void) + hpet_events->flags = HPET_EVT_LEGACY; + } + +-void cf_check hpet_broadcast_resume(void) ++void hpet_broadcast_resume(void) + { + u32 cfg; + unsigned int i, n; +diff --git a/xen/arch/x86/include/asm/hpet.h b/xen/arch/x86/include/asm/hpet.h +index 9919f74730..f343fe4740 100644 +--- a/xen/arch/x86/include/asm/hpet.h ++++ b/xen/arch/x86/include/asm/hpet.h +@@ -89,8 +89,8 @@ void hpet_disable_legacy_replacement_mode(void); + * Temporarily use an HPET event counter for timer interrupt handling, + * rather than using the LAPIC timer. Used for Cx state entry. + */ +-void cf_check hpet_broadcast_init(void); +-void cf_check hpet_broadcast_resume(void); ++void hpet_broadcast_init(void); ++void hpet_broadcast_resume(void); + void cf_check hpet_broadcast_enter(void); + void cf_check hpet_broadcast_exit(void); + int hpet_broadcast_is_available(void); +diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c +index b664ae4c83..4d1766284f 100644 +--- a/xen/arch/x86/time.c ++++ b/xen/arch/x86/time.c +@@ -2288,7 +2288,7 @@ void __init early_time_init(void) + } + + /* keep pit enabled for pit_broadcast working while cpuidle enabled */ +-static int _disable_pit_irq(void(*hpet_broadcast_setup)(void)) ++static int _disable_pit_irq(bool init) + { + int ret = 1; + +@@ -2303,13 +2303,13 @@ static int _disable_pit_irq(void(*hpet_broadcast_setup)(void)) + */ + if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) ) + { +- hpet_broadcast_setup(); ++ init ? hpet_broadcast_init() : hpet_broadcast_resume(); + if ( !hpet_broadcast_is_available() ) + { + if ( xen_cpuidle > 0 ) + { +- printk("%ps() failed, turning to PIT broadcast\n", +- hpet_broadcast_setup); ++ printk("hpet_broadcast_%s() failed, turning to PIT broadcast\n", ++ init ? "init" : "resume"); + return -1; + } + ret = 0; +@@ -2326,7 +2326,7 @@ static int _disable_pit_irq(void(*hpet_broadcast_setup)(void)) + + static int __init cf_check disable_pit_irq(void) + { +- if ( !_disable_pit_irq(hpet_broadcast_init) ) ++ if ( !_disable_pit_irq(true) ) + { + xen_cpuidle = 0; + printk("CPUIDLE: disabled due to no HPET. " +@@ -2387,7 +2387,7 @@ int time_resume(void) + + resume_platform_timer(); + +- if ( !_disable_pit_irq(hpet_broadcast_resume) ) ++ if ( !_disable_pit_irq(false) ) + BUG(); + + init_percpu_time(); +-- +2.44.0 + diff --git a/0511-core-parking-use-alternative_call.patch b/0511-core-parking-use-alternative_call.patch new file mode 100644 index 00000000..2b39c920 --- /dev/null +++ b/0511-core-parking-use-alternative_call.patch @@ -0,0 +1,95 @@ +From f4efbcf156fb408dcedf1fc88d3be595ec722ad0 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Mon, 22 Jan 2024 13:38:24 +0100 +Subject: [PATCH 511/542] core-parking: use alternative_call() + +This way we can arrange for core_parking_{performance,power}()'s ENDBR +to also be zapped. + +For the decision to be taken before the 2nd alternative patching pass, +the initcall needs to become a pre-SMP one, though. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit 1bc07ebcac3b1bb2a378732bc0f9a19940e76faf) +--- + xen/common/core_parking.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/xen/common/core_parking.c b/xen/common/core_parking.c +index c4f01291c0..a970ffeab8 100644 +--- a/xen/common/core_parking.c ++++ b/xen/common/core_parking.c +@@ -30,10 +30,11 @@ static DEFINE_SPINLOCK(accounting_lock); + static uint32_t cur_idle_nums; + static unsigned int core_parking_cpunum[NR_CPUS] = {[0 ... NR_CPUS-1] = -1}; + +-static const struct cp_policy { ++struct cp_policy { + char name[30]; + unsigned int (*next)(unsigned int event); +-} *__read_mostly core_parking_policy; ++}; ++static struct cp_policy __ro_after_init core_parking_policy; + + static enum core_parking_controller { + POWER_FIRST, +@@ -175,12 +176,13 @@ long cf_check core_parking_helper(void *data) + unsigned int cpu; + int ret = 0; + +- if ( !core_parking_policy ) ++ if ( !core_parking_policy.next ) + return -EINVAL; + + while ( cur_idle_nums < idle_nums ) + { +- cpu = core_parking_policy->next(CORE_PARKING_INCREMENT); ++ cpu = alternative_call(core_parking_policy.next, ++ CORE_PARKING_INCREMENT); + ret = cpu_down(cpu); + if ( ret ) + return ret; +@@ -193,7 +195,8 @@ long cf_check core_parking_helper(void *data) + + while ( cur_idle_nums > idle_nums ) + { +- cpu = core_parking_policy->next(CORE_PARKING_DECREMENT); ++ cpu = alternative_call(core_parking_policy.next, ++ CORE_PARKING_DECREMENT); + ret = cpu_up(cpu); + if ( ret ) + return ret; +@@ -239,12 +242,12 @@ uint32_t get_cur_idle_nums(void) + return cur_idle_nums; + } + +-static const struct cp_policy power_first = { ++static const struct cp_policy __initconst_cf_clobber power_first = { + .name = "power", + .next = core_parking_power, + }; + +-static const struct cp_policy performance_first = { ++static const struct cp_policy __initconst_cf_clobber performance_first = { + .name = "performance", + .next = core_parking_performance, + }; +@@ -254,7 +257,7 @@ static int __init register_core_parking_policy(const struct cp_policy *policy) + if ( !policy || !policy->next ) + return -EINVAL; + +- core_parking_policy = policy; ++ core_parking_policy = *policy; + return 0; + } + +@@ -269,4 +272,4 @@ static int __init cf_check core_parking_init(void) + + return ret; + } +-__initcall(core_parking_init); ++presmp_initcall(core_parking_init); +-- +2.44.0 + diff --git a/0512-x86-MTRR-avoid-several-indirect-calls.patch b/0512-x86-MTRR-avoid-several-indirect-calls.patch new file mode 100644 index 00000000..9958c052 --- /dev/null +++ b/0512-x86-MTRR-avoid-several-indirect-calls.patch @@ -0,0 +1,366 @@ +From aed8192f578fb02111f57eca0868c2262ada1341 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Mon, 22 Jan 2024 13:39:23 +0100 +Subject: [PATCH 512/542] x86/MTRR: avoid several indirect calls + +The use of (supposedly) vendor-specific hooks is a relic from the days +when Xen was still possible to build as 32-bit binary. There's no +expectation that a new need for such an abstraction would arise. Convert +mttr_if to a mere boolean and all prior calls through it to direct ones, +thus allowing to eliminate 6 ENDBR from .text. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit e9e0eb30d4d6565b411499ca826718b4b9acab68) +--- + xen/arch/x86/cpu/mtrr/generic.c | 26 ++++-------- + xen/arch/x86/cpu/mtrr/main.c | 66 +++++++++++-------------------- + xen/arch/x86/cpu/mtrr/mtrr.h | 37 +++++------------ + xen/arch/x86/platform_hypercall.c | 2 +- + 4 files changed, 40 insertions(+), 91 deletions(-) + +diff --git a/xen/arch/x86/cpu/mtrr/generic.c b/xen/arch/x86/cpu/mtrr/generic.c +index 47aaf76226..837d3250f1 100644 +--- a/xen/arch/x86/cpu/mtrr/generic.c ++++ b/xen/arch/x86/cpu/mtrr/generic.c +@@ -287,7 +287,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) + } + } + +-int cf_check generic_get_free_region( ++int mtrr_get_free_region( + unsigned long base, unsigned long size, int replace_reg) + /* [SUMMARY] Get a free MTRR. + The starting (base) address of the region. +@@ -303,14 +303,14 @@ int cf_check generic_get_free_region( + if (replace_reg >= 0 && replace_reg < max) + return replace_reg; + for (i = 0; i < max; ++i) { +- mtrr_if->get(i, &lbase, &lsize, <ype); ++ mtrr_get(i, &lbase, &lsize, <ype); + if (lsize == 0) + return i; + } + return -ENOSPC; + } + +-static void cf_check generic_get_mtrr( ++void mtrr_get( + unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type *type) + { + uint64_t _mask, _base; +@@ -500,7 +500,7 @@ static void post_set(bool pge) + spin_unlock(&set_atomicity_lock); + } + +-static void cf_check generic_set_all(void) ++void mtrr_set_all(void) + { + unsigned long mask, count; + unsigned long flags; +@@ -523,7 +523,7 @@ static void cf_check generic_set_all(void) + } + } + +-static void cf_check generic_set_mtrr( ++void mtrr_set( + unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) + /* [SUMMARY] Set variable MTRR register on the local CPU. + The register to set. +@@ -567,7 +567,7 @@ static void cf_check generic_set_mtrr( + local_irq_restore(flags); + } + +-int cf_check generic_validate_add_page( ++int mtrr_validate_add_page( + unsigned long base, unsigned long size, unsigned int type) + { + unsigned long lbase, last; +@@ -586,21 +586,9 @@ int cf_check generic_validate_add_page( + } + + +-static int cf_check generic_have_wrcomb(void) ++bool mtrr_have_wrcomb(void) + { + unsigned long config; + rdmsrl(MSR_MTRRcap, config); + return (config & (1ULL << 10)); + } +- +-/* generic structure... +- */ +-const struct mtrr_ops generic_mtrr_ops = { +- .use_intel_if = true, +- .set_all = generic_set_all, +- .get = generic_get_mtrr, +- .get_free_region = generic_get_free_region, +- .set = generic_set_mtrr, +- .validate_add_page = generic_validate_add_page, +- .have_wrcomb = generic_have_wrcomb, +-}; +diff --git a/xen/arch/x86/cpu/mtrr/main.c b/xen/arch/x86/cpu/mtrr/main.c +index 4e01c8d6f9..dee59ea168 100644 +--- a/xen/arch/x86/cpu/mtrr/main.c ++++ b/xen/arch/x86/cpu/mtrr/main.c +@@ -57,7 +57,7 @@ static DEFINE_MUTEX(mtrr_mutex); + u64 __read_mostly size_or_mask; + u64 __read_mostly size_and_mask; + +-const struct mtrr_ops *__read_mostly mtrr_if = NULL; ++static bool __ro_after_init mtrr_if; + + static void set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type); +@@ -78,23 +78,12 @@ static const char *mtrr_attrib_to_str(int x) + return (x <= 6) ? mtrr_strings[x] : "?"; + } + +-/* Returns non-zero if we have the write-combining memory type */ +-static int have_wrcomb(void) +-{ +- return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); +-} +- + /* This function returns the number of variable MTRRs */ + static void __init set_num_var_ranges(void) + { +- unsigned long config = 0; +- +- if (use_intel()) { +- rdmsrl(MSR_MTRRcap, config); +- } else if (is_cpu(AMD)) +- config = 2; +- else if (is_cpu(CENTAUR)) +- config = 8; ++ unsigned long config; ++ ++ rdmsrl(MSR_MTRRcap, config); + num_var_ranges = MASK_EXTR(config, MTRRcap_VCNT); + } + +@@ -149,10 +138,10 @@ static void cf_check ipi_handler(void *info) + if (data->smp_reg == ~0U) /* update all mtrr registers */ + /* At the cpu hot-add time this will reinitialize mtrr + * registres on the existing cpus. It is ok. */ +- mtrr_if->set_all(); ++ mtrr_set_all(); + else /* single mtrr register update */ +- mtrr_if->set(data->smp_reg, data->smp_base, +- data->smp_size, data->smp_type); ++ mtrr_set(data->smp_reg, data->smp_base, ++ data->smp_size, data->smp_type); + + atomic_dec(&data->count); + while(atomic_read(&data->gate)) +@@ -198,10 +187,9 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) { + * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait + * until it hits 0 and proceed. We set the data.gate flag and reset data.count. + * Meanwhile, they are waiting for that flag to be set. Once it's set, each +- * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it +- * differently, so we call mtrr_if->set() callback and let them take care of it. +- * When they're done, they again decrement data->count and wait for data.gate to +- * be reset. ++ * CPU goes through the transition of updating MTRRs. ++ * When mtrr_set() is done, they again decrement data->count and wait for ++ * data.gate to be reset. + * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. + * Everyone then enables interrupts and we all continue on. + * +@@ -251,9 +239,9 @@ static void set_mtrr(unsigned int reg, unsigned long base, + if (reg == ~0U) /* update all mtrr registers */ + /* at boot or resume time, this will reinitialize the mtrrs on + * the bp. It is ok. */ +- mtrr_if->set_all(); ++ mtrr_set_all(); + else /* update the single mtrr register */ +- mtrr_if->set(reg,base,size,type); ++ mtrr_set(reg, base, size, type); + + /* wait for the others */ + while (atomic_read(&data.count)) +@@ -319,7 +307,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, + if (!mtrr_if) + return -ENXIO; + +- if ((error = mtrr_if->validate_add_page(base,size,type))) ++ if ((error = mtrr_validate_add_page(base, size, type))) + return error; + + if (type >= MTRR_NUM_TYPES) { +@@ -328,7 +316,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, + } + + /* If the type is WC, check that this processor supports it */ +- if ((type == X86_MT_WC) && !have_wrcomb()) { ++ if ((type == X86_MT_WC) && mtrr_have_wrcomb()) { + printk(KERN_WARNING + "mtrr: your processor doesn't support write-combining\n"); + return -EOPNOTSUPP; +@@ -350,7 +338,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, + /* Search for existing MTRR */ + mutex_lock(&mtrr_mutex); + for (i = 0; i < num_var_ranges; ++i) { +- mtrr_if->get(i, &lbase, &lsize, <ype); ++ mtrr_get(i, &lbase, &lsize, <ype); + if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) + continue; + /* At this point we know there is some kind of overlap/enclosure */ +@@ -385,7 +373,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, + goto out; + } + /* Search for an empty MTRR */ +- i = mtrr_if->get_free_region(base, size, replace); ++ i = mtrr_get_free_region(base, size, replace); + if (i >= 0) { + set_mtrr(i, base, size, type); + if (likely(replace < 0)) +@@ -494,7 +482,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) + if (reg < 0) { + /* Search for existing MTRR */ + for (i = 0; i < max; ++i) { +- mtrr_if->get(i, &lbase, &lsize, <ype); ++ mtrr_get(i, &lbase, &lsize, <ype); + if (lbase == base && lsize == size) { + reg = i; + break; +@@ -510,7 +498,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) + printk(KERN_WARNING "mtrr: register: %d too big\n", reg); + goto out; + } +- mtrr_if->get(reg, &lbase, &lsize, <ype); ++ mtrr_get(reg, &lbase, &lsize, <ype); + if (lsize < 1) { + printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); + goto out; +@@ -568,7 +556,7 @@ struct mtrr_value { + void __init mtrr_bp_init(void) + { + if (cpu_has_mtrr) { +- mtrr_if = &generic_mtrr_ops; ++ mtrr_if = true; + size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) - 1); + size_and_mask = ~size_or_mask & 0xfffff00000ULL; + } +@@ -576,14 +564,13 @@ void __init mtrr_bp_init(void) + if (mtrr_if) { + set_num_var_ranges(); + init_table(); +- if (use_intel()) +- get_mtrr_state(); ++ get_mtrr_state(); + } + } + + void mtrr_ap_init(void) + { +- if (!mtrr_if || !use_intel() || hold_mtrr_updates_on_aps) ++ if (!mtrr_if || hold_mtrr_updates_on_aps) + return; + /* + * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, +@@ -612,32 +599,25 @@ void mtrr_save_state(void) + + void mtrr_aps_sync_begin(void) + { +- if (!use_intel()) +- return; + hold_mtrr_updates_on_aps = 1; + } + + void mtrr_aps_sync_end(void) + { +- if (!use_intel()) +- return; + set_mtrr(~0U, 0, 0, 0); + hold_mtrr_updates_on_aps = 0; + } + + void mtrr_bp_restore(void) + { +- if (!use_intel()) +- return; +- mtrr_if->set_all(); ++ mtrr_set_all(); + } + + static int __init cf_check mtrr_init_finialize(void) + { + if (!mtrr_if) + return 0; +- if (use_intel()) +- mtrr_state_warn(); ++ mtrr_state_warn(); + return 0; + } + __initcall(mtrr_init_finialize); +diff --git a/xen/arch/x86/cpu/mtrr/mtrr.h b/xen/arch/x86/cpu/mtrr/mtrr.h +index c7fd44daab..a9741e0cb0 100644 +--- a/xen/arch/x86/cpu/mtrr/mtrr.h ++++ b/xen/arch/x86/cpu/mtrr/mtrr.h +@@ -6,40 +6,21 @@ + #define MTRR_CHANGE_MASK_VARIABLE 0x02 + #define MTRR_CHANGE_MASK_DEFTYPE 0x04 + +- +-struct mtrr_ops { +- u32 vendor; +- bool use_intel_if; +-// void (*init)(void); +- void (*set)(unsigned int reg, unsigned long base, +- unsigned long size, mtrr_type type); +- void (*set_all)(void); +- +- void (*get)(unsigned int reg, unsigned long *base, +- unsigned long *size, mtrr_type * type); +- int (*get_free_region)(unsigned long base, unsigned long size, +- int replace_reg); +- int (*validate_add_page)(unsigned long base, unsigned long size, +- unsigned int type); +- int (*have_wrcomb)(void); +-}; +- +-int cf_check generic_get_free_region( ++void mtrr_get( ++ unsigned int reg, unsigned long *base, unsigned long *size, ++ mtrr_type *type); ++void mtrr_set( ++ unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); ++void mtrr_set_all(void); ++int mtrr_get_free_region( + unsigned long base, unsigned long size, int replace_reg); +-int cf_check generic_validate_add_page( ++int mtrr_validate_add_page( + unsigned long base, unsigned long size, unsigned int type); +- +-extern const struct mtrr_ops generic_mtrr_ops; ++bool mtrr_have_wrcomb(void); + + void get_mtrr_state(void); + +-extern void set_mtrr_ops(const struct mtrr_ops *); +- + extern u64 size_or_mask, size_and_mask; +-extern const struct mtrr_ops *mtrr_if; +- +-#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) +-#define use_intel() (mtrr_if && mtrr_if->use_intel_if) + + extern unsigned int num_var_ranges; + +diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c +index e7deee2268..27a799161a 100644 +--- a/xen/arch/x86/platform_hypercall.c ++++ b/xen/arch/x86/platform_hypercall.c +@@ -299,7 +299,7 @@ ret_t do_platform_op( + ret = -EINVAL; + if ( op->u.read_memtype.reg < num_var_ranges ) + { +- mtrr_if->get(op->u.read_memtype.reg, &mfn, &nr_mfns, &type); ++ mtrr_get(op->u.read_memtype.reg, &mfn, &nr_mfns, &type); + op->u.read_memtype.mfn = mfn; + op->u.read_memtype.nr_mfns = nr_mfns; + op->u.read_memtype.type = type; +-- +2.44.0 + diff --git a/0513-x86-PV-avoid-indirect-call-for-I-O-emulation-quirk-h.patch b/0513-x86-PV-avoid-indirect-call-for-I-O-emulation-quirk-h.patch new file mode 100644 index 00000000..85b96418 --- /dev/null +++ b/0513-x86-PV-avoid-indirect-call-for-I-O-emulation-quirk-h.patch @@ -0,0 +1,102 @@ +From 796959c8bd04672cb93ccbb1bc2f79e35209e30b Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Mon, 22 Jan 2024 13:40:00 +0100 +Subject: [PATCH 513/542] x86/PV: avoid indirect call for I/O emulation quirk + hook + +This way ioemul_handle_proliant_quirk() won't need ENDBR anymore. + +While touching this code, also +- arrange for it to not be built at all when !PV, +- add "const" to the last function parameter and bring the definition + in sync with the declaration (for Misra). + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit 1212af3e8c4d3a1350046d4fe0ca3b97b51e67de) +--- + xen/arch/x86/Makefile | 2 +- + xen/arch/x86/include/asm/io.h | 10 +++++++--- + xen/arch/x86/ioport_emulate.c | 9 ++++----- + xen/arch/x86/pv/emul-priv-op.c | 2 +- + 4 files changed, 13 insertions(+), 10 deletions(-) + +diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile +index f213a6b56a..cb9d952659 100644 +--- a/xen/arch/x86/Makefile ++++ b/xen/arch/x86/Makefile +@@ -43,7 +43,7 @@ obj-$(CONFIG_LIVEPATCH) += alternative.o livepatch.o + obj-y += msi.o + obj-y += msr.o + obj-$(CONFIG_INDIRECT_THUNK) += indirect-thunk.o +-obj-y += ioport_emulate.o ++obj-$(CONFIG_PV) += ioport_emulate.o + obj-y += irq.o + obj-$(CONFIG_KEXEC) += machine_kexec.o + obj-y += mm.o x86_64/mm.o +diff --git a/xen/arch/x86/include/asm/io.h b/xen/arch/x86/include/asm/io.h +index 92b784a861..9b19d2d389 100644 +--- a/xen/arch/x86/include/asm/io.h ++++ b/xen/arch/x86/include/asm/io.h +@@ -47,10 +47,14 @@ __OUT(b,"b",char) + __OUT(w,"w",short) + __OUT(l,,int) + +-/* Function pointer used to handle platform specific I/O port emulation. */ ++/* ++ * Boolean indicator and function used to handle platform specific I/O port ++ * emulation. ++ */ + #define IOEMUL_QUIRK_STUB_BYTES 9 ++extern bool ioemul_handle_quirk; + struct cpu_user_regs; +-extern unsigned int (*ioemul_handle_quirk)( +- u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs); ++unsigned int ioemul_handle_proliant_quirk( ++ uint8_t opcode, char *io_emul_stub, const struct cpu_user_regs *regs); + + #endif +diff --git a/xen/arch/x86/ioport_emulate.c b/xen/arch/x86/ioport_emulate.c +index 6caeb3d470..0c1e389bc8 100644 +--- a/xen/arch/x86/ioport_emulate.c ++++ b/xen/arch/x86/ioport_emulate.c +@@ -8,11 +8,10 @@ + #include + #include + +-unsigned int (*__read_mostly ioemul_handle_quirk)( +- uint8_t opcode, char *io_emul_stub, struct cpu_user_regs *regs); ++bool __ro_after_init ioemul_handle_quirk; + +-static unsigned int cf_check ioemul_handle_proliant_quirk( +- u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs) ++unsigned int ioemul_handle_proliant_quirk( ++ uint8_t opcode, char *io_emul_stub, const struct cpu_user_regs *regs) + { + static const char stub[] = { + 0x9c, /* pushf */ +@@ -103,7 +102,7 @@ static const struct dmi_system_id __initconstrel ioport_quirks_tbl[] = { + static int __init cf_check ioport_quirks_init(void) + { + if ( dmi_check_system(ioport_quirks_tbl) ) +- ioemul_handle_quirk = ioemul_handle_proliant_quirk; ++ ioemul_handle_quirk = true; + + return 0; + } +diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c +index 2c94beb10e..e429dfa4f0 100644 +--- a/xen/arch/x86/pv/emul-priv-op.c ++++ b/xen/arch/x86/pv/emul-priv-op.c +@@ -124,7 +124,7 @@ static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode, + /* Some platforms might need to quirk the stub for specific inputs. */ + if ( unlikely(ioemul_handle_quirk) ) + { +- quirk_bytes = ioemul_handle_quirk(opcode, p, ctxt->ctxt.regs); ++ quirk_bytes = ioemul_handle_proliant_quirk(opcode, p, ctxt->ctxt.regs); + p += quirk_bytes; + } + +-- +2.44.0 + diff --git a/0514-x86-MCE-separate-BSP-only-initialization.patch b/0514-x86-MCE-separate-BSP-only-initialization.patch new file mode 100644 index 00000000..01ae23e8 --- /dev/null +++ b/0514-x86-MCE-separate-BSP-only-initialization.patch @@ -0,0 +1,192 @@ +From 2268aacc4324f6010058c2e3bbb214a280dc8078 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Mon, 22 Jan 2024 13:40:32 +0100 +Subject: [PATCH 514/542] x86/MCE: separate BSP-only initialization + +Several function pointers are registered over and over again, when +setting them once on the BSP suffices. Arrange for this in the vendor +init functions and mark involved registration functions __init. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit 9f58616ddb1cc1870399de2202fafc7bf0d61694) +--- + xen/arch/x86/cpu/mcheck/mcaction.c | 2 +- + xen/arch/x86/cpu/mcheck/mce.c | 15 ++++++--------- + xen/arch/x86/cpu/mcheck/mce.h | 2 +- + xen/arch/x86/cpu/mcheck/mce_amd.c | 20 +++++++++++++------- + xen/arch/x86/cpu/mcheck/mce_intel.c | 10 +++++++--- + 5 files changed, 28 insertions(+), 21 deletions(-) + +diff --git a/xen/arch/x86/cpu/mcheck/mcaction.c b/xen/arch/x86/cpu/mcheck/mcaction.c +index f4f265c1bc..695fb61d7d 100644 +--- a/xen/arch/x86/cpu/mcheck/mcaction.c ++++ b/xen/arch/x86/cpu/mcheck/mcaction.c +@@ -29,7 +29,7 @@ mci_action_add_pageoffline(int bank, struct mc_info *mi, + + mce_check_addr_t mc_check_addr = NULL; + +-void mce_register_addrcheck(mce_check_addr_t cbfunc) ++void __init mce_register_addrcheck(mce_check_addr_t cbfunc) + { + mc_check_addr = cbfunc; + } +diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c +index f68e31b643..0b164e2027 100644 +--- a/xen/arch/x86/cpu/mcheck/mce.c ++++ b/xen/arch/x86/cpu/mcheck/mce.c +@@ -84,7 +84,7 @@ static void cf_check unexpected_machine_check(const struct cpu_user_regs *regs) + + static x86_mce_vector_t _machine_check_vector = unexpected_machine_check; + +-void x86_mce_vector_register(x86_mce_vector_t hdlr) ++void __init x86_mce_vector_register(x86_mce_vector_t hdlr) + { + _machine_check_vector = hdlr; + } +@@ -107,7 +107,7 @@ void do_machine_check(const struct cpu_user_regs *regs) + */ + static x86_mce_callback_t mc_callback_bank_extended = NULL; + +-void x86_mce_callback_register(x86_mce_callback_t cbfunc) ++void __init x86_mce_callback_register(x86_mce_callback_t cbfunc) + { + mc_callback_bank_extended = cbfunc; + } +@@ -118,7 +118,7 @@ void x86_mce_callback_register(x86_mce_callback_t cbfunc) + */ + static mce_recoverable_t mc_recoverable_scan = NULL; + +-void mce_recoverable_register(mce_recoverable_t cbfunc) ++void __init mce_recoverable_register(mce_recoverable_t cbfunc) + { + mc_recoverable_scan = cbfunc; + } +@@ -182,7 +182,7 @@ static void mcabank_clear(int banknum) + */ + static mce_need_clearbank_t mc_need_clearbank_scan = NULL; + +-void mce_need_clearbank_register(mce_need_clearbank_t cbfunc) ++void __init mce_need_clearbank_register(mce_need_clearbank_t cbfunc) + { + mc_need_clearbank_scan = cbfunc; + } +@@ -799,7 +799,7 @@ void mcheck_init(struct cpuinfo_x86 *c, bool bsp) + { + case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: +- inited = amd_mcheck_init(c); ++ inited = amd_mcheck_init(c, bsp); + break; + + case X86_VENDOR_INTEL: +@@ -1913,11 +1913,8 @@ static void cf_check mce_softirq(void) + * will help to collect and log those MCE errors. + * Round2: Do all MCE processing logic as normal. + */ +-void mce_handler_init(void) ++void __init mce_handler_init(void) + { +- if ( smp_processor_id() != 0 ) +- return; +- + /* callback register, do we really need so many callback? */ + /* mce handler data initialization */ + spin_lock_init(&mce_logout_lock); +diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h +index bea08bdc74..10ed059f7c 100644 +--- a/xen/arch/x86/cpu/mcheck/mce.h ++++ b/xen/arch/x86/cpu/mcheck/mce.h +@@ -44,7 +44,7 @@ extern uint8_t cmci_apic_vector; + extern bool lmce_support; + + /* Init functions */ +-enum mcheck_type amd_mcheck_init(struct cpuinfo_x86 *c); ++enum mcheck_type amd_mcheck_init(const struct cpuinfo_x86 *c, bool bsp); + enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp); + + void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); +diff --git a/xen/arch/x86/cpu/mcheck/mce_amd.c b/xen/arch/x86/cpu/mcheck/mce_amd.c +index d7ae8919df..cf80e1a275 100644 +--- a/xen/arch/x86/cpu/mcheck/mce_amd.c ++++ b/xen/arch/x86/cpu/mcheck/mce_amd.c +@@ -284,7 +284,7 @@ int vmce_amd_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) + } + + enum mcheck_type +-amd_mcheck_init(struct cpuinfo_x86 *ci) ++amd_mcheck_init(const struct cpuinfo_x86 *ci, bool bsp) + { + uint32_t i; + enum mcequirk_amd_flags quirkflag = 0; +@@ -294,9 +294,12 @@ amd_mcheck_init(struct cpuinfo_x86 *ci) + + /* Assume that machine check support is available. + * The minimum provided support is at least the K8. */ +- mce_handler_init(); +- x86_mce_vector_register(mcheck_cmn_handler); +- mce_need_clearbank_register(amd_need_clearbank_scan); ++ if ( bsp ) ++ { ++ mce_handler_init(); ++ x86_mce_vector_register(mcheck_cmn_handler); ++ mce_need_clearbank_register(amd_need_clearbank_scan); ++ } + + for ( i = 0; i < this_cpu(nr_mce_banks); i++ ) + { +@@ -336,9 +339,12 @@ amd_mcheck_init(struct cpuinfo_x86 *ci) + ppin_msr = MSR_AMD_PPIN; + } + +- x86_mce_callback_register(amd_f10_handler); +- mce_recoverable_register(mc_amd_recoverable_scan); +- mce_register_addrcheck(mc_amd_addrcheck); ++ if ( bsp ) ++ { ++ x86_mce_callback_register(amd_f10_handler); ++ mce_recoverable_register(mc_amd_recoverable_scan); ++ mce_register_addrcheck(mc_amd_addrcheck); ++ } + + return ci->x86_vendor == X86_VENDOR_HYGON ? + mcheck_hygon : mcheck_amd_famXX; +diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c +index ce7678f242..837a8c6d0c 100644 +--- a/xen/arch/x86/cpu/mcheck/mce_intel.c ++++ b/xen/arch/x86/cpu/mcheck/mce_intel.c +@@ -814,7 +814,7 @@ static void intel_mce_post_reset(void) + return; + } + +-static void intel_init_mce(void) ++static void intel_init_mce(bool bsp) + { + uint64_t msr_content; + int i; +@@ -840,6 +840,9 @@ static void intel_init_mce(void) + if ( firstbank ) /* if cmci enabled, firstbank = 0 */ + wrmsrl(MSR_IA32_MC0_STATUS, 0x0ULL); + ++ if ( !bsp ) ++ return; ++ + x86_mce_vector_register(mcheck_cmn_handler); + mce_recoverable_register(intel_recoverable_scan); + mce_need_clearbank_register(intel_need_clearbank_scan); +@@ -979,9 +982,10 @@ enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp) + + intel_init_mca(c); + +- mce_handler_init(); ++ if ( bsp ) ++ mce_handler_init(); + +- intel_init_mce(); ++ intel_init_mce(bsp); + + intel_init_cmci(c); + +-- +2.44.0 + diff --git a/0515-x86-MCE-switch-some-callback-invocations-to-altcall.patch b/0515-x86-MCE-switch-some-callback-invocations-to-altcall.patch new file mode 100644 index 00000000..b849cf22 --- /dev/null +++ b/0515-x86-MCE-switch-some-callback-invocations-to-altcall.patch @@ -0,0 +1,405 @@ +From 90275d1cbfa3cbb2380028753349bcb6bc6f0717 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Mon, 22 Jan 2024 13:41:07 +0100 +Subject: [PATCH 515/542] x86/MCE: switch some callback invocations to altcall + +While not performance critical, these hook invocations still would +better be converted: This way all pre-filled (and newly introduced) +struct mce_callback instances can become __initconst_cf_clobber, thus +allowing to eliminate another 9 ENDBR during the 2nd phase of +alternatives patching. + +While this means registering callbacks a little earlier, doing so is +perhaps even advantageous, for having pointers be non-NULL earlier on. +Only one set of callbacks would only ever be registered anyway, and +neither of the respective initialization function can (subsequently) +fail. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit 85ba4d050f9f3c4286164f21660ae88435b7e83c) +--- + xen/arch/x86/cpu/mcheck/mcaction.c | 10 +--- + xen/arch/x86/cpu/mcheck/mcaction.h | 5 -- + xen/arch/x86/cpu/mcheck/mce.c | 71 ++++++++-------------------- + xen/arch/x86/cpu/mcheck/mce.h | 72 +++++++++++++++-------------- + xen/arch/x86/cpu/mcheck/mce_amd.c | 26 ++++++----- + xen/arch/x86/cpu/mcheck/mce_intel.c | 14 +++--- + 6 files changed, 80 insertions(+), 118 deletions(-) + +diff --git a/xen/arch/x86/cpu/mcheck/mcaction.c b/xen/arch/x86/cpu/mcheck/mcaction.c +index 695fb61d7d..bf7a0de965 100644 +--- a/xen/arch/x86/cpu/mcheck/mcaction.c ++++ b/xen/arch/x86/cpu/mcheck/mcaction.c +@@ -27,13 +27,6 @@ mci_action_add_pageoffline(int bank, struct mc_info *mi, + return rec; + } + +-mce_check_addr_t mc_check_addr = NULL; +- +-void __init mce_register_addrcheck(mce_check_addr_t cbfunc) +-{ +- mc_check_addr = cbfunc; +-} +- + void + mc_memerr_dhandler(struct mca_binfo *binfo, + enum mce_result *result, +@@ -48,7 +41,8 @@ mc_memerr_dhandler(struct mca_binfo *binfo, + int vmce_vcpuid; + unsigned int mc_vcpuid; + +- if ( !mc_check_addr(bank->mc_status, bank->mc_misc, MC_ADDR_PHYSICAL) ) ++ if ( !alternative_call(mce_callbacks.check_addr, bank->mc_status, ++ bank->mc_misc, MC_ADDR_PHYSICAL) ) + { + dprintk(XENLOG_WARNING, + "No physical address provided for memory error\n"); +diff --git a/xen/arch/x86/cpu/mcheck/mcaction.h b/xen/arch/x86/cpu/mcheck/mcaction.h +index 5cbe558fb0..6c79498cd2 100644 +--- a/xen/arch/x86/cpu/mcheck/mcaction.h ++++ b/xen/arch/x86/cpu/mcheck/mcaction.h +@@ -12,9 +12,4 @@ mc_memerr_dhandler(struct mca_binfo *binfo, + #define MC_ADDR_PHYSICAL 0 + #define MC_ADDR_VIRTUAL 1 + +-typedef bool (*mce_check_addr_t)(uint64_t status, uint64_t misc, int addr_type); +-extern void mce_register_addrcheck(mce_check_addr_t); +- +-extern mce_check_addr_t mc_check_addr; +- + #endif +diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c +index 0b164e2027..5b7b85a0b5 100644 +--- a/xen/arch/x86/cpu/mcheck/mce.c ++++ b/xen/arch/x86/cpu/mcheck/mce.c +@@ -82,47 +82,21 @@ static void cf_check unexpected_machine_check(const struct cpu_user_regs *regs) + fatal_trap(regs, 1); + } + +-static x86_mce_vector_t _machine_check_vector = unexpected_machine_check; +- +-void __init x86_mce_vector_register(x86_mce_vector_t hdlr) +-{ +- _machine_check_vector = hdlr; +-} ++struct mce_callbacks __ro_after_init mce_callbacks = { ++ .handler = unexpected_machine_check, ++}; ++static const typeof(mce_callbacks.handler) __initconst_cf_clobber __used ++ default_handler = unexpected_machine_check; + + /* Call the installed machine check handler for this CPU setup. */ + + void do_machine_check(const struct cpu_user_regs *regs) + { + mce_enter(); +- _machine_check_vector(regs); ++ alternative_vcall(mce_callbacks.handler, regs); + mce_exit(); + } + +-/* +- * Init machine check callback handler +- * It is used to collect additional information provided by newer +- * CPU families/models without the need to duplicate the whole handler. +- * This avoids having many handlers doing almost nearly the same and each +- * with its own tweaks ands bugs. +- */ +-static x86_mce_callback_t mc_callback_bank_extended = NULL; +- +-void __init x86_mce_callback_register(x86_mce_callback_t cbfunc) +-{ +- mc_callback_bank_extended = cbfunc; +-} +- +-/* +- * Machine check recoverable judgement callback handler +- * It is used to judge whether an UC error is recoverable by software +- */ +-static mce_recoverable_t mc_recoverable_scan = NULL; +- +-void __init mce_recoverable_register(mce_recoverable_t cbfunc) +-{ +- mc_recoverable_scan = cbfunc; +-} +- + struct mca_banks *mcabanks_alloc(unsigned int nr_mce_banks) + { + struct mca_banks *mb; +@@ -174,19 +148,6 @@ static void mcabank_clear(int banknum) + mca_wrmsr(MSR_IA32_MCx_STATUS(banknum), 0x0ULL); + } + +-/* +- * Judging whether to Clear Machine Check error bank callback handler +- * According to Intel latest MCA OS Recovery Writer's Guide, +- * whether the error MCA bank needs to be cleared is decided by the mca_source +- * and MCi_status bit value. +- */ +-static mce_need_clearbank_t mc_need_clearbank_scan = NULL; +- +-void __init mce_need_clearbank_register(mce_need_clearbank_t cbfunc) +-{ +- mc_need_clearbank_scan = cbfunc; +-} +- + /* + * mce_logout_lock should only be used in the trap handler, + * while MCIP has not been cleared yet in the global status +@@ -227,7 +188,8 @@ static void mca_init_bank(enum mca_source who, struct mc_info *mi, int bank) + + if ( (mib->mc_status & MCi_STATUS_MISCV) && + (mib->mc_status & MCi_STATUS_ADDRV) && +- (mc_check_addr(mib->mc_status, mib->mc_misc, MC_ADDR_PHYSICAL)) && ++ alternative_call(mce_callbacks.check_addr, mib->mc_status, ++ mib->mc_misc, MC_ADDR_PHYSICAL) && + (who == MCA_POLLER || who == MCA_CMCI_HANDLER) && + (mfn_valid(_mfn(paddr_to_pfn(mib->mc_addr)))) ) + { +@@ -327,7 +289,7 @@ mcheck_mca_logout(enum mca_source who, struct mca_banks *bankmask, + * If no mc_recovery_scan callback handler registered, + * this error is not recoverable + */ +- recover = mc_recoverable_scan ? 1 : 0; ++ recover = mce_callbacks.recoverable_scan; + + for ( i = 0; i < this_cpu(nr_mce_banks); i++ ) + { +@@ -344,8 +306,9 @@ mcheck_mca_logout(enum mca_source who, struct mca_banks *bankmask, + * decide whether to clear bank by MCi_STATUS bit value such as + * OVER/UC/EN/PCC/S/AR + */ +- if ( mc_need_clearbank_scan ) +- need_clear = mc_need_clearbank_scan(who, status); ++ if ( mce_callbacks.need_clearbank_scan ) ++ need_clear = alternative_call(mce_callbacks.need_clearbank_scan, ++ who, status); + + /* + * If this is the first bank with valid MCA DATA, then +@@ -381,12 +344,12 @@ mcheck_mca_logout(enum mca_source who, struct mca_banks *bankmask, + + if ( recover && uc ) + /* uc = true, recover = true, we need not panic. */ +- recover = mc_recoverable_scan(status); ++ recover = alternative_call(mce_callbacks.recoverable_scan, status); + + mca_init_bank(who, mci, i); + +- if ( mc_callback_bank_extended ) +- mc_callback_bank_extended(mci, i, status); ++ if ( mce_callbacks.info_collect ) ++ alternative_vcall(mce_callbacks.info_collect, mci, i, status); + + /* By default, need_clear = true */ + if ( who != MCA_MCE_SCAN && need_clear ) +@@ -1913,9 +1876,11 @@ static void cf_check mce_softirq(void) + * will help to collect and log those MCE errors. + * Round2: Do all MCE processing logic as normal. + */ +-void __init mce_handler_init(void) ++void __init mce_handler_init(const struct mce_callbacks *cb) + { + /* callback register, do we really need so many callback? */ ++ mce_callbacks = *cb; ++ + /* mce handler data initialization */ + spin_lock_init(&mce_logout_lock); + open_softirq(MACHINE_CHECK_SOFTIRQ, mce_softirq); +diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h +index 10ed059f7c..6bd25d4101 100644 +--- a/xen/arch/x86/cpu/mcheck/mce.h ++++ b/xen/arch/x86/cpu/mcheck/mce.h +@@ -62,20 +62,12 @@ void noreturn mc_panic(char *s); + void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *, + uint32_t *, uint32_t *, uint32_t *, uint32_t *); + +-/* Register a handler for machine check exceptions. */ +-typedef void (*x86_mce_vector_t)(const struct cpu_user_regs *regs); +-extern void x86_mce_vector_register(x86_mce_vector_t); +- + /* + * Common generic MCE handler that implementations may nominate + * via x86_mce_vector_register. + */ + void cf_check mcheck_cmn_handler(const struct cpu_user_regs *regs); + +-/* Register a handler for judging whether mce is recoverable. */ +-typedef bool (*mce_recoverable_t)(uint64_t status); +-extern void mce_recoverable_register(mce_recoverable_t); +- + /* Read an MSR, checking for an interposed value first */ + extern struct intpose_ent *intpose_lookup(unsigned int, uint64_t, + uint64_t *); +@@ -134,30 +126,6 @@ extern void mcheck_mca_clearbanks(struct mca_banks *); + extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, struct mca_banks *, + struct mca_summary *, struct mca_banks *); + +-/* +- * Register callbacks to be made during bank telemetry logout. +- * Those callbacks are only available to those machine check handlers +- * that call to the common mcheck_cmn_handler or who use the common +- * telemetry logout function mcheck_mca_logout in error polling. +- */ +- +-/* Register a handler for judging whether the bank need to be cleared */ +-typedef bool (*mce_need_clearbank_t)(enum mca_source who, u64 status); +-extern void mce_need_clearbank_register(mce_need_clearbank_t); +- +-/* +- * Register a callback to collect additional information (typically non- +- * architectural) provided by newer CPU families/models without the need +- * to duplicate the whole handler resulting in various handlers each with +- * its own tweaks and bugs. The callback receives an struct mc_info pointer +- * which it can use with x86_mcinfo_reserve to add additional telemetry, +- * the current MCA bank number we are reading telemetry from, and the +- * MCi_STATUS value for that bank. +- */ +-typedef struct mcinfo_extended *(*x86_mce_callback_t) +- (struct mc_info *, uint16_t, uint64_t); +-extern void x86_mce_callback_register(x86_mce_callback_t); +- + void *x86_mcinfo_reserve(struct mc_info *mi, + unsigned int size, unsigned int type); + void x86_mcinfo_dump(struct mc_info *mi); +@@ -198,8 +166,44 @@ static inline int mce_bank_msr(const struct vcpu *v, uint32_t msr) + return 0; + } + +-/* MC softirq */ +-void mce_handler_init(void); ++struct mce_callbacks { ++ void (*handler)(const struct cpu_user_regs *regs); ++ bool (*check_addr)(uint64_t status, uint64_t misc, int addr_type); ++ ++ /* Handler for judging whether mce is recoverable. */ ++ bool (*recoverable_scan)(uint64_t status); ++ ++ /* ++ * Callbacks to be made during bank telemetry logout. ++ * They are only available to those machine check handlers ++ * that call to the common mcheck_cmn_handler or who use the common ++ * telemetry logout function mcheck_mca_logout in error polling. ++ */ ++ ++ /* ++ * Judging whether to Clear Machine Check error bank callback handler. ++ * According to Intel latest MCA OS Recovery Writer's Guide, whether ++ * the error MCA bank needs to be cleared is decided by the mca_source ++ * and MCi_status bit value. ++ */ ++ bool (*need_clearbank_scan)(enum mca_source who, u64 status); ++ ++ /* ++ * Callback to collect additional information (typically non- ++ * architectural) provided by newer CPU families/models without the need ++ * to duplicate the whole handler resulting in various handlers each with ++ * its own tweaks and bugs. The callback receives an struct mc_info pointer ++ * which it can use with x86_mcinfo_reserve to add additional telemetry, ++ * the current MCA bank number we are reading telemetry from, and the ++ * MCi_STATUS value for that bank. ++ */ ++ struct mcinfo_extended *(*info_collect) ++ (struct mc_info *mi, uint16_t bank, uint64_t status); ++}; ++ ++extern struct mce_callbacks mce_callbacks; ++ ++void mce_handler_init(const struct mce_callbacks *cb); + + extern const struct mca_error_handler *mce_dhandlers; + extern const struct mca_error_handler *mce_uhandlers; +diff --git a/xen/arch/x86/cpu/mcheck/mce_amd.c b/xen/arch/x86/cpu/mcheck/mce_amd.c +index cf80e1a275..f401f54fab 100644 +--- a/xen/arch/x86/cpu/mcheck/mce_amd.c ++++ b/xen/arch/x86/cpu/mcheck/mce_amd.c +@@ -283,6 +283,19 @@ int vmce_amd_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) + return 1; + } + ++static const struct mce_callbacks __initconst_cf_clobber k8_callbacks = { ++ .handler = mcheck_cmn_handler, ++ .need_clearbank_scan = amd_need_clearbank_scan, ++}; ++ ++static const struct mce_callbacks __initconst_cf_clobber k10_callbacks = { ++ .handler = mcheck_cmn_handler, ++ .check_addr = mc_amd_addrcheck, ++ .recoverable_scan = mc_amd_recoverable_scan, ++ .need_clearbank_scan = amd_need_clearbank_scan, ++ .info_collect = amd_f10_handler, ++}; ++ + enum mcheck_type + amd_mcheck_init(const struct cpuinfo_x86 *ci, bool bsp) + { +@@ -295,11 +308,7 @@ amd_mcheck_init(const struct cpuinfo_x86 *ci, bool bsp) + /* Assume that machine check support is available. + * The minimum provided support is at least the K8. */ + if ( bsp ) +- { +- mce_handler_init(); +- x86_mce_vector_register(mcheck_cmn_handler); +- mce_need_clearbank_register(amd_need_clearbank_scan); +- } ++ mce_handler_init(ci->x86 == 0xf ? &k8_callbacks : &k10_callbacks); + + for ( i = 0; i < this_cpu(nr_mce_banks); i++ ) + { +@@ -339,13 +348,6 @@ amd_mcheck_init(const struct cpuinfo_x86 *ci, bool bsp) + ppin_msr = MSR_AMD_PPIN; + } + +- if ( bsp ) +- { +- x86_mce_callback_register(amd_f10_handler); +- mce_recoverable_register(mc_amd_recoverable_scan); +- mce_register_addrcheck(mc_amd_addrcheck); +- } +- + return ci->x86_vendor == X86_VENDOR_HYGON ? + mcheck_hygon : mcheck_amd_famXX; + } +diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c +index 837a8c6d0c..dc7e1e61a6 100644 +--- a/xen/arch/x86/cpu/mcheck/mce_intel.c ++++ b/xen/arch/x86/cpu/mcheck/mce_intel.c +@@ -843,11 +843,6 @@ static void intel_init_mce(bool bsp) + if ( !bsp ) + return; + +- x86_mce_vector_register(mcheck_cmn_handler); +- mce_recoverable_register(intel_recoverable_scan); +- mce_need_clearbank_register(intel_need_clearbank_scan); +- mce_register_addrcheck(intel_checkaddr); +- + mce_dhandlers = intel_mce_dhandlers; + mce_dhandler_num = ARRAY_SIZE(intel_mce_dhandlers); + mce_uhandlers = intel_mce_uhandlers; +@@ -957,6 +952,13 @@ static int cf_check cpu_callback( + return !rc ? NOTIFY_DONE : notifier_from_errno(rc); + } + ++static const struct mce_callbacks __initconst_cf_clobber intel_callbacks = { ++ .handler = mcheck_cmn_handler, ++ .check_addr = intel_checkaddr, ++ .recoverable_scan = intel_recoverable_scan, ++ .need_clearbank_scan = intel_need_clearbank_scan, ++}; ++ + static struct notifier_block cpu_nfb = { + .notifier_call = cpu_callback + }; +@@ -983,7 +985,7 @@ enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp) + intel_init_mca(c); + + if ( bsp ) +- mce_handler_init(); ++ mce_handler_init(&intel_callbacks); + + intel_init_mce(bsp); + +-- +2.44.0 + diff --git a/0516-IRQ-generalize-gs-et_irq_regs.patch b/0516-IRQ-generalize-gs-et_irq_regs.patch new file mode 100644 index 00000000..c0d724c6 --- /dev/null +++ b/0516-IRQ-generalize-gs-et_irq_regs.patch @@ -0,0 +1,134 @@ +From e356ac136aac6cddf26f0287112813a9344a8aed Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Tue, 23 Jan 2024 12:03:23 +0100 +Subject: [PATCH 516/542] IRQ: generalize [gs]et_irq_regs() + +Move functions (and their data) to common code, and invoke the functions +on Arm as well. This is in preparation of dropping the register +parameters from handler functions. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Reviewed-by: Julien Grall +(cherry picked from commit f67bddf3bccd99a5fee968c3b3f288db6a57d3be) +--- + xen/arch/arm/irq.c | 2 ++ + xen/arch/x86/include/asm/irq.h | 21 --------------------- + xen/arch/x86/irq.c | 2 -- + xen/common/irq.c | 2 ++ + xen/include/xen/irq.h | 21 +++++++++++++++++++++ + 5 files changed, 25 insertions(+), 23 deletions(-) + +diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c +index fd0c15fffd..8649c636a3 100644 +--- a/xen/arch/arm/irq.c ++++ b/xen/arch/arm/irq.c +@@ -229,6 +229,7 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq) + { + struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; ++ struct cpu_user_regs *old_regs = set_irq_regs(regs); + + perfc_incr(irqs); + +@@ -296,6 +297,7 @@ out: + out_no_end: + spin_unlock(&desc->lock); + irq_exit(); ++ set_irq_regs(old_regs); + } + + void release_irq(unsigned int irq, const void *dev_id) +diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h +index 823d627fd0..26850e5077 100644 +--- a/xen/arch/x86/include/asm/irq.h ++++ b/xen/arch/x86/include/asm/irq.h +@@ -70,27 +70,6 @@ extern bool opt_noirqbalance; + + extern int opt_irq_vector_map; + +-/* +- * Per-cpu current frame pointer - the location of the last exception frame on +- * the stack +- */ +-DECLARE_PER_CPU(struct cpu_user_regs *, __irq_regs); +- +-static inline struct cpu_user_regs *get_irq_regs(void) +-{ +- return this_cpu(__irq_regs); +-} +- +-static inline struct cpu_user_regs *set_irq_regs(struct cpu_user_regs *new_regs) +-{ +- struct cpu_user_regs *old_regs, **pp_regs = &this_cpu(__irq_regs); +- +- old_regs = *pp_regs; +- *pp_regs = new_regs; +- return old_regs; +-} +- +- + #define platform_legacy_irq(irq) ((irq) < 16) + + void cf_check event_check_interrupt(struct cpu_user_regs *regs); +diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c +index 51b4837cd3..abd6f577dd 100644 +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -53,8 +53,6 @@ static DEFINE_SPINLOCK(vector_lock); + + DEFINE_PER_CPU(vector_irq_t, vector_irq); + +-DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs); +- + static LIST_HEAD(irq_ratelimit_list); + static DEFINE_SPINLOCK(irq_ratelimit_lock); + static struct timer irq_ratelimit_timer; +diff --git a/xen/common/irq.c b/xen/common/irq.c +index 727cf8bd22..236cf171e2 100644 +--- a/xen/common/irq.c ++++ b/xen/common/irq.c +@@ -1,6 +1,8 @@ + #include + #include + ++DEFINE_PER_CPU(struct cpu_user_regs *, irq_regs); ++ + int init_one_irq_desc(struct irq_desc *desc) + { + int err; +diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h +index 300625e56d..c93ef31a9c 100644 +--- a/xen/include/xen/irq.h ++++ b/xen/include/xen/irq.h +@@ -130,6 +130,27 @@ void cf_check irq_actor_none(struct irq_desc *); + #define irq_disable_none irq_actor_none + #define irq_enable_none irq_actor_none + ++/* ++ * Per-cpu interrupted context register state - the inner-most interrupt frame ++ * on the stack. ++ */ ++DECLARE_PER_CPU(struct cpu_user_regs *, irq_regs); ++ ++static inline struct cpu_user_regs *get_irq_regs(void) ++{ ++ return this_cpu(irq_regs); ++} ++ ++static inline struct cpu_user_regs *set_irq_regs(struct cpu_user_regs *new_regs) ++{ ++ struct cpu_user_regs *old_regs, **pp_regs = &this_cpu(irq_regs); ++ ++ old_regs = *pp_regs; ++ *pp_regs = new_regs; ++ ++ return old_regs; ++} ++ + struct domain; + struct vcpu; + +-- +2.44.0 + diff --git a/0517-x86-spec-ctrl-Expose-IPRED_CTRL-to-guests.patch b/0517-x86-spec-ctrl-Expose-IPRED_CTRL-to-guests.patch new file mode 100644 index 00000000..3418ab38 --- /dev/null +++ b/0517-x86-spec-ctrl-Expose-IPRED_CTRL-to-guests.patch @@ -0,0 +1,78 @@ +From 68468e5b0ff7f82518de6de8b74187f86a9b6e22 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 30 Jan 2024 10:13:58 +0100 +Subject: [PATCH 517/542] x86/spec-ctrl: Expose IPRED_CTRL to guests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The CPUID feature bit signals the presence of the IPRED_DIS_{U,S} controls in +SPEC_CTRL MSR, first available in Intel AlderLake and Sapphire Rapids CPUs. + +Xen already knows how to context switch MSR_SPEC_CTRL properly between guest +and hypervisor context. + +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit 4dd6760706848de30f7c8b5f83462b9bcb070c91) +--- + xen/arch/x86/msr.c | 6 ++++-- + xen/include/public/arch-x86/cpufeatureset.h | 2 +- + xen/tools/gen-cpuid.py | 3 ++- + 3 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index f7192c1ddf..ac01553598 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -316,8 +316,8 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) + + /* + * Caller to confirm that MSR_SPEC_CTRL is available. Intel and AMD have +- * separate CPUID features for this functionality, but only set will be +- * active. ++ * separate CPUID features for some of this functionality, but only one ++ * vendors-worth will be active on a single host. + */ + uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp) + { +@@ -331,6 +331,8 @@ uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp) + return (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | + (ssbd ? SPEC_CTRL_SSBD : 0) | + (psfd ? SPEC_CTRL_PSFD : 0) | ++ (cp->feat.ipred_ctrl ++ ? (SPEC_CTRL_IPRED_DIS_U | SPEC_CTRL_IPRED_DIS_S) : 0) | + 0); + } + +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index bc971f3c6f..8708b934a0 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -295,7 +295,7 @@ XEN_CPUFEATURE(INTEL_PPIN, 12*32+ 0) /* Protected Processor Inventory + + /* Intel-defined CPU features, CPUID level 0x00000007:2.edx, word 13 */ + XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */ +-XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /* MSR_SPEC_CTRL.IPRED_DIS_* */ ++XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /*A MSR_SPEC_CTRL.IPRED_DIS_* */ + XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /* MSR_SPEC_CTRL.RRSBA_DIS_* */ + XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /* MSR_SPEC_CTRL.BHI_DIS_S */ + XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ +diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py +index 636ff44c8b..9d1e47cfcd 100755 +--- a/xen/tools/gen-cpuid.py ++++ b/xen/tools/gen-cpuid.py +@@ -318,7 +318,8 @@ def crunch_numbers(state): + # IBRSB/IBRS, and we pass this MSR directly to guests. Treating them + # as dependent features simplifies Xen's logic, and prevents the guest + # from seeing implausible configurations. +- IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS], ++ IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS, ++ IPRED_CTRL], + IBRS: [AMD_STIBP, AMD_SSBD, PSFD, + IBRS_ALWAYS, IBRS_FAST, IBRS_SAME_MODE], + IBPB: [IBPB_RET, SBPB, IBPB_BRTYPE], +-- +2.44.0 + diff --git a/0518-x86-spec-ctrl-Expose-RRSBA_CTRL-to-guests.patch b/0518-x86-spec-ctrl-Expose-RRSBA_CTRL-to-guests.patch new file mode 100644 index 00000000..d938e285 --- /dev/null +++ b/0518-x86-spec-ctrl-Expose-RRSBA_CTRL-to-guests.patch @@ -0,0 +1,66 @@ +From b7559a0488ac286d92a09fc00614207b32abb72d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 30 Jan 2024 10:13:59 +0100 +Subject: [PATCH 518/542] x86/spec-ctrl: Expose RRSBA_CTRL to guests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The CPUID feature bit signals the presence of the RRSBA_DIS_{U,S} controls in +SPEC_CTRL MSR, first available in Intel AlderLake and Sapphire Rapids CPUs. + +Xen already knows how to context switch MSR_SPEC_CTRL properly between guest +and hypervisor context. + +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit 478e4787fa64b621061177a7843c452e9a19916d) +--- + xen/arch/x86/msr.c | 2 ++ + xen/include/public/arch-x86/cpufeatureset.h | 2 +- + xen/tools/gen-cpuid.py | 2 +- + 3 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index ac01553598..615314f1e1 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -333,6 +333,8 @@ uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp) + (psfd ? SPEC_CTRL_PSFD : 0) | + (cp->feat.ipred_ctrl + ? (SPEC_CTRL_IPRED_DIS_U | SPEC_CTRL_IPRED_DIS_S) : 0) | ++ (cp->feat.rrsba_ctrl ++ ? (SPEC_CTRL_RRSBA_DIS_U | SPEC_CTRL_RRSBA_DIS_S) : 0) | + 0); + } + +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 8708b934a0..0e1581cdac 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -296,7 +296,7 @@ XEN_CPUFEATURE(INTEL_PPIN, 12*32+ 0) /* Protected Processor Inventory + /* Intel-defined CPU features, CPUID level 0x00000007:2.edx, word 13 */ + XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */ + XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /*A MSR_SPEC_CTRL.IPRED_DIS_* */ +-XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /* MSR_SPEC_CTRL.RRSBA_DIS_* */ ++XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /*A MSR_SPEC_CTRL.RRSBA_DIS_* */ + XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /* MSR_SPEC_CTRL.BHI_DIS_S */ + XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ + +diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py +index 9d1e47cfcd..09acb9764c 100755 +--- a/xen/tools/gen-cpuid.py ++++ b/xen/tools/gen-cpuid.py +@@ -319,7 +319,7 @@ def crunch_numbers(state): + # as dependent features simplifies Xen's logic, and prevents the guest + # from seeing implausible configurations. + IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS, +- IPRED_CTRL], ++ IPRED_CTRL, RRSBA_CTRL], + IBRS: [AMD_STIBP, AMD_SSBD, PSFD, + IBRS_ALWAYS, IBRS_FAST, IBRS_SAME_MODE], + IBPB: [IBPB_RET, SBPB, IBPB_BRTYPE], +-- +2.44.0 + diff --git a/0519-x86-spec-ctrl-Expose-BHI_CTRL-to-guests.patch b/0519-x86-spec-ctrl-Expose-BHI_CTRL-to-guests.patch new file mode 100644 index 00000000..54480584 --- /dev/null +++ b/0519-x86-spec-ctrl-Expose-BHI_CTRL-to-guests.patch @@ -0,0 +1,65 @@ +From 363745e52dc758bdfb2fd42d32f12276c80ed447 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 30 Jan 2024 10:14:00 +0100 +Subject: [PATCH 519/542] x86/spec-ctrl: Expose BHI_CTRL to guests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The CPUID feature bit signals the presence of the BHI_DIS_S control in +SPEC_CTRL MSR, first available in Intel AlderLake and Sapphire Rapids CPUs + +Xen already knows how to context switch MSR_SPEC_CTRL properly between guest +and hypervisor context. + +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit 583f1d0950529f3517b1741c2b21a028a82ba831) +--- + xen/arch/x86/msr.c | 1 + + xen/include/public/arch-x86/cpufeatureset.h | 2 +- + xen/tools/gen-cpuid.py | 2 +- + 3 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index 615314f1e1..51e6744e8f 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -335,6 +335,7 @@ uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp) + ? (SPEC_CTRL_IPRED_DIS_U | SPEC_CTRL_IPRED_DIS_S) : 0) | + (cp->feat.rrsba_ctrl + ? (SPEC_CTRL_RRSBA_DIS_U | SPEC_CTRL_RRSBA_DIS_S) : 0) | ++ (cp->feat.bhi_ctrl ? SPEC_CTRL_BHI_DIS_S : 0) | + 0); + } + +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 0e1581cdac..51f238683c 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -297,7 +297,7 @@ XEN_CPUFEATURE(INTEL_PPIN, 12*32+ 0) /* Protected Processor Inventory + XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */ + XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /*A MSR_SPEC_CTRL.IPRED_DIS_* */ + XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /*A MSR_SPEC_CTRL.RRSBA_DIS_* */ +-XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /* MSR_SPEC_CTRL.BHI_DIS_S */ ++XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /*A MSR_SPEC_CTRL.BHI_DIS_S */ + XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1.ecx, word 14 */ +diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py +index 09acb9764c..a7c2ba3e5d 100755 +--- a/xen/tools/gen-cpuid.py ++++ b/xen/tools/gen-cpuid.py +@@ -319,7 +319,7 @@ def crunch_numbers(state): + # as dependent features simplifies Xen's logic, and prevents the guest + # from seeing implausible configurations. + IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS, +- IPRED_CTRL, RRSBA_CTRL], ++ IPRED_CTRL, RRSBA_CTRL, BHI_CTRL], + IBRS: [AMD_STIBP, AMD_SSBD, PSFD, + IBRS_ALWAYS, IBRS_FAST, IBRS_SAME_MODE], + IBPB: [IBPB_RET, SBPB, IBPB_BRTYPE], +-- +2.44.0 + diff --git a/0520-x86-arrange-for-ENDBR-zapping-from-vendor-_ctxt_swit.patch b/0520-x86-arrange-for-ENDBR-zapping-from-vendor-_ctxt_swit.patch new file mode 100644 index 00000000..fe10f3c7 --- /dev/null +++ b/0520-x86-arrange-for-ENDBR-zapping-from-vendor-_ctxt_swit.patch @@ -0,0 +1,69 @@ +From f2947a0da348eafc72e166dea14983f6d7c8300e Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Mon, 5 Feb 2024 10:44:46 +0100 +Subject: [PATCH 520/542] x86: arrange for ENDBR zapping from + _ctxt_switch_masking() + +While altcall is already used for them, the functions want announcing in +.init.rodata.cf_clobber, even if the resulting static variables aren't +otherwise used. + +While doing this also move ctxt_switch_masking to .data.ro_after_init. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit 044168fa3a65b6542bda5c21e373742de1bd5980) +--- + xen/arch/x86/cpu/amd.c | 5 +++++ + xen/arch/x86/cpu/common.c | 2 +- + xen/arch/x86/cpu/intel.c | 5 +++++ + 3 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index 3d85e9797d..d5e9ad7598 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -258,6 +258,11 @@ static void cf_check amd_ctxt_switch_masking(const struct vcpu *next) + #undef LAZY + } + ++#ifdef CONFIG_XEN_IBT /* Announce the function to ENDBR clobbering logic. */ ++static const typeof(ctxt_switch_masking) __initconst_cf_clobber __used csm = ++ amd_ctxt_switch_masking; ++#endif ++ + /* + * Mask the features and extended features returned by CPUID. Parameters are + * set from the boot line via two methods: +diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c +index 54ea7fa831..60e472da26 100644 +--- a/xen/arch/x86/cpu/common.c ++++ b/xen/arch/x86/cpu/common.c +@@ -121,7 +121,7 @@ static const struct cpu_dev default_cpu = { + static const struct cpu_dev *this_cpu = &default_cpu; + + static DEFINE_PER_CPU(uint64_t, msr_misc_features); +-void (* __read_mostly ctxt_switch_masking)(const struct vcpu *next); ++void (* __ro_after_init ctxt_switch_masking)(const struct vcpu *next); + + bool __init probe_cpuid_faulting(void) + { +diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c +index 96723b5d44..532e845f66 100644 +--- a/xen/arch/x86/cpu/intel.c ++++ b/xen/arch/x86/cpu/intel.c +@@ -220,6 +220,11 @@ static void cf_check intel_ctxt_switch_masking(const struct vcpu *next) + #undef LAZY + } + ++#ifdef CONFIG_XEN_IBT /* Announce the function to ENDBR clobbering logic. */ ++static const typeof(ctxt_switch_masking) __initconst_cf_clobber __used csm = ++ intel_ctxt_switch_masking; ++#endif ++ + /* + * opt_cpuid_mask_ecx/edx: cpuid.1[ecx, edx] feature mask. + * For example, E8400[Intel Core 2 Duo Processor series] ecx = 0x0008E3FD, +-- +2.44.0 + diff --git a/0521-x86-guest-finish-conversion-to-altcall.patch b/0521-x86-guest-finish-conversion-to-altcall.patch new file mode 100644 index 00000000..6b654b8f --- /dev/null +++ b/0521-x86-guest-finish-conversion-to-altcall.patch @@ -0,0 +1,84 @@ +From d11255f909e4b77ae1d1aa7e928cdfca5964a574 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Mon, 5 Feb 2024 10:45:31 +0100 +Subject: [PATCH 521/542] x86/guest: finish conversion to altcall + +While .setup() and .e820_fixup() don't need fiddling with for being run +only very early, both .ap_setup() and .resume() want converting too: +This way both pre-filled struct hypervisor_ops instances can become +__initconst_cf_clobber, thus allowing to eliminate up to 5 more ENDBR +(configuration dependent) during the 2nd phase of alternatives patching. + +While fiddling with section annotations here, also move "ops" itself to +.data.ro_after_init. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Paul Durrant +(cherry picked from commit e931edccc53c9dd6e9a505ad0ff3a03d985669bc) +--- + xen/arch/x86/guest/hyperv/hyperv.c | 2 +- + xen/arch/x86/guest/hypervisor.c | 6 +++--- + xen/arch/x86/guest/xen/xen.c | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/xen/arch/x86/guest/hyperv/hyperv.c b/xen/arch/x86/guest/hyperv/hyperv.c +index b101ba3080..5c58a0c457 100644 +--- a/xen/arch/x86/guest/hyperv/hyperv.c ++++ b/xen/arch/x86/guest/hyperv/hyperv.c +@@ -219,7 +219,7 @@ static int cf_check flush_tlb( + return hyperv_flush_tlb(mask, va, flags); + } + +-static const struct hypervisor_ops __initconstrel ops = { ++static const struct hypervisor_ops __initconst_cf_clobber ops = { + .name = "Hyper-V", + .setup = setup, + .ap_setup = ap_setup, +diff --git a/xen/arch/x86/guest/hypervisor.c b/xen/arch/x86/guest/hypervisor.c +index 366af1d650..c3e10c3586 100644 +--- a/xen/arch/x86/guest/hypervisor.c ++++ b/xen/arch/x86/guest/hypervisor.c +@@ -25,7 +25,7 @@ + #include + #include + +-static struct hypervisor_ops __read_mostly ops; ++static struct hypervisor_ops __ro_after_init ops; + + const char *__init hypervisor_probe(void) + { +@@ -61,7 +61,7 @@ void __init hypervisor_setup(void) + int hypervisor_ap_setup(void) + { + if ( ops.ap_setup ) +- return ops.ap_setup(); ++ return alternative_call(ops.ap_setup); + + return 0; + } +@@ -69,7 +69,7 @@ int hypervisor_ap_setup(void) + void hypervisor_resume(void) + { + if ( ops.resume ) +- ops.resume(); ++ alternative_vcall(ops.resume); + } + + void __init hypervisor_e820_fixup(struct e820map *e820) +diff --git a/xen/arch/x86/guest/xen/xen.c b/xen/arch/x86/guest/xen/xen.c +index 9c2defaa66..c4cb16df38 100644 +--- a/xen/arch/x86/guest/xen/xen.c ++++ b/xen/arch/x86/guest/xen/xen.c +@@ -330,7 +330,7 @@ static int cf_check flush_tlb( + return xen_hypercall_hvm_op(HVMOP_flush_tlbs, NULL); + } + +-static const struct hypervisor_ops __initconstrel ops = { ++static const struct hypervisor_ops __initconst_cf_clobber ops = { + .name = "Xen", + .setup = setup, + .ap_setup = ap_setup, +-- +2.44.0 + diff --git a/0522-x86-CPU-convert-vendor-hook-invocations-to-altcall.patch b/0522-x86-CPU-convert-vendor-hook-invocations-to-altcall.patch new file mode 100644 index 00000000..7f668768 --- /dev/null +++ b/0522-x86-CPU-convert-vendor-hook-invocations-to-altcall.patch @@ -0,0 +1,152 @@ +From 6b899fe735d040356ead7170e0fe10f6668624d0 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Mon, 5 Feb 2024 10:48:11 +0100 +Subject: [PATCH 522/542] x86/CPU: convert vendor hook invocations to altcall + +While not performance critical, these hook invocations still want +converting: This way all pre-filled struct cpu_dev instances can become +__initconst_cf_clobber, thus allowing to eliminate further 8 ENDBR +during the 2nd phase of alternatives patching (besides moving previously +resident data to .init.*). + +Since all use sites need touching anyway, take the opportunity and also +address a Misra C:2012 Rule 5.5 violation: Rename the this_cpu static +variable. + +Signed-off-by: Jan Beulich +Acked-by: Andrew Cooper +(cherry picked from commit 660f8a75013c947fbe5358a640032a1f9f1eece5) +--- + xen/arch/x86/cpu/amd.c | 2 +- + xen/arch/x86/cpu/centaur.c | 2 +- + xen/arch/x86/cpu/common.c | 24 ++++++++++++------------ + xen/arch/x86/cpu/hygon.c | 2 +- + xen/arch/x86/cpu/intel.c | 2 +- + xen/arch/x86/cpu/shanghai.c | 2 +- + 6 files changed, 17 insertions(+), 17 deletions(-) + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index d5e9ad7598..2838725bab 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -1286,7 +1286,7 @@ static void cf_check init_amd(struct cpuinfo_x86 *c) + amd_log_freq(c); + } + +-const struct cpu_dev amd_cpu_dev = { ++const struct cpu_dev __initconst_cf_clobber amd_cpu_dev = { + .c_early_init = early_init_amd, + .c_init = init_amd, + }; +diff --git a/xen/arch/x86/cpu/centaur.c b/xen/arch/x86/cpu/centaur.c +index eac49d78db..750168d1e8 100644 +--- a/xen/arch/x86/cpu/centaur.c ++++ b/xen/arch/x86/cpu/centaur.c +@@ -54,6 +54,6 @@ static void cf_check init_centaur(struct cpuinfo_x86 *c) + init_c3(c); + } + +-const struct cpu_dev centaur_cpu_dev = { ++const struct cpu_dev __initconst_cf_clobber centaur_cpu_dev = { + .c_init = init_centaur, + }; +diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c +index 60e472da26..88855f5773 100644 +--- a/xen/arch/x86/cpu/common.c ++++ b/xen/arch/x86/cpu/common.c +@@ -115,10 +115,10 @@ static void cf_check default_init(struct cpuinfo_x86 * c) + __clear_bit(X86_FEATURE_SEP, c->x86_capability); + } + +-static const struct cpu_dev default_cpu = { ++static const struct cpu_dev __initconst_cf_clobber __used default_cpu = { + .c_init = default_init, + }; +-static const struct cpu_dev *this_cpu = &default_cpu; ++static struct cpu_dev __ro_after_init actual_cpu; + + static DEFINE_PER_CPU(uint64_t, msr_misc_features); + void (* __ro_after_init ctxt_switch_masking)(const struct vcpu *next); +@@ -343,12 +343,13 @@ void __init early_cpu_init(void) + + c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx); + switch (c->x86_vendor) { +- case X86_VENDOR_INTEL: this_cpu = &intel_cpu_dev; break; +- case X86_VENDOR_AMD: this_cpu = &amd_cpu_dev; break; +- case X86_VENDOR_CENTAUR: this_cpu = ¢aur_cpu_dev; break; +- case X86_VENDOR_SHANGHAI: this_cpu = &shanghai_cpu_dev; break; +- case X86_VENDOR_HYGON: this_cpu = &hygon_cpu_dev; break; ++ case X86_VENDOR_INTEL: actual_cpu = intel_cpu_dev; break; ++ case X86_VENDOR_AMD: actual_cpu = amd_cpu_dev; break; ++ case X86_VENDOR_CENTAUR: actual_cpu = centaur_cpu_dev; break; ++ case X86_VENDOR_SHANGHAI: actual_cpu = shanghai_cpu_dev; break; ++ case X86_VENDOR_HYGON: actual_cpu = hygon_cpu_dev; break; + default: ++ actual_cpu = default_cpu; + printk(XENLOG_ERR + "Unrecognised or unsupported CPU vendor '%.12s'\n", + c->x86_vendor_id); +@@ -434,8 +435,8 @@ static void generic_identify(struct cpuinfo_x86 *c) + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); + c->phys_proc_id = c->apicid; + +- if (this_cpu->c_early_init) +- this_cpu->c_early_init(c); ++ if (actual_cpu.c_early_init) ++ alternative_vcall(actual_cpu.c_early_init, c); + + /* c_early_init() may have adjusted cpuid levels/features. Reread. */ + c->cpuid_level = cpuid_eax(0); +@@ -540,9 +541,8 @@ void identify_cpu(struct cpuinfo_x86 *c) + * At the end of this section, c->x86_capability better + * indicate the features this CPU genuinely supports! + */ +- if (this_cpu->c_init) +- this_cpu->c_init(c); +- ++ if (actual_cpu.c_init) ++ alternative_vcall(actual_cpu.c_init, c); + + if (c == &boot_cpu_data && !opt_pku) + setup_clear_cpu_cap(X86_FEATURE_PKU); +diff --git a/xen/arch/x86/cpu/hygon.c b/xen/arch/x86/cpu/hygon.c +index 361eb6fd41..0c7c97ebb7 100644 +--- a/xen/arch/x86/cpu/hygon.c ++++ b/xen/arch/x86/cpu/hygon.c +@@ -88,7 +88,7 @@ static void cf_check init_hygon(struct cpuinfo_x86 *c) + amd_log_freq(c); + } + +-const struct cpu_dev hygon_cpu_dev = { ++const struct cpu_dev __initconst_cf_clobber hygon_cpu_dev = { + .c_early_init = early_init_amd, + .c_init = init_hygon, + }; +diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c +index 532e845f66..2d439e0bd2 100644 +--- a/xen/arch/x86/cpu/intel.c ++++ b/xen/arch/x86/cpu/intel.c +@@ -598,7 +598,7 @@ static void cf_check init_intel(struct cpuinfo_x86 *c) + setup_clear_cpu_cap(X86_FEATURE_CLWB); + } + +-const struct cpu_dev intel_cpu_dev = { ++const struct cpu_dev __initconst_cf_clobber intel_cpu_dev = { + .c_early_init = early_init_intel, + .c_init = init_intel, + }; +diff --git a/xen/arch/x86/cpu/shanghai.c b/xen/arch/x86/cpu/shanghai.c +index 95ae544f8c..910f2c32f3 100644 +--- a/xen/arch/x86/cpu/shanghai.c ++++ b/xen/arch/x86/cpu/shanghai.c +@@ -15,6 +15,6 @@ static void cf_check init_shanghai(struct cpuinfo_x86 *c) + init_intel_cacheinfo(c); + } + +-const struct cpu_dev shanghai_cpu_dev = { ++const struct cpu_dev __initconst_cf_clobber shanghai_cpu_dev = { + .c_init = init_shanghai, + }; +-- +2.44.0 + diff --git a/0523-VMX-tertiary-execution-control-infrastructure.patch b/0523-VMX-tertiary-execution-control-infrastructure.patch new file mode 100644 index 00000000..e8d0b62c --- /dev/null +++ b/0523-VMX-tertiary-execution-control-infrastructure.patch @@ -0,0 +1,256 @@ +From 91c2a92231af71a50557c65e32e2f838ae3aed14 Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Wed, 7 Feb 2024 13:46:11 +0100 +Subject: [PATCH 523/542] VMX: tertiary execution control infrastructure +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This is a prereq to enabling e.g. the MSRLIST feature. + +Note that the PROCBASED_CTLS3 MSR is different from other VMX feature +reporting MSRs, in that all 64 bits report allowed 1-settings. + +vVMX code is left alone, though, for the time being. + +Signed-off-by: Jan Beulich +Reviewed-by: Roger Pau Monné +(cherry picked from commit 878159bf259bfbd7a40312829f1ea0ce1f6645e2) +--- + xen/arch/x86/hvm/vmx/vmcs.c | 57 ++++++++++++++++++++++--- + xen/arch/x86/hvm/vmx/vmx.c | 6 +++ + xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 13 ++++++ + xen/arch/x86/include/asm/hvm/vmx/vmx.h | 1 + + xen/arch/x86/include/asm/msr-index.h | 1 + + 5 files changed, 72 insertions(+), 6 deletions(-) + +diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c +index b5ecc51b43..49d51fb524 100644 +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -176,6 +176,7 @@ static int cf_check parse_ept_param_runtime(const char *s) + u32 vmx_pin_based_exec_control __read_mostly; + u32 vmx_cpu_based_exec_control __read_mostly; + u32 vmx_secondary_exec_control __read_mostly; ++uint64_t vmx_tertiary_exec_control __read_mostly; + u32 vmx_vmexit_control __read_mostly; + u32 vmx_vmentry_control __read_mostly; + u64 vmx_ept_vpid_cap __read_mostly; +@@ -241,10 +242,32 @@ static u32 adjust_vmx_controls( + return ctl; + } + +-static bool_t cap_check(const char *name, u32 expected, u32 saw) ++static uint64_t adjust_vmx_controls2( ++ const char *name, uint64_t ctl_min, uint64_t ctl_opt, unsigned int msr, ++ bool *mismatch) ++{ ++ uint64_t vmx_msr, ctl = ctl_min | ctl_opt; ++ ++ rdmsrl(msr, vmx_msr); ++ ++ ctl &= vmx_msr; /* bit == 0 ==> must be zero */ ++ ++ /* Ensure minimum (required) set of control bits are supported. */ ++ if ( ctl_min & ~ctl ) ++ { ++ *mismatch = true; ++ printk("VMX: CPU%u has insufficient %s (%#lx; requires %#lx)\n", ++ smp_processor_id(), name, ctl, ctl_min); ++ } ++ ++ return ctl; ++} ++ ++static bool cap_check( ++ const char *name, unsigned long expected, unsigned long saw) + { + if ( saw != expected ) +- printk("VMX %s: saw %#x expected %#x\n", name, saw, expected); ++ printk("VMX %s: saw %#lx expected %#lx\n", name, saw, expected); + return saw != expected; + } + +@@ -254,6 +277,7 @@ static int vmx_init_vmcs_config(bool bsp) + u32 _vmx_pin_based_exec_control; + u32 _vmx_cpu_based_exec_control; + u32 _vmx_secondary_exec_control = 0; ++ uint64_t _vmx_tertiary_exec_control = 0; + u64 _vmx_ept_vpid_cap = 0; + u64 _vmx_misc_cap = 0; + u32 _vmx_vmexit_control; +@@ -287,7 +311,8 @@ static int vmx_init_vmcs_config(bool bsp) + opt = (CPU_BASED_ACTIVATE_MSR_BITMAP | + CPU_BASED_TPR_SHADOW | + CPU_BASED_MONITOR_TRAP_FLAG | +- CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); ++ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | ++ CPU_BASED_ACTIVATE_TERTIARY_CONTROLS); + _vmx_cpu_based_exec_control = adjust_vmx_controls( + "CPU-Based Exec Control", min, opt, + MSR_IA32_VMX_PROCBASED_CTLS, &mismatch); +@@ -351,6 +376,15 @@ static int vmx_init_vmcs_config(bool bsp) + MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch); + } + ++ if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS ) ++ { ++ uint64_t opt = 0; ++ ++ _vmx_tertiary_exec_control = adjust_vmx_controls2( ++ "Tertiary Exec Control", 0, opt, ++ MSR_IA32_VMX_PROCBASED_CTLS3, &mismatch); ++ } ++ + /* The IA32_VMX_EPT_VPID_CAP MSR exists only when EPT or VPID available */ + if ( _vmx_secondary_exec_control & (SECONDARY_EXEC_ENABLE_EPT | + SECONDARY_EXEC_ENABLE_VPID) ) +@@ -481,6 +515,7 @@ static int vmx_init_vmcs_config(bool bsp) + vmx_pin_based_exec_control = _vmx_pin_based_exec_control; + vmx_cpu_based_exec_control = _vmx_cpu_based_exec_control; + vmx_secondary_exec_control = _vmx_secondary_exec_control; ++ vmx_tertiary_exec_control = _vmx_tertiary_exec_control; + vmx_ept_vpid_cap = _vmx_ept_vpid_cap; + vmx_vmexit_control = _vmx_vmexit_control; + vmx_vmentry_control = _vmx_vmentry_control; +@@ -516,6 +551,9 @@ static int vmx_init_vmcs_config(bool bsp) + mismatch |= cap_check( + "Secondary Exec Control", + vmx_secondary_exec_control, _vmx_secondary_exec_control); ++ mismatch |= cap_check( ++ "Tertiary Exec Control", ++ vmx_tertiary_exec_control, _vmx_tertiary_exec_control); + mismatch |= cap_check( + "VMExit Control", + vmx_vmexit_control, _vmx_vmexit_control); +@@ -1092,6 +1130,7 @@ static int construct_vmcs(struct vcpu *v) + v->arch.hvm.vmx.exec_control |= CPU_BASED_RDTSC_EXITING; + + v->arch.hvm.vmx.secondary_exec_control = vmx_secondary_exec_control; ++ v->arch.hvm.vmx.tertiary_exec_control = vmx_tertiary_exec_control; + + /* + * Disable features which we don't want active by default: +@@ -1146,6 +1185,10 @@ static int construct_vmcs(struct vcpu *v) + __vmwrite(SECONDARY_VM_EXEC_CONTROL, + v->arch.hvm.vmx.secondary_exec_control); + ++ if ( cpu_has_vmx_tertiary_exec_control ) ++ __vmwrite(TERTIARY_VM_EXEC_CONTROL, ++ v->arch.hvm.vmx.tertiary_exec_control); ++ + /* MSR access bitmap. */ + if ( cpu_has_vmx_msr_bitmap ) + { +@@ -2069,10 +2112,12 @@ void vmcs_dump_vcpu(struct vcpu *v) + vmr(HOST_PERF_GLOBAL_CTRL)); + + printk("*** Control State ***\n"); +- printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", ++ printk("PinBased=%08x CPUBased=%08x\n", + vmr32(PIN_BASED_VM_EXEC_CONTROL), +- vmr32(CPU_BASED_VM_EXEC_CONTROL), +- vmr32(SECONDARY_VM_EXEC_CONTROL)); ++ vmr32(CPU_BASED_VM_EXEC_CONTROL)); ++ printk("SecondaryExec=%08x TertiaryExec=%016lx\n", ++ vmr32(SECONDARY_VM_EXEC_CONTROL), ++ vmr(TERTIARY_VM_EXEC_CONTROL)); + printk("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); + printk("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", + vmr32(EXCEPTION_BITMAP), +diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c +index fed362bc32..26b6e4ca61 100644 +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -725,6 +725,12 @@ void vmx_update_secondary_exec_control(struct vcpu *v) + v->arch.hvm.vmx.secondary_exec_control); + } + ++void vmx_update_tertiary_exec_control(const struct vcpu *v) ++{ ++ __vmwrite(TERTIARY_VM_EXEC_CONTROL, ++ v->arch.hvm.vmx.tertiary_exec_control); ++} ++ + void vmx_update_exception_bitmap(struct vcpu *v) + { + u32 bitmap = unlikely(v->arch.hvm.vmx.vmx_realmode) +diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h +index 0af021d5f5..bbb0966fc3 100644 +--- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h ++++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h +@@ -125,6 +125,7 @@ struct vmx_vcpu { + /* Cache of cpu execution control. */ + u32 exec_control; + u32 secondary_exec_control; ++ uint64_t tertiary_exec_control; + u32 exception_bitmap; + + uint64_t shadow_gs; +@@ -207,6 +208,7 @@ void vmx_vmcs_reload(struct vcpu *v); + #define CPU_BASED_RDTSC_EXITING 0x00001000 + #define CPU_BASED_CR3_LOAD_EXITING 0x00008000 + #define CPU_BASED_CR3_STORE_EXITING 0x00010000 ++#define CPU_BASED_ACTIVATE_TERTIARY_CONTROLS 0x00020000 + #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 + #define CPU_BASED_CR8_STORE_EXITING 0x00100000 + #define CPU_BASED_TPR_SHADOW 0x00200000 +@@ -271,6 +273,14 @@ extern u32 vmx_vmentry_control; + #define SECONDARY_EXEC_NOTIFY_VM_EXITING 0x80000000 + extern u32 vmx_secondary_exec_control; + ++#define TERTIARY_EXEC_LOADIWKEY_EXITING BIT(0, UL) ++#define TERTIARY_EXEC_ENABLE_HLAT BIT(1, UL) ++#define TERTIARY_EXEC_EPT_PAGING_WRITE BIT(2, UL) ++#define TERTIARY_EXEC_GUEST_PAGING_VERIFY BIT(3, UL) ++#define TERTIARY_EXEC_IPI_VIRT BIT(4, UL) ++#define TERTIARY_EXEC_VIRT_SPEC_CTRL BIT(7, UL) ++extern uint64_t vmx_tertiary_exec_control; ++ + #define VMX_EPT_EXEC_ONLY_SUPPORTED 0x00000001 + #define VMX_EPT_WALK_LENGTH_4_SUPPORTED 0x00000040 + #define VMX_EPT_MEMORY_TYPE_UC 0x00000100 +@@ -307,6 +317,8 @@ extern u64 vmx_ept_vpid_cap; + (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP) + #define cpu_has_vmx_secondary_exec_control \ + (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ++#define cpu_has_vmx_tertiary_exec_control \ ++ (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) + #define cpu_has_vmx_ept \ + (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) + #define cpu_has_vmx_dt_exiting \ +@@ -430,6 +442,7 @@ enum vmcs_field { + VIRT_EXCEPTION_INFO = 0x0000202a, + XSS_EXIT_BITMAP = 0x0000202c, + TSC_MULTIPLIER = 0x00002032, ++ TERTIARY_VM_EXEC_CONTROL = 0x00002034, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + VMCS_LINK_POINTER = 0x00002800, + GUEST_IA32_DEBUGCTL = 0x00002802, +diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmx.h b/xen/arch/x86/include/asm/hvm/vmx/vmx.h +index 8e1e42ac47..4ff19488ea 100644 +--- a/xen/arch/x86/include/asm/hvm/vmx/vmx.h ++++ b/xen/arch/x86/include/asm/hvm/vmx/vmx.h +@@ -102,6 +102,7 @@ void vmx_update_debug_state(struct vcpu *v); + void vmx_update_exception_bitmap(struct vcpu *v); + void vmx_update_cpu_exec_control(struct vcpu *v); + void vmx_update_secondary_exec_control(struct vcpu *v); ++void vmx_update_tertiary_exec_control(const struct vcpu *v); + + #define POSTED_INTR_ON 0 + #define POSTED_INTR_SN 1 +diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h +index 9b5f67711f..521079191a 100644 +--- a/xen/arch/x86/include/asm/msr-index.h ++++ b/xen/arch/x86/include/asm/msr-index.h +@@ -327,6 +327,7 @@ + #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x48f + #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x490 + #define MSR_IA32_VMX_VMFUNC 0x491 ++#define MSR_IA32_VMX_PROCBASED_CTLS3 0x492 + + /* K7/K8 MSRs. Not complete. See the architecture manual for a more + complete list. */ +-- +2.44.0 + diff --git a/0524-x86-spec-ctrl-Move-__read_mostly-data-into-__ro_afte.patch b/0524-x86-spec-ctrl-Move-__read_mostly-data-into-__ro_afte.patch new file mode 100644 index 00000000..ef92b0bf --- /dev/null +++ b/0524-x86-spec-ctrl-Move-__read_mostly-data-into-__ro_afte.patch @@ -0,0 +1,71 @@ +From a0bd738f9cd158306e046c7a6f3726128219e4eb Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 28 Mar 2024 12:38:32 +0000 +Subject: [PATCH 524/542] x86/spec-ctrl: Move __read_mostly data into + __ro_after_init + +These variables predate the introduction of __ro_after_init, but all qualify. +Update them to be consistent with the rest of the file. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 7a09966e7b2823b70f6d56d0cf66c11124f4a3c1) +--- + xen/arch/x86/spec_ctrl.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index ac21af2c5c..0a2de88593 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -61,17 +61,17 @@ bool __ro_after_init opt_ssbd; + int8_t __initdata opt_psfd = -1; + + int8_t __ro_after_init opt_ibpb_ctxt_switch = -1; +-int8_t __read_mostly opt_eager_fpu = -1; +-int8_t __read_mostly opt_l1d_flush = -1; ++int8_t __ro_after_init opt_eager_fpu = -1; ++int8_t __ro_after_init opt_l1d_flush = -1; + static bool __initdata opt_branch_harden = + IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH); + + bool __initdata bsp_delay_spec_ctrl; +-uint8_t __read_mostly default_xen_spec_ctrl; +-uint8_t __read_mostly default_spec_ctrl_flags; ++uint8_t __ro_after_init default_xen_spec_ctrl; ++uint8_t __ro_after_init default_spec_ctrl_flags; + +-paddr_t __read_mostly l1tf_addr_mask, __read_mostly l1tf_safe_maddr; +-bool __read_mostly cpu_has_bug_l1tf; ++paddr_t __ro_after_init l1tf_addr_mask, __ro_after_init l1tf_safe_maddr; ++bool __ro_after_init cpu_has_bug_l1tf; + static unsigned int __initdata l1d_maxphysaddr; + + static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */ +@@ -328,8 +328,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) + } + custom_param("spec-ctrl", parse_spec_ctrl); + +-int8_t __read_mostly opt_xpti_hwdom = -1; +-int8_t __read_mostly opt_xpti_domu = -1; ++int8_t __ro_after_init opt_xpti_hwdom = -1; ++int8_t __ro_after_init opt_xpti_domu = -1; + + static __init void xpti_init_default(void) + { +@@ -393,8 +393,8 @@ static int __init cf_check parse_xpti(const char *s) + } + custom_param("xpti", parse_xpti); + +-int8_t __read_mostly opt_pv_l1tf_hwdom = -1; +-int8_t __read_mostly opt_pv_l1tf_domu = -1; ++int8_t __ro_after_init opt_pv_l1tf_hwdom = -1; ++int8_t __ro_after_init opt_pv_l1tf_domu = -1; + + static int __init cf_check parse_pv_l1tf(const char *s) + { +-- +2.44.0 + diff --git a/0525-x86-tsx-Cope-with-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch b/0525-x86-tsx-Cope-with-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch new file mode 100644 index 00000000..a7f82222 --- /dev/null +++ b/0525-x86-tsx-Cope-with-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch @@ -0,0 +1,123 @@ +From 81ebc5abe77223783da0ae567408d8addebd83a7 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Wed, 3 Apr 2024 17:43:42 +0100 +Subject: [PATCH 525/542] x86/tsx: Cope with RTM_ALWAYS_ABORT vs RTM mismatch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +It turns out there is something wonky on some but not all CPUs with +MSR_TSX_FORCE_ABORT. The presence of RTM_ALWAYS_ABORT causes Xen to think +it's safe to offer HLE/RTM to guests, but in this case, XBEGIN instructions +genuinely #UD. + +Spot this case and try to back out as cleanly as we can. + +Signed-off-by: Andrew Cooper +Tested-by: Marek Marczykowski-Górecki +Acked-by: Jan Beulich +(cherry picked from commit b33f191e3ca99458fdcea1cb5a29dfa4965d1604) +--- + xen/arch/x86/tsx.c | 55 +++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 45 insertions(+), 10 deletions(-) + +diff --git a/xen/arch/x86/tsx.c b/xen/arch/x86/tsx.c +index 80c6f4cedd..a019400c96 100644 +--- a/xen/arch/x86/tsx.c ++++ b/xen/arch/x86/tsx.c +@@ -1,5 +1,6 @@ + #include + #include ++#include + #include + + /* +@@ -9,6 +10,7 @@ + * -1 => Default, altered to 0/1 (if unspecified) by: + * - TAA heuristics/settings for speculative safety + * - "TSX vs PCR3" select for TSX memory ordering safety ++ * -2 => Implicit tsx=0 (from RTM_ALWAYS_ABORT vs RTM mismatch) + * -3 => Implicit tsx=1 (feed-through from spec-ctrl=0) + * + * This is arranged such that the bottom bit encodes whether TSX is actually +@@ -122,11 +124,50 @@ void tsx_init(void) + + if ( cpu_has_tsx_force_abort ) + { ++ uint64_t val; ++ + /* +- * On an early TSX-enable Skylake part subject to the memory ++ * On an early TSX-enabled Skylake part subject to the memory + * ordering erratum, with at least the March 2019 microcode. + */ + ++ rdmsrl(MSR_TSX_FORCE_ABORT, val); ++ ++ /* ++ * At the time of writing (April 2024), it was discovered that ++ * some parts (e.g. CoffeeLake 8th Gen, 06-9e-0a, ucode 0xf6) ++ * advertise RTM_ALWAYS_ABORT, but XBEGIN instructions #UD. Other ++ * similar parts (e.g. KabyLake Xeon-E3, 06-9e-09, ucode 0xf8) ++ * operate as expected. ++ * ++ * In this case: ++ * - RTM_ALWAYS_ABORT and MSR_TSX_FORCE_ABORT are enumerated. ++ * - XBEGIN instructions genuinely #UD. ++ * - MSR_TSX_FORCE_ABORT appears to be write-discard and fails to ++ * hold its value. ++ * - HLE and RTM are not enumerated, despite ++ * MSR_TSX_FORCE_ABORT.TSX_CPUID_CLEAR being clear. ++ * ++ * Spot RTM being unavailable without CLEAR_CPUID being set, and ++ * treat it as if no TSX is available at all. This will prevent ++ * Xen from thinking it's safe to offer HLE/RTM to VMs. ++ */ ++ if ( val == 0 && cpu_has_rtm_always_abort && !cpu_has_rtm ) ++ { ++ printk(XENLOG_ERR ++ "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RTM_ALWAYS_ABORT vs RTM mismatch\n", ++ boot_cpu_data.x86, boot_cpu_data.x86_model, ++ boot_cpu_data.x86_mask, this_cpu(cpu_sig).rev); ++ ++ setup_clear_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT); ++ setup_clear_cpu_cap(X86_FEATURE_TSX_FORCE_ABORT); ++ ++ if ( opt_tsx < 0 ) ++ opt_tsx = -2; ++ ++ goto done_probe; ++ } ++ + /* + * Probe for the June 2021 microcode which de-features TSX on + * client parts. (Note - this is a subset of parts impacted by +@@ -136,15 +177,8 @@ void tsx_init(void) + * read as zero if TSX_FORCE_ABORT.ENABLE_RTM has been set before + * we run. + */ +- if ( !has_rtm_always_abort ) +- { +- uint64_t val; +- +- rdmsrl(MSR_TSX_FORCE_ABORT, val); +- +- if ( val & TSX_ENABLE_RTM ) +- has_rtm_always_abort = true; +- } ++ if ( val & TSX_ENABLE_RTM ) ++ has_rtm_always_abort = true; + + /* + * If no explicit tsx= option is provided, pick a default. +@@ -199,6 +233,7 @@ void tsx_init(void) + setup_force_cpu_cap(X86_FEATURE_RTM); + } + } ++ done_probe: + + /* + * Note: MSR_TSX_CTRL is enumerated on TSX-enabled MDS_NO and later parts. +-- +2.44.0 + diff --git a/0526-x86-alternatives-fix-.init-section-reference-in-_app.patch b/0526-x86-alternatives-fix-.init-section-reference-in-_app.patch new file mode 100644 index 00000000..00915b45 --- /dev/null +++ b/0526-x86-alternatives-fix-.init-section-reference-in-_app.patch @@ -0,0 +1,43 @@ +From e60fc805d8a2ee2822dc96715bca44ebed135a8c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= +Date: Tue, 9 Apr 2024 14:50:46 +0200 +Subject: [PATCH 526/542] x86/alternatives: fix .init section reference in + _apply_alternatives() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The code in _apply_alternatives() will unconditionally attempt to read +__initdata_cf_clobber_{start,end} when called as part of applying alternatives +to a livepatch payload when Xen is using IBT. + +That leads to a page-fault as __initdata_cf_clobber_{start,end} living in +.init section will have been unmapped by the time a livepatch gets loaded. + +Fix by adding a check that limits the clobbering of endbr64 instructions to +boot time only. + +Fixes: 37ed5da851b8 ('x86/altcall: Optimise away endbr64 instruction where possible') +Signed-off-by: Roger Pau Monné +Reviewed-by: Andrew Cooper +(cherry picked from commit 4be1fef1e6572c2be0bd378902ffb62a6e73faeb) +--- + xen/arch/x86/alternative.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c +index 1d59dffc46..8356414be7 100644 +--- a/xen/arch/x86/alternative.c ++++ b/xen/arch/x86/alternative.c +@@ -338,7 +338,7 @@ static void init_or_livepatch _apply_alternatives(struct alt_instr *start, + * Clobber endbr64 instructions now that altcall has finished optimising + * all indirect branches to direct ones. + */ +- if ( force && cpu_has_xen_ibt ) ++ if ( force && cpu_has_xen_ibt && system_state < SYS_STATE_active ) + { + void *const *val; + unsigned int clobbered = 0; +-- +2.44.0 + diff --git a/0527-x86-cpuid-Don-t-expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch b/0527-x86-cpuid-Don-t-expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch new file mode 100644 index 00000000..3dae6b67 --- /dev/null +++ b/0527-x86-cpuid-Don-t-expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch @@ -0,0 +1,42 @@ +From d2b179ba6e308769f1b37637d1c746c3dbf55cc0 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 9 Apr 2024 15:03:05 +0100 +Subject: [PATCH 527/542] x86/cpuid: Don't expose {IPRED,RRSBA,BHI}_CTRL to PV + guests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +All of these are prediction-mode (i.e. CPL) based. They don't operate as +advertised in PV context. + +Fixes: 4dd676070684 ("x86/spec-ctrl: Expose IPRED_CTRL to guests") +Fixes: 478e4787fa64 ("x86/spec-ctrl: Expose RRSBA_CTRL to guests") +Fixes: 583f1d095052 ("x86/spec-ctrl: Expose BHI_CTRL to guests") +Signed-off-by: Andrew Cooper +Acked-by: Roger Pau Monné +(cherry picked from commit 4b3da946ad7e3452761478ae683da842e7ff20d6) +--- + xen/include/public/arch-x86/cpufeatureset.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 51f238683c..63c8ac8486 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -295,9 +295,9 @@ XEN_CPUFEATURE(INTEL_PPIN, 12*32+ 0) /* Protected Processor Inventory + + /* Intel-defined CPU features, CPUID level 0x00000007:2.edx, word 13 */ + XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */ +-XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /*A MSR_SPEC_CTRL.IPRED_DIS_* */ +-XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /*A MSR_SPEC_CTRL.RRSBA_DIS_* */ +-XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /*A MSR_SPEC_CTRL.BHI_DIS_S */ ++XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /*S MSR_SPEC_CTRL.IPRED_DIS_* */ ++XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /*S MSR_SPEC_CTRL.RRSBA_DIS_* */ ++XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /*S MSR_SPEC_CTRL.BHI_DIS_S */ + XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1.ecx, word 14 */ +-- +2.44.0 + diff --git a/0528-x86-spec-ctrl-Rename-spec_ctrl_flags-to-scf.patch b/0528-x86-spec-ctrl-Rename-spec_ctrl_flags-to-scf.patch new file mode 100644 index 00000000..99073fca --- /dev/null +++ b/0528-x86-spec-ctrl-Rename-spec_ctrl_flags-to-scf.patch @@ -0,0 +1,425 @@ +From 164c69bcee159b6f5c9f58d49fd3b715be75146f Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 28 Mar 2024 11:57:25 +0000 +Subject: [PATCH 528/542] x86/spec-ctrl: Rename spec_ctrl_flags to scf + +XSA-455 was ultimately caused by having fields with too-similar names. + +Both {xen,last}_spec_ctrl are fields containing an architectural MSR_SPEC_CTRL +value. The spec_ctrl_flags field contains Xen-internal flags. + +To more-obviously distinguish the two, rename spec_ctrl_flags to scf, which is +also the prefix of the constants used by the fields. + +No functional change. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit c62673c4334b3372ebd4292a7ac8185357e7ea27) +--- + xen/arch/x86/acpi/power.c | 4 ++-- + xen/arch/x86/domain.c | 8 ++++---- + xen/arch/x86/hvm/svm/entry.S | 2 +- + xen/arch/x86/hvm/vmx/entry.S | 2 +- + xen/arch/x86/hvm/vmx/vmcs.c | 2 +- + xen/arch/x86/include/asm/current.h | 2 +- + xen/arch/x86/include/asm/domain.h | 2 +- + xen/arch/x86/include/asm/spec_ctrl.h | 16 ++++++++-------- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 22 +++++++++++----------- + xen/arch/x86/setup.c | 2 +- + xen/arch/x86/spec_ctrl.c | 18 +++++++++--------- + xen/arch/x86/x86_64/asm-offsets.c | 2 +- + xen/arch/x86/x86_64/compat/entry.S | 4 ++-- + xen/arch/x86/x86_64/entry.S | 2 +- + 14 files changed, 44 insertions(+), 44 deletions(-) + +diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c +index b76f673acb..5cddb0f0f6 100644 +--- a/xen/arch/x86/acpi/power.c ++++ b/xen/arch/x86/acpi/power.c +@@ -246,7 +246,7 @@ static int enter_state(u32 state) + + ci = get_cpu_info(); + /* Avoid NMI/#MC using unsafe MSRs until we've reloaded microcode. */ +- ci->spec_ctrl_flags &= ~SCF_IST_MASK; ++ ci->scf &= ~SCF_IST_MASK; + + ACPI_FLUSH_CPU_CACHE(); + +@@ -290,7 +290,7 @@ static int enter_state(u32 state) + panic("Missing previously available feature(s)\n"); + + /* Re-enabled default NMI/#MC use of MSRs now microcode is loaded. */ +- ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_IST_MASK); ++ ci->scf |= (default_scf & SCF_IST_MASK); + + if ( boot_cpu_has(X86_FEATURE_IBRSB) || boot_cpu_has(X86_FEATURE_IBRS) ) + { +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index aca9fa310c..228763b5e9 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -2096,10 +2096,10 @@ void context_switch(struct vcpu *prev, struct vcpu *next) + } + } + +- /* Update the top-of-stack block with the new spec_ctrl settings. */ +- info->spec_ctrl_flags = +- (info->spec_ctrl_flags & ~SCF_DOM_MASK) | +- (nextd->arch.spec_ctrl_flags & SCF_DOM_MASK); ++ /* Update the top-of-stack block with the new speculation settings. */ ++ info->scf = ++ (info->scf & ~SCF_DOM_MASK) | ++ (nextd->arch.scf & SCF_DOM_MASK); + } + + sched_context_switched(prev, next); +diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S +index c19e964bc6..0264e0bac2 100644 +--- a/xen/arch/x86/hvm/svm/entry.S ++++ b/xen/arch/x86/hvm/svm/entry.S +@@ -103,7 +103,7 @@ __UNLIKELY_END(nsvm_hap) + /* SPEC_CTRL_ENTRY_FROM_SVM Req: %rsp=regs/cpuinfo, %rdx=0 Clob: acd */ + + .macro svm_vmexit_cond_ibpb +- testb $SCF_entry_ibpb, CPUINFO_spec_ctrl_flags(%rsp) ++ testb $SCF_entry_ibpb, CPUINFO_scf(%rsp) + jz .L_skip_ibpb + + mov $MSR_PRED_CMD, %ecx +diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S +index cdde76e138..4ee529c57a 100644 +--- a/xen/arch/x86/hvm/vmx/entry.S ++++ b/xen/arch/x86/hvm/vmx/entry.S +@@ -111,7 +111,7 @@ UNLIKELY_END(realmode) + BUILD_BUG_ON(SCF_verw & ~0xff) + movzbl VCPU_vmx_launched(%rbx), %ecx + shl $31, %ecx +- movzbl CPUINFO_spec_ctrl_flags(%rsp), %eax ++ movzbl CPUINFO_scf(%rsp), %eax + and $SCF_verw, %eax + or %eax, %ecx + +diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c +index 49d51fb524..f0fb4874b8 100644 +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -1386,7 +1386,7 @@ static int construct_vmcs(struct vcpu *v) + rc = vmx_add_msr(v, MSR_FLUSH_CMD, FLUSH_CMD_L1D, + VMX_MSR_GUEST_LOADONLY); + +- if ( !rc && (d->arch.spec_ctrl_flags & SCF_entry_ibpb) ) ++ if ( !rc && (d->arch.scf & SCF_entry_ibpb) ) + rc = vmx_add_msr(v, MSR_PRED_CMD, PRED_CMD_IBPB, + VMX_MSR_HOST); + +diff --git a/xen/arch/x86/include/asm/current.h b/xen/arch/x86/include/asm/current.h +index da5e152a10..9cc8d8e3d4 100644 +--- a/xen/arch/x86/include/asm/current.h ++++ b/xen/arch/x86/include/asm/current.h +@@ -57,7 +57,7 @@ struct cpu_info { + unsigned int shadow_spec_ctrl; + uint8_t xen_spec_ctrl; + uint8_t last_spec_ctrl; +- uint8_t spec_ctrl_flags; ++ uint8_t scf; /* SCF_* */ + + /* + * The following field controls copying of the L4 page table of 64-bit +diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h +index 5293c0cde4..f90a268b01 100644 +--- a/xen/arch/x86/include/asm/domain.h ++++ b/xen/arch/x86/include/asm/domain.h +@@ -324,7 +324,7 @@ struct arch_domain + uint32_t pci_cf8; + uint8_t cmos_idx; + +- uint8_t spec_ctrl_flags; /* See SCF_DOM_MASK */ ++ uint8_t scf; /* See SCF_DOM_MASK */ + + union { + struct pv_domain pv; +diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h +index a431fea587..8fc350abe2 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl.h ++++ b/xen/arch/x86/include/asm/spec_ctrl.h +@@ -21,10 +21,10 @@ + #define __X86_SPEC_CTRL_H__ + + /* +- * Encoding of: +- * cpuinfo.spec_ctrl_flags +- * default_spec_ctrl_flags +- * domain.spec_ctrl_flags ++ * Encoding of Xen's speculation control flags in: ++ * cpuinfo.scf ++ * default_scf ++ * domain.scf + * + * Live settings are in the top-of-stack block, because they need to be + * accessable when XPTI is active. Some settings are fixed from boot, some +@@ -94,7 +94,7 @@ extern int8_t opt_l1d_flush; + + extern bool bsp_delay_spec_ctrl; + extern uint8_t default_xen_spec_ctrl; +-extern uint8_t default_spec_ctrl_flags; ++extern uint8_t default_scf; + + extern int8_t opt_xpti_hwdom, opt_xpti_domu; + +@@ -114,7 +114,7 @@ static inline void init_shadow_spec_ctrl_state(void) + + info->shadow_spec_ctrl = 0; + info->xen_spec_ctrl = default_xen_spec_ctrl; +- info->spec_ctrl_flags = default_spec_ctrl_flags; ++ info->scf = default_scf; + + /* + * For least latency, the VERW selector should be a writeable data +@@ -138,7 +138,7 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) + */ + info->shadow_spec_ctrl = val; + barrier(); +- info->spec_ctrl_flags |= SCF_use_shadow; ++ info->scf |= SCF_use_shadow; + barrier(); + alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, + "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); +@@ -187,7 +187,7 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) + * Disable shadowing before updating the MSR. There are no SMP issues + * here; only local processor ordering concerns. + */ +- info->spec_ctrl_flags &= ~SCF_use_shadow; ++ info->scf &= ~SCF_use_shadow; + barrier(); + alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, + "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index e85db1a329..8c488be048 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -51,7 +51,7 @@ + * shadowing logic. + * + * Factor 2 is harder. We maintain a shadow_spec_ctrl value, and a use_shadow +- * boolean in the per cpu spec_ctrl_flags. The synchronous use is: ++ * boolean in the per cpu scf. The synchronous use is: + * + * 1) Store guest value in shadow_spec_ctrl + * 2) Set the use_shadow boolean +@@ -98,11 +98,11 @@ + * interrupting Xen. + */ + .if \maybexen +- testb $SCF_entry_ibpb, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) ++ testb $SCF_entry_ibpb, STACK_CPUINFO_FIELD(scf)(%r14) + jz .L\@_skip + testb $3, UREGS_cs(%rsp) + .else +- testb $SCF_entry_ibpb, CPUINFO_spec_ctrl_flags(%rsp) ++ testb $SCF_entry_ibpb, CPUINFO_scf(%rsp) + .endif + jz .L\@_skip + +@@ -172,8 +172,8 @@ + #define STK_REL(field, top_of_stk) ((field) - (top_of_stk)) + + .macro SPEC_CTRL_COND_VERW \ +- scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_error_code), \ +- sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code) ++ scf=STK_REL(CPUINFO_scf, CPUINFO_error_code), \ ++ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code) + /* + * Requires \scf and \sel as %rsp-relative expressions + * Clobbers eflags +@@ -228,10 +228,10 @@ + testb $3, UREGS_cs(%rsp) + setnz %al + not %eax +- and %al, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) ++ and %al, STACK_CPUINFO_FIELD(scf)(%r14) + movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax + .else +- andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp) ++ andb $~SCF_use_shadow, CPUINFO_scf(%rsp) + movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax + .endif + +@@ -250,7 +250,7 @@ + mov %eax, CPUINFO_shadow_spec_ctrl(%rsp) + + /* Set SPEC_CTRL shadowing *before* loading the guest value. */ +- orb $SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp) ++ orb $SCF_use_shadow, CPUINFO_scf(%rsp) + + mov $MSR_SPEC_CTRL, %ecx + xor %edx, %edx +@@ -328,7 +328,7 @@ + * DO_SPEC_CTRL_ENTRY maybexen=1 + * but with conditionals rather than alternatives. + */ +- movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx ++ movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx + + test $SCF_ist_ibpb, %bl + jz .L\@_skip_ibpb +@@ -353,7 +353,7 @@ + testb $3, UREGS_cs(%rsp) + setnz %al + not %eax +- and %al, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) ++ and %al, STACK_CPUINFO_FIELD(scf)(%r14) + + /* Load Xen's intended value. */ + mov $MSR_SPEC_CTRL, %ecx +@@ -387,7 +387,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs + * Clobbers %rax, %rbx, %rcx, %rdx + */ +- movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx ++ movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx + + testb $SCF_ist_sc_msr, %bl + jz .L\@_skip_sc_msr +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 0c00ea875d..d47f156711 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -1984,7 +1984,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) + + if ( bsp_delay_spec_ctrl ) + { +- info->spec_ctrl_flags &= ~SCF_use_shadow; ++ info->scf &= ~SCF_use_shadow; + barrier(); + wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl); + info->last_spec_ctrl = default_xen_spec_ctrl; +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 0a2de88593..ab81ad457b 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -69,7 +69,7 @@ static bool __initdata opt_lock_harden; + + bool __initdata bsp_delay_spec_ctrl; + uint8_t __ro_after_init default_xen_spec_ctrl; +-uint8_t __ro_after_init default_spec_ctrl_flags; ++uint8_t __ro_after_init default_scf; + + paddr_t __ro_after_init l1tf_addr_mask, __ro_after_init l1tf_safe_maddr; + bool __ro_after_init cpu_has_bug_l1tf; +@@ -1117,7 +1117,7 @@ static void __init ibpb_calculations(void) + * NMI/#MC, so can't interrupt Xen ahead of having already flushed the + * BTB. + */ +- default_spec_ctrl_flags |= SCF_ist_ibpb; ++ default_scf |= SCF_ist_ibpb; + } + if ( opt_ibpb_entry_hvm ) + setup_force_cpu_cap(X86_FEATURE_IBPB_ENTRY_HVM); +@@ -1618,7 +1618,7 @@ void spec_ctrl_init_domain(struct domain *d) + bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) && + (d->domain_id != 0 || opt_ibpb_entry_dom0)); + +- d->arch.spec_ctrl_flags = ++ d->arch.scf = + (verw ? SCF_verw : 0) | + (ibpb ? SCF_entry_ibpb : 0) | + 0; +@@ -1723,7 +1723,7 @@ void __init init_speculation_mitigations(void) + { + if ( opt_msr_sc_pv ) + { +- default_spec_ctrl_flags |= SCF_ist_sc_msr; ++ default_scf |= SCF_ist_sc_msr; + setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV); + } + +@@ -1734,7 +1734,7 @@ void __init init_speculation_mitigations(void) + * Xen's value is not restored atomically. An early NMI hitting + * the VMExit path needs to restore Xen's value for safety. + */ +- default_spec_ctrl_flags |= SCF_ist_sc_msr; ++ default_scf |= SCF_ist_sc_msr; + setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); + } + } +@@ -1869,7 +1869,7 @@ void __init init_speculation_mitigations(void) + if ( opt_rsb_pv ) + { + setup_force_cpu_cap(X86_FEATURE_SC_RSB_PV); +- default_spec_ctrl_flags |= SCF_ist_rsb; ++ default_scf |= SCF_ist_rsb; + } + + /* +@@ -1892,7 +1892,7 @@ void __init init_speculation_mitigations(void) + * possible rogue RSB speculation. + */ + if ( !cpu_has_svm ) +- default_spec_ctrl_flags |= SCF_ist_rsb; ++ default_scf |= SCF_ist_rsb; + } + + srso_calculations(hw_smt_enabled); +@@ -1905,7 +1905,7 @@ void __init init_speculation_mitigations(void) + if ( opt_eager_fpu == -1 ) + opt_eager_fpu = should_use_eager_fpu(); + +- /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */ ++ /* (Re)init BSP state now that default_scf has been calculated. */ + init_shadow_spec_ctrl_state(); + + /* +@@ -2178,7 +2178,7 @@ void __init init_speculation_mitigations(void) + { + info->shadow_spec_ctrl = 0; + barrier(); +- info->spec_ctrl_flags |= SCF_use_shadow; ++ info->scf |= SCF_use_shadow; + barrier(); + } + +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index 4cd5938d7b..fba82d6436 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -154,7 +154,7 @@ void __dummy__(void) + OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl); + OFFSET(CPUINFO_xen_spec_ctrl, struct cpu_info, xen_spec_ctrl); + OFFSET(CPUINFO_last_spec_ctrl, struct cpu_info, last_spec_ctrl); +- OFFSET(CPUINFO_spec_ctrl_flags, struct cpu_info, spec_ctrl_flags); ++ OFFSET(CPUINFO_scf, struct cpu_info, scf); + OFFSET(CPUINFO_root_pgt_changed, struct cpu_info, root_pgt_changed); + OFFSET(CPUINFO_use_pv_cr3, struct cpu_info, use_pv_cr3); + DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); +diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S +index 3b2fbcd873..fab85eb733 100644 +--- a/xen/arch/x86/x86_64/compat/entry.S ++++ b/xen/arch/x86/x86_64/compat/entry.S +@@ -164,8 +164,8 @@ ENTRY(compat_restore_all_guest) + + /* Account for ev/ec having already been popped off the stack. */ + SPEC_CTRL_COND_VERW \ +- scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_rip), \ +- sel=STK_REL(CPUINFO_verw_sel, CPUINFO_rip) ++ scf=STK_REL(CPUINFO_scf, CPUINFO_rip), \ ++ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_rip) + + .Lft0: iretq + _ASM_PRE_EXTABLE(.Lft0, handle_exception) +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index ef517e2945..50fc048834 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -692,7 +692,7 @@ UNLIKELY_END(exit_cr3) + /* + * When the CPU pushed this exception frame, it zero-extended eflags. + * For an IST exit, SPEC_CTRL_EXIT_TO_XEN stashed shadow copies of +- * spec_ctrl_flags and ver_sel above eflags, as we can't use any GPRs, ++ * scf and ver_sel above eflags, as we can't use any GPRs, + * and we're at a random place on the stack, not in a CPUFINFO block. + * + * Account for ev/ec having already been popped off the stack. +-- +2.44.0 + diff --git a/0529-x86-spec-ctrl-Rework-conditional-safety-for-SPEC_CTR.patch b/0529-x86-spec-ctrl-Rework-conditional-safety-for-SPEC_CTR.patch new file mode 100644 index 00000000..431c1880 --- /dev/null +++ b/0529-x86-spec-ctrl-Rework-conditional-safety-for-SPEC_CTR.patch @@ -0,0 +1,196 @@ +From 05f6fe7c39fe0c44807a51f6aa1d8ee1a38de197 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 22 Mar 2024 11:41:41 +0000 +Subject: [PATCH 529/542] x86/spec-ctrl: Rework conditional safety for + SPEC_CTRL_ENTRY_* +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Right now, we have a mix of safety strategies in different blocks, making the +logic fragile and hard to follow. + +Start addressing this by having a safety LFENCE at the end of the blocks, +which can be patched out if other safety criteria are met. This will allow us +to simplify the sub-blocks. For SPEC_CTRL_ENTRY_FROM_IST, simply leave an +LFENCE unconditionally at the end; the IST path is not a fast-path by any +stretch of the imagination. + +For SPEC_CTRL_ENTRY_FROM_INTR, the existing description was incorrect. The +IRET #GP path is non-fatal but can occur with the guest's choice of +MSR_SPEC_CTRL. It is safe to skip the flush/barrier-like protections when +interrupting Xen, but we must run DO_SPEC_CTRL_ENTRY irrespective. + +This will skip RSB stuffing which was previously unconditional even when +interrupting Xen. + +AFAICT, this is a missing cleanup from commit 3fffaf9c13e9 ("x86/entry: Avoid +using alternatives in NMI/#MC paths") where we split the IST entry path out of +the main INTR entry path. + +Signed-off-by: Andrew Cooper +Acked-by: Roger Pau Monné +(cherry picked from commit 94896de1a98c4289fe6fef9e16ef99fc6ef2efc4) +--- + xen/arch/x86/hvm/vmx/entry.S | 1 + + xen/arch/x86/include/asm/cpufeatures.h | 4 ++ + xen/arch/x86/include/asm/spec_ctrl_asm.h | 27 ++++++------ + xen/arch/x86/spec_ctrl.c | 52 ++++++++++++++++++++++++ + 4 files changed, 72 insertions(+), 12 deletions(-) + +diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S +index 4ee529c57a..8d5b683879 100644 +--- a/xen/arch/x86/hvm/vmx/entry.S ++++ b/xen/arch/x86/hvm/vmx/entry.S +@@ -43,6 +43,7 @@ ENTRY(vmx_asm_vmexit_handler) + wrmsr + .endm + ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM ++ ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_VMX + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + /* Hardware clears MSR_DEBUGCTL on VMExit. Reinstate it if debugging Xen. */ +diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h +index 7e8221fd85..6422c66b0f 100644 +--- a/xen/arch/x86/include/asm/cpufeatures.h ++++ b/xen/arch/x86/include/asm/cpufeatures.h +@@ -52,5 +52,9 @@ XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for + #define X86_BUG_CLFLUSH_MFENCE X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */ + #define X86_BUG_IBPB_NO_RET X86_BUG( 3) /* IBPB doesn't flush the RSB/RAS */ + ++#define X86_SPEC_NO_LFENCE_ENTRY_PV X86_BUG(16) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_PV. */ ++#define X86_SPEC_NO_LFENCE_ENTRY_INTR X86_BUG(17) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_INTR. */ ++#define X86_SPEC_NO_LFENCE_ENTRY_VMX X86_BUG(18) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_VMX. */ ++ + /* Total number of capability words, inc synth and bug words. */ + #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */ +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index 8c488be048..e58e5110d9 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -273,25 +273,37 @@ + + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ + X86_FEATURE_SC_MSR_PV ++ ++ ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_PV + .endm + + /* + * Used after an exception or maskable interrupt, hitting Xen or PV context. +- * There will either be a guest speculation context, or (barring fatal +- * exceptions) a well-formed Xen speculation context. ++ * There will either be a guest speculation context, or a well-formed Xen ++ * speculation context, with the exception of one case. IRET #GP handling may ++ * have a guest choice of MSR_SPEC_CTRL. ++ * ++ * Therefore, we can skip the flush/barrier-like protections when hitting Xen, ++ * but we must still run the mode-based protections. + */ + .macro SPEC_CTRL_ENTRY_FROM_INTR + /* + * Requires %rsp=regs, %r14=stack_end, %rdx=0 + * Clobbers %rax, %rcx, %rdx + */ ++ testb $3, UREGS_cs(%rsp) ++ jz .L\@_skip ++ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \ + X86_FEATURE_IBPB_ENTRY_PV + + ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV + ++.L\@_skip: + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ + X86_FEATURE_SC_MSR_PV ++ ++ ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_INTR + .endm + + /* +@@ -360,18 +372,9 @@ + movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax + wrmsr + +- /* Opencoded UNLIKELY_START() with no condition. */ +-UNLIKELY_DISPATCH_LABEL(\@_serialise): +- .subsection 1 +- /* +- * In the case that we might need to set SPEC_CTRL.IBRS for safety, we +- * need to ensure that an attacker can't poison the `jz .L\@_skip_wrmsr` +- * to speculate around the WRMSR. As a result, we need a dispatch +- * serialising instruction in the else clause. +- */ + .L\@_skip_msr_spec_ctrl: ++ + lfence +- UNLIKELY_END(\@_serialise) + .endm + + /* +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index ab81ad457b..2b22deb891 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -2154,6 +2154,58 @@ void __init init_speculation_mitigations(void) + + print_details(thunk); + ++ /* ++ * With the alternative blocks now chosen, see if we need any other ++ * adjustments for safety. ++ * ++ * We compile the LFENCE in, and patch it out if it's not needed. ++ * ++ * Notes: ++ * - SPEC_CTRL_ENTRY_FROM_SVM doesn't need an LFENCE because it has an ++ * unconditional STGI. ++ * - SPEC_CTRL_ENTRY_FROM_IST handles its own safety, without the use of ++ * alternatives. ++ * - DO_OVERWRITE_RSB has conditional branches in it, but it's an inline ++ * sequence. It is considered safe for uarch reasons. ++ */ ++ { ++ /* ++ * SPEC_CTRL_ENTRY_FROM_PV conditional safety ++ * ++ * DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an ++ * unconditional WRMSR as the last action. ++ * ++ * If we have it, or we're not using any prior conditional mitigation, ++ * then it's safe to drop the LFENCE. ++ */ ++ if ( boot_cpu_has(X86_FEATURE_SC_MSR_PV) || ++ !boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ) ++ setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_PV); ++ ++ /* ++ * SPEC_CTRL_ENTRY_FROM_INTR conditional safety ++ * ++ * DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an ++ * unconditional WRMSR as the last action. ++ * ++ * If we have it, or we have no protections active in the block that ++ * is skipped when interrupting guest context, then it's safe to drop ++ * the LFENCE. ++ */ ++ if ( boot_cpu_has(X86_FEATURE_SC_MSR_PV) || ++ (!boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) && ++ !boot_cpu_has(X86_FEATURE_SC_RSB_PV)) ) ++ setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_INTR); ++ ++ /* ++ * SPEC_CTRL_ENTRY_FROM_VMX conditional safety ++ * ++ * Currently there are no safety actions with conditional branches, so ++ * no need for the extra safety LFENCE. ++ */ ++ setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_VMX); ++ } ++ + /* + * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard + * any firmware settings. For performance reasons, when safe to do so, we +-- +2.44.0 + diff --git a/0530-x86-entry-Arrange-for-r14-to-be-STACK_END-across-SPE.patch b/0530-x86-entry-Arrange-for-r14-to-be-STACK_END-across-SPE.patch new file mode 100644 index 00000000..201947fa --- /dev/null +++ b/0530-x86-entry-Arrange-for-r14-to-be-STACK_END-across-SPE.patch @@ -0,0 +1,171 @@ +From 687691733f4834b7edfd52cae6339d43257a19b3 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 22 Mar 2024 15:52:06 +0000 +Subject: [PATCH 530/542] x86/entry: Arrange for %r14 to be STACK_END across + SPEC_CTRL_ENTRY_FROM_PV + +Other SPEC_CTRL_* paths already use %r14 like this, and it will allow for +simplifications. + +All instances of SPEC_CTRL_ENTRY_FROM_PV are followed by a GET_STACK_END() +invocation, so this change is only really logic and register shuffling. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 22390697bf1b4cd3024f2d10893dec3c3ec08a9c) +--- + xen/arch/x86/x86_64/compat/entry.S | 4 ++- + xen/arch/x86/x86_64/entry.S | 44 ++++++++++++++++-------------- + 2 files changed, 27 insertions(+), 21 deletions(-) + +diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S +index fab85eb733..a32b95f7c3 100644 +--- a/xen/arch/x86/x86_64/compat/entry.S ++++ b/xen/arch/x86/x86_64/compat/entry.S +@@ -18,6 +18,8 @@ ENTRY(entry_int82) + movl $HYPERCALL_VECTOR, EFRAME_entry_vector(%rsp) + SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ + ++ GET_STACK_END(14) ++ + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + +@@ -25,7 +27,7 @@ ENTRY(entry_int82) + + CR4_PV32_RESTORE + +- GET_CURRENT(bx) ++ movq STACK_CPUINFO_FIELD(current_vcpu)(%r14), %rbx + + mov %rsp, %rdi + call do_entry_int82 +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 50fc048834..78c00bdd19 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -266,21 +266,22 @@ ENTRY(lstar_enter) + movl $TRAP_syscall, EFRAME_entry_vector(%rsp) + SAVE_ALL + ++ GET_STACK_END(14) ++ + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + +- GET_STACK_END(bx) +- mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx ++ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx + test %rcx, %rcx + jz .Llstar_cr3_okay +- movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) ++ movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) + mov %rcx, %cr3 + /* %r12 is still zero at this point. */ +- mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) ++ mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + .Llstar_cr3_okay: + sti + +- movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx ++ movq STACK_CPUINFO_FIELD(current_vcpu)(%r14), %rbx + testb $TF_kernel_mode,VCPU_thread_flags(%rbx) + jz switch_to_kernel + +@@ -303,23 +304,24 @@ ENTRY(cstar_enter) + movl $TRAP_syscall, EFRAME_entry_vector(%rsp) + SAVE_ALL + ++ GET_STACK_END(14) ++ + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + +- GET_STACK_END(bx) +- mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx ++ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx + test %rcx, %rcx + jz .Lcstar_cr3_okay +- movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) ++ movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) + mov %rcx, %cr3 + /* %r12 is still zero at this point. */ +- mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) ++ mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + .Lcstar_cr3_okay: + sti + + CR4_PV32_RESTORE + +- movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx ++ movq STACK_CPUINFO_FIELD(current_vcpu)(%r14), %rbx + + #ifdef CONFIG_PV32 + movq VCPU_domain(%rbx), %rcx +@@ -344,23 +346,24 @@ GLOBAL(sysenter_eflags_saved) + movl $TRAP_syscall, EFRAME_entry_vector(%rsp) + SAVE_ALL + ++ GET_STACK_END(14) ++ + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + +- GET_STACK_END(bx) + /* PUSHF above has saved EFLAGS.IF clear (the caller had it set). */ + orl $X86_EFLAGS_IF, UREGS_eflags(%rsp) +- mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx ++ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx + test %rcx, %rcx + jz .Lsyse_cr3_okay +- movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) ++ movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) + mov %rcx, %cr3 + /* %r12 is still zero at this point. */ +- mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) ++ mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + .Lsyse_cr3_okay: + sti + +- movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx ++ movq STACK_CPUINFO_FIELD(current_vcpu)(%r14), %rbx + cmpb $0,VCPU_sysenter_disables_events(%rbx) + movq VCPU_sysenter_addr(%rbx),%rax + setne %cl +@@ -398,17 +401,18 @@ ENTRY(int80_direct_trap) + movl $0x80, EFRAME_entry_vector(%rsp) + SAVE_ALL + ++ GET_STACK_END(14) ++ + SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + +- GET_STACK_END(bx) +- mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx ++ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx + test %rcx, %rcx + jz .Lint80_cr3_okay +- movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) ++ movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) + mov %rcx, %cr3 + /* %r12 is still zero at this point. */ +- mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) ++ mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + .Lint80_cr3_okay: + sti + +@@ -418,7 +422,7 @@ UNLIKELY_START(ne, msi_check) + call check_for_unexpected_msi + UNLIKELY_END(msi_check) + +- movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx ++ movq STACK_CPUINFO_FIELD(current_vcpu)(%r14), %rbx + + mov VCPU_trap_ctxt(%rbx), %rsi + mov VCPU_domain(%rbx), %rax +-- +2.44.0 + diff --git a/0531-x86-spec_ctrl-Hold-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch b/0531-x86-spec_ctrl-Hold-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch new file mode 100644 index 00000000..2d76ef69 --- /dev/null +++ b/0531-x86-spec_ctrl-Hold-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch @@ -0,0 +1,122 @@ +From d2313bd20d36a452e0b4906da4814149a18e5acf Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 22 Mar 2024 12:08:02 +0000 +Subject: [PATCH 531/542] x86/spec_ctrl: Hold SCF in %ebx across + SPEC_CTRL_ENTRY_{PV,INTR} + +... as we do in the exit paths too. This will allow simplification to the +sub-blocks. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 9607aeb6602b8ed9962404de3f5f90170ffddb66) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 10 +++++++--- + xen/arch/x86/x86_64/compat/entry.S | 2 +- + xen/arch/x86/x86_64/entry.S | 12 ++++++------ + 3 files changed, 14 insertions(+), 10 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index e58e5110d9..67f6963e8d 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -263,9 +263,11 @@ + */ + .macro SPEC_CTRL_ENTRY_FROM_PV + /* +- * Requires %rsp=regs/cpuinfo, %rdx=0 +- * Clobbers %rax, %rcx, %rdx ++ * Requires %rsp=regs/cpuinfo, %r14=stack_end, %rdx=0 ++ * Clobbers %rax, %rbx, %rcx, %rdx + */ ++ movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx ++ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \ + X86_FEATURE_IBPB_ENTRY_PV + +@@ -289,8 +291,10 @@ + .macro SPEC_CTRL_ENTRY_FROM_INTR + /* + * Requires %rsp=regs, %r14=stack_end, %rdx=0 +- * Clobbers %rax, %rcx, %rdx ++ * Clobbers %rax, %rbx, %rcx, %rdx + */ ++ movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx ++ + testb $3, UREGS_cs(%rsp) + jz .L\@_skip + +diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S +index a32b95f7c3..ff462a92e0 100644 +--- a/xen/arch/x86/x86_64/compat/entry.S ++++ b/xen/arch/x86/x86_64/compat/entry.S +@@ -20,7 +20,7 @@ ENTRY(entry_int82) + + GET_STACK_END(14) + +- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ ++ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %r14=end, %rdx=0, Clob: abcd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + sti +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 78c00bdd19..801d241337 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -268,7 +268,7 @@ ENTRY(lstar_enter) + + GET_STACK_END(14) + +- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ ++ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %r14=end, %rdx=0, Clob: abcd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx +@@ -306,7 +306,7 @@ ENTRY(cstar_enter) + + GET_STACK_END(14) + +- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ ++ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %r14=end, %rdx=0, Clob: abcd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx +@@ -348,7 +348,7 @@ GLOBAL(sysenter_eflags_saved) + + GET_STACK_END(14) + +- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ ++ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %r14=end, %rdx=0, Clob: abcd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + /* PUSHF above has saved EFLAGS.IF clear (the caller had it set). */ +@@ -403,7 +403,7 @@ ENTRY(int80_direct_trap) + + GET_STACK_END(14) + +- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ ++ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %r14=end, %rdx=0, Clob: abcd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx +@@ -713,7 +713,7 @@ ENTRY(common_interrupt) + + GET_STACK_END(14) + +- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: acd */ ++ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: abcd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx +@@ -747,7 +747,7 @@ GLOBAL(handle_exception) + + GET_STACK_END(14) + +- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: acd */ ++ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: abcd */ + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + + mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx +-- +2.44.0 + diff --git a/0532-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch b/0532-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch new file mode 100644 index 00000000..0d1b8170 --- /dev/null +++ b/0532-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch @@ -0,0 +1,95 @@ +From b73f37b91ce05c28cb998ef4870198922fa2b17c Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 22 Mar 2024 14:33:17 +0000 +Subject: [PATCH 532/542] x86/spec-ctrl: Simplify DO_COND_IBPB +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +With the prior refactoring, SPEC_CTRL_ENTRY_{PV,INTR} both load SCF into %ebx, +and handle the conditional safety including skipping if interrupting Xen. + +Therefore, we can drop the maybexen parameter and the conditional safety. + +Signed-off-by: Andrew Cooper +Acked-by: Roger Pau Monné +(cherry picked from commit 2378d16a931de0e62c03669169989e9437306abe) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 30 +++++++----------------- + 1 file changed, 8 insertions(+), 22 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index 67f6963e8d..8d171ecca2 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -87,33 +87,21 @@ + * - SPEC_CTRL_EXIT_TO_{SVM,VMX} + */ + +-.macro DO_SPEC_CTRL_COND_IBPB maybexen:req ++.macro DO_COND_IBPB + /* +- * Requires %rsp=regs (also cpuinfo if !maybexen) +- * Requires %r14=stack_end (if maybexen), %rdx=0 +- * Clobbers %rax, %rcx, %rdx ++ * Requires %rbx=SCF, %rdx=0 ++ * Clobbers %rax, %rcx + * +- * Conditionally issue IBPB if SCF_entry_ibpb is active. In the maybexen +- * case, we can safely look at UREGS_cs to skip taking the hit when +- * interrupting Xen. ++ * Conditionally issue IBPB if SCF_entry_ibpb is active. + */ +- .if \maybexen +- testb $SCF_entry_ibpb, STACK_CPUINFO_FIELD(scf)(%r14) +- jz .L\@_skip +- testb $3, UREGS_cs(%rsp) +- .else +- testb $SCF_entry_ibpb, CPUINFO_scf(%rsp) +- .endif ++ testb $SCF_entry_ibpb, %bl + jz .L\@_skip + + mov $MSR_PRED_CMD, %ecx + mov $PRED_CMD_IBPB, %eax + wrmsr +- jmp .L\@_done + + .L\@_skip: +- lfence +-.L\@_done: + .endm + + .macro DO_OVERWRITE_RSB tmp=rax xu +@@ -268,8 +256,7 @@ + */ + movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx + +- ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \ +- X86_FEATURE_IBPB_ENTRY_PV ++ ALTERNATIVE "", DO_COND_IBPB, X86_FEATURE_IBPB_ENTRY_PV + + ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV + +@@ -298,8 +285,7 @@ + testb $3, UREGS_cs(%rsp) + jz .L\@_skip + +- ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \ +- X86_FEATURE_IBPB_ENTRY_PV ++ ALTERNATIVE "", DO_COND_IBPB, X86_FEATURE_IBPB_ENTRY_PV + + ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV + +@@ -339,7 +325,7 @@ + * Clobbers %rax, %rbx, %rcx, %rdx + * + * This is logical merge of: +- * DO_SPEC_CTRL_COND_IBPB maybexen=0 ++ * DO_COND_IBPB + * DO_OVERWRITE_RSB + * DO_SPEC_CTRL_ENTRY maybexen=1 + * but with conditionals rather than alternatives. +-- +2.44.0 + diff --git a/0533-x86-spec-ctrl-Detail-the-safety-properties-in-SPEC_C.patch b/0533-x86-spec-ctrl-Detail-the-safety-properties-in-SPEC_C.patch new file mode 100644 index 00000000..ae1d5c15 --- /dev/null +++ b/0533-x86-spec-ctrl-Detail-the-safety-properties-in-SPEC_C.patch @@ -0,0 +1,183 @@ +From 046f90e1b5c72ebc609eb1629c80cf5e077da02b Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Mon, 25 Mar 2024 11:09:35 +0000 +Subject: [PATCH 533/542] x86/spec-ctrl: Detail the safety properties in + SPEC_CTRL_ENTRY_* + +The complexity is getting out of hand. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +(cherry picked from commit 40dea83b75386cb693481cf340024ce093be5c0f) +--- + xen/arch/x86/hvm/svm/entry.S | 14 ++++++ + xen/arch/x86/hvm/vmx/entry.S | 14 ++++++ + xen/arch/x86/include/asm/spec_ctrl_asm.h | 59 ++++++++++++++++++++++++ + 3 files changed, 87 insertions(+) + +diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S +index 0264e0bac2..58d8a1bffa 100644 +--- a/xen/arch/x86/hvm/svm/entry.S ++++ b/xen/arch/x86/hvm/svm/entry.S +@@ -102,6 +102,11 @@ __UNLIKELY_END(nsvm_hap) + + /* SPEC_CTRL_ENTRY_FROM_SVM Req: %rsp=regs/cpuinfo, %rdx=0 Clob: acd */ + ++ /* ++ * IBPB is to mitigate BTC/SRSO on AMD/Hygon parts, in particular ++ * making type-confused RETs safe to use. This is not needed on Zen5 ++ * and later parts when SRSO_MSR_FIX (BP-SPEC-REDUCE) is in use. ++ */ + .macro svm_vmexit_cond_ibpb + testb $SCF_entry_ibpb, CPUINFO_scf(%rsp) + jz .L_skip_ibpb +@@ -113,8 +118,17 @@ __UNLIKELY_END(nsvm_hap) + .endm + ALTERNATIVE "", svm_vmexit_cond_ibpb, X86_FEATURE_IBPB_ENTRY_HVM + ++ /* ++ * RSB (RAS/RAP) stuffing is to prevents RET predictions following guest ++ * entries. This is not needed on Zen4 and later, when AutoIBRS is in ++ * use. ++ */ + ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM + ++ /* ++ * Restore Xen's MSR_SPEC_CTRL setting, making indirect CALLs/JMPs ++ * safe to use. The guest's setting resides in the VMCB. ++ */ + .macro svm_vmexit_spec_ctrl + movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax + movzbl CPUINFO_last_spec_ctrl(%rsp), %edx +diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S +index 8d5b683879..008d76a6e1 100644 +--- a/xen/arch/x86/hvm/vmx/entry.S ++++ b/xen/arch/x86/hvm/vmx/entry.S +@@ -34,8 +34,22 @@ ENTRY(vmx_asm_vmexit_handler) + mov %rax,VCPU_hvm_guest_cr2(%rbx) + + /* SPEC_CTRL_ENTRY_FROM_VMX Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ ++ /* ++ * RSB stuffing is to prevents RET predictions following guest ++ * entries. This is *not* sufficient to flush all RSB entries on ++ * parts enumerating eIBRS, although the following restore_spec_ctrl ++ * does covers us. ++ */ + ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM + ++ /* ++ * Restore Xen's MSR_SPEC_CTRL setting. The guest's value resides in ++ * the MSR load/save list. For Legacy IBRS, this flushes/inhibits ++ * indirect predictions and does not flush the RSB. For eIBRS, this ++ * prevents CALLs/JMPs using predictions learnt at a lower predictor ++ * mode, and it flushes the RSB. On eIBRS parts that also suffer from ++ * PBRSB, the prior RSB stuffing suffices to make the RSB safe. ++ */ + .macro restore_spec_ctrl + mov $MSR_SPEC_CTRL, %ecx + movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index 8d171ecca2..9531d046d7 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -256,10 +256,32 @@ + */ + movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx + ++ /* ++ * For all safety notes, 32bit PV guest kernels run in Ring 1 and are ++ * therefore supervisor (== Xen) in the architecture. As a result, most ++ * hardware isolation techniques do not work. ++ */ ++ ++ /* ++ * IBPB is to mitigate BTC/SRSO on AMD/Hygon parts, in particular making ++ * type-confused RETs safe to use. This is not needed on Zen5 and later ++ * parts when SRSO_U/S_NO is enumerated. ++ */ + ALTERNATIVE "", DO_COND_IBPB, X86_FEATURE_IBPB_ENTRY_PV + ++ /* ++ * RSB stuffing is to prevent RET predictions following guest entries. ++ * This is not needed if SMEP is active and the RSB is full-width. ++ */ + ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV + ++ /* ++ * Only used on Intel parts. Restore Xen's MSR_SPEC_CTRL setting. The ++ * guest can't change it's value behind Xen's back. For Legacy IBRS, this ++ * flushes/inhibits indirect predictions and does not flush the RSB. For ++ * eIBRS, this prevents CALLs/JMPs using predictions learnt at a lower ++ * predictor mode, and it flushes the RSB. ++ */ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ + X86_FEATURE_SC_MSR_PV + +@@ -282,6 +304,14 @@ + */ + movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx + ++ /* ++ * All safety notes the same as SPEC_CTRL_ENTRY_FROM_PV, although there is ++ * a conditional jump skipping some actions when interrupting Xen. ++ * ++ * On Intel parts, the IRET #GP path ends up here with the guest's choice ++ * of MSR_SPEC_CTRL. ++ */ ++ + testb $3, UREGS_cs(%rsp) + jz .L\@_skip + +@@ -332,6 +362,19 @@ + */ + movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx + ++ /* ++ * For all safety notes, 32bit PV guest kernels run in Ring 1 and are ++ * therefore supervisor (== Xen) in the architecture. As a result, most ++ * hardware isolation techniques do not work. ++ */ ++ ++ /* ++ * IBPB is to mitigate BTC/SRSO on AMD/Hygon parts, in particular making ++ * type-confused RETs safe to use. This is not needed on Zen5 and later ++ * parts when SRSO_U/S_NO is enumerated. The SVM path takes care of ++ * Host/Guest interactions prior to clearing GIF, and it's not used on the ++ * VMX path. ++ */ + test $SCF_ist_ibpb, %bl + jz .L\@_skip_ibpb + +@@ -341,6 +384,12 @@ + + .L\@_skip_ibpb: + ++ /* ++ * RSB stuffing is to prevent RET predictions following guest entries. ++ * SCF_ist_rsb is active if either PV or HVM protections are needed. The ++ * VMX path cannot guarantee to make the RSB safe ahead of taking an IST ++ * vector. ++ */ + test $SCF_ist_rsb, %bl + jz .L\@_skip_rsb + +@@ -348,6 +397,16 @@ + + .L\@_skip_rsb: + ++ /* ++ * Only used on Intel parts. Restore Xen's MSR_SPEC_CTRL setting. PV ++ * guests can't change their value behind Xen's back. HVM guests have ++ * their value stored in the MSR load/save list. For Legacy IBRS, this ++ * flushes/inhibits indirect predictions and does not flush the RSB. For ++ * eIBRS, this prevents CALLs/JMPs using predictions learnt at a lower ++ * predictor mode, and it flushes the RSB. On eIBRS parts that also ++ * suffer from PBRSB, the prior RSB stuffing suffices to make the RSB ++ * safe. ++ */ + test $SCF_ist_sc_msr, %bl + jz .L\@_skip_msr_spec_ctrl + +-- +2.44.0 + diff --git a/0534-x86-vmx-Add-support-for-virtualize-SPEC_CTRL.patch b/0534-x86-vmx-Add-support-for-virtualize-SPEC_CTRL.patch new file mode 100644 index 00000000..21c885f4 --- /dev/null +++ b/0534-x86-vmx-Add-support-for-virtualize-SPEC_CTRL.patch @@ -0,0 +1,206 @@ +From 587298a707ea6c9afd3565a1f68fd43bc21038e9 Mon Sep 17 00:00:00 2001 +From: Roger Pau Monne +Date: Thu, 15 Feb 2024 17:46:53 +0100 +Subject: [PATCH 534/542] x86/vmx: Add support for virtualize SPEC_CTRL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The feature is defined in the tertiary exec control, and is available starting +from Sapphire Rapids and Alder Lake CPUs. + +When enabled, two extra VMCS fields are used: SPEC_CTRL mask and shadow. Bits +set in mask are not allowed to be toggled by the guest (either set or clear) +and the value in the shadow field is the value the guest expects to be in the +SPEC_CTRL register. + +By using it the hypervisor can force the value of SPEC_CTRL bits behind the +guest back without having to trap all accesses to SPEC_CTRL, note that no bits +are forced into the guest as part of this patch. It also allows getting rid of +SPEC_CTRL in the guest MSR load list, since the value in the shadow field will +be loaded by the hardware on vmentry. + +Signed-off-by: Roger Pau Monné +Signed-off-by: Andrew Cooper +(cherry picked from commit 97c5b8b657e41a6645de9d40713b881234417b49) +--- + xen/arch/x86/hvm/vmx/vmcs.c | 12 +++++++- + xen/arch/x86/hvm/vmx/vmx.c | 37 ++++++++++++++++++++----- + xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 5 ++++ + xen/arch/x86/include/asm/msr.h | 7 +++-- + 4 files changed, 51 insertions(+), 10 deletions(-) + +diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c +index f0fb4874b8..11464c60ed 100644 +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -215,6 +215,7 @@ static void __init vmx_display_features(void) + P(cpu_has_vmx_tsc_scaling, "TSC Scaling"); + P(cpu_has_vmx_bus_lock_detection, "Bus Lock Detection"); + P(cpu_has_vmx_notify_vm_exiting, "Notify VM Exit"); ++ P(cpu_has_vmx_virt_spec_ctrl, "Virtualize SPEC_CTRL"); + #undef P + + if ( !printed ) +@@ -378,7 +379,7 @@ static int vmx_init_vmcs_config(bool bsp) + + if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS ) + { +- uint64_t opt = 0; ++ uint64_t opt = TERTIARY_EXEC_VIRT_SPEC_CTRL; + + _vmx_tertiary_exec_control = adjust_vmx_controls2( + "Tertiary Exec Control", 0, opt, +@@ -1377,6 +1378,12 @@ static int construct_vmcs(struct vcpu *v) + if ( cpu_has_vmx_tsc_scaling ) + __vmwrite(TSC_MULTIPLIER, d->arch.hvm.tsc_scaling_ratio); + ++ if ( cpu_has_vmx_virt_spec_ctrl ) ++ { ++ __vmwrite(SPEC_CTRL_MASK, 0); ++ __vmwrite(SPEC_CTRL_SHADOW, 0); ++ } ++ + /* will update HOST & GUEST_CR3 as reqd */ + paging_update_paging_modes(v); + +@@ -2087,6 +2094,9 @@ void vmcs_dump_vcpu(struct vcpu *v) + if ( v->arch.hvm.vmx.secondary_exec_control & + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY ) + printk("InterruptStatus = %04x\n", vmr16(GUEST_INTR_STATUS)); ++ if ( cpu_has_vmx_virt_spec_ctrl ) ++ printk("SPEC_CTRL mask = 0x%016lx shadow = 0x%016lx\n", ++ vmr(SPEC_CTRL_MASK), vmr(SPEC_CTRL_SHADOW)); + + printk("*** Host State ***\n"); + printk("RIP = 0x%016lx (%ps) RSP = 0x%016lx\n", +diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c +index 26b6e4ca61..38d6d78607 100644 +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -759,23 +759,28 @@ static void cf_check vmx_cpuid_policy_changed(struct vcpu *v) + /* + * We can safely pass MSR_SPEC_CTRL through to the guest, even if STIBP + * isn't enumerated in hardware, as SPEC_CTRL_STIBP is ignored. ++ * ++ * If VMX_VIRT_SPEC_CTRL is available, it is activated by default and the ++ * guest MSR_SPEC_CTRL value lives in the VMCS. Otherwise, it lives in ++ * the MSR load/save list. + */ + if ( cp->feat.ibrsb ) + { + vmx_clear_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW); + +- rc = vmx_add_guest_msr(v, MSR_SPEC_CTRL, 0); +- if ( rc ) +- goto out; ++ if ( !cpu_has_vmx_virt_spec_ctrl ) ++ { ++ rc = vmx_add_guest_msr(v, MSR_SPEC_CTRL, 0); ++ if ( rc ) ++ goto out; ++ } + } + else + { + vmx_set_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW); + +- rc = vmx_del_msr(v, MSR_SPEC_CTRL, VMX_MSR_GUEST); +- if ( rc && rc != -ESRCH ) +- goto out; +- rc = 0; /* Tolerate -ESRCH */ ++ if ( !cpu_has_vmx_virt_spec_ctrl ) ++ vmx_del_msr(v, MSR_SPEC_CTRL, VMX_MSR_GUEST); + } + + /* MSR_PRED_CMD is safe to pass through if the guest knows about it. */ +@@ -2592,6 +2597,10 @@ static uint64_t cf_check vmx_get_reg(struct vcpu *v, unsigned int reg) + switch ( reg ) + { + case MSR_SPEC_CTRL: ++ if ( cpu_has_vmx_virt_spec_ctrl ) ++ /* Guest value in VMCS - fetched below. */ ++ break; ++ + rc = vmx_read_guest_msr(v, reg, &val); + if ( rc ) + { +@@ -2612,6 +2621,11 @@ static uint64_t cf_check vmx_get_reg(struct vcpu *v, unsigned int reg) + vmx_vmcs_enter(v); + switch ( reg ) + { ++ case MSR_SPEC_CTRL: ++ ASSERT(cpu_has_vmx_virt_spec_ctrl); ++ __vmread(SPEC_CTRL_SHADOW, &val); ++ break; ++ + case MSR_IA32_BNDCFGS: + __vmread(GUEST_BNDCFGS, &val); + break; +@@ -2636,6 +2650,10 @@ static void cf_check vmx_set_reg(struct vcpu *v, unsigned int reg, uint64_t val) + switch ( reg ) + { + case MSR_SPEC_CTRL: ++ if ( cpu_has_vmx_virt_spec_ctrl ) ++ /* Guest value in VMCS - set below. */ ++ break; ++ + rc = vmx_write_guest_msr(v, reg, val); + if ( rc ) + { +@@ -2650,6 +2668,11 @@ static void cf_check vmx_set_reg(struct vcpu *v, unsigned int reg, uint64_t val) + vmx_vmcs_enter(v); + switch ( reg ) + { ++ case MSR_SPEC_CTRL: ++ ASSERT(cpu_has_vmx_virt_spec_ctrl); ++ __vmwrite(SPEC_CTRL_SHADOW, val); ++ break; ++ + case MSR_IA32_BNDCFGS: + __vmwrite(GUEST_BNDCFGS, val); + break; +diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h +index bbb0966fc3..63074a49c0 100644 +--- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h ++++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h +@@ -281,6 +281,9 @@ extern u32 vmx_secondary_exec_control; + #define TERTIARY_EXEC_VIRT_SPEC_CTRL BIT(7, UL) + extern uint64_t vmx_tertiary_exec_control; + ++#define cpu_has_vmx_virt_spec_ctrl \ ++ (vmx_tertiary_exec_control & TERTIARY_EXEC_VIRT_SPEC_CTRL) ++ + #define VMX_EPT_EXEC_ONLY_SUPPORTED 0x00000001 + #define VMX_EPT_WALK_LENGTH_4_SUPPORTED 0x00000040 + #define VMX_EPT_MEMORY_TYPE_UC 0x00000100 +@@ -443,6 +446,8 @@ enum vmcs_field { + XSS_EXIT_BITMAP = 0x0000202c, + TSC_MULTIPLIER = 0x00002032, + TERTIARY_VM_EXEC_CONTROL = 0x00002034, ++ SPEC_CTRL_MASK = 0x0000204a, ++ SPEC_CTRL_SHADOW = 0x0000204c, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + VMCS_LINK_POINTER = 0x00002800, + GUEST_IA32_DEBUGCTL = 0x00002802, +diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h +index adda736efc..0309b7dfbe 100644 +--- a/xen/arch/x86/include/asm/msr.h ++++ b/xen/arch/x86/include/asm/msr.h +@@ -290,8 +290,11 @@ struct vcpu_msrs + * For PV guests, this holds the guest kernel value. It is accessed on + * every entry/exit path. + * +- * For VT-x guests, the guest value is held in the MSR guest load/save +- * list. ++ * For VT-x guests, one of two situations exist: ++ * ++ * - If hardware supports virtualized MSR_SPEC_CTRL, it is active by ++ * default and the guest value lives in the VMCS. ++ * - Otherwise, the guest value is held in the MSR load/save list. + * + * For SVM, the guest value lives in the VMCB, and hardware saves/restores + * the host value automatically. However, guests run with the OR of the +-- +2.44.0 + diff --git a/0535-x86-spec-ctrl-Widen-the-xen-last-default-_spec_ctrl-.patch b/0535-x86-spec-ctrl-Widen-the-xen-last-default-_spec_ctrl-.patch new file mode 100644 index 00000000..7b21004e --- /dev/null +++ b/0535-x86-spec-ctrl-Widen-the-xen-last-default-_spec_ctrl-.patch @@ -0,0 +1,145 @@ +From e32ff92de8905f35b7e0e44b53100271b5992ce2 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 26 Mar 2024 22:43:18 +0000 +Subject: [PATCH 535/542] x86/spec-ctrl: Widen the {xen,last,default}_spec_ctrl + fields + +Right now, they're all bytes, but MSR_SPEC_CTRL has been steadily gaining new +features. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 45dac88e78e8a2d9d8738eef884fe6730faf9e67) +--- + xen/arch/x86/hvm/svm/entry.S | 10 +++++----- + xen/arch/x86/hvm/vmx/entry.S | 2 +- + xen/arch/x86/include/asm/current.h | 4 ++-- + xen/arch/x86/include/asm/spec_ctrl.h | 2 +- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 6 +++--- + xen/arch/x86/spec_ctrl.c | 2 +- + 6 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S +index 58d8a1bffa..8779856fb5 100644 +--- a/xen/arch/x86/hvm/svm/entry.S ++++ b/xen/arch/x86/hvm/svm/entry.S +@@ -63,14 +63,14 @@ __UNLIKELY_END(nsvm_hap) + /* SPEC_CTRL_EXIT_TO_SVM Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ + .macro svm_vmentry_spec_ctrl + mov VCPU_arch_msrs(%rbx), %rax +- movzbl CPUINFO_last_spec_ctrl(%rsp), %edx ++ mov CPUINFO_last_spec_ctrl(%rsp), %edx + mov VCPUMSR_spec_ctrl_raw(%rax), %eax + cmp %edx, %eax + je 1f /* Skip write if value is correct. */ + mov $MSR_SPEC_CTRL, %ecx + xor %edx, %edx + wrmsr +- mov %al, CPUINFO_last_spec_ctrl(%rsp) ++ mov %eax, CPUINFO_last_spec_ctrl(%rsp) + 1: /* No Spectre v1 concerns. Execution will hit VMRUN imminently. */ + .endm + ALTERNATIVE "", svm_vmentry_spec_ctrl, X86_FEATURE_SC_MSR_HVM +@@ -130,14 +130,14 @@ __UNLIKELY_END(nsvm_hap) + * safe to use. The guest's setting resides in the VMCB. + */ + .macro svm_vmexit_spec_ctrl +- movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax +- movzbl CPUINFO_last_spec_ctrl(%rsp), %edx ++ mov CPUINFO_xen_spec_ctrl(%rsp), %eax ++ mov CPUINFO_last_spec_ctrl(%rsp), %edx + cmp %edx, %eax + je 1f /* Skip write if value is correct. */ + mov $MSR_SPEC_CTRL, %ecx + xor %edx, %edx + wrmsr +- mov %al, CPUINFO_last_spec_ctrl(%rsp) ++ mov %eax, CPUINFO_last_spec_ctrl(%rsp) + 1: + .endm + ALTERNATIVE "", svm_vmexit_spec_ctrl, X86_FEATURE_SC_MSR_HVM +diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S +index 008d76a6e1..9250eb1839 100644 +--- a/xen/arch/x86/hvm/vmx/entry.S ++++ b/xen/arch/x86/hvm/vmx/entry.S +@@ -52,7 +52,7 @@ ENTRY(vmx_asm_vmexit_handler) + */ + .macro restore_spec_ctrl + mov $MSR_SPEC_CTRL, %ecx +- movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax ++ mov CPUINFO_xen_spec_ctrl(%rsp), %eax + xor %edx, %edx + wrmsr + .endm +diff --git a/xen/arch/x86/include/asm/current.h b/xen/arch/x86/include/asm/current.h +index 9cc8d8e3d4..ba82b413e2 100644 +--- a/xen/arch/x86/include/asm/current.h ++++ b/xen/arch/x86/include/asm/current.h +@@ -55,8 +55,8 @@ struct cpu_info { + + /* See asm/spec_ctrl_asm.h for usage. */ + unsigned int shadow_spec_ctrl; +- uint8_t xen_spec_ctrl; +- uint8_t last_spec_ctrl; ++ unsigned int xen_spec_ctrl; ++ unsigned int last_spec_ctrl; + uint8_t scf; /* SCF_* */ + + /* +diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h +index 8fc350abe2..7048e5ee21 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl.h ++++ b/xen/arch/x86/include/asm/spec_ctrl.h +@@ -93,7 +93,7 @@ extern int8_t opt_eager_fpu; + extern int8_t opt_l1d_flush; + + extern bool bsp_delay_spec_ctrl; +-extern uint8_t default_xen_spec_ctrl; ++extern unsigned int default_xen_spec_ctrl; + extern uint8_t default_scf; + + extern int8_t opt_xpti_hwdom, opt_xpti_domu; +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index 9531d046d7..d232172159 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -217,10 +217,10 @@ + setnz %al + not %eax + and %al, STACK_CPUINFO_FIELD(scf)(%r14) +- movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax ++ mov STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax + .else + andb $~SCF_use_shadow, CPUINFO_scf(%rsp) +- movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax ++ mov CPUINFO_xen_spec_ctrl(%rsp), %eax + .endif + + wrmsr +@@ -418,7 +418,7 @@ + + /* Load Xen's intended value. */ + mov $MSR_SPEC_CTRL, %ecx +- movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax ++ mov STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax + wrmsr + + .L\@_skip_msr_spec_ctrl: +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 2b22deb891..5ccd82f161 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -68,7 +68,7 @@ static bool __initdata opt_branch_harden = + IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH); + + bool __initdata bsp_delay_spec_ctrl; +-uint8_t __ro_after_init default_xen_spec_ctrl; ++unsigned int __ro_after_init default_xen_spec_ctrl; + uint8_t __ro_after_init default_scf; + + paddr_t __ro_after_init l1tf_addr_mask, __ro_after_init l1tf_safe_maddr; +-- +2.44.0 + diff --git a/0536-x86-Use-indirect-calls-in-reset-stack-infrastructure.patch b/0536-x86-Use-indirect-calls-in-reset-stack-infrastructure.patch new file mode 100644 index 00000000..05913c37 --- /dev/null +++ b/0536-x86-Use-indirect-calls-in-reset-stack-infrastructure.patch @@ -0,0 +1,111 @@ +From e2a2cfb4ddf05a76e072ea84172d6a83ba392d20 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 22 Dec 2023 17:44:48 +0000 +Subject: [PATCH 536/542] x86: Use indirect calls in reset-stack infrastructure + +Mixing up JMP and CALL indirect targets leads a very fun form of speculative +type confusion. A target which is expecting to be called CALLed needs a +return address on the stack, and an indirect JMP doesn't place one there. + +An indirect JMP which predicts to a target intending to be CALLed can end up +with a RET speculatively executing with a value from the JMPers stack frame. + +There are several ways get indirect JMPs in Xen. + + * From tailcall optimisations. These are safe because the compiler has + arranged the stack to point at the callee's return address. + + * From jump tables. These are unsafe, but Xen is built with -fno-jump-tables + to work around several compiler issues. + + * From reset_stack_and_jump_ind(), which is particularly unsafe. Because of + the additional stack adjustment made, the value picked up off the stack is + regs->r15 of the next vCPU to run. + +In order to mitigate this type confusion, we want to make all indirect targets +be CALL targets, and remove the use of indirect JMP except via tailcall +optimisation. + +Luckily due to XSA-348, all C target functions of reset_stack_and_jump_ind() +are noreturn. {svm,vmx}_do_resume() exits via reset_stack_and_jump(); a +direct JMP with entirely different prediction properties. idle_loop() is an +infinite loop which eventually exits via reset_stack_and_jump_ind() from a new +schedule. i.e. These paths are all fine having one extra return address on +the stack. + +This leaves continue_pv_domain(), which is expecting to be a JMP target. +Alter it to strip the return address off the stack, which is safe because +there isn't actually a RET expecting to return to its caller. + +This allows us change reset_stack_and_jump_ind() to reset_stack_and_call_ind() +in order to mitigate the speculative type confusion. + +This is part of XSA-456 / CVE-2024-2201. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 8e186f98ce0e35d1754ec9299da41ec98873b65c) +--- + xen/arch/x86/domain.c | 4 ++-- + xen/arch/x86/include/asm/current.h | 4 ++-- + xen/arch/x86/x86_64/entry.S | 8 ++++++++ + 3 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index 228763b5e9..5dbd1d8a12 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -2112,12 +2112,12 @@ void context_switch(struct vcpu *prev, struct vcpu *next) + /* Ensure that the vcpu has an up-to-date time base. */ + update_vcpu_system_time(next); + +- reset_stack_and_jump_ind(nextd->arch.ctxt_switch->tail); ++ reset_stack_and_call_ind(nextd->arch.ctxt_switch->tail); + } + + void continue_running(struct vcpu *same) + { +- reset_stack_and_jump_ind(same->domain->arch.ctxt_switch->tail); ++ reset_stack_and_call_ind(same->domain->arch.ctxt_switch->tail); + } + + int __sync_local_execstate(void) +diff --git a/xen/arch/x86/include/asm/current.h b/xen/arch/x86/include/asm/current.h +index ba82b413e2..2f723bcf1b 100644 +--- a/xen/arch/x86/include/asm/current.h ++++ b/xen/arch/x86/include/asm/current.h +@@ -196,10 +196,10 @@ unsigned long get_stack_dump_bottom (unsigned long sp); + switch_stack_and_jump(fn, "jmp %c", "i") + + /* The constraint may only specify non-call-clobbered registers. */ +-#define reset_stack_and_jump_ind(fn) \ ++#define reset_stack_and_call_ind(fn) \ + ({ \ + (void)((fn) == (void (*)(void))NULL); \ +- switch_stack_and_jump(fn, "INDIRECT_JMP %", "b"); \ ++ switch_stack_and_jump(fn, "INDIRECT_CALL %", "b"); \ + }) + + /* +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 801d241337..9c256746ba 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -637,6 +637,14 @@ ENTRY(dom_crash_sync_extable) + #ifdef CONFIG_PV + ENTRY(continue_pv_domain) + ENDBR64 ++ ++ /* ++ * For speculative type confusion reasons, we're CALLed rather than ++ * JMPed to. Drop the return address. ++ */ ++ add $8, %rsp ++ ALTERNATIVE "", "mov $2, %eax; incsspd %eax", X86_FEATURE_XEN_SHSTK ++ + call check_wakeup_from_wait + ret_from_intr: + GET_CURRENT(bx) +-- +2.44.0 + diff --git a/0537-x86-Drop-INDIRECT_JMP.patch b/0537-x86-Drop-INDIRECT_JMP.patch new file mode 100644 index 00000000..9a95f42c --- /dev/null +++ b/0537-x86-Drop-INDIRECT_JMP.patch @@ -0,0 +1,68 @@ +From 801e251556c374ce3e84ca776f211e00431932ef Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 22 Dec 2023 18:01:37 +0000 +Subject: [PATCH 537/542] x86: Drop INDIRECT_JMP + +Indirect JMPs which are not tailcalls can lead to an unwelcome form of +speculative type confusion, and we've removed the uses of INDIRECT_JMP to +compensate. Remove the temptation to reintroduce new instances. + +This is part of XSA-456 / CVE-2024-2201. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 0b66d7ce3c0290eaad28bdafb35200052d012b14) +--- + xen/arch/x86/include/asm/asm-defns.h | 18 ++++-------------- + 1 file changed, 4 insertions(+), 14 deletions(-) + +diff --git a/xen/arch/x86/include/asm/asm-defns.h b/xen/arch/x86/include/asm/asm-defns.h +index 8bd9007731..7e22fcb9c0 100644 +--- a/xen/arch/x86/include/asm/asm-defns.h ++++ b/xen/arch/x86/include/asm/asm-defns.h +@@ -20,10 +20,9 @@ + .byte 0x0f, 0x01, 0xdd + .endm + +-.macro INDIRECT_BRANCH insn:req arg:req ++.macro INDIRECT_CALL arg:req + /* +- * Create an indirect branch. insn is one of call/jmp, arg is a single +- * register. ++ * Create an indirect call. arg is a single register. + * + * With no compiler support, this degrades into a plain indirect call/jmp. + * With compiler support, dispatch to the correct __x86_indirect_thunk_* +@@ -33,7 +32,7 @@ + $done = 0 + .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 + .ifeqs "\arg", "%r\reg" +- \insn __x86_indirect_thunk_r\reg ++ call __x86_indirect_thunk_r\reg + $done = 1 + .exitm + .endif +@@ -44,19 +43,10 @@ + .endif + + .else +- \insn *\arg ++ call *\arg + .endif + .endm + +-/* Convenience wrappers. */ +-.macro INDIRECT_CALL arg:req +- INDIRECT_BRANCH call \arg +-.endm +- +-.macro INDIRECT_JMP arg:req +- INDIRECT_BRANCH jmp \arg +-.endm +- + #ifdef CONFIG_XEN_IBT + # define ENDBR64 endbr64 + #else +-- +2.44.0 + diff --git a/0538-x86-tsx-Expose-RTM_ALWAYS_ABORT-to-guests.patch b/0538-x86-tsx-Expose-RTM_ALWAYS_ABORT-to-guests.patch new file mode 100644 index 00000000..0324aa9b --- /dev/null +++ b/0538-x86-tsx-Expose-RTM_ALWAYS_ABORT-to-guests.patch @@ -0,0 +1,189 @@ +From 02a424a7a8be13f6f85eb4fa2f43100b8e76f760 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Sat, 6 Apr 2024 20:36:54 +0100 +Subject: [PATCH 538/542] x86/tsx: Expose RTM_ALWAYS_ABORT to guests + +A TSX Abort is one option mitigate Native-BHI, but a guest kernel doesn't get +to see this if Xen has turned RTM off using MSR_TSX_{CTRL,FORCE_ABORT}. + +Therefore, the meaning of RTM_ALWAYS_ABORT has been adjusted to "XBEGIN won't +fault", and it should be exposed to guests so they can make a better decision. + +Expose it in the max policy for any RTM-capable system. Offer it by default +only if RTM has been disabled. + +Update test-tsx to account for this new meaning. While adjusting the logic in +test_guest_policies(), take the opportunity to use feature names (now they're +available) to make the logic easier to follow. + +This is part of XSA-456 / CVE-2024-2201. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit c94e2105924347de0d9f32065370e802a20cc829) +--- + tools/tests/tsx/test-tsx.c | 39 ++++++++++++++------- + xen/arch/x86/cpu-policy.c | 20 +++++++++++ + xen/include/public/arch-x86/cpufeatureset.h | 2 +- + 3 files changed, 47 insertions(+), 14 deletions(-) + +diff --git a/tools/tests/tsx/test-tsx.c b/tools/tests/tsx/test-tsx.c +index 0f4ea5f9c4..fab5c9a367 100644 +--- a/tools/tests/tsx/test-tsx.c ++++ b/tools/tests/tsx/test-tsx.c +@@ -311,25 +311,25 @@ static void test_guest_policies(const struct cpu_policy *max, + dump_tsx_details(max, "Max:"); + dump_tsx_details(def, "Def:"); + +- if ( ((max->feat.raw[0].d | def->feat.raw[0].d) & +- (bitmaskof(X86_FEATURE_TSX_FORCE_ABORT) | +- bitmaskof(X86_FEATURE_RTM_ALWAYS_ABORT) | +- bitmaskof(X86_FEATURE_SRBDS_CTRL))) || +- ((max->arch_caps.raw | def->arch_caps.raw) & ARCH_CAPS_TSX_CTRL) ) ++ if ( max->feat.tsx_force_abort || def->feat.tsx_force_abort || ++ max->feat.srbds_ctrl || def->feat.srbds_ctrl || ++ max->arch_caps.tsx_ctrl || def->arch_caps.tsx_ctrl ) + fail(" Xen-only TSX controls offered to guest\n"); + + switch ( rtm_behaviour ) + { + case RTM_UD: +- if ( (max->feat.raw[0].b | def->feat.raw[0].b) & +- (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) ) +- fail(" HLE/RTM offered to guests despite not being available\n"); ++ if ( max->feat.hle || def->feat.hle || ++ max->feat.rtm || def->feat.rtm || ++ max->feat.rtm_always_abort || def->feat.rtm_always_abort ) ++ fail(" HLE/RTM/RTM_AA offered to guests despite not being available\n"); + break; + + case RTM_ABORT: +- if ( def->feat.raw[0].b & +- (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) ) ++ if ( def->feat.hle || def->feat.rtm ) + fail(" HLE/RTM offered to guests by default despite not being usable\n"); ++ if ( !def->feat.rtm_always_abort ) ++ fail(" RTM_AA not offered to guests by default despite being available\n"); + break; + + case RTM_OK: +@@ -340,6 +340,9 @@ static void test_guest_policies(const struct cpu_policy *max, + + if ( def->feat.hle ) + fail(" Fail: HLE offered in default policy\n"); ++ ++ if ( def->feat.rtm && def->feat.rtm_always_abort ) ++ fail(" Fail: Both RTM and RTM_AA offered in default policy\n"); + } + + static void test_def_max_policies(void) +@@ -388,14 +391,13 @@ static void test_guest(struct xen_domctl_createdomain *c) + + if ( guest_policy.policy.feat.hle || + guest_policy.policy.feat.tsx_force_abort || +- guest_policy.policy.feat.rtm_always_abort || + guest_policy.policy.feat.srbds_ctrl || + guest_policy.policy.arch_caps.tsx_ctrl ) + fail(" Unexpected features advertised\n"); + + if ( host.policy.feat.rtm ) + { +- unsigned int _7b0; ++ unsigned int _7b0, _7d0; + + /* + * If host RTM is available, all combinations of guest flags should be +@@ -403,6 +405,8 @@ static void test_guest(struct xen_domctl_createdomain *c) + */ + _7b0 = (guest_policy.policy.feat.raw[0].b ^= + (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM))); ++ _7d0 = (guest_policy.policy.feat.raw[0].d ^= ++ bitmaskof(X86_FEATURE_RTM_ALWAYS_ABORT)); + + /* Set the new policy. */ + rc = xc_cpu_policy_set_domain(xch, domid, &guest_policy); +@@ -426,10 +430,17 @@ static void test_guest(struct xen_domctl_createdomain *c) + + if ( guest_policy.policy.feat.raw[0].b != _7b0 ) + { +- fail(" Expected CPUID.7[1].b 0x%08x differs from actual 0x%08x\n", ++ fail(" Expected CPUID.7[0].b 0x%08x differs from actual 0x%08x\n", + _7b0, guest_policy.policy.feat.raw[0].b); + goto out; + } ++ ++ if ( guest_policy.policy.feat.raw[0].d != _7d0 ) ++ { ++ fail(" Expected CPUID.7[0].d 0x%08x differs from actual 0x%08x\n", ++ _7d0, guest_policy.policy.feat.raw[0].d); ++ goto out; ++ } + } + + out: +@@ -514,6 +525,8 @@ static void test_tsx(void) + i, errno, strerror(errno)); + } + ++ dump_tsx_details(&host.policy, "Host:"); ++ + rc = xc_physinfo(xch, &physinfo); + if ( rc ) + return fail("Failed to obtain physinfo: %d - %s\n", +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index 24acd12ce2..e44de3cfcb 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -468,6 +468,21 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) + */ + __set_bit(X86_FEATURE_HTT, fs); + __set_bit(X86_FEATURE_CMP_LEGACY, fs); ++ ++ /* ++ * To mitigate Native-BHI, one option is to use a TSX Abort on capable ++ * systems. This is safe even if RTM has been disabled for other reasons ++ * via MSR_TSX_{CTRL,FORCE_ABORT}. However, a guest kernel doesn't get to ++ * know this type of information. ++ * ++ * Therefore the meaning of RTM_ALWAYS_ABORT has been adjusted, to instead ++ * mean "XBEGIN won't fault". This is enough for a guest kernel to make ++ * an informed choice WRT mitigating Native-BHI. ++ * ++ * If RTM-capable, we can run a VM which has seen RTM_ALWAYS_ABORT. ++ */ ++ if ( test_bit(X86_FEATURE_RTM, fs) ) ++ __set_bit(X86_FEATURE_RTM_ALWAYS_ABORT, fs); + } + + static void __init guest_common_default_feature_adjustments(uint32_t *fs) +@@ -540,9 +555,14 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) + * function as expected, but is technically compatible with the ISA. + * + * Do not advertise RTM to guests by default if it won't actually work. ++ * Instead, advertise RTM_ALWAYS_ABORT indicating that TSX Aborts are safe ++ * to use, e.g. for mitigating Native-BHI. + */ + if ( rtm_disabled ) ++ { + __clear_bit(X86_FEATURE_RTM, fs); ++ __set_bit(X86_FEATURE_RTM_ALWAYS_ABORT, fs); ++ } + } + + static void __init guest_common_feature_adjustments(uint32_t *fs) +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 63c8ac8486..0004fd4bf5 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -261,7 +261,7 @@ XEN_CPUFEATURE(FSRM, 9*32+ 4) /*A Fast Short REP MOVS */ + XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a VP2INTERSECT{D,Q} insns */ + XEN_CPUFEATURE(SRBDS_CTRL, 9*32+ 9) /* MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */ + XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffers */ +-XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */ ++XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! RTM disabled (but XBEGIN wont fault) */ + XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ + XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */ + XEN_CPUFEATURE(HYBRID, 9*32+15) /* Heterogeneous platform */ +-- +2.44.0 + diff --git a/0539-x86-spec-ctrl-Support-BHI_DIS_S-in-order-to-mitigate.patch b/0539-x86-spec-ctrl-Support-BHI_DIS_S-in-order-to-mitigate.patch new file mode 100644 index 00000000..44f4b3c3 --- /dev/null +++ b/0539-x86-spec-ctrl-Support-BHI_DIS_S-in-order-to-mitigate.patch @@ -0,0 +1,177 @@ +From 7d3ad6775d59ad292c3b6431e8eb73e10d191298 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Tue, 26 Mar 2024 19:01:37 +0000 +Subject: [PATCH 539/542] x86/spec-ctrl: Support BHI_DIS_S in order to mitigate + BHI +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce a "bhi-dis-s" boolean to match the other options we have for +MSR_SPEC_CTRL values. Also introduce bhi_calculations(). + +Use BHI_DIS_S whenever possible. + +Guests which are levelled to be migration compatible with older CPUs can't see +BHI_DIS_S, and Xen must fill in the difference to make the guest safe. Use +the virt MSR_SPEC_CTRL infrastructure to force BHI_DIS_S behind the guest's +back. + +This is part of XSA-456 / CVE-2024-2201. + +Signed-off-by: Andrew Cooper +Acked-by: Roger Pau Monné +(cherry picked from commit 62a1106415c5e8a49b45147ca84d54a58d471343) +--- + docs/misc/xen-command-line.pandoc | 8 +++++++- + xen/arch/x86/hvm/vmx/vmx.c | 17 +++++++++++++++++ + xen/arch/x86/include/asm/spec_ctrl.h | 1 + + xen/arch/x86/spec_ctrl.c | 24 +++++++++++++++++++++++- + 4 files changed, 48 insertions(+), 2 deletions(-) + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index e1d56407dd..0b0abf8983 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -2327,7 +2327,8 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). + > {msr-sc,rsb,verw,ibpb-entry}=|{pv,hvm}=, + > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, + > eager-fpu,l1d-flush,branch-harden,srb-lock, +-> unpriv-mmio,gds-mit,div-scrub}= ]` ++> unpriv-mmio,gds-mit,div-scrub, ++> bhi-dis-s}= ]` + + Controls for speculative execution sidechannel mitigations. By default, Xen + will pick the most appropriate mitigations based on compiled in support, +@@ -2408,6 +2409,11 @@ option can be used to force or prevent Xen using the feature itself. By + default, Xen will not use PSFD. PSFD is implied by SSBD, and SSBD is off by + default. + ++On hardware supporting BHI_DIS_S (Branch History Injection Disable ++Supervisor), the `bhi-dis-s=` option can be used to force or prevent Xen using ++the feature itself. By default Xen will use BHI_DIS_S on hardware susceptible ++to Branch History Injection. ++ + On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=` + option can be used to force (the default) or prevent Xen from issuing branch + prediction barriers on vcpu context switches. +diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c +index 38d6d78607..e5259ed034 100644 +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -58,6 +58,7 @@ + #include + #include + #include ++#include + #include + + static bool_t __initdata opt_force_ept; +@@ -783,6 +784,22 @@ static void cf_check vmx_cpuid_policy_changed(struct vcpu *v) + vmx_del_msr(v, MSR_SPEC_CTRL, VMX_MSR_GUEST); + } + ++ if ( cpu_has_vmx_virt_spec_ctrl ) ++ { ++ /* ++ * If we're on BHI_DIS_S capable hardware, the short loop sequence is ++ * not sufficient to mitigate Native-BHI. If the VM can't see it ++ * (i.e. it's levelled with older hardware), force it behind the ++ * guests back for safey. ++ * ++ * Because there's not a real Host/Guest split of the MSR_SPEC_CTRL ++ * value, this only works as expected when Xen is using BHI_DIS_S too. ++ */ ++ bool force_bhi_dis_s = opt_bhi_dis_s && !cp->feat.bhi_ctrl; ++ ++ __vmwrite(SPEC_CTRL_MASK, force_bhi_dis_s ? SPEC_CTRL_BHI_DIS_S : 0); ++ } ++ + /* MSR_PRED_CMD is safe to pass through if the guest knows about it. */ + if ( cp->feat.ibrsb || cp->extd.ibpb ) + vmx_clear_msr_intercept(v, MSR_PRED_CMD, VMX_MSR_RW); +diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h +index 7048e5ee21..617209202b 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl.h ++++ b/xen/arch/x86/include/asm/spec_ctrl.h +@@ -89,6 +89,7 @@ static always_inline void spec_ctrl_new_guest_context(void) + + extern int8_t opt_ibpb_ctxt_switch; + extern bool opt_ssbd; ++extern int8_t opt_bhi_dis_s; + extern int8_t opt_eager_fpu; + extern int8_t opt_l1d_flush; + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 5ccd82f161..085e37525d 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -59,6 +59,7 @@ static int8_t __initdata opt_ibrs = -1; + int8_t __initdata opt_stibp = -1; + bool __ro_after_init opt_ssbd; + int8_t __initdata opt_psfd = -1; ++int8_t __ro_after_init opt_bhi_dis_s = -1; + + int8_t __ro_after_init opt_ibpb_ctxt_switch = -1; + int8_t __ro_after_init opt_eager_fpu = -1; +@@ -281,6 +282,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) + opt_ssbd = val; + else if ( (val = parse_boolean("psfd", s, ss)) >= 0 ) + opt_psfd = val; ++ else if ( (val = parse_boolean("bhi-dis-s", s, ss)) >= 0 ) ++ opt_bhi_dis_s = val; + + /* Misc settings. */ + else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) +@@ -536,7 +539,7 @@ static void __init print_details(enum ind_thunk thunk) + "\n"); + + /* Settings for Xen's protection, irrespective of guests. */ +- printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", ++ printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s%s, Other:%s%s%s%s%s%s\n", + thunk != THUNK_NONE ? "BTI-Thunk: " : "", + thunk == THUNK_NONE ? "" : + thunk == THUNK_RETPOLINE ? "RETPOLINE, " : +@@ -554,6 +557,8 @@ static void __init print_details(enum ind_thunk thunk) + (!boot_cpu_has(X86_FEATURE_PSFD) && + !boot_cpu_has(X86_FEATURE_INTEL_PSFD)) ? "" : + (default_xen_spec_ctrl & SPEC_CTRL_PSFD) ? " PSFD+" : " PSFD-", ++ !boot_cpu_has(X86_FEATURE_BHI_CTRL) ? "" : ++ (default_xen_spec_ctrl & SPEC_CTRL_BHI_DIS_S) ? " BHI_DIS_S+" : " BHI_DIS_S-", + !(caps & ARCH_CAPS_TSX_CTRL) ? "" : + (opt_tsx & 1) ? " TSX+" : " TSX-", + !cpu_has_srbds_ctrl ? "" : +@@ -1608,6 +1613,21 @@ static void __init gds_calculations(void) + } + } + ++/* ++ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/branch-history-injection.html ++ */ ++static void __init bhi_calculations(void) ++{ ++ if ( opt_bhi_dis_s == -1 ) ++ opt_bhi_dis_s = !boot_cpu_has(X86_FEATURE_BHI_NO); ++ ++ if ( !boot_cpu_has(X86_FEATURE_BHI_CTRL) ) ++ opt_bhi_dis_s = false; ++ ++ if ( opt_bhi_dis_s ) ++ default_xen_spec_ctrl |= SPEC_CTRL_BHI_DIS_S; ++} ++ + void spec_ctrl_init_domain(struct domain *d) + { + bool pv = is_pv_domain(d); +@@ -2152,6 +2172,8 @@ void __init init_speculation_mitigations(void) + + gds_calculations(); + ++ bhi_calculations(); ++ + print_details(thunk); + + /* +-- +2.44.0 + diff --git a/0540-x86-spec-ctrl-Software-BHB-clearing-sequences.patch b/0540-x86-spec-ctrl-Software-BHB-clearing-sequences.patch new file mode 100644 index 00000000..79642fd5 --- /dev/null +++ b/0540-x86-spec-ctrl-Software-BHB-clearing-sequences.patch @@ -0,0 +1,349 @@ +From 4abd50dc17ced2e1221226b7893c437acbbd0d1b Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Jun 2023 19:41:44 +0100 +Subject: [PATCH 540/542] x86/spec-ctrl: Software BHB-clearing sequences +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Implement clear_bhb_{tsx,loops}() as per the BHI guidance. The loops variant +is set up as the "short" sequence. + +Introduce SCF_entry_bhb and extend SPEC_CTRL_ENTRY_* with a conditional call +to selected clearing routine. + +Note that due to a limitation in the ALTERNATIVE capability, the TEST/JZ can't +be included alongside a CALL in a single alternative block. This is going to +require further work to untangle. + +The BHB sequences (if used) must be after the restoration of Xen's +MSR_SPEC_CTRL value, which must be accounted for when judging whether it is +safe to skip the safety LFENCEs. + +This is part of XSA-456 / CVE-2024-2201. + +Signed-off-by: Andrew Cooper +Acked-by: Roger Pau Monné +(cherry picked from commit 954c983abceee97bf5f6230b9ae164f2c49a9aa9) +--- + xen/arch/x86/Makefile | 1 + + xen/arch/x86/bhb-thunk.S | 98 ++++++++++++++++++++++++ + xen/arch/x86/hvm/vmx/entry.S | 12 +++ + xen/arch/x86/include/asm/cpufeature.h | 3 + + xen/arch/x86/include/asm/cpufeatures.h | 3 + + xen/arch/x86/include/asm/spec_ctrl.h | 3 +- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 30 ++++++++ + xen/arch/x86/spec_ctrl.c | 39 ++++++---- + 8 files changed, 171 insertions(+), 18 deletions(-) + create mode 100644 xen/arch/x86/bhb-thunk.S + +diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile +index cb9d952659..6a070a8cf8 100644 +--- a/xen/arch/x86/Makefile ++++ b/xen/arch/x86/Makefile +@@ -14,6 +14,7 @@ alternative-y := alternative.init.o + alternative-$(CONFIG_LIVEPATCH) := + obj-bin-y += $(alternative-y) + obj-y += apic.o ++obj-y += bhb-thunk.o + obj-y += bitops.o + obj-bin-y += bzimage.init.o + obj-bin-y += clear_page.o +diff --git a/xen/arch/x86/bhb-thunk.S b/xen/arch/x86/bhb-thunk.S +new file mode 100644 +index 0000000000..f52cfb9bc2 +--- /dev/null ++++ b/xen/arch/x86/bhb-thunk.S +@@ -0,0 +1,98 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * Branch History Injection clearing sequences. ++ * ++ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/branch-history-injection.html ++ * ++ * Copyright (c) 2023, 2024 XenServer. ++ */ ++ .file __FILE__ ++ ++#include ++ ++ .section .text.entry, "ax", @progbits ++ ++/* ++ * Clear the Branch History Buffer using a TSX Abort. ++ * ++ * Any TSX Abort has a side effect of clearing the BHB, even when TSX is ++ * disabled for e.g. TAA mitigation reasons. ++ */ ++ENTRY(clear_bhb_tsx) ++ .byte 0xc7, 0xf8; .long 1f - 0f /* xbegin 1f */ ++0: .byte 0xc6, 0xf8, 0 /* xabort $0 */ ++ int3 ++1: ++ ret ++ ++ .size clear_bhb_tsx, . - clear_bhb_tsx ++ .type clear_bhb_tsx, @function ++ ++/* ++ * Clear the Branch History Buffer using the software sequence. ++ * ++ * Clobbers: %eax, %ecx ++ * ++ * This executes a specific number of taken branches, sufficient to displace ++ * all prior entries in the history tracker, therefore removing prior ++ * influence on subsequent BTB lookups. ++ * ++ * Structurally, it looks like this: ++ * ++ * call 1 ++ * call 2 ++ * ... 5x jmp loop ++ * call 2 ++ * ... 5x jmp loop ++ * ... 5x call2's deep ++ * ++ * ret ++ * ret ++ * ret ++ * ret ++ * ++ * The CALL/RETs are necessary to prevent the Loop Stream Detector from ++ * interfering. The alignment is for performance and not safety. ++ * ++ * The "short" sequence (5 and 5) is for CPUs prior to Alder Lake / Sapphire ++ * Rapids (i.e. Cores prior to Golden Cove and/or Gracemont). ++ */ ++ENTRY(clear_bhb_loops) ++ mov $5, %ecx ++ ++ call 1f ++ jmp 5f ++ int3 ++ ++ .align 64 ++1: call 2f ++ ret ++ int3 ++ ++ .align 64 ++2: mov $5, %eax ++ ++3: jmp 4f ++ int3 ++ ++4: sub $1, %eax ++ jnz 3b ++ ++ sub $1, %ecx ++ jnz 1b ++ ++ ret ++5: ++ /* ++ * The Intel sequence has an LFENCE here. The purpose is to ensure ++ * that all prior branches have executed, before dispatching a ++ * subsequent indirect branch. ++ * ++ * Xen's SPEC_CTRL_ENTRY_* blocks have safety LFENCEs at the end when ++ * protections are active, which suffices for this purpose. ++ */ ++ ++ ret ++ ++ .size clear_bhb_loops, . - clear_bhb_loops ++ .type clear_bhb_loops, @function +diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S +index 9250eb1839..1092d1918c 100644 +--- a/xen/arch/x86/hvm/vmx/entry.S ++++ b/xen/arch/x86/hvm/vmx/entry.S +@@ -57,6 +57,18 @@ ENTRY(vmx_asm_vmexit_handler) + wrmsr + .endm + ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM ++ ++ /* ++ * Clear the BHB to mitigate BHI. Used on eIBRS parts, and uses RETs ++ * itself so must be after we've perfomed all the RET-safety we can. ++ */ ++ testb $SCF_entry_bhb, CPUINFO_scf(%rsp) ++ jz .L_skip_bhb ++ ALTERNATIVE_2 "", \ ++ "call clear_bhb_loops", X86_SPEC_BHB_LOOPS, \ ++ "call clear_bhb_tsx", X86_SPEC_BHB_TSX ++.L_skip_bhb: ++ + ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_VMX + /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ + +diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h +index a6b8af1296..b24d535080 100644 +--- a/xen/arch/x86/include/asm/cpufeature.h ++++ b/xen/arch/x86/include/asm/cpufeature.h +@@ -181,6 +181,9 @@ + #define cpu_bug_fpu_ptrs boot_cpu_has(X86_BUG_FPU_PTRS) + #define cpu_bug_null_seg boot_cpu_has(X86_BUG_NULL_SEG) + ++#define cpu_has_bhb_seq (boot_cpu_has(X86_SPEC_BHB_TSX) || \ ++ boot_cpu_has(X86_SPEC_BHB_LOOPS)) ++ + enum _cache_type { + CACHE_TYPE_NULL = 0, + CACHE_TYPE_DATA = 1, +diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h +index 6422c66b0f..bada8912e0 100644 +--- a/xen/arch/x86/include/asm/cpufeatures.h ++++ b/xen/arch/x86/include/asm/cpufeatures.h +@@ -56,5 +56,8 @@ XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for + #define X86_SPEC_NO_LFENCE_ENTRY_INTR X86_BUG(17) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_INTR. */ + #define X86_SPEC_NO_LFENCE_ENTRY_VMX X86_BUG(18) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_VMX. */ + ++#define X86_SPEC_BHB_TSX X86_BUG(19) /* Use clear_bhb_tsx for BHI mitigation. */ ++#define X86_SPEC_BHB_LOOPS X86_BUG(20) /* Use clear_bhb_loops for BHI mitigation.*/ ++ + /* Total number of capability words, inc synth and bug words. */ + #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */ +diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h +index 617209202b..4439a1b243 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl.h ++++ b/xen/arch/x86/include/asm/spec_ctrl.h +@@ -36,6 +36,7 @@ + #define SCF_verw (1 << 3) + #define SCF_ist_ibpb (1 << 4) + #define SCF_entry_ibpb (1 << 5) ++#define SCF_entry_bhb (1 << 6) + + /* + * The IST paths (NMI/#MC) can interrupt any arbitrary context. Some +@@ -54,7 +55,7 @@ + * Some speculative protections are per-domain. These settings are merged + * into the top-of-stack block in the context switch path. + */ +-#define SCF_DOM_MASK (SCF_verw | SCF_entry_ibpb) ++#define SCF_DOM_MASK (SCF_verw | SCF_entry_ibpb | SCF_entry_bhb) + + #ifndef __ASSEMBLY__ + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index d232172159..1d7c4f4a68 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -285,6 +285,17 @@ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ + X86_FEATURE_SC_MSR_PV + ++ /* ++ * Clear the BHB to mitigate BHI. Used on eIBRS parts, and uses RETs ++ * itself so must be after we've perfomed all the RET-safety we can. ++ */ ++ testb $SCF_entry_bhb, %bl ++ jz .L\@_skip_bhb ++ ALTERNATIVE_2 "", \ ++ "call clear_bhb_loops", X86_SPEC_BHB_LOOPS, \ ++ "call clear_bhb_tsx", X86_SPEC_BHB_TSX ++.L\@_skip_bhb: ++ + ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_PV + .endm + +@@ -323,6 +334,13 @@ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ + X86_FEATURE_SC_MSR_PV + ++ testb $SCF_entry_bhb, %bl ++ jz .L\@_skip_bhb ++ ALTERNATIVE_2 "", \ ++ "call clear_bhb_loops", X86_SPEC_BHB_LOOPS, \ ++ "call clear_bhb_tsx", X86_SPEC_BHB_TSX ++.L\@_skip_bhb: ++ + ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_INTR + .endm + +@@ -423,6 +441,18 @@ + + .L\@_skip_msr_spec_ctrl: + ++ /* ++ * Clear the BHB to mitigate BHI. Used on eIBRS parts, and uses RETs ++ * itself so must be after we've perfomed all the RET-safety we can. ++ */ ++ testb $SCF_entry_bhb, %bl ++ jz .L\@_skip_bhb ++ ++ ALTERNATIVE_2 "", \ ++ "call clear_bhb_loops", X86_SPEC_BHB_LOOPS, \ ++ "call clear_bhb_tsx", X86_SPEC_BHB_TSX ++.L\@_skip_bhb: ++ + lfence + .endm + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 085e37525d..546199fabe 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -2194,38 +2194,43 @@ void __init init_speculation_mitigations(void) + /* + * SPEC_CTRL_ENTRY_FROM_PV conditional safety + * +- * DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an +- * unconditional WRMSR as the last action. ++ * A BHB sequence, if used, is a conditional action and last. If we ++ * have this, then we must have the LFENCE. + * +- * If we have it, or we're not using any prior conditional mitigation, +- * then it's safe to drop the LFENCE. ++ * Otherwise, DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an ++ * unconditional WRMSR. If we do have it, or we're not using any ++ * prior conditional block, then it's safe to drop the LFENCE. + */ +- if ( boot_cpu_has(X86_FEATURE_SC_MSR_PV) || +- !boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ) ++ if ( !cpu_has_bhb_seq && ++ (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || ++ !boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV)) ) + setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_PV); + + /* + * SPEC_CTRL_ENTRY_FROM_INTR conditional safety + * +- * DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an +- * unconditional WRMSR as the last action. ++ * A BHB sequence, if used, is a conditional action and last. If we ++ * have this, then we must have the LFENCE. + * +- * If we have it, or we have no protections active in the block that +- * is skipped when interrupting guest context, then it's safe to drop +- * the LFENCE. ++ * Otherwise DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an ++ * unconditional WRMSR. If we have it, or we have no protections ++ * active in the block that is skipped when interrupting guest ++ * context, then it's safe to drop the LFENCE. + */ +- if ( boot_cpu_has(X86_FEATURE_SC_MSR_PV) || +- (!boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) && +- !boot_cpu_has(X86_FEATURE_SC_RSB_PV)) ) ++ if ( !cpu_has_bhb_seq && ++ (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || ++ (!boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) && ++ !boot_cpu_has(X86_FEATURE_SC_RSB_PV))) ) + setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_INTR); + + /* + * SPEC_CTRL_ENTRY_FROM_VMX conditional safety + * +- * Currently there are no safety actions with conditional branches, so +- * no need for the extra safety LFENCE. ++ * A BHB sequence, if used, is the only conditional action, so if we ++ * don't have it, we don't need the safety LFENCE. + */ +- setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_VMX); ++ if ( !cpu_has_bhb_seq ) ++ setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_VMX); + } + + /* +-- +2.44.0 + diff --git a/0541-x86-spec-ctrl-Wire-up-the-Native-BHI-software-sequen.patch b/0541-x86-spec-ctrl-Wire-up-the-Native-BHI-software-sequen.patch new file mode 100644 index 00000000..2d0940ea --- /dev/null +++ b/0541-x86-spec-ctrl-Wire-up-the-Native-BHI-software-sequen.patch @@ -0,0 +1,347 @@ +From eab897caca018c126f925247e4c9fd4aac54afdb Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Thu, 8 Jun 2023 19:41:44 +0100 +Subject: [PATCH 541/542] x86/spec-ctrl: Wire up the Native-BHI software + sequences +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In the absence of BHI_DIS_S, mitigating Native-BHI requires the use of a +software sequence. + +Introduce a new bhb-seq= option to select between avaialble sequences and +bhb-entry= to control the per-PV/HVM actions like we have for other blocks. + +Activate the short sequence by default for PV and HVM guests on affected +hardware if BHI_DIS_S isn't present. + +This is part of XSA-456 / CVE-2024-2201. + +Signed-off-by: Andrew Cooper +Acked-by: Roger Pau Monné +(cherry picked from commit 689ad48ce9cf4c38297cd126e7e003a1c13a3b9d) +--- + docs/misc/xen-command-line.pandoc | 25 ++++-- + xen/arch/x86/spec_ctrl.c | 145 ++++++++++++++++++++++++++++-- + 2 files changed, 157 insertions(+), 13 deletions(-) + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index 0b0abf8983..0bd0588097 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -2324,8 +2324,9 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). + + ### spec-ctrl (x86) + > `= List of [ , xen=, {pv,hvm}=, +-> {msr-sc,rsb,verw,ibpb-entry}=|{pv,hvm}=, +-> bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, ++> {msr-sc,rsb,verw,{ibpb,bhb}-entry}=|{pv,hvm}=, ++> bti-thunk=retpoline|lfence|jmp,bhb-seq=short|tsx, ++> {ibrs,ibpb,ssbd,psfd, + > eager-fpu,l1d-flush,branch-harden,srb-lock, + > unpriv-mmio,gds-mit,div-scrub, + > bhi-dis-s}= ]` +@@ -2350,10 +2351,10 @@ in place for guests to use. + + Use of a positive boolean value for either of these options is invalid. + +-The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=` and `ibpb-entry=` options +-offer fine grained control over the primitives by Xen. These impact Xen's +-ability to protect itself, and/or Xen's ability to virtualise support for +-guests to use. ++The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=`, `ibpb-entry=` and `bhb-entry=` ++options offer fine grained control over the primitives by Xen. These impact ++Xen's ability to protect itself, and/or Xen's ability to virtualise support ++for guests to use. + + * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests + respectively. +@@ -2379,6 +2380,12 @@ guests to use. + Return Stack Overflow if appropriate microcode has been loaded, but for + performance reasons dom0 is unprotected by default. If it is necessary to + protect dom0 too, boot with `spec-ctrl=ibpb-entry`. ++* `bhb-entry=` offers control over whether BHB-clearing (Branch History ++ Buffer) sequences are used on entry to Xen. This is used by default on ++ hardware vulnerable to Branch History Injection, when the BHI_DIS_S control ++ is not available (see `bhi-dis-s`). The choice of scrubbing sequence can be ++ selected using the `bhb-seq=` option. If it is necessary to protect dom0 ++ too, boot with `spec-ctrl=bhb-entry`. + + If Xen was compiled with `CONFIG_INDIRECT_THUNK` support, `bti-thunk=` can be + used to select which of the thunks gets patched into the +@@ -2386,6 +2393,12 @@ used to select which of the thunks gets patched into the + (generally preferred), with the alternatives being `jmp` (a `jmp *%reg` gadget, + minimal overhead), and `lfence` (an `lfence; jmp *%reg` gadget). + ++On all hardware, `bhb-seq=` can be used to select which of the BHB-clearing ++sequences gets used. This interacts with the `bhb-entry=` and `bhi-dis-s=` ++options in order to mitigate Branch History Injection on affected hardware. ++The default sequence is `short`, with `tsx` as an alternative available ++capable hardware that can be opted in to. ++ + On hardware supporting IBRS (Indirect Branch Restricted Speculation), the + `ibrs=` option can be used to force or prevent Xen using the feature itself. + If Xen is not using IBRS itself, functionality is still set up so IBRS can be +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 546199fabe..b53e9c4e7a 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -45,6 +45,16 @@ static int8_t __ro_after_init opt_ibpb_entry_pv = -1; + static int8_t __ro_after_init opt_ibpb_entry_hvm = -1; + static bool __ro_after_init opt_ibpb_entry_dom0; + ++static int8_t __ro_after_init opt_bhb_entry_pv = -1; ++static int8_t __ro_after_init opt_bhb_entry_hvm = -1; ++static bool __ro_after_init opt_bhb_entry_dom0; ++static enum bhb_thunk { ++ BHB_DEFAULT, ++ BHB_NONE, ++ BHB_TSX, ++ BHB_SHORT, ++} opt_bhb_seq __initdata; ++ + /* Cmdline controls for Xen's speculative settings. */ + static enum ind_thunk { + THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ +@@ -128,8 +138,12 @@ static int __init cf_check parse_spec_ctrl(const char *s) + opt_ibpb_entry_pv = 0; + opt_ibpb_entry_hvm = 0; + opt_ibpb_entry_dom0 = false; ++ opt_bhb_entry_pv = 0; ++ opt_bhb_entry_hvm = 0; ++ opt_bhb_entry_dom0 = false; + + opt_thunk = THUNK_JMP; ++ opt_bhb_seq = BHB_NONE; + opt_ibrs = 0; + opt_ibpb_ctxt_switch = false; + opt_ssbd = false; +@@ -158,6 +172,7 @@ static int __init cf_check parse_spec_ctrl(const char *s) + opt_rsb_pv = val; + opt_verw_pv = val; + opt_ibpb_entry_pv = val; ++ opt_bhb_entry_pv = val; + } + else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) + { +@@ -165,6 +180,7 @@ static int __init cf_check parse_spec_ctrl(const char *s) + opt_rsb_hvm = val; + opt_verw_hvm = val; + opt_ibpb_entry_hvm = val; ++ opt_bhb_entry_hvm = val; + } + else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 ) + { +@@ -252,6 +268,28 @@ static int __init cf_check parse_spec_ctrl(const char *s) + break; + } + } ++ else if ( (val = parse_boolean("bhb-entry", s, ss)) != -1 ) ++ { ++ switch ( val ) ++ { ++ case 0: ++ case 1: ++ opt_bhb_entry_pv = opt_bhb_entry_hvm = ++ opt_bhb_entry_dom0 = val; ++ break; ++ ++ case -2: ++ s += strlen("bhb-entry="); ++ if ( (val = parse_boolean("pv", s, ss)) >= 0 ) ++ opt_bhb_entry_pv = val; ++ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) ++ opt_bhb_entry_hvm = val; ++ else ++ default: ++ rc = -EINVAL; ++ break; ++ } ++ } + + /* Xen's speculative sidechannel mitigation settings. */ + else if ( !strncmp(s, "bti-thunk=", 10) ) +@@ -272,6 +310,19 @@ static int __init cf_check parse_spec_ctrl(const char *s) + else + rc = -EINVAL; + } ++ else if ( !strncmp(s, "bhb-seq=", 8) ) ++ { ++ s += strlen("bhb-seq="); ++ ++ if ( !cmdline_strcmp(s, "none") ) ++ opt_bhb_seq = BHB_NONE; ++ else if ( !cmdline_strcmp(s, "tsx") ) ++ opt_bhb_seq = BHB_TSX; ++ else if ( !cmdline_strcmp(s, "short") ) ++ opt_bhb_seq = BHB_SHORT; ++ else ++ rc = -EINVAL; ++ } + + /* Bits in MSR_SPEC_CTRL. */ + else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 ) +@@ -539,12 +590,16 @@ static void __init print_details(enum ind_thunk thunk) + "\n"); + + /* Settings for Xen's protection, irrespective of guests. */ +- printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s%s, Other:%s%s%s%s%s%s\n", ++ printk(" Xen settings: %s%s%s%sSPEC_CTRL: %s%s%s%s%s%s, Other:%s%s%s%s%s%s\n", + thunk != THUNK_NONE ? "BTI-Thunk: " : "", + thunk == THUNK_NONE ? "" : + thunk == THUNK_RETPOLINE ? "RETPOLINE, " : + thunk == THUNK_LFENCE ? "LFENCE, " : + thunk == THUNK_JMP ? "JMP, " : "?, ", ++ opt_bhb_seq != BHB_NONE ? "BHB-Seq: " : "", ++ opt_bhb_seq == BHB_NONE ? "" : ++ opt_bhb_seq == BHB_TSX ? "TSX, " : ++ opt_bhb_seq == BHB_SHORT ? "SHORT, " : "?, ", + (!boot_cpu_has(X86_FEATURE_IBRSB) && + !boot_cpu_has(X86_FEATURE_IBRS)) ? "No" : + (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-", +@@ -583,11 +638,11 @@ static void __init print_details(enum ind_thunk thunk) + * mitigation support for guests. + */ + #ifdef CONFIG_HVM +- printk(" Support for HVM VMs:%s%s%s%s%s%s%s\n", ++ printk(" Support for HVM VMs:%s%s%s%s%s%s%s%s\n", + (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || + boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) || +- amd_virt_spec_ctrl || ++ cpu_has_bhb_seq || amd_virt_spec_ctrl || + opt_eager_fpu || opt_verw_hvm) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", + (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || +@@ -595,20 +650,23 @@ static void __init print_details(enum ind_thunk thunk) + boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "", + opt_eager_fpu ? " EAGER_FPU" : "", + opt_verw_hvm ? " VERW" : "", +- boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : ""); ++ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : "", ++ cpu_has_bhb_seq ? " BHB-entry" : ""); + + #endif + #ifdef CONFIG_PV +- printk(" Support for PV VMs:%s%s%s%s%s%s\n", ++ printk(" Support for PV VMs:%s%s%s%s%s%s%s\n", + (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || + boot_cpu_has(X86_FEATURE_SC_RSB_PV) || + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) || ++ cpu_has_bhb_seq || + opt_eager_fpu || opt_verw_pv) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", + boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", + opt_eager_fpu ? " EAGER_FPU" : "", + opt_verw_pv ? " VERW" : "", +- boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : ""); ++ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : "", ++ cpu_has_bhb_seq ? " BHB-entry" : ""); + + printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", + opt_xpti_hwdom ? "enabled" : "disabled", +@@ -1616,16 +1674,85 @@ static void __init gds_calculations(void) + /* + * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/branch-history-injection.html + */ ++static bool __init cpu_has_bug_bhi(void) ++{ ++ /* BHI is only known to affect Intel Family 6 processors at this time. */ ++ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || ++ boot_cpu_data.x86 != 6 ) ++ return false; ++ ++ if ( boot_cpu_has(X86_FEATURE_BHI_NO) ) ++ return false; ++ ++ if ( cpu_has_hypervisor ) ++ return true; /* TODO: how to figure out out if we're really eIBRS levelled out? */ ++ ++ return cpu_has_eibrs; ++} ++ + static void __init bhi_calculations(void) + { ++ bool has_bhi = cpu_has_bug_bhi(); ++ ++ /* ++ * To mitigate BHI, we want to use BHI_DIS_S wherever possible, or the ++ * short sequence otherwise. Other forms are available on request. ++ * ++ * We are repsonsbile for performing default-conversion on opt_bhi_dis_s ++ * and opt_bhb_seq, irrespective of succeptibility to BHI. ++ */ ++ + if ( opt_bhi_dis_s == -1 ) +- opt_bhi_dis_s = !boot_cpu_has(X86_FEATURE_BHI_NO); ++ opt_bhi_dis_s = has_bhi; + + if ( !boot_cpu_has(X86_FEATURE_BHI_CTRL) ) + opt_bhi_dis_s = false; + + if ( opt_bhi_dis_s ) + default_xen_spec_ctrl |= SPEC_CTRL_BHI_DIS_S; ++ ++ if ( opt_bhb_seq == BHB_DEFAULT ) ++ { ++ /* ++ * If we're using BHI_DIS_S, or we're not succeptable, don't activate ++ * the thunks. ++ */ ++ if ( !has_bhi || opt_bhi_dis_s ) ++ opt_bhb_seq = BHB_NONE; ++ else ++ opt_bhb_seq = BHB_SHORT; ++ } ++ ++ /* ++ * We can use the TSX even if it's disabled for e.g. TAA reasons. ++ * However, fall back to the loop sequence if there is no trace of RTM at ++ * all, as XBEGIN will #UD. ++ */ ++ if ( opt_bhb_seq == BHB_TSX && !cpu_has_rtm && !cpu_has_rtm_always_abort && ++ !cpu_has_tsx_force_abort ) ++ opt_bhb_seq = BHB_SHORT; ++ ++ /* ++ * Only activate SCF_entry_bhb by for guests if a sequence is in place. ++ */ ++ if ( opt_bhb_entry_pv == -1 ) ++ opt_bhb_entry_pv = has_bhi && opt_bhb_seq != BHB_NONE; ++ if ( opt_bhb_entry_hvm == -1 ) ++ opt_bhb_entry_hvm = has_bhi && opt_bhb_seq != BHB_NONE; ++ ++ switch ( opt_bhb_seq ) ++ { ++ case BHB_SHORT: ++ setup_force_cpu_cap(X86_SPEC_BHB_LOOPS); ++ break; ++ ++ case BHB_TSX: ++ setup_force_cpu_cap(X86_SPEC_BHB_TSX); ++ break; ++ ++ default: ++ break; ++ } + } + + void spec_ctrl_init_domain(struct domain *d) +@@ -1638,9 +1765,13 @@ void spec_ctrl_init_domain(struct domain *d) + bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) && + (d->domain_id != 0 || opt_ibpb_entry_dom0)); + ++ bool bhb = ((pv ? opt_bhb_entry_pv : opt_bhb_entry_hvm) && ++ (d->domain_id != 0 || opt_bhb_entry_dom0)); ++ + d->arch.scf = + (verw ? SCF_verw : 0) | + (ibpb ? SCF_entry_ibpb : 0) | ++ (bhb ? SCF_entry_bhb : 0) | + 0; + } + +-- +2.44.0 + diff --git a/0542-x86-spec-ctrl-Support-the-long-BHB-loop-sequence.patch b/0542-x86-spec-ctrl-Support-the-long-BHB-loop-sequence.patch new file mode 100644 index 00000000..e1e52702 --- /dev/null +++ b/0542-x86-spec-ctrl-Support-the-long-BHB-loop-sequence.patch @@ -0,0 +1,130 @@ +From 60859cc99033beb84da035fb1664ce6e9b698bc6 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 22 Mar 2024 19:29:34 +0000 +Subject: [PATCH 542/542] x86/spec-ctrl: Support the "long" BHB loop sequence +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Out of an abudnance of caution, implement the long loop too, and allowing for +it to be opted in to. + +This is part of XSA-456 / CVE-2024-2201. + +Signed-off-by: Andrew Cooper +Acked-by: Roger Pau Monné +(cherry picked from commit d5887c0decbd90e798b24ed696628645b04632fb) +--- + docs/misc/xen-command-line.pandoc | 4 ++-- + xen/arch/x86/bhb-thunk.S | 8 ++++++-- + xen/arch/x86/include/asm/cpufeatures.h | 1 + + xen/arch/x86/spec_ctrl.c | 10 +++++++++- + 4 files changed, 18 insertions(+), 5 deletions(-) + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index 0bd0588097..fba5a8221b 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -2325,7 +2325,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). + ### spec-ctrl (x86) + > `= List of [ , xen=, {pv,hvm}=, + > {msr-sc,rsb,verw,{ibpb,bhb}-entry}=|{pv,hvm}=, +-> bti-thunk=retpoline|lfence|jmp,bhb-seq=short|tsx, ++> bti-thunk=retpoline|lfence|jmp,bhb-seq=short|tsx|long, + > {ibrs,ibpb,ssbd,psfd, + > eager-fpu,l1d-flush,branch-harden,srb-lock, + > unpriv-mmio,gds-mit,div-scrub, +@@ -2397,7 +2397,7 @@ On all hardware, `bhb-seq=` can be used to select which of the BHB-clearing + sequences gets used. This interacts with the `bhb-entry=` and `bhi-dis-s=` + options in order to mitigate Branch History Injection on affected hardware. + The default sequence is `short`, with `tsx` as an alternative available +-capable hardware that can be opted in to. ++capable hardware, and `long` that can be opted in to. + + On hardware supporting IBRS (Indirect Branch Restricted Speculation), the + `ibrs=` option can be used to force or prevent Xen using the feature itself. +diff --git a/xen/arch/x86/bhb-thunk.S b/xen/arch/x86/bhb-thunk.S +index f52cfb9bc2..7e866784f7 100644 +--- a/xen/arch/x86/bhb-thunk.S ++++ b/xen/arch/x86/bhb-thunk.S +@@ -56,9 +56,13 @@ ENTRY(clear_bhb_tsx) + * + * The "short" sequence (5 and 5) is for CPUs prior to Alder Lake / Sapphire + * Rapids (i.e. Cores prior to Golden Cove and/or Gracemont). ++ * ++ * The "long" sequence (12 and 7) is for Alder Lake / Sapphire Rapids ++ * (i.e. Golden Cove and/or Gracemont cores). However, such CPUs are expected ++ * to use BHI_DIS_S in preference. + */ + ENTRY(clear_bhb_loops) +- mov $5, %ecx ++ ALTERNATIVE "mov $5, %ecx", "mov $12, %ecx", X86_SPEC_BHB_LOOPS_LONG + + call 1f + jmp 5f +@@ -70,7 +74,7 @@ ENTRY(clear_bhb_loops) + int3 + + .align 64 +-2: mov $5, %eax ++2: ALTERNATIVE "mov $5, %eax", "mov $7, %eax", X86_SPEC_BHB_LOOPS_LONG + + 3: jmp 4f + int3 +diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h +index bada8912e0..ba3df174b7 100644 +--- a/xen/arch/x86/include/asm/cpufeatures.h ++++ b/xen/arch/x86/include/asm/cpufeatures.h +@@ -58,6 +58,7 @@ XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for + + #define X86_SPEC_BHB_TSX X86_BUG(19) /* Use clear_bhb_tsx for BHI mitigation. */ + #define X86_SPEC_BHB_LOOPS X86_BUG(20) /* Use clear_bhb_loops for BHI mitigation.*/ ++#define X86_SPEC_BHB_LOOPS_LONG X86_BUG(21) /* Upgrade clear_bhb_loops to the "long" sequence. */ + + /* Total number of capability words, inc synth and bug words. */ + #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */ +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index b53e9c4e7a..7697f9ad3f 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -53,6 +53,7 @@ static enum bhb_thunk { + BHB_NONE, + BHB_TSX, + BHB_SHORT, ++ BHB_LONG, + } opt_bhb_seq __initdata; + + /* Cmdline controls for Xen's speculative settings. */ +@@ -320,6 +321,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) + opt_bhb_seq = BHB_TSX; + else if ( !cmdline_strcmp(s, "short") ) + opt_bhb_seq = BHB_SHORT; ++ else if ( !cmdline_strcmp(s, "long") ) ++ opt_bhb_seq = BHB_LONG; + else + rc = -EINVAL; + } +@@ -599,7 +602,8 @@ static void __init print_details(enum ind_thunk thunk) + opt_bhb_seq != BHB_NONE ? "BHB-Seq: " : "", + opt_bhb_seq == BHB_NONE ? "" : + opt_bhb_seq == BHB_TSX ? "TSX, " : +- opt_bhb_seq == BHB_SHORT ? "SHORT, " : "?, ", ++ opt_bhb_seq == BHB_SHORT ? "SHORT, " : ++ opt_bhb_seq == BHB_LONG ? "LONG, " : "?, ", + (!boot_cpu_has(X86_FEATURE_IBRSB) && + !boot_cpu_has(X86_FEATURE_IBRS)) ? "No" : + (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-", +@@ -1742,6 +1746,10 @@ static void __init bhi_calculations(void) + + switch ( opt_bhb_seq ) + { ++ case BHB_LONG: ++ setup_force_cpu_cap(X86_SPEC_BHB_LOOPS_LONG); ++ fallthrough; ++ + case BHB_SHORT: + setup_force_cpu_cap(X86_SPEC_BHB_LOOPS); + break; +-- +2.44.0 + diff --git a/1017-Disable-TSX-by-default.patch b/1017-Disable-TSX-by-default.patch deleted file mode 100644 index c26c3c6a..00000000 --- a/1017-Disable-TSX-by-default.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 334130846ee62dcc4082edd580dfde2544d20321 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= - -Date: Wed, 16 Nov 2022 02:26:12 +0100 -Subject: [PATCH 1017/1018] Disable TSX by default -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Intel is dropping TSX from several platforms instead of fixing -speculative bugs in it. Lets proactively disable it even if wasn't -dropped on a specific platform yet. - -Signed-off-by: Marek Marczykowski-Górecki ---- - xen/arch/x86/tsx.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) - -diff --git a/xen/arch/x86/tsx.c b/xen/arch/x86/tsx.c -index 41b6092cfe16..996c22b56be7 100644 ---- a/xen/arch/x86/tsx.c -+++ b/xen/arch/x86/tsx.c -@@ -18,7 +18,7 @@ - * This option only has any effect on systems presenting a mechanism of - * controlling TSX behaviour, and where TSX isn't force-disabled by firmware. - */ --int8_t __read_mostly opt_tsx = -1; -+int8_t __read_mostly opt_tsx = -2; - bool __read_mostly rtm_disabled; - - static int __init cf_check parse_tsx(const char *s) -@@ -197,6 +197,13 @@ void tsx_init(void) - } - } - -+ /* -+ * Check bottom bit only. Higher bits are various sentinels. Performed -+ * unconditionally so tsx=0 hides guest CPUID bits on HSX/BDX even without -+ * MSRs to enforce the restriction. -+ */ -+ rtm_disabled = !(opt_tsx & 1); -+ - /* - * Note: MSR_TSX_CTRL is enumerated on TSX-enabled MDS_NO and later parts. - * MSR_TSX_FORCE_ABORT is enumerated on TSX-enabled pre-MDS_NO Skylake -@@ -222,9 +229,6 @@ void tsx_init(void) - - rdmsr(MSR_TSX_CTRL, lo, hi); - -- /* Check bottom bit only. Higher bits are various sentinels. */ -- rtm_disabled = !(opt_tsx & 1); -- - lo &= ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR); - if ( rtm_disabled ) - lo |= TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR; -@@ -241,9 +245,6 @@ void tsx_init(void) - - rdmsr(MSR_TSX_FORCE_ABORT, lo, hi); - -- /* Check bottom bit only. Higher bits are various sentinels. */ -- rtm_disabled = !(opt_tsx & 1); -- - lo &= ~(TSX_FORCE_ABORT_RTM | TSX_CPUID_CLEAR | TSX_ENABLE_RTM); - - if ( cpu_has_rtm_always_abort ) --- -2.37.3 - diff --git a/xen.spec.in b/xen.spec.in index 1f3eecc1..5ef2f006 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -139,6 +139,8 @@ Patch0346: 0346-xen-livepatch-properly-build-the-noapply-and-norever.patch Patch0347: 0347-libxl-Fix-segfault-in-device_model_spawn_outcome.patch Patch0348: 0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch Patch0349: 0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch +Patch0350: 0350-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch +Patch0351: 0351-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch # Security fixes Patch0500: 0500-xsa452-4.17-1.patch @@ -148,6 +150,43 @@ Patch0503: 0503-xsa452-4.17-4.patch Patch0504: 0504-xsa452-4.17-5.patch Patch0505: 0505-xsa452-4.17-6.patch Patch0506: 0506-xsa452-4.17-7.patch +Patch0507: 0507-xsa455-4.17.patch +# XSA-456 +Patch0508: 0508-x86-APIC-finish-genapic-conversion-to-altcall.patch +Patch0509: 0509-cpufreq-finish-conversion-to-altcall.patch +Patch0510: 0510-x86-HPET-avoid-an-indirect-call.patch +Patch0511: 0511-core-parking-use-alternative_call.patch +Patch0512: 0512-x86-MTRR-avoid-several-indirect-calls.patch +Patch0513: 0513-x86-PV-avoid-indirect-call-for-I-O-emulation-quirk-h.patch +Patch0514: 0514-x86-MCE-separate-BSP-only-initialization.patch +Patch0515: 0515-x86-MCE-switch-some-callback-invocations-to-altcall.patch +Patch0516: 0516-IRQ-generalize-gs-et_irq_regs.patch +Patch0517: 0517-x86-spec-ctrl-Expose-IPRED_CTRL-to-guests.patch +Patch0518: 0518-x86-spec-ctrl-Expose-RRSBA_CTRL-to-guests.patch +Patch0519: 0519-x86-spec-ctrl-Expose-BHI_CTRL-to-guests.patch +Patch0520: 0520-x86-arrange-for-ENDBR-zapping-from-vendor-_ctxt_swit.patch +Patch0521: 0521-x86-guest-finish-conversion-to-altcall.patch +Patch0522: 0522-x86-CPU-convert-vendor-hook-invocations-to-altcall.patch +Patch0523: 0523-VMX-tertiary-execution-control-infrastructure.patch +Patch0524: 0524-x86-spec-ctrl-Move-__read_mostly-data-into-__ro_afte.patch +Patch0525: 0525-x86-tsx-Cope-with-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch +Patch0526: 0526-x86-alternatives-fix-.init-section-reference-in-_app.patch +Patch0527: 0527-x86-cpuid-Don-t-expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch +Patch0528: 0528-x86-spec-ctrl-Rename-spec_ctrl_flags-to-scf.patch +Patch0529: 0529-x86-spec-ctrl-Rework-conditional-safety-for-SPEC_CTR.patch +Patch0530: 0530-x86-entry-Arrange-for-r14-to-be-STACK_END-across-SPE.patch +Patch0531: 0531-x86-spec_ctrl-Hold-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch +Patch0532: 0532-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch +Patch0533: 0533-x86-spec-ctrl-Detail-the-safety-properties-in-SPEC_C.patch +Patch0534: 0534-x86-vmx-Add-support-for-virtualize-SPEC_CTRL.patch +Patch0535: 0535-x86-spec-ctrl-Widen-the-xen-last-default-_spec_ctrl-.patch +Patch0536: 0536-x86-Use-indirect-calls-in-reset-stack-infrastructure.patch +Patch0537: 0537-x86-Drop-INDIRECT_JMP.patch +Patch0538: 0538-x86-tsx-Expose-RTM_ALWAYS_ABORT-to-guests.patch +Patch0539: 0539-x86-spec-ctrl-Support-BHI_DIS_S-in-order-to-mitigate.patch +Patch0540: 0540-x86-spec-ctrl-Software-BHB-clearing-sequences.patch +Patch0541: 0541-x86-spec-ctrl-Wire-up-the-Native-BHI-software-sequen.patch +Patch0542: 0542-x86-spec-ctrl-Support-the-long-BHB-loop-sequence.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch @@ -213,7 +252,6 @@ Patch1013: 1013-libxl-do-not-force-qdisk-backend-for-cdrom.patch Patch1014: 1014-Additional-support-in-ACPI-builder-to-support-SLIC-a.patch Patch1015: 1015-libxl-conditionally-allow-PCI-passthrough-on-PV-with.patch Patch1016: 1016-gnttab-disable-grant-tables-v2-by-default.patch -Patch1017: 1017-Disable-TSX-by-default.patch Patch1018: 1018-Fix-IGD-passthrough-with-linux-stubdomain.patch Patch1019: 1019-Use-Linux-s-PAT.patch From b11fc663cdd6875b5bbc19cc848d99c34b10f495 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 9 Apr 2024 21:53:37 +0200 Subject: [PATCH 48/64] version 4.17.3-5 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index b8626c4c..7ed6ff82 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -4 +5 From d2e30227788c9d480bf6297d8161ed14932ee4ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 23 Apr 2024 01:16:33 +0200 Subject: [PATCH 49/64] Revert "Temporarily switch to a hash file for download verification" Go back to signatures, as builderv2 was modified to tolerate older keys too. This reverts commit 606f99ae86189ece9a9ec394de1f56529947f7d6. --- .qubesbuilder | 7 +++---- xen-4.17.3.tar.gz.sha512 | 1 - 2 files changed, 3 insertions(+), 5 deletions(-) delete mode 100644 xen-4.17.3.tar.gz.sha512 diff --git a/.qubesbuilder b/.qubesbuilder index e87fd428..8700b019 100644 --- a/.qubesbuilder +++ b/.qubesbuilder @@ -9,7 +9,6 @@ vm: source: files: - url: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz -# signature: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz.sig -# pubkeys: -# - xen.org-key.asc - sha512: xen-@VERSION@.tar.gz.sha512 + signature: https://downloads.xenproject.org/release/xen/@VERSION@/xen-@VERSION@.tar.gz.sig + pubkeys: + - xen.org-key.asc diff --git a/xen-4.17.3.tar.gz.sha512 b/xen-4.17.3.tar.gz.sha512 deleted file mode 100644 index b9f48101..00000000 --- a/xen-4.17.3.tar.gz.sha512 +++ /dev/null @@ -1 +0,0 @@ -56ead90af00b4c1aa452b5edba980a2873e4c76e8c518220a88089a2771cd76fe2478e15a41abf5d247e4bc5af8415d53614dff7eb028ced80701c1a1263f91f From bd07080ebedaeb322199d2e63909605fc3eca138 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 23 Apr 2024 15:35:53 +0200 Subject: [PATCH 50/64] Update to 4.17.4 Remove patches included upstream. --- ...RR_-constants-with-X86_MT_-constants.patch | 4 +- ...assignment-if-phantom-functions-cann.patch | 91 ---- ...-VT-d-Fix-else-vs-endif-misplacement.patch | 70 --- ...PU-erratum-1474-fix-to-more-affected.patch | 123 ----- 0318-CirrusCI-drop-FreeBSD-12.patch | 39 -- ...-Global-Performance-Counter-Control-.patch | 74 --- ...ix-IRQ-handling-for-EXIT_REASON_INIT.patch | 65 --- ...isallow-the-use-of-inactivity-states.patch | 126 ------ ...-lib-fdt-elf-temp.o-and-their-deps-t.patch | 70 --- ...fix-off-by-one-in-entry-check-assert.patch | 36 -- ...top-fix-sorting-bug-for-some-columns.patch | 67 --- 0325-amd-vi-fix-IVMD-memory-type-checks.patch | 53 --- ...ix-fast-singlestep-state-persistence.patch | 86 ---- ...te-on-hvmemul_map_linear_addr-s-erro.patch | 63 --- 0328-build-Replace-which-with-command-v.patch | 57 --- ...locating-memory-for-qemu-xen-in-stub.patch | 50 --- ...build-fails-when-running-kconfig-fai.patch | 59 --- 0331-x86emul-add-missing-EVEX.R-checks.patch | 50 --- ...ch-fix-norevert-test-hook-setup-typo.patch | 36 -- ...printf-format-specifier-in-no_config.patch | 38 -- ...a-union-as-register-type-for-functio.patch | 141 ------ ...NCH_HARDEN-option-to-only-be-set-whe.patch | 57 --- ...shadow-stack-in-exception-from-stub-.patch | 212 --------- ...n-arm-Fix-UBSAN-failure-in-start_xen.patch | 52 --- ...-VMX-when-their-enabling-is-prohibit.patch | 67 --- ...-Fix-UB-shift-in-compat_set_timer_op.patch | 86 ---- ...he-built-in-SPECULATIVE_HARDEN_-opti.patch | 54 --- ...IRECT_THUNK-option-to-only-be-set-wh.patch | 67 --- ...print-thunk-option-selection-if-not-.patch | 50 --- ...gister-livepatch-regions-when-loaded.patch | 159 ------- ...arch-for-symbols-in-all-loaded-paylo.patch | 149 ------ ...x-norevert-test-attempt-to-open-code.patch | 186 -------- ...operly-build-the-noapply-and-norever.patch | 43 -- ...gfault-in-device_model_spawn_outcome.patch | 39 -- ...ys-use-a-temporary-parameter-stashin.patch | 197 -------- ...llow-for-levelling-of-VERW-side-effe.patch | 102 ----- ...pu-policy-Hide-x2APIC-from-PV-guests.patch | 90 ---- ...ix-visibility-of-HTT-CMP_LEGACY-in-m.patch | 85 ---- 0500-xsa452-4.17-1.patch | 304 ------------- 0501-xsa452-4.17-2.patch | 90 ---- 0502-xsa452-4.17-3.patch | 135 ------ 0503-xsa452-4.17-4.patch | 197 -------- 0504-xsa452-4.17-5.patch | 239 ---------- 0505-xsa452-4.17-6.patch | 163 ------- 0506-xsa452-4.17-7.patch | 307 ------------- 0507-xsa455-4.17.patch | 48 -- ...finish-genapic-conversion-to-altcall.patch | 111 ----- ...cpufreq-finish-conversion-to-altcall.patch | 157 ------- 0510-x86-HPET-avoid-an-indirect-call.patch | 107 ----- 0511-core-parking-use-alternative_call.patch | 95 ---- ...86-MTRR-avoid-several-indirect-calls.patch | 366 --------------- ...irect-call-for-I-O-emulation-quirk-h.patch | 102 ----- ...MCE-separate-BSP-only-initialization.patch | 192 -------- ...some-callback-invocations-to-altcall.patch | 405 ----------------- 0516-IRQ-generalize-gs-et_irq_regs.patch | 134 ------ ...pec-ctrl-Expose-IPRED_CTRL-to-guests.patch | 78 ---- ...pec-ctrl-Expose-RRSBA_CTRL-to-guests.patch | 66 --- ...-spec-ctrl-Expose-BHI_CTRL-to-guests.patch | 65 --- ...ENDBR-zapping-from-vendor-_ctxt_swit.patch | 69 --- ...6-guest-finish-conversion-to-altcall.patch | 84 ---- ...t-vendor-hook-invocations-to-altcall.patch | 152 ------- ...ary-execution-control-infrastructure.patch | 256 ----------- ...ve-__read_mostly-data-into-__ro_afte.patch | 71 --- ...ith-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch | 123 ----- ...-fix-.init-section-reference-in-_app.patch | 43 -- ...expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch | 42 -- ...c-ctrl-Rename-spec_ctrl_flags-to-scf.patch | 425 ------------------ ...work-conditional-safety-for-SPEC_CTR.patch | 196 -------- ...e-for-r14-to-be-STACK_END-across-SPE.patch | 171 ------- ...ld-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch | 122 ----- ...-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch | 95 ---- ...tail-the-safety-properties-in-SPEC_C.patch | 183 -------- ...Add-support-for-virtualize-SPEC_CTRL.patch | 206 --------- ...den-the-xen-last-default-_spec_ctrl-.patch | 145 ------ ...-calls-in-reset-stack-infrastructure.patch | 111 ----- 0537-x86-Drop-INDIRECT_JMP.patch | 68 --- ...sx-Expose-RTM_ALWAYS_ABORT-to-guests.patch | 189 -------- ...pport-BHI_DIS_S-in-order-to-mitigate.patch | 177 -------- ...ctrl-Software-BHB-clearing-sequences.patch | 349 -------------- ...re-up-the-Native-BHI-software-sequen.patch | 347 -------------- ...l-Support-the-long-BHB-loop-sequence.patch | 130 ------ config | 1 + rel | 2 +- version | 2 +- xen.spec.in | 81 ---- 85 files changed, 5 insertions(+), 10289 deletions(-) delete mode 100644 0315-pci-fail-device-assignment-if-phantom-functions-cann.patch delete mode 100644 0316-VT-d-Fix-else-vs-endif-misplacement.patch delete mode 100644 0317-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch delete mode 100644 0318-CirrusCI-drop-FreeBSD-12.patch delete mode 100644 0319-x86-intel-ensure-Global-Performance-Counter-Control-.patch delete mode 100644 0320-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch delete mode 100644 0321-x86-vmx-Disallow-the-use-of-inactivity-states.patch delete mode 100644 0322-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch delete mode 100644 0323-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch delete mode 100644 0324-tools-xentop-fix-sorting-bug-for-some-columns.patch delete mode 100644 0325-amd-vi-fix-IVMD-memory-type-checks.patch delete mode 100644 0326-x86-hvm-Fix-fast-singlestep-state-persistence.patch delete mode 100644 0327-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch delete mode 100644 0328-build-Replace-which-with-command-v.patch delete mode 100644 0329-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch delete mode 100644 0330-build-make-sure-build-fails-when-running-kconfig-fai.patch delete mode 100644 0331-x86emul-add-missing-EVEX.R-checks.patch delete mode 100644 0332-xen-livepatch-fix-norevert-test-hook-setup-typo.patch delete mode 100644 0333-xen-cmdline-fix-printf-format-specifier-in-no_config.patch delete mode 100644 0334-x86-altcall-use-a-union-as-register-type-for-functio.patch delete mode 100644 0335-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch delete mode 100644 0336-x86-account-for-shadow-stack-in-exception-from-stub-.patch delete mode 100644 0337-xen-arm-Fix-UBSAN-failure-in-start_xen.patch delete mode 100644 0338-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch delete mode 100644 0339-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch delete mode 100644 0340-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch delete mode 100644 0341-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch delete mode 100644 0342-x86-spec-do-not-print-thunk-option-selection-if-not-.patch delete mode 100644 0343-xen-livepatch-register-livepatch-regions-when-loaded.patch delete mode 100644 0344-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch delete mode 100644 0345-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch delete mode 100644 0346-xen-livepatch-properly-build-the-noapply-and-norever.patch delete mode 100644 0347-libxl-Fix-segfault-in-device_model_spawn_outcome.patch delete mode 100644 0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch delete mode 100644 0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch delete mode 100644 0350-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch delete mode 100644 0351-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch delete mode 100644 0500-xsa452-4.17-1.patch delete mode 100644 0501-xsa452-4.17-2.patch delete mode 100644 0502-xsa452-4.17-3.patch delete mode 100644 0503-xsa452-4.17-4.patch delete mode 100644 0504-xsa452-4.17-5.patch delete mode 100644 0505-xsa452-4.17-6.patch delete mode 100644 0506-xsa452-4.17-7.patch delete mode 100644 0507-xsa455-4.17.patch delete mode 100644 0508-x86-APIC-finish-genapic-conversion-to-altcall.patch delete mode 100644 0509-cpufreq-finish-conversion-to-altcall.patch delete mode 100644 0510-x86-HPET-avoid-an-indirect-call.patch delete mode 100644 0511-core-parking-use-alternative_call.patch delete mode 100644 0512-x86-MTRR-avoid-several-indirect-calls.patch delete mode 100644 0513-x86-PV-avoid-indirect-call-for-I-O-emulation-quirk-h.patch delete mode 100644 0514-x86-MCE-separate-BSP-only-initialization.patch delete mode 100644 0515-x86-MCE-switch-some-callback-invocations-to-altcall.patch delete mode 100644 0516-IRQ-generalize-gs-et_irq_regs.patch delete mode 100644 0517-x86-spec-ctrl-Expose-IPRED_CTRL-to-guests.patch delete mode 100644 0518-x86-spec-ctrl-Expose-RRSBA_CTRL-to-guests.patch delete mode 100644 0519-x86-spec-ctrl-Expose-BHI_CTRL-to-guests.patch delete mode 100644 0520-x86-arrange-for-ENDBR-zapping-from-vendor-_ctxt_swit.patch delete mode 100644 0521-x86-guest-finish-conversion-to-altcall.patch delete mode 100644 0522-x86-CPU-convert-vendor-hook-invocations-to-altcall.patch delete mode 100644 0523-VMX-tertiary-execution-control-infrastructure.patch delete mode 100644 0524-x86-spec-ctrl-Move-__read_mostly-data-into-__ro_afte.patch delete mode 100644 0525-x86-tsx-Cope-with-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch delete mode 100644 0526-x86-alternatives-fix-.init-section-reference-in-_app.patch delete mode 100644 0527-x86-cpuid-Don-t-expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch delete mode 100644 0528-x86-spec-ctrl-Rename-spec_ctrl_flags-to-scf.patch delete mode 100644 0529-x86-spec-ctrl-Rework-conditional-safety-for-SPEC_CTR.patch delete mode 100644 0530-x86-entry-Arrange-for-r14-to-be-STACK_END-across-SPE.patch delete mode 100644 0531-x86-spec_ctrl-Hold-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch delete mode 100644 0532-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch delete mode 100644 0533-x86-spec-ctrl-Detail-the-safety-properties-in-SPEC_C.patch delete mode 100644 0534-x86-vmx-Add-support-for-virtualize-SPEC_CTRL.patch delete mode 100644 0535-x86-spec-ctrl-Widen-the-xen-last-default-_spec_ctrl-.patch delete mode 100644 0536-x86-Use-indirect-calls-in-reset-stack-infrastructure.patch delete mode 100644 0537-x86-Drop-INDIRECT_JMP.patch delete mode 100644 0538-x86-tsx-Expose-RTM_ALWAYS_ABORT-to-guests.patch delete mode 100644 0539-x86-spec-ctrl-Support-BHI_DIS_S-in-order-to-mitigate.patch delete mode 100644 0540-x86-spec-ctrl-Software-BHB-clearing-sequences.patch delete mode 100644 0541-x86-spec-ctrl-Wire-up-the-Native-BHI-software-sequen.patch delete mode 100644 0542-x86-spec-ctrl-Support-the-long-BHB-loop-sequence.patch diff --git a/0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch b/0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch index 9082554a..6867ed05 100644 --- a/0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch +++ b/0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch @@ -95,8 +95,8 @@ index 4e01c8d6f9df6562b94438f265d79a0a6fca8de6..2946003b84938f3b83c98b62dfaa3ace } /* If the type is WC, check that this processor supports it */ -- if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { -+ if ((type == X86_MT_WC) && !have_wrcomb()) { +- if ((type == MTRR_TYPE_WRCOMB) && mtrr_have_wrcomb()) { ++ if ((type == X86_MT_WC) && !mtrr_have_wrcomb()) { printk(KERN_WARNING "mtrr: your processor doesn't support write-combining\n"); return -EOPNOTSUPP; diff --git a/0315-pci-fail-device-assignment-if-phantom-functions-cann.patch b/0315-pci-fail-device-assignment-if-phantom-functions-cann.patch deleted file mode 100644 index 88e78bb5..00000000 --- a/0315-pci-fail-device-assignment-if-phantom-functions-cann.patch +++ /dev/null @@ -1,91 +0,0 @@ -From f9e1ed51bdba31017ea17e1819eb2ade6b5c8615 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 30 Jan 2024 14:37:39 +0100 -Subject: [PATCH 315/349] pci: fail device assignment if phantom functions - cannot be assigned -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current behavior is that no error is reported if (some) phantom functions -fail to be assigned during device add or assignment, so the operation succeeds -even if some phantom functions are not correctly setup. - -This can lead to devices possibly being successfully assigned to a domU while -some of the device phantom functions are still assigned to dom0. Even when the -device is assigned domIO before being assigned to a domU phantom functions -might fail to be assigned to domIO, and also fail to be assigned to the domU, -leaving them assigned to dom0. - -Since the device can generate requests using the IDs of those phantom -functions, given the scenario above a device in such state would be in control -of a domU, but still capable of generating transactions that use a context ID -targeting dom0 owned memory. - -Modify device assign in order to attempt to deassign the device if phantom -functions failed to be assigned. - -Note that device addition is not modified in the same way, as in that case the -device is assigned to a trusted domain, and hence partial assign can lead to -device malfunction but not a security issue. - -This is XSA-449 / CVE-2023-46839 - -Fixes: 4e9950dc1bd2 ('IOMMU: add phantom function support') -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -master commit: cb4ecb3cc17b02c2814bc817efd05f3f3ba33d1e -master date: 2024-01-30 14:28:01 +0100 ---- - xen/drivers/passthrough/pci.c | 27 +++++++++++++++++++++------ - 1 file changed, 21 insertions(+), 6 deletions(-) - -diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c -index 07d1986d33..8c62b14d19 100644 ---- a/xen/drivers/passthrough/pci.c -+++ b/xen/drivers/passthrough/pci.c -@@ -1444,11 +1444,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) - - pdev->fault.count = 0; - -- if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn, -- pci_to_dev(pdev), flag)) ) -- goto done; -+ rc = iommu_call(hd->platform_ops, assign_device, d, devfn, pci_to_dev(pdev), -+ flag); - -- for ( ; pdev->phantom_stride; rc = 0 ) -+ while ( pdev->phantom_stride && !rc ) - { - devfn += pdev->phantom_stride; - if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) -@@ -1459,8 +1458,24 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) - - done: - if ( rc ) -- printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n", -- d, &PCI_SBDF(seg, bus, devfn), rc); -+ { -+ printk(XENLOG_G_WARNING "%pd: assign %s(%pp) failed (%d)\n", -+ d, devfn != pdev->devfn ? "phantom function " : "", -+ &PCI_SBDF(seg, bus, devfn), rc); -+ -+ if ( devfn != pdev->devfn && deassign_device(d, seg, bus, pdev->devfn) ) -+ { -+ /* -+ * Device with phantom functions that failed to both assign and -+ * rollback. Mark the device as broken and crash the target domain, -+ * as the state of the functions at this point is unknown and Xen -+ * has no way to assert consistent context assignment among them. -+ */ -+ pdev->broken = true; -+ if ( !is_hardware_domain(d) && d != dom_io ) -+ domain_crash(d); -+ } -+ } - /* The device is assigned to dom_io so mark it as quarantined */ - else if ( d == dom_io ) - pdev->quarantine = true; --- -2.44.0 - diff --git a/0316-VT-d-Fix-else-vs-endif-misplacement.patch b/0316-VT-d-Fix-else-vs-endif-misplacement.patch deleted file mode 100644 index ca037af3..00000000 --- a/0316-VT-d-Fix-else-vs-endif-misplacement.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 6b1864afc14d484cdbc9754ce3172ac3dc189846 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 30 Jan 2024 14:38:38 +0100 -Subject: [PATCH 316/349] VT-d: Fix "else" vs "#endif" misplacement - -In domain_pgd_maddr() the "#endif" is misplaced with respect to "else". This -generates incorrect logic when CONFIG_HVM is compiled out, as the "else" body -is executed unconditionally. - -Rework the logic to use IS_ENABLED() instead of explicit #ifdef-ary, as it's -clearer to follow. This in turn involves adjusting p2m_get_pagetable() to -compile when CONFIG_HVM is disabled. - -This is XSA-450 / CVE-2023-46840. - -Fixes: 033ff90aa9c1 ("x86/P2M: p2m_{alloc,free}_ptp() and p2m_alloc_table() are HVM-only") -Reported-by: Teddy Astie -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: cc6ba68edf6dcd18c3865e7d7c0f1ed822796426 -master date: 2024-01-30 14:29:15 +0100 ---- - xen/arch/x86/include/asm/p2m.h | 9 ++++++++- - xen/drivers/passthrough/vtd/iommu.c | 4 +--- - 2 files changed, 9 insertions(+), 4 deletions(-) - -diff --git a/xen/arch/x86/include/asm/p2m.h b/xen/arch/x86/include/asm/p2m.h -index cd43d8621a..4f691533d5 100644 ---- a/xen/arch/x86/include/asm/p2m.h -+++ b/xen/arch/x86/include/asm/p2m.h -@@ -447,7 +447,14 @@ static inline bool_t p2m_is_altp2m(const struct p2m_domain *p2m) - return p2m->p2m_class == p2m_alternate; - } - --#define p2m_get_pagetable(p2m) ((p2m)->phys_table) -+#ifdef CONFIG_HVM -+static inline pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m) -+{ -+ return p2m->phys_table; -+} -+#else -+pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m); -+#endif - - /* - * Ensure any deferred p2m TLB flush has been completed on all VCPUs. -diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c -index b4c11a6b48..908b3ba6ee 100644 ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -441,15 +441,13 @@ static paddr_t domain_pgd_maddr(struct domain *d, paddr_t pgd_maddr, - - if ( pgd_maddr ) - /* nothing */; --#ifdef CONFIG_HVM -- else if ( iommu_use_hap_pt(d) ) -+ else if ( IS_ENABLED(CONFIG_HVM) && iommu_use_hap_pt(d) ) - { - pagetable_t pgt = p2m_get_pagetable(p2m_get_hostp2m(d)); - - pgd_maddr = pagetable_get_paddr(pgt); - } - else --#endif - { - if ( !hd->arch.vtd.pgd_maddr ) - { --- -2.44.0 - diff --git a/0317-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch b/0317-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch deleted file mode 100644 index b0867d66..00000000 --- a/0317-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch +++ /dev/null @@ -1,123 +0,0 @@ -From abcc32f0634627fe21117a48bd10e792bfbdd6dc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Fri, 2 Feb 2024 08:01:09 +0100 -Subject: [PATCH 317/349] x86/amd: Extend CPU erratum #1474 fix to more - affected models -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Erratum #1474 has now been extended to cover models from family 17h ranges -00-2Fh, so the errata now covers all the models released under Family -17h (Zen, Zen+ and Zen2). - -Additionally extend the workaround to Family 18h (Hygon), since it's based on -the Zen architecture and very likely affected. - -Rename all the zen2 related symbols to fam17, since the errata doesn't -exclusively affect Zen2 anymore. - -Reported-by: Andrew Cooper -Signed-off-by: Roger Pau Monné -Reviewed-by: Andrew Cooper -master commit: 23db507a01a4ec5259ec0ab43d296a41b1c326ba -master date: 2023-12-21 12:19:40 +0000 ---- - xen/arch/x86/cpu/amd.c | 27 ++++++++++++++------------- - 1 file changed, 14 insertions(+), 13 deletions(-) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index 29ae97e7c0..3d85e9797d 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -54,7 +54,7 @@ bool __read_mostly amd_acpi_c1e_quirk; - bool __ro_after_init amd_legacy_ssbd; - bool __initdata amd_virt_spec_ctrl; - --static bool __read_mostly zen2_c6_disabled; -+static bool __read_mostly fam17_c6_disabled; - - static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo, - unsigned int *hi) -@@ -951,24 +951,24 @@ void amd_check_zenbleed(void) - val & chickenbit ? "chickenbit" : "microcode"); - } - --static void cf_check zen2_disable_c6(void *arg) -+static void cf_check fam17_disable_c6(void *arg) - { - /* Disable C6 by clearing the CCR{0,1,2}_CC6EN bits. */ - const uint64_t mask = ~((1ul << 6) | (1ul << 14) | (1ul << 22)); - uint64_t val; - -- if (!zen2_c6_disabled) { -+ if (!fam17_c6_disabled) { - printk(XENLOG_WARNING - "Disabling C6 after 1000 days apparent uptime due to AMD errata 1474\n"); -- zen2_c6_disabled = true; -+ fam17_c6_disabled = true; - /* - * Prevent CPU hotplug so that started CPUs will either see -- * zen2_c6_disabled set, or will be handled by -+ * zen_c6_disabled set, or will be handled by - * smp_call_function(). - */ - while (!get_cpu_maps()) - process_pending_softirqs(); -- smp_call_function(zen2_disable_c6, NULL, 0); -+ smp_call_function(fam17_disable_c6, NULL, 0); - put_cpu_maps(); - } - -@@ -1273,8 +1273,8 @@ static void cf_check init_amd(struct cpuinfo_x86 *c) - amd_check_zenbleed(); - amd_check_erratum_1485(); - -- if (zen2_c6_disabled) -- zen2_disable_c6(NULL); -+ if (fam17_c6_disabled) -+ fam17_disable_c6(NULL); - - check_syscfg_dram_mod_en(); - -@@ -1286,7 +1286,7 @@ const struct cpu_dev amd_cpu_dev = { - .c_init = init_amd, - }; - --static int __init cf_check zen2_c6_errata_check(void) -+static int __init cf_check amd_check_erratum_1474(void) - { - /* - * Errata #1474: A Core May Hang After About 1044 Days -@@ -1294,7 +1294,8 @@ static int __init cf_check zen2_c6_errata_check(void) - */ - s_time_t delta; - -- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch()) -+ if (cpu_has_hypervisor || -+ (boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18)) - return 0; - - /* -@@ -1309,10 +1310,10 @@ static int __init cf_check zen2_c6_errata_check(void) - if (delta > 0) { - static struct timer errata_c6; - -- init_timer(&errata_c6, zen2_disable_c6, NULL, 0); -+ init_timer(&errata_c6, fam17_disable_c6, NULL, 0); - set_timer(&errata_c6, NOW() + delta); - } else -- zen2_disable_c6(NULL); -+ fam17_disable_c6(NULL); - - return 0; - } -@@ -1320,4 +1321,4 @@ static int __init cf_check zen2_c6_errata_check(void) - * Must be executed after early_time_init() for tsc_ticks2ns() to have been - * calibrated. That prevents us doing the check in init_amd(). - */ --presmp_initcall(zen2_c6_errata_check); -+presmp_initcall(amd_check_erratum_1474); --- -2.44.0 - diff --git a/0318-CirrusCI-drop-FreeBSD-12.patch b/0318-CirrusCI-drop-FreeBSD-12.patch deleted file mode 100644 index 10332e0b..00000000 --- a/0318-CirrusCI-drop-FreeBSD-12.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 0ef1fb43ddd61b3c4c953e833e012ac21ad5ca0f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Fri, 2 Feb 2024 08:01:50 +0100 -Subject: [PATCH 318/349] CirrusCI: drop FreeBSD 12 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Went EOL by the end of December 2023, and the pkg repos have been shut down. - -Reported-by: Andrew Cooper -Signed-off-by: Roger Pau Monné -Acked-by: Andrew Cooper -master commit: c2ce3466472e9c9eda79f5dc98eb701bc6fdba20 -master date: 2024-01-15 12:20:11 +0100 ---- - .cirrus.yml | 6 ------ - 1 file changed, 6 deletions(-) - -diff --git a/.cirrus.yml b/.cirrus.yml -index 7e0beb200d..63f3afb104 100644 ---- a/.cirrus.yml -+++ b/.cirrus.yml -@@ -14,12 +14,6 @@ freebsd_template: &FREEBSD_TEMPLATE - - ./configure --with-system-seabios=/usr/local/share/seabios/bios.bin - - gmake -j`sysctl -n hw.ncpu` clang=y - --task: -- name: 'FreeBSD 12' -- freebsd_instance: -- image_family: freebsd-12-4 -- << : *FREEBSD_TEMPLATE -- - task: - name: 'FreeBSD 13' - freebsd_instance: --- -2.44.0 - diff --git a/0319-x86-intel-ensure-Global-Performance-Counter-Control-.patch b/0319-x86-intel-ensure-Global-Performance-Counter-Control-.patch deleted file mode 100644 index 502e6aba..00000000 --- a/0319-x86-intel-ensure-Global-Performance-Counter-Control-.patch +++ /dev/null @@ -1,74 +0,0 @@ -From d0ad2cc5eac1b5d3cfd14204d377ce2384f52607 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Fri, 2 Feb 2024 08:02:20 +0100 -Subject: [PATCH 319/349] x86/intel: ensure Global Performance Counter Control - is setup correctly -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When Architectural Performance Monitoring is available, the PERF_GLOBAL_CTRL -MSR contains per-counter enable bits that is ANDed with the enable bit in the -counter EVNTSEL MSR in order for a PMC counter to be enabled. - -So far the watchdog code seems to have relied on the PERF_GLOBAL_CTRL enable -bits being set by default, but at least on some Intel Sapphire and Emerald -Rapids this is no longer the case, and Xen reports: - -Testing NMI watchdog on all CPUs: 0 40 stuck - -The first CPU on each package is started with PERF_GLOBAL_CTRL zeroed, so PMC0 -doesn't start counting when the enable bit in EVNTSEL0 is set, due to the -relevant enable bit in PERF_GLOBAL_CTRL not being set. - -Check and adjust PERF_GLOBAL_CTRL during CPU initialization so that all the -general-purpose PMCs are enabled. Doing so brings the state of the package-BSP -PERF_GLOBAL_CTRL in line with the rest of the CPUs on the system. - -Signed-off-by: Roger Pau Monné -Acked-by: Jan Beulich -master commit: 6bdb965178bbb3fc50cd4418d4770a7789956e2c -master date: 2024-01-17 10:40:52 +0100 ---- - xen/arch/x86/cpu/intel.c | 23 ++++++++++++++++++++++- - 1 file changed, 22 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c -index b40ac696e6..96723b5d44 100644 ---- a/xen/arch/x86/cpu/intel.c -+++ b/xen/arch/x86/cpu/intel.c -@@ -528,9 +528,30 @@ static void cf_check init_intel(struct cpuinfo_x86 *c) - init_intel_cacheinfo(c); - if (c->cpuid_level > 9) { - unsigned eax = cpuid_eax(10); -+ unsigned int cnt = (eax >> 8) & 0xff; -+ - /* Check for version and the number of counters */ -- if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) -+ if ((eax & 0xff) && (cnt > 1) && (cnt <= 32)) { -+ uint64_t global_ctrl; -+ unsigned int cnt_mask = (1UL << cnt) - 1; -+ -+ /* -+ * On (some?) Sapphire/Emerald Rapids platforms each -+ * package-BSP starts with all the enable bits for the -+ * general-purpose PMCs cleared. Adjust so counters -+ * can be enabled from EVNTSEL. -+ */ -+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); -+ if ((global_ctrl & cnt_mask) != cnt_mask) { -+ printk("CPU%u: invalid PERF_GLOBAL_CTRL: %#" -+ PRIx64 " adjusting to %#" PRIx64 "\n", -+ smp_processor_id(), global_ctrl, -+ global_ctrl | cnt_mask); -+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, -+ global_ctrl | cnt_mask); -+ } - __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); -+ } - } - - if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) ) --- -2.44.0 - diff --git a/0320-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch b/0320-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch deleted file mode 100644 index a2acd839..00000000 --- a/0320-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch +++ /dev/null @@ -1,65 +0,0 @@ -From eca5416f9b0e179de9553900de8de660ab09199d Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 2 Feb 2024 08:02:51 +0100 -Subject: [PATCH 320/349] x86/vmx: Fix IRQ handling for EXIT_REASON_INIT - -When receiving an INIT, a prior bugfix tried to ignore the INIT and continue -onwards. - -Unfortunately it's not safe to return at that point in vmx_vmexit_handler(). -Just out of context in the first hunk is a local_irqs_enabled() which is -depended-upon by the return-to-guest path, causing the following checklock -failure in debug builds: - - (XEN) Error: INIT received - ignoring - (XEN) CHECKLOCK FAILURE: prev irqsafe: 0, curr irqsafe 1 - (XEN) Xen BUG at common/spinlock.c:132 - (XEN) ----[ Xen-4.19-unstable x86_64 debug=y Tainted: H ]---- - ... - (XEN) Xen call trace: - (XEN) [] R check_lock+0xcd/0xe1 - (XEN) [] F _spin_lock+0x1b/0x60 - (XEN) [] F pt_update_irq+0x32/0x3bb - (XEN) [] F vmx_intr_assist+0x3b/0x51d - (XEN) [] F vmx_asm_vmexit_handler+0xf7/0x210 - -Luckily, this is benign in release builds. Accidentally having IRQs disabled -when trying to take an IRQs-on lock isn't a deadlock-vulnerable pattern. - -Drop the problematic early return. In hindsight, it's wrong to skip other -normal VMExit steps. - -Fixes: b1f11273d5a7 ("x86/vmx: Don't spuriously crash the domain when INIT is received") -Reported-by: Reima ISHII -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: d1f8883aebe00f6a9632d77ab0cd5c6d02c9cbe4 -master date: 2024-01-18 20:59:06 +0000 ---- - xen/arch/x86/hvm/vmx/vmx.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index 072288a5ef..31f4a861c6 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -4037,7 +4037,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) - - case EXIT_REASON_INIT: - printk(XENLOG_ERR "Error: INIT received - ignoring\n"); -- return; /* Renter the guest without further processing */ -+ break; - } - - /* Now enable interrupts so it's safe to take locks. */ -@@ -4323,6 +4323,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) - break; - } - case EXIT_REASON_EXTERNAL_INTERRUPT: -+ case EXIT_REASON_INIT: - /* Already handled above. */ - break; - case EXIT_REASON_TRIPLE_FAULT: --- -2.44.0 - diff --git a/0321-x86-vmx-Disallow-the-use-of-inactivity-states.patch b/0321-x86-vmx-Disallow-the-use-of-inactivity-states.patch deleted file mode 100644 index f6830472..00000000 --- a/0321-x86-vmx-Disallow-the-use-of-inactivity-states.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 7bd612727df792671e44152a8205f0cf821ad984 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 2 Feb 2024 08:03:26 +0100 -Subject: [PATCH 321/349] x86/vmx: Disallow the use of inactivity states - -Right now, vvmx will blindly copy L12's ACTIVITY_STATE into the L02 VMCS and -enter the vCPU. Luckily for us, nested-virt is explicitly unsupported for -security bugs. - -The inactivity states are HLT, SHUTDOWN and WAIT-FOR-SIPI, and as noted by the -SDM in Vol3 27.7 "Special Features of VM Entry": - - If VM entry ends with the logical processor in an inactive activity state, - the VM entry generates any special bus cycle that is normally generated when - that activity state is entered from the active state. - -Also, - - Some activity states unconditionally block certain events. - -I.e. A VMEntry with ACTIVITY=SHUTDOWN will initiate a platform reset, while a -VMEntry with ACTIVITY=WAIT-FOR-SIPI will really block everything other than -SIPIs. - -Both of these activity states are for the TXT ACM to use, not for regular -hypervisors, and Xen doesn't support dropping the HLT intercept either. - -There are two paths in Xen which operate on ACTIVITY_STATE. - -1) The vmx_{get,set}_nonreg_state() helpers for VM-Fork. - - As regular VMs can't use any inactivity states, this is just duplicating - the 0 from construct_vmcs(). Retain the ability to query activity_state, - but crash the domain on any attempt to set an inactivity state. - -2) Nested virt, because of ACTIVITY_STATE in vmcs_gstate_field[]. - - Explicitly hide the inactivity states in the guest's view of MSR_VMX_MISC, - and remove ACTIVITY_STATE from vmcs_gstate_field[]. - - In virtual_vmentry(), we should trigger a VMEntry failure for the use of - any inactivity states, but there's no support for that in the code at all - so leave a TODO for when we finally start working on nested-virt in - earnest. - -Reported-by: Reima Ishii -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -Reviewed-by: Tamas K Lengyel -master commit: 3643bb53a05b7c8fbac072c63bef1538f2a6d0d2 -master date: 2024-01-18 20:59:06 +0000 ---- - xen/arch/x86/hvm/vmx/vmx.c | 8 +++++++- - xen/arch/x86/hvm/vmx/vvmx.c | 9 +++++++-- - xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 1 + - 3 files changed, 15 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index 31f4a861c6..35d391d8e5 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -1499,7 +1499,13 @@ static void cf_check vmx_set_nonreg_state(struct vcpu *v, - { - vmx_vmcs_enter(v); - -- __vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state); -+ if ( nrs->vmx.activity_state ) -+ { -+ printk("Attempt to set %pv activity_state %#lx\n", -+ v, nrs->vmx.activity_state); -+ domain_crash(v->domain); -+ } -+ - __vmwrite(GUEST_INTERRUPTIBILITY_INFO, nrs->vmx.interruptibility_info); - __vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, nrs->vmx.pending_dbg); - -diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c -index f8fe8d0c14..515cb5ae77 100644 ---- a/xen/arch/x86/hvm/vmx/vvmx.c -+++ b/xen/arch/x86/hvm/vmx/vvmx.c -@@ -910,7 +910,10 @@ static const u16 vmcs_gstate_field[] = { - GUEST_LDTR_AR_BYTES, - GUEST_TR_AR_BYTES, - GUEST_INTERRUPTIBILITY_INFO, -+ /* -+ * ACTIVITY_STATE is handled specially. - GUEST_ACTIVITY_STATE, -+ */ - GUEST_SYSENTER_CS, - GUEST_PREEMPTION_TIMER, - /* natural */ -@@ -1211,6 +1214,8 @@ static void virtual_vmentry(struct cpu_user_regs *regs) - nvcpu->nv_vmentry_pending = 0; - nvcpu->nv_vmswitch_in_progress = 1; - -+ /* TODO: Fail VMentry for GUEST_ACTIVITY_STATE != 0 */ -+ - /* - * EFER handling: - * hvm_set_efer won't work if CR0.PG = 1, so we change the value -@@ -2327,8 +2332,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) - data = hvm_cr4_guest_valid_bits(d); - break; - case MSR_IA32_VMX_MISC: -- /* Do not support CR3-target feature now */ -- data = host_data & ~VMX_MISC_CR3_TARGET; -+ /* Do not support CR3-targets or activity states. */ -+ data = host_data & ~(VMX_MISC_CR3_TARGET | VMX_MISC_ACTIVITY_MASK); - break; - case MSR_IA32_VMX_EPT_VPID_CAP: - data = nept_get_ept_vpid_cap(); -diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -index 78404e42b3..0af021d5f5 100644 ---- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -+++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -@@ -288,6 +288,7 @@ extern u32 vmx_secondary_exec_control; - #define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL 0x80000000000ULL - extern u64 vmx_ept_vpid_cap; - -+#define VMX_MISC_ACTIVITY_MASK 0x000001c0 - #define VMX_MISC_PROC_TRACE 0x00004000 - #define VMX_MISC_CR3_TARGET 0x01ff0000 - #define VMX_MISC_VMWRITE_ALL 0x20000000 --- -2.44.0 - diff --git a/0322-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch b/0322-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch deleted file mode 100644 index c6dee11b..00000000 --- a/0322-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch +++ /dev/null @@ -1,70 +0,0 @@ -From afb85cf1e8f165abf88de9d8a6df625692a753b1 Mon Sep 17 00:00:00 2001 -From: Michal Orzel -Date: Fri, 2 Feb 2024 08:04:07 +0100 -Subject: [PATCH 322/349] lib{fdt,elf}: move lib{fdt,elf}-temp.o and their deps - to $(targets) - -At the moment, trying to run xencov read/reset (calling SYSCTL_coverage_op -under the hood) results in a crash. This is due to a profiler trying to -access data in the .init.* sections (libfdt for Arm and libelf for x86) -that are stripped after boot. Normally, the build system compiles any -*.init.o file without COV_FLAGS. However, these two libraries are -handled differently as sections will be renamed to init after linking. - -To override COV_FLAGS to empty for these libraries, lib{fdt,elf}.o were -added to nocov-y. This worked until e321576f4047 ("xen/build: start using -if_changed") that added lib{fdt,elf}-temp.o and their deps to extra-y. -This way, even though these objects appear as prerequisites of -lib{fdt,elf}.o and the settings should propagate to them, make can also -build them as a prerequisite of __build, in which case COV_FLAGS would -still have the unwanted flags. Fix it by switching to $(targets) instead. - -Also, for libfdt, append libfdt.o to nocov-y only if CONFIG_OVERLAY_DTB -is not set. Otherwise, there is no section renaming and we should be able -to run the coverage. - -Fixes: e321576f4047 ("xen/build: start using if_changed") -Signed-off-by: Michal Orzel -Reviewed-by: Anthony PERARD -Acked-by: Jan Beulich -master commit: 79519fcfa0605bbf19d8c02b979af3a2c8afed68 -master date: 2024-01-23 12:02:44 +0100 ---- - xen/common/libelf/Makefile | 2 +- - xen/common/libfdt/Makefile | 4 ++-- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/xen/common/libelf/Makefile b/xen/common/libelf/Makefile -index 8a4522e4e1..917d12b006 100644 ---- a/xen/common/libelf/Makefile -+++ b/xen/common/libelf/Makefile -@@ -13,4 +13,4 @@ $(obj)/libelf.o: $(obj)/libelf-temp.o FORCE - $(obj)/libelf-temp.o: $(addprefix $(obj)/,$(libelf-objs)) FORCE - $(call if_changed,ld) - --extra-y += libelf-temp.o $(libelf-objs) -+targets += libelf-temp.o $(libelf-objs) -diff --git a/xen/common/libfdt/Makefile b/xen/common/libfdt/Makefile -index 75aaefa2e3..4d14fd61ba 100644 ---- a/xen/common/libfdt/Makefile -+++ b/xen/common/libfdt/Makefile -@@ -2,9 +2,9 @@ include $(src)/Makefile.libfdt - - SECTIONS := text data $(SPECIAL_DATA_SECTIONS) - OBJCOPYFLAGS := $(foreach s,$(SECTIONS),--rename-section .$(s)=.init.$(s)) -+nocov-y += libfdt.o - - obj-y += libfdt.o --nocov-y += libfdt.o - - CFLAGS-y += -I$(srctree)/include/xen/libfdt/ - -@@ -14,4 +14,4 @@ $(obj)/libfdt.o: $(obj)/libfdt-temp.o FORCE - $(obj)/libfdt-temp.o: $(addprefix $(obj)/,$(LIBFDT_OBJS)) FORCE - $(call if_changed,ld) - --extra-y += libfdt-temp.o $(LIBFDT_OBJS) -+targets += libfdt-temp.o $(LIBFDT_OBJS) --- -2.44.0 - diff --git a/0323-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch b/0323-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch deleted file mode 100644 index 57dc04da..00000000 --- a/0323-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 091466ba55d1e2e75738f751818ace2e3ed08ccf Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Fri, 2 Feb 2024 08:04:33 +0100 -Subject: [PATCH 323/349] x86/p2m-pt: fix off by one in entry check assert -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The MMIO RO rangeset overlap check is bogus: the rangeset is inclusive so the -passed end mfn should be the last mfn to be mapped (not last + 1). - -Fixes: 6fa1755644d0 ('amd/npt/shadow: replace assert that prevents creating 2M/1G MMIO entries') -Signed-off-by: Roger Pau Monné -Reviewed-by: George Dunlap -master commit: 610775d0dd61c1bd2f4720c755986098e6a5bafd -master date: 2024-01-25 16:09:04 +0100 ---- - xen/arch/x86/mm/p2m-pt.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c -index eaba2b0fb4..f02ebae372 100644 ---- a/xen/arch/x86/mm/p2m-pt.c -+++ b/xen/arch/x86/mm/p2m-pt.c -@@ -564,7 +564,7 @@ static void check_entry(mfn_t mfn, p2m_type_t new, p2m_type_t old, - if ( new == p2m_mmio_direct ) - ASSERT(!mfn_eq(mfn, INVALID_MFN) && - !rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn), -- mfn_x(mfn) + (1ul << order))); -+ mfn_x(mfn) + (1UL << order) - 1)); - else if ( p2m_allows_invalid_mfn(new) || new == p2m_invalid || - new == p2m_mmio_dm ) - ASSERT(mfn_valid(mfn) || mfn_eq(mfn, INVALID_MFN)); --- -2.44.0 - diff --git a/0324-tools-xentop-fix-sorting-bug-for-some-columns.patch b/0324-tools-xentop-fix-sorting-bug-for-some-columns.patch deleted file mode 100644 index 770ebd2b..00000000 --- a/0324-tools-xentop-fix-sorting-bug-for-some-columns.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 61da71968ea44964fd1dd2e449b053c77eb83139 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Cyril=20R=C3=A9bert=20=28zithro=29?= -Date: Tue, 27 Feb 2024 14:06:53 +0100 -Subject: [PATCH 324/349] tools/xentop: fix sorting bug for some columns -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Sort doesn't work on columns VBD_OO, VBD_RD, VBD_WR and VBD_RSECT. -Fix by adjusting variables names in compare functions. -Bug fix only. No functional change. - -Fixes: 91c3e3dc91d6 ("tools/xentop: Display '-' when stats are not available.") -Signed-off-by: Cyril Rébert (zithro) -Reviewed-by: Anthony PERARD -master commit: 29f17d837421f13c0e0010802de1b2d51d2ded4a -master date: 2024-02-05 17:58:23 +0000 ---- - tools/xentop/xentop.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/tools/xentop/xentop.c b/tools/xentop/xentop.c -index 950e8935c4..545bd5e96d 100644 ---- a/tools/xentop/xentop.c -+++ b/tools/xentop/xentop.c -@@ -684,7 +684,7 @@ static int compare_vbd_oo(xenstat_domain *domain1, xenstat_domain *domain2) - unsigned long long dom1_vbd_oo = 0, dom2_vbd_oo = 0; - - tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom1_vbd_oo); -- tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom2_vbd_oo); -+ tot_vbd_reqs(domain2, FIELD_VBD_OO, &dom2_vbd_oo); - - return -compare(dom1_vbd_oo, dom2_vbd_oo); - } -@@ -711,9 +711,9 @@ static int compare_vbd_rd(xenstat_domain *domain1, xenstat_domain *domain2) - unsigned long long dom1_vbd_rd = 0, dom2_vbd_rd = 0; - - tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom1_vbd_rd); -- tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom2_vbd_rd); -+ tot_vbd_reqs(domain2, FIELD_VBD_RD, &dom2_vbd_rd); - -- return -compare(dom1_vbd_rd, dom1_vbd_rd); -+ return -compare(dom1_vbd_rd, dom2_vbd_rd); - } - - /* Prints number of total VBD READ requests statistic */ -@@ -738,7 +738,7 @@ static int compare_vbd_wr(xenstat_domain *domain1, xenstat_domain *domain2) - unsigned long long dom1_vbd_wr = 0, dom2_vbd_wr = 0; - - tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom1_vbd_wr); -- tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom2_vbd_wr); -+ tot_vbd_reqs(domain2, FIELD_VBD_WR, &dom2_vbd_wr); - - return -compare(dom1_vbd_wr, dom2_vbd_wr); - } -@@ -765,7 +765,7 @@ static int compare_vbd_rsect(xenstat_domain *domain1, xenstat_domain *domain2) - unsigned long long dom1_vbd_rsect = 0, dom2_vbd_rsect = 0; - - tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom1_vbd_rsect); -- tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom2_vbd_rsect); -+ tot_vbd_reqs(domain2, FIELD_VBD_RSECT, &dom2_vbd_rsect); - - return -compare(dom1_vbd_rsect, dom2_vbd_rsect); - } --- -2.44.0 - diff --git a/0325-amd-vi-fix-IVMD-memory-type-checks.patch b/0325-amd-vi-fix-IVMD-memory-type-checks.patch deleted file mode 100644 index f2547f59..00000000 --- a/0325-amd-vi-fix-IVMD-memory-type-checks.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 463aaf3fbf62d24e898ae0c2ba53d85ca0f94d3f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 27 Feb 2024 14:07:12 +0100 -Subject: [PATCH 325/349] amd-vi: fix IVMD memory type checks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current code that parses the IVMD blocks is relaxed with regard to the -restriction that such unity regions should always fall into memory ranges -marked as reserved in the memory map. - -However the type checks for the IVMD addresses are inverted, and as a result -IVMD ranges falling into RAM areas are accepted. Note that having such ranges -in the first place is a firmware bug, as IVMD should always fall into reserved -ranges. - -Fixes: ed6c77ebf0c1 ('AMD/IOMMU: check / convert IVMD ranges for being / to be reserved') -Reported-by: Ox -Signed-off-by: Roger Pau Monné -Tested-by: oxjo -Reviewed-by: Jan Beulich -master commit: 83afa313583019d9f159c122cecf867735d27ec5 -master date: 2024-02-06 11:56:13 +0100 ---- - xen/drivers/passthrough/amd/iommu_acpi.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c -index 3b577c9b39..3a7045c39b 100644 ---- a/xen/drivers/passthrough/amd/iommu_acpi.c -+++ b/xen/drivers/passthrough/amd/iommu_acpi.c -@@ -426,9 +426,14 @@ static int __init parse_ivmd_block(const struct acpi_ivrs_memory *ivmd_block) - return -EIO; - } - -- /* Types which won't be handed out are considered good enough. */ -- if ( !(type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI | -- RAM_TYPE_UNUSABLE)) ) -+ /* -+ * Types which aren't RAM are considered good enough. -+ * Note that a page being partially RESERVED, ACPI or UNUSABLE will -+ * force Xen into assuming the whole page as having that type in -+ * practice. -+ */ -+ if ( type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI | -+ RAM_TYPE_UNUSABLE) ) - continue; - - AMD_IOMMU_ERROR("IVMD: page at %lx can't be converted\n", addr); --- -2.44.0 - diff --git a/0326-x86-hvm-Fix-fast-singlestep-state-persistence.patch b/0326-x86-hvm-Fix-fast-singlestep-state-persistence.patch deleted file mode 100644 index 8dafe387..00000000 --- a/0326-x86-hvm-Fix-fast-singlestep-state-persistence.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 415f770d23f9fcbc02436560fa6583dcd8e1343f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Petr=20Bene=C5=A1?= -Date: Tue, 27 Feb 2024 14:07:45 +0100 -Subject: [PATCH 326/349] x86/hvm: Fix fast singlestep state persistence -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This patch addresses an issue where the fast singlestep setting would persist -despite xc_domain_debug_control being called with XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF. -Specifically, if fast singlestep was enabled in a VMI session and that session -stopped before the MTF trap occurred, the fast singlestep setting remained -active even though MTF itself was disabled. This led to a situation where, upon -starting a new VMI session, the first event to trigger an EPT violation would -cause the corresponding EPT event callback to be skipped due to the lingering -fast singlestep setting. - -The fix ensures that the fast singlestep setting is properly reset when -disabling single step debugging operations. - -Signed-off-by: Petr Beneš -Reviewed-by: Tamas K Lengyel -master commit: 897def94b56175ce569673a05909d2f223e1e749 -master date: 2024-02-12 09:37:58 +0100 ---- - xen/arch/x86/hvm/hvm.c | 34 ++++++++++++++++++++++++---------- - 1 file changed, 24 insertions(+), 10 deletions(-) - -diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c -index d6c6ab8897..558dc3eddc 100644 ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -5153,26 +5153,40 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) - - int hvm_debug_op(struct vcpu *v, int32_t op) - { -- int rc; -+ int rc = 0; - - switch ( op ) - { - case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON: - case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF: -- rc = -EOPNOTSUPP; - if ( !cpu_has_monitor_trap_flag ) -- break; -- rc = 0; -- vcpu_pause(v); -- v->arch.hvm.single_step = -- (op == XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON); -- vcpu_unpause(v); /* guest will latch new state */ -+ return -EOPNOTSUPP; - break; - default: -- rc = -ENOSYS; -- break; -+ return -ENOSYS; -+ } -+ -+ vcpu_pause(v); -+ -+ switch ( op ) -+ { -+ case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON: -+ v->arch.hvm.single_step = true; -+ break; -+ -+ case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF: -+ v->arch.hvm.single_step = false; -+ v->arch.hvm.fast_single_step.enabled = false; -+ v->arch.hvm.fast_single_step.p2midx = 0; -+ break; -+ -+ default: /* Excluded above */ -+ ASSERT_UNREACHABLE(); -+ return -ENOSYS; - } - -+ vcpu_unpause(v); /* guest will latch new state */ -+ - return rc; - } - --- -2.44.0 - diff --git a/0327-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch b/0327-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch deleted file mode 100644 index 2fc1929d..00000000 --- a/0327-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch +++ /dev/null @@ -1,63 +0,0 @@ -From b3ae0e6201495216b12157bd8b2382b28fdd7dae Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Tue, 27 Feb 2024 14:08:20 +0100 -Subject: [PATCH 327/349] x86/HVM: tidy state on hvmemul_map_linear_addr()'s - error path - -While in the vast majority of cases failure of the function will not -be followed by re-invocation with the same emulation context, a few -very specific insns - involving multiple independent writes, e.g. ENTER -and PUSHA - exist where this can happen. Since failure of the function -only signals to the caller that it ought to try an MMIO write instead, -such failure also cannot be assumed to result in wholesale failure of -emulation of the current insn. Instead we have to maintain internal -state such that another invocation of the function with the same -emulation context remains possible. To achieve that we need to reset MFN -slots after putting page references on the error path. - -Note that all of this affects debugging code only, in causing an -assertion to trigger (higher up in the function). There's otherwise no -misbehavior - such a "leftover" slot would simply be overwritten by new -contents in a release build. - -Also extend the related unmap() assertion, to further check for MFN 0. - -Fixes: 8cbd4fb0b7ea ("x86/hvm: implement hvmemul_write() using real mappings") -Reported-by: Manuel Andreas -Signed-off-by: Jan Beulich -Acked-by: Paul Durrant -master commit: e72f951df407bc3be82faac64d8733a270036ba1 -master date: 2024-02-13 09:36:14 +0100 ---- - xen/arch/x86/hvm/emulate.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c -index 275451dd36..27928dc3f3 100644 ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -697,7 +697,12 @@ static void *hvmemul_map_linear_addr( - out: - /* Drop all held references. */ - while ( mfn-- > hvmemul_ctxt->mfn ) -+ { - put_page(mfn_to_page(*mfn)); -+#ifndef NDEBUG /* Clean slot for a subsequent map()'s error checking. */ -+ *mfn = _mfn(0); -+#endif -+ } - - return err; - } -@@ -719,7 +724,7 @@ static void hvmemul_unmap_linear_addr( - - for ( i = 0; i < nr_frames; i++ ) - { -- ASSERT(mfn_valid(*mfn)); -+ ASSERT(mfn_x(*mfn) && mfn_valid(*mfn)); - paging_mark_dirty(currd, *mfn); - put_page(mfn_to_page(*mfn)); - --- -2.44.0 - diff --git a/0328-build-Replace-which-with-command-v.patch b/0328-build-Replace-which-with-command-v.patch deleted file mode 100644 index 5f0bf92d..00000000 --- a/0328-build-Replace-which-with-command-v.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 1330a5fe44ca91f98857b53fe8bbe06522d9db27 Mon Sep 17 00:00:00 2001 -From: Anthony PERARD -Date: Tue, 27 Feb 2024 14:08:50 +0100 -Subject: [PATCH 328/349] build: Replace `which` with `command -v` -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The `which` command is not standard, may not exist on the build host, -or may not behave as expected by the build system. It is recommended -to use `command -v` to find out if a command exist and have its path, -and it's part of a POSIX shell standard (at least, it seems to be -mandatory since IEEE Std 1003.1-2008, but was optional before). - -Fixes: c8a8645f1efe ("xen/build: Automatically locate a suitable python interpreter") -Fixes: 3b47bcdb6d38 ("xen/build: Use a distro version of figlet") -Signed-off-by: Anthony PERARD -Tested-by: Marek Marczykowski-Górecki -Acked-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: f93629b18b528a5ab1b1092949c5420069c7226c -master date: 2024-02-19 12:45:48 +0100 ---- - xen/Makefile | 4 ++-- - xen/build.mk | 2 +- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/xen/Makefile b/xen/Makefile -index dd0b004e1c..7ea13a6791 100644 ---- a/xen/Makefile -+++ b/xen/Makefile -@@ -25,8 +25,8 @@ export XEN_BUILD_HOST := $(shell hostname) - endif - - # Best effort attempt to find a python interpreter, defaulting to Python 3 if --# available. Fall back to just `python` if `which` is nowhere to be found. --PYTHON_INTERPRETER := $(word 1,$(shell which python3 python python2 2>/dev/null) python) -+# available. Fall back to just `python`. -+PYTHON_INTERPRETER := $(word 1,$(shell command -v python3 || command -v python || command -v python2) python) - export PYTHON ?= $(PYTHON_INTERPRETER) - - export CHECKPOLICY ?= checkpolicy -diff --git a/xen/build.mk b/xen/build.mk -index 9ecb104f1e..b489f77b7c 100644 ---- a/xen/build.mk -+++ b/xen/build.mk -@@ -1,6 +1,6 @@ - quiet_cmd_banner = BANNER $@ - define cmd_banner -- if which figlet >/dev/null 2>&1 ; then \ -+ if command -v figlet >/dev/null 2>&1 ; then \ - echo " Xen $(XEN_FULLVERSION)" | figlet -f $< > $@.tmp; \ - else \ - echo " Xen $(XEN_FULLVERSION)" > $@.tmp; \ --- -2.44.0 - diff --git a/0329-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch b/0329-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch deleted file mode 100644 index db46c5a6..00000000 --- a/0329-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b9745280736ee526374873aa3c4142596e2ba10b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= - -Date: Tue, 27 Feb 2024 14:09:19 +0100 -Subject: [PATCH 329/349] libxl: Disable relocating memory for qemu-xen in - stubdomain too -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -According to comments (and experiments) qemu-xen cannot handle memory -reolcation done by hvmloader. The code was already disabled when running -qemu-xen in dom0 (see libxl__spawn_local_dm()), but it was missed when -adding qemu-xen support to stubdomain. Adjust libxl__spawn_stub_dm() to -be consistent in this regard. - -Reported-by: Neowutran -Signed-off-by: Marek Marczykowski-Górecki -Reviewed-by: Jason Andryuk -Acked-by: Anthony PERARD -master commit: 97883aa269f6745a6ded232be3a855abb1297e0d -master date: 2024-02-22 11:48:22 +0100 ---- - tools/libs/light/libxl_dm.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c -index 14b593110f..ed620a9d8e 100644 ---- a/tools/libs/light/libxl_dm.c -+++ b/tools/libs/light/libxl_dm.c -@@ -2432,6 +2432,16 @@ void libxl__spawn_stub_dm(libxl__egc *egc, libxl__stub_dm_spawn_state *sdss) - "%s", - libxl_bios_type_to_string(guest_config->b_info.u.hvm.bios)); - } -+ /* Disable relocating memory to make the MMIO hole larger -+ * unless we're running qemu-traditional and vNUMA is not -+ * configured. */ -+ libxl__xs_printf(gc, XBT_NULL, -+ libxl__sprintf(gc, "%s/hvmloader/allow-memory-relocate", -+ libxl__xs_get_dompath(gc, guest_domid)), -+ "%d", -+ guest_config->b_info.device_model_version -+ == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL && -+ !libxl__vnuma_configured(&guest_config->b_info)); - ret = xc_domain_set_target(ctx->xch, dm_domid, guest_domid); - if (ret<0) { - LOGED(ERROR, guest_domid, "setting target domain %d -> %d", --- -2.44.0 - diff --git a/0330-build-make-sure-build-fails-when-running-kconfig-fai.patch b/0330-build-make-sure-build-fails-when-running-kconfig-fai.patch deleted file mode 100644 index 8b65b919..00000000 --- a/0330-build-make-sure-build-fails-when-running-kconfig-fai.patch +++ /dev/null @@ -1,59 +0,0 @@ -From ea869977271f93945451908be9b6117ffd1fb02d Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Tue, 27 Feb 2024 14:09:37 +0100 -Subject: [PATCH 330/349] build: make sure build fails when running kconfig - fails -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Because of using "-include", failure to (re)build auto.conf (with -auto.conf.cmd produced as a secondary target) won't stop make from -continuing the build. Arrange for it being possible to drop the - from -Rules.mk, requiring that the include be skipped for tools-only targets. -Note that relying on the inclusion in those cases wouldn't be correct -anyway, as it might be a stale file (yet to be rebuilt) which would be -included, while during initial build, the file would be absent -altogether. - -Fixes: 8d4c17a90b0a ("xen/build: silence make warnings about missing auto.conf*") -Reported-by: Roger Pau Monné -Signed-off-by: Jan Beulich -Reviewed-by: Anthony PERARD -master commit: d34e5fa2e8db19f23081f46a3e710bb122130691 -master date: 2024-02-22 11:52:47 +0100 ---- - xen/Makefile | 1 + - xen/Rules.mk | 4 +++- - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/xen/Makefile b/xen/Makefile -index 7ea13a6791..bac3684a36 100644 ---- a/xen/Makefile -+++ b/xen/Makefile -@@ -374,6 +374,7 @@ $(KCONFIG_CONFIG): tools_fixdep - # This exploits the 'multi-target pattern rule' trick. - # The syncconfig should be executed only once to make all the targets. - include/config/%.conf include/config/%.conf.cmd: $(KCONFIG_CONFIG) -+ $(Q)rm -f include/config/auto.conf - $(Q)$(MAKE) $(build)=tools/kconfig syncconfig - - ifeq ($(CONFIG_DEBUG),y) -diff --git a/xen/Rules.mk b/xen/Rules.mk -index 8af3dd7277..d759cccee3 100644 ---- a/xen/Rules.mk -+++ b/xen/Rules.mk -@@ -15,7 +15,9 @@ srcdir := $(srctree)/$(src) - PHONY := __build - __build: - ---include $(objtree)/include/config/auto.conf -+ifneq ($(firstword $(subst /, ,$(obj))),tools) -+include $(objtree)/include/config/auto.conf -+endif - - include $(XEN_ROOT)/Config.mk - include $(srctree)/scripts/Kbuild.include --- -2.44.0 - diff --git a/0331-x86emul-add-missing-EVEX.R-checks.patch b/0331-x86emul-add-missing-EVEX.R-checks.patch deleted file mode 100644 index 765e946a..00000000 --- a/0331-x86emul-add-missing-EVEX.R-checks.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 16f2e47eb1207d866f95cf694a60a7ceb8f96a36 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Tue, 27 Feb 2024 14:09:55 +0100 -Subject: [PATCH 331/349] x86emul: add missing EVEX.R' checks - -EVEX.R' is not ignored in 64-bit code when encoding a GPR or mask -register. While for mask registers suitable checks are in place (there -also covering EVEX.R), they were missing for the few cases where in -EVEX-encoded instructions ModR/M.reg encodes a GPR. While for VPEXTRW -the bit is replaced before an emulation stub is invoked, for -VCVT{,T}{S,D,H}2{,U}SI this actually would have led to #UD from inside -an emulation stub, in turn raising #UD to the guest, but accompanied by -log messages indicating something's wrong in Xen nevertheless. - -Fixes: 001bd91ad864 ("x86emul: support AVX512{F,BW,DQ} extract insns") -Fixes: baf4a376f550 ("x86emul: support AVX512F legacy-equivalent scalar int/FP conversion insns") -Signed-off-by: Jan Beulich -Acked-by: Andrew Cooper -master commit: cb319824bfa8d3c9ea0410cc71daaedc3e11aa2a -master date: 2024-02-22 11:54:07 +0100 ---- - xen/arch/x86/x86_emulate/x86_emulate.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c -index 0c0336f737..995670cbc8 100644 ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -6829,7 +6829,8 @@ x86_emulate( - CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */ - CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */ - CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x79): /* vcvts{s,d}2usi xmm/mem,reg */ -- generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk || -+ generate_exception_if((evex.reg != 0xf || !evex.RX || !evex.R || -+ evex.opmsk || - (ea.type != OP_REG && evex.brs)), - EXC_UD); - host_and_vcpu_must_have(avx512f); -@@ -10705,7 +10706,7 @@ x86_emulate( - goto pextr; - - case X86EMUL_OPC_EVEX_66(0x0f, 0xc5): /* vpextrw $imm8,xmm,reg */ -- generate_exception_if(ea.type != OP_REG, EXC_UD); -+ generate_exception_if(ea.type != OP_REG || !evex.R, EXC_UD); - /* Convert to alternative encoding: We want to use a memory operand. */ - evex.opcx = ext_0f3a; - b = 0x15; --- -2.44.0 - diff --git a/0332-xen-livepatch-fix-norevert-test-hook-setup-typo.patch b/0332-xen-livepatch-fix-norevert-test-hook-setup-typo.patch deleted file mode 100644 index 050b5093..00000000 --- a/0332-xen-livepatch-fix-norevert-test-hook-setup-typo.patch +++ /dev/null @@ -1,36 +0,0 @@ -From f6b12792542e372f36a71ea4c2563e6dd6e4fa57 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 27 Feb 2024 14:10:24 +0100 -Subject: [PATCH 332/349] xen/livepatch: fix norevert test hook setup typo -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The test code has a typo in using LIVEPATCH_APPLY_HOOK() instead of -LIVEPATCH_REVERT_HOOK(). - -Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker') -Signed-off-by: Roger Pau Monné -Reviewed-by: Ross Lagerwall -master commit: f0622dd4fd6ae6ddb523a45d89ed9b8f3a9a8f36 -master date: 2024-02-26 10:13:46 +0100 ---- - xen/test/livepatch/xen_action_hooks_norevert.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c -index 3e21ade6ab..c173855192 100644 ---- a/xen/test/livepatch/xen_action_hooks_norevert.c -+++ b/xen/test/livepatch/xen_action_hooks_norevert.c -@@ -120,7 +120,7 @@ static void post_revert_hook(livepatch_payload_t *payload) - printk(KERN_DEBUG "%s: Hook done.\n", __func__); - } - --LIVEPATCH_APPLY_HOOK(revert_hook); -+LIVEPATCH_REVERT_HOOK(revert_hook); - - LIVEPATCH_PREAPPLY_HOOK(pre_apply_hook); - LIVEPATCH_POSTAPPLY_HOOK(post_apply_hook); --- -2.44.0 - diff --git a/0333-xen-cmdline-fix-printf-format-specifier-in-no_config.patch b/0333-xen-cmdline-fix-printf-format-specifier-in-no_config.patch deleted file mode 100644 index 1812da57..00000000 --- a/0333-xen-cmdline-fix-printf-format-specifier-in-no_config.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 229e8a72ee4cde5698aaf42cc59ae57446dce60f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 27 Feb 2024 14:10:39 +0100 -Subject: [PATCH 333/349] xen/cmdline: fix printf format specifier in - no_config_param() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -'*' sets the width field, which is the minimum number of characters to output, -but what we want in no_config_param() is the precision instead, which is '.*' -as it imposes a maximum limit on the output. - -Fixes: 68d757df8dd2 ('x86/pv: Options to disable and/or compile out 32bit PV support') -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -master commit: ef101f525173cf51dc70f4c77862f6f10a8ddccf -master date: 2024-02-26 10:17:40 +0100 ---- - xen/include/xen/param.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/include/xen/param.h b/xen/include/xen/param.h -index 93c3fe7cb7..e02e49635c 100644 ---- a/xen/include/xen/param.h -+++ b/xen/include/xen/param.h -@@ -191,7 +191,7 @@ static inline void no_config_param(const char *cfg, const char *param, - { - int len = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s); - -- printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%*s' setting\n", -+ printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%.*s' setting\n", - cfg, param, len, s); - } - --- -2.44.0 - diff --git a/0334-x86-altcall-use-a-union-as-register-type-for-functio.patch b/0334-x86-altcall-use-a-union-as-register-type-for-functio.patch deleted file mode 100644 index c5dddc16..00000000 --- a/0334-x86-altcall-use-a-union-as-register-type-for-functio.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 1aafe054e7d1efbf8e8482a9cdd4be5753b79e2f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 27 Feb 2024 14:11:04 +0100 -Subject: [PATCH 334/349] x86/altcall: use a union as register type for - function parameters on clang -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current code for alternative calls uses the caller parameter types as the -types for the register variables that serve as function parameters: - -uint8_t foo; -[...] -alternative_call(myfunc, foo); - -Would expand roughly into: - -register unint8_t a1_ asm("rdi") = foo; -register unsigned long a2_ asm("rsi"); -[...] -asm volatile ("call *%c[addr](%%rip)"...); - -However with -O2 clang will generate incorrect code, given the following -example: - -unsigned int func(uint8_t t) -{ - return t; -} - -static void bar(uint8_t b) -{ - int ret_; - register uint8_t di asm("rdi") = b; - register unsigned long si asm("rsi"); - register unsigned long dx asm("rdx"); - register unsigned long cx asm("rcx"); - register unsigned long r8 asm("r8"); - register unsigned long r9 asm("r9"); - register unsigned long r10 asm("r10"); - register unsigned long r11 asm("r11"); - - asm volatile ( "call %c[addr]" - : "+r" (di), "=r" (si), "=r" (dx), - "=r" (cx), "=r" (r8), "=r" (r9), - "=r" (r10), "=r" (r11), "=a" (ret_) - : [addr] "i" (&(func)), "g" (func) - : "memory" ); -} - -void foo(unsigned int a) -{ - bar(a); -} - -Clang generates the following assembly code: - -func: # @func - movl %edi, %eax - retq -foo: # @foo - callq func - retq - -Note the truncation of the unsigned int parameter 'a' of foo() to uint8_t when -passed into bar() is lost. clang doesn't zero extend the parameters in the -callee when required, as the psABI mandates. - -The above can be worked around by using a union when defining the register -variables, so that `di` becomes: - -register union { - uint8_t e; - unsigned long r; -} di asm("rdi") = { .e = b }; - -Which results in following code generated for `foo()`: - -foo: # @foo - movzbl %dil, %edi - callq func - retq - -So the truncation is not longer lost. Apply such workaround only when built -with clang. - -Reported-by: Matthew Grooms -Link: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=277200 -Link: https://github.com/llvm/llvm-project/issues/12579 -Link: https://github.com/llvm/llvm-project/issues/82598 -Signed-off-by: Roger Pau Monné -Acked-by: Jan Beulich -master commit: 2ce562b2a413cbdb2e1128989ed1722290a27c4e -master date: 2024-02-26 10:18:01 +0100 ---- - xen/arch/x86/include/asm/alternative.h | 25 +++++++++++++++++++++++++ - 1 file changed, 25 insertions(+) - -diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h -index a7a82c2c03..bcb1dc94f4 100644 ---- a/xen/arch/x86/include/asm/alternative.h -+++ b/xen/arch/x86/include/asm/alternative.h -@@ -167,9 +167,34 @@ extern void alternative_branches(void); - #define ALT_CALL_arg5 "r8" - #define ALT_CALL_arg6 "r9" - -+#ifdef CONFIG_CC_IS_CLANG -+/* -+ * Use a union with an unsigned long in order to prevent clang from -+ * skipping a possible truncation of the value. By using the union any -+ * truncation is carried before the call instruction, in turn covering -+ * for ABI-non-compliance in that the necessary clipping / extension of -+ * the value is supposed to be carried out in the callee. -+ * -+ * Note this behavior is not mandated by the standard, and hence could -+ * stop being a viable workaround, or worse, could cause a different set -+ * of code-generation issues in future clang versions. -+ * -+ * This has been reported upstream: -+ * https://github.com/llvm/llvm-project/issues/12579 -+ * https://github.com/llvm/llvm-project/issues/82598 -+ */ -+#define ALT_CALL_ARG(arg, n) \ -+ register union { \ -+ typeof(arg) e; \ -+ unsigned long r; \ -+ } a ## n ## _ asm ( ALT_CALL_arg ## n ) = { \ -+ .e = ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); }) \ -+ } -+#else - #define ALT_CALL_ARG(arg, n) \ - register typeof(arg) a ## n ## _ asm ( ALT_CALL_arg ## n ) = \ - ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); }) -+#endif - #define ALT_CALL_NO_ARG(n) \ - register unsigned long a ## n ## _ asm ( ALT_CALL_arg ## n ) - --- -2.44.0 - diff --git a/0335-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch b/0335-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch deleted file mode 100644 index 285c2c7e..00000000 --- a/0335-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 91650010815f3da0834bc9781c4359350d1162a5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 27 Feb 2024 14:11:40 +0100 -Subject: [PATCH 335/349] x86/spec: fix BRANCH_HARDEN option to only be set - when build-enabled -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current logic to handle the BRANCH_HARDEN option will report it as enabled -even when build-time disabled. Fix this by only allowing the option to be set -when support for it is built into Xen. - -Fixes: 2d6f36daa086 ('x86/nospec: Introduce CONFIG_SPECULATIVE_HARDEN_BRANCH') -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -master commit: 60e00f77a5cc671d30c5ef3318f5b8e9b74e4aa3 -master date: 2024-02-26 16:06:42 +0100 ---- - xen/arch/x86/spec_ctrl.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 56e07d7536..661716d695 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -62,7 +62,8 @@ int8_t __initdata opt_psfd = -1; - int8_t __ro_after_init opt_ibpb_ctxt_switch = -1; - int8_t __read_mostly opt_eager_fpu = -1; - int8_t __read_mostly opt_l1d_flush = -1; --static bool __initdata opt_branch_harden = true; -+static bool __initdata opt_branch_harden = -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH); - - bool __initdata bsp_delay_spec_ctrl; - uint8_t __read_mostly default_xen_spec_ctrl; -@@ -280,7 +281,16 @@ static int __init cf_check parse_spec_ctrl(const char *s) - else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 ) - opt_l1d_flush = val; - else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 ) -- opt_branch_harden = val; -+ { -+ if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) ) -+ opt_branch_harden = val; -+ else -+ { -+ no_config_param("SPECULATIVE_HARDEN_BRANCH", "spec-ctrl", s, -+ ss); -+ rc = -EINVAL; -+ } -+ } - else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 ) - opt_srb_lock = val; - else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 ) --- -2.44.0 - diff --git a/0336-x86-account-for-shadow-stack-in-exception-from-stub-.patch b/0336-x86-account-for-shadow-stack-in-exception-from-stub-.patch deleted file mode 100644 index 133451db..00000000 --- a/0336-x86-account-for-shadow-stack-in-exception-from-stub-.patch +++ /dev/null @@ -1,212 +0,0 @@ -From 49f77602373b58b7bbdb40cea2b49d2f88d4003d Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Tue, 27 Feb 2024 14:12:11 +0100 -Subject: [PATCH 336/349] x86: account for shadow stack in exception-from-stub - recovery - -Dealing with exceptions raised from within emulation stubs involves -discarding return address (replaced by exception related information). -Such discarding of course also requires removing the corresponding entry -from the shadow stack. - -Also amend the comment in fixup_exception_return(), to further clarify -why use of ptr[1] can't be an out-of-bounds access. - -This is CVE-2023-46841 / XSA-451. - -Fixes: 209fb9919b50 ("x86/extable: Adjust extable handling to be shadow stack compatible") -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -master commit: 91f5f7a9154919a765c3933521760acffeddbf28 -master date: 2024-02-27 13:49:22 +0100 ---- - xen/arch/x86/extable.c | 20 ++++++---- - xen/arch/x86/include/asm/uaccess.h | 3 +- - xen/arch/x86/traps.c | 63 +++++++++++++++++++++++++++--- - 3 files changed, 71 insertions(+), 15 deletions(-) - -diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c -index 6758ba1dca..dd9583f2a5 100644 ---- a/xen/arch/x86/extable.c -+++ b/xen/arch/x86/extable.c -@@ -86,26 +86,29 @@ search_one_extable(const struct exception_table_entry *first, - } - - unsigned long --search_exception_table(const struct cpu_user_regs *regs) -+search_exception_table(const struct cpu_user_regs *regs, unsigned long *stub_ra) - { - const struct virtual_region *region = find_text_region(regs->rip); - unsigned long stub = this_cpu(stubs.addr); - - if ( region && region->ex ) -+ { -+ *stub_ra = 0; - return search_one_extable(region->ex, region->ex_end, regs->rip); -+ } - - if ( regs->rip >= stub + STUB_BUF_SIZE / 2 && - regs->rip < stub + STUB_BUF_SIZE && - regs->rsp > (unsigned long)regs && - regs->rsp < (unsigned long)get_cpu_info() ) - { -- unsigned long retptr = *(unsigned long *)regs->rsp; -+ unsigned long retaddr = *(unsigned long *)regs->rsp, fixup; - -- region = find_text_region(retptr); -- retptr = region && region->ex -- ? search_one_extable(region->ex, region->ex_end, retptr) -- : 0; -- if ( retptr ) -+ region = find_text_region(retaddr); -+ fixup = region && region->ex -+ ? search_one_extable(region->ex, region->ex_end, retaddr) -+ : 0; -+ if ( fixup ) - { - /* - * Put trap number and error code on the stack (in place of the -@@ -117,7 +120,8 @@ search_exception_table(const struct cpu_user_regs *regs) - }; - - *(unsigned long *)regs->rsp = token.raw; -- return retptr; -+ *stub_ra = retaddr; -+ return fixup; - } - } - -diff --git a/xen/arch/x86/include/asm/uaccess.h b/xen/arch/x86/include/asm/uaccess.h -index 684fccd95c..74bb222c03 100644 ---- a/xen/arch/x86/include/asm/uaccess.h -+++ b/xen/arch/x86/include/asm/uaccess.h -@@ -421,7 +421,8 @@ union stub_exception_token { - unsigned long raw; - }; - --extern unsigned long search_exception_table(const struct cpu_user_regs *regs); -+extern unsigned long search_exception_table(const struct cpu_user_regs *regs, -+ unsigned long *stub_ra); - extern void sort_exception_tables(void); - extern void sort_exception_table(struct exception_table_entry *start, - const struct exception_table_entry *stop); -diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c -index 06c4f3868b..7599bee361 100644 ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -856,7 +856,7 @@ void do_unhandled_trap(struct cpu_user_regs *regs) - } - - static void fixup_exception_return(struct cpu_user_regs *regs, -- unsigned long fixup) -+ unsigned long fixup, unsigned long stub_ra) - { - if ( IS_ENABLED(CONFIG_XEN_SHSTK) ) - { -@@ -873,7 +873,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs, - /* - * Search for %rip. The shstk currently looks like this: - * -- * ... [Likely pointed to by SSP] -+ * tok [Supervisor token, == &tok | BUSY, only with FRED inactive] -+ * ... [Pointed to by SSP for most exceptions, empty in IST cases] - * %cs [== regs->cs] - * %rip [== regs->rip] - * SSP [Likely points to 3 slots higher, above %cs] -@@ -891,7 +892,56 @@ static void fixup_exception_return(struct cpu_user_regs *regs, - */ - if ( ptr[0] == regs->rip && ptr[1] == regs->cs ) - { -+ unsigned long primary_shstk = -+ (ssp & ~(STACK_SIZE - 1)) + -+ (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8; -+ - wrss(fixup, ptr); -+ -+ if ( !stub_ra ) -+ goto shstk_done; -+ -+ /* -+ * Stub recovery ought to happen only when the outer context -+ * was on the main shadow stack. We need to also "pop" the -+ * stub's return address from the interrupted context's shadow -+ * stack. That is, -+ * - if we're still on the main stack, we need to move the -+ * entire stack (up to and including the exception frame) -+ * up by one slot, incrementing the original SSP in the -+ * exception frame, -+ * - if we're on an IST stack, we need to increment the -+ * original SSP. -+ */ -+ BUG_ON((ptr[-1] ^ primary_shstk) >> PAGE_SHIFT); -+ -+ if ( (ssp ^ primary_shstk) >> PAGE_SHIFT ) -+ { -+ /* -+ * We're on an IST stack. First make sure the two return -+ * addresses actually match. Then increment the interrupted -+ * context's SSP. -+ */ -+ BUG_ON(stub_ra != *(unsigned long*)ptr[-1]); -+ wrss(ptr[-1] + 8, &ptr[-1]); -+ goto shstk_done; -+ } -+ -+ /* Make sure the two return addresses actually match. */ -+ BUG_ON(stub_ra != ptr[2]); -+ -+ /* Move exception frame, updating SSP there. */ -+ wrss(ptr[1], &ptr[2]); /* %cs */ -+ wrss(ptr[0], &ptr[1]); /* %rip */ -+ wrss(ptr[-1] + 8, &ptr[0]); /* SSP */ -+ -+ /* Move all newer entries. */ -+ while ( --ptr != _p(ssp) ) -+ wrss(ptr[-1], &ptr[0]); -+ -+ /* Finally account for our own stack having shifted up. */ -+ asm volatile ( "incsspd %0" :: "r" (2) ); -+ - goto shstk_done; - } - } -@@ -912,7 +962,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs, - - static bool extable_fixup(struct cpu_user_regs *regs, bool print) - { -- unsigned long fixup = search_exception_table(regs); -+ unsigned long stub_ra = 0; -+ unsigned long fixup = search_exception_table(regs, &stub_ra); - - if ( unlikely(fixup == 0) ) - return false; -@@ -926,7 +977,7 @@ static bool extable_fixup(struct cpu_user_regs *regs, bool print) - vector_name(regs->entry_vector), regs->error_code, - _p(regs->rip), _p(regs->rip), _p(fixup)); - -- fixup_exception_return(regs, fixup); -+ fixup_exception_return(regs, fixup, stub_ra); - this_cpu(last_extable_addr) = regs->rip; - - return true; -@@ -1214,7 +1265,7 @@ void do_invalid_op(struct cpu_user_regs *regs) - void (*fn)(struct cpu_user_regs *) = bug_ptr(bug); - - fn(regs); -- fixup_exception_return(regs, (unsigned long)eip); -+ fixup_exception_return(regs, (unsigned long)eip, 0); - return; - } - -@@ -1235,7 +1286,7 @@ void do_invalid_op(struct cpu_user_regs *regs) - case BUGFRAME_warn: - printk("Xen WARN at %s%s:%d\n", prefix, filename, lineno); - show_execution_state(regs); -- fixup_exception_return(regs, (unsigned long)eip); -+ fixup_exception_return(regs, (unsigned long)eip, 0); - return; - - case BUGFRAME_bug: --- -2.44.0 - diff --git a/0337-xen-arm-Fix-UBSAN-failure-in-start_xen.patch b/0337-xen-arm-Fix-UBSAN-failure-in-start_xen.patch deleted file mode 100644 index 91ac7a04..00000000 --- a/0337-xen-arm-Fix-UBSAN-failure-in-start_xen.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 6cbccc4071ef49a8c591ecaddfdcb1cc26d28411 Mon Sep 17 00:00:00 2001 -From: Michal Orzel -Date: Thu, 8 Feb 2024 11:43:39 +0100 -Subject: [PATCH 337/349] xen/arm: Fix UBSAN failure in start_xen() - -When running Xen on arm32, in scenario where Xen is loaded at an address -such as boot_phys_offset >= 2GB, UBSAN reports the following: - -(XEN) UBSAN: Undefined behaviour in arch/arm/setup.c:739:58 -(XEN) pointer operation underflowed 00200000 to 86800000 -(XEN) Xen WARN at common/ubsan/ubsan.c:172 -(XEN) ----[ Xen-4.19-unstable arm32 debug=y ubsan=y Not tainted ]---- -... -(XEN) Xen call trace: -(XEN) [<0031b4c0>] ubsan.c#ubsan_epilogue+0x18/0xf0 (PC) -(XEN) [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4 (LR) -(XEN) [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4 -(XEN) [<004d15a8>] start_xen+0xe0/0xbe0 -(XEN) [<0020007c>] head.o#primary_switched+0x4/0x30 - -The failure is reported for the following line: -(paddr_t)(uintptr_t)(_start + boot_phys_offset) - -This occurs because the compiler treats (ptr + size) with size bigger than -PTRDIFF_MAX as undefined behavior. To address this, switch to macro -virt_to_maddr(), given the future plans to eliminate boot_phys_offset. - -Signed-off-by: Michal Orzel -Reviewed-by: Luca Fancellu -Tested-by: Luca Fancellu -Acked-by: Julien Grall -(cherry picked from commit e11f5766503c0ff074b4e0f888bbfc931518a169) ---- - xen/arch/arm/setup.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c -index 4395640019..9ee19c2bc1 100644 ---- a/xen/arch/arm/setup.c -+++ b/xen/arch/arm/setup.c -@@ -1025,7 +1025,7 @@ void __init start_xen(unsigned long boot_phys_offset, - - /* Register Xen's load address as a boot module. */ - xen_bootmodule = add_boot_module(BOOTMOD_XEN, -- (paddr_t)(uintptr_t)(_start + boot_phys_offset), -+ virt_to_maddr(_start), - (paddr_t)(uintptr_t)(_end - _start), false); - BUG_ON(!xen_bootmodule); - --- -2.44.0 - diff --git a/0338-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch b/0338-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch deleted file mode 100644 index b76fef91..00000000 --- a/0338-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 9c0d518eb8dc69430e6a8d767bd101dad19b846a Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Tue, 5 Mar 2024 11:56:31 +0100 -Subject: [PATCH 338/349] x86/HVM: hide SVM/VMX when their enabling is - prohibited by firmware -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -... or we fail to enable the functionality on the BSP for other reasons. -The only place where hardware announcing the feature is recorded is the -raw CPU policy/featureset. - -Inspired by https://lore.kernel.org/all/20230921114940.957141-1-pbonzini@redhat.com/. - -Signed-off-by: Jan Beulich -Acked-by: Roger Pau Monné -master commit: 0b5f149338e35a795bf609ce584640b0977f9e6c -master date: 2024-01-09 14:06:34 +0100 ---- - xen/arch/x86/hvm/svm/svm.c | 1 + - xen/arch/x86/hvm/vmx/vmcs.c | 17 +++++++++++++++++ - 2 files changed, 18 insertions(+) - -diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c -index fd32600ae3..3c17464550 100644 ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -1669,6 +1669,7 @@ const struct hvm_function_table * __init start_svm(void) - - if ( _svm_cpu_up(true) ) - { -+ setup_clear_cpu_cap(X86_FEATURE_SVM); - printk("SVM: failed to initialise.\n"); - return NULL; - } -diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c -index bcbecc6945..b5ecc51b43 100644 ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -2163,6 +2163,23 @@ int __init vmx_vmcs_init(void) - - if ( !ret ) - register_keyhandler('v', vmcs_dump, "dump VT-x VMCSs", 1); -+ else -+ { -+ setup_clear_cpu_cap(X86_FEATURE_VMX); -+ -+ /* -+ * _vmx_vcpu_up() may have made it past feature identification. -+ * Make sure all dependent features are off as well. -+ */ -+ vmx_basic_msr = 0; -+ vmx_pin_based_exec_control = 0; -+ vmx_cpu_based_exec_control = 0; -+ vmx_secondary_exec_control = 0; -+ vmx_vmexit_control = 0; -+ vmx_vmentry_control = 0; -+ vmx_ept_vpid_cap = 0; -+ vmx_vmfunc = 0; -+ } - - return ret; - } --- -2.44.0 - diff --git a/0339-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch b/0339-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch deleted file mode 100644 index 3ed6b9ea..00000000 --- a/0339-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch +++ /dev/null @@ -1,86 +0,0 @@ -From b75bee183210318150e678e14b35224d7c73edb6 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 5 Mar 2024 11:57:02 +0100 -Subject: [PATCH 339/349] xen/sched: Fix UB shift in compat_set_timer_op() - -Tamas reported this UBSAN failure from fuzzing: - - (XEN) ================================================================================ - (XEN) UBSAN: Undefined behaviour in common/sched/compat.c:48:37 - (XEN) left shift of negative value -2147425536 - (XEN) ----[ Xen-4.19-unstable x86_64 debug=y ubsan=y Not tainted ]---- - ... - (XEN) Xen call trace: - (XEN) [] R ubsan.c#ubsan_epilogue+0xa/0xd9 - (XEN) [] F __ubsan_handle_shift_out_of_bounds+0x11a/0x1c5 - (XEN) [] F compat_set_timer_op+0x41/0x43 - (XEN) [] F hvm_do_multicall_call+0x77f/0xa75 - (XEN) [] F arch_do_multicall_call+0xec/0xf1 - (XEN) [] F do_multicall+0x1dc/0xde3 - (XEN) [] F hvm_hypercall+0xa00/0x149a - (XEN) [] F vmx_vmexit_handler+0x1596/0x279c - (XEN) [] F vmx_asm_vmexit_handler+0xdb/0x200 - -Left-shifting any negative value is strictly undefined behaviour in C, and -the two parameters here come straight from the guest. - -The fuzzer happened to choose lo 0xf, hi 0x8000e300. - -Switch everything to be unsigned values, making the shift well defined. - -As GCC documents: - - As an extension to the C language, GCC does not use the latitude given in - C99 and C11 only to treat certain aspects of signed '<<' as undefined. - However, -fsanitize=shift (and -fsanitize=undefined) will diagnose such - cases. - -this was deemed not to need an XSA. - -Note: The unsigned -> signed conversion for do_set_timer_op()'s s_time_t -parameter is also well defined. C makes it implementation defined, and GCC -defines it as reduction modulo 2^N to be within range of the new type. - -Fixes: 2942f45e09fb ("Enable compatibility mode operation for HYPERVISOR_sched_op and HYPERVISOR_set_timer_op.") -Reported-by: Tamas K Lengyel -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: ae6d4fd876765e6d623eec67d14f5d0464be09cb -master date: 2024-02-01 19:52:44 +0000 ---- - xen/common/sched/compat.c | 4 ++-- - xen/include/hypercall-defs.c | 2 +- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/xen/common/sched/compat.c b/xen/common/sched/compat.c -index 040b4caca2..b827fdecb8 100644 ---- a/xen/common/sched/compat.c -+++ b/xen/common/sched/compat.c -@@ -39,9 +39,9 @@ static int compat_poll(struct compat_sched_poll *compat) - - #include "core.c" - --int compat_set_timer_op(u32 lo, s32 hi) -+int compat_set_timer_op(uint32_t lo, uint32_t hi) - { -- return do_set_timer_op(((s64)hi << 32) | lo); -+ return do_set_timer_op(((uint64_t)hi << 32) | lo); - } - - /* -diff --git a/xen/include/hypercall-defs.c b/xen/include/hypercall-defs.c -index 1896121074..c442dee284 100644 ---- a/xen/include/hypercall-defs.c -+++ b/xen/include/hypercall-defs.c -@@ -127,7 +127,7 @@ xenoprof_op(int op, void *arg) - - #ifdef CONFIG_COMPAT - prefix: compat --set_timer_op(uint32_t lo, int32_t hi) -+set_timer_op(uint32_t lo, uint32_t hi) - multicall(multicall_entry_compat_t *call_list, uint32_t nr_calls) - memory_op(unsigned int cmd, void *arg) - #ifdef CONFIG_IOREQ_SERVER --- -2.44.0 - diff --git a/0340-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch b/0340-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch deleted file mode 100644 index e5de8a10..00000000 --- a/0340-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 76ea2aab3652cc34e474de0905f0a9cd4df7d087 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 5 Mar 2024 11:57:41 +0100 -Subject: [PATCH 340/349] x86/spec: print the built-in SPECULATIVE_HARDEN_* - options -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Just like it's done for INDIRECT_THUNK and SHADOW_PAGING. - -Reported-by: Jan Beulich -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -master commit: 6e9507f7d51fe49df8bc70f83e49ce06c92e4e54 -master date: 2024-02-27 14:57:52 +0100 ---- - xen/arch/x86/spec_ctrl.c | 14 +++++++++++++- - 1 file changed, 13 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 661716d695..93f1cf3bb5 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -488,13 +488,25 @@ static void __init print_details(enum ind_thunk thunk) - (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : ""); - - /* Compiled-in support which pertains to mitigations. */ -- if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ) -+ if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) || -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) || -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) || -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) ) - printk(" Compiled-in support:" - #ifdef CONFIG_INDIRECT_THUNK - " INDIRECT_THUNK" - #endif - #ifdef CONFIG_SHADOW_PAGING - " SHADOW_PAGING" -+#endif -+#ifdef CONFIG_SPECULATIVE_HARDEN_ARRAY -+ " HARDEN_ARRAY" -+#endif -+#ifdef CONFIG_SPECULATIVE_HARDEN_BRANCH -+ " HARDEN_BRANCH" -+#endif -+#ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS -+ " HARDEN_GUEST_ACCESS" - #endif - "\n"); - --- -2.44.0 - diff --git a/0341-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch b/0341-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch deleted file mode 100644 index 350d0344..00000000 --- a/0341-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 693455c3c370e535eb6cd065800ff91e147815fa Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 5 Mar 2024 11:58:04 +0100 -Subject: [PATCH 341/349] x86/spec: fix INDIRECT_THUNK option to only be set - when build-enabled -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Attempt to provide a more helpful error message when the user attempts to set -spec-ctrl=bti-thunk option but the support is build-time disabled. - -While there also adjust the command line documentation to mention -CONFIG_INDIRECT_THUNK instead of INDIRECT_THUNK. - -Reported-by: Andrew Cooper -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -master commit: 8441fa806a3b778867867cd0159fa1722e90397e -master date: 2024-02-27 14:58:20 +0100 ---- - docs/misc/xen-command-line.pandoc | 10 +++++----- - xen/arch/x86/spec_ctrl.c | 7 ++++++- - 2 files changed, 11 insertions(+), 6 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 05f613c71c..2006697226 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2378,11 +2378,11 @@ guests to use. - performance reasons dom0 is unprotected by default. If it is necessary to - protect dom0 too, boot with `spec-ctrl=ibpb-entry`. - --If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to --select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` --locations. The default thunk is `retpoline` (generally preferred), with the --alternatives being `jmp` (a `jmp *%reg` gadget, minimal overhead), and --`lfence` (an `lfence; jmp *%reg` gadget). -+If Xen was compiled with `CONFIG_INDIRECT_THUNK` support, `bti-thunk=` can be -+used to select which of the thunks gets patched into the -+`__x86_indirect_thunk_%reg` locations. The default thunk is `retpoline` -+(generally preferred), with the alternatives being `jmp` (a `jmp *%reg` gadget, -+minimal overhead), and `lfence` (an `lfence; jmp *%reg` gadget). - - On hardware supporting IBRS (Indirect Branch Restricted Speculation), the - `ibrs=` option can be used to force or prevent Xen using the feature itself. -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 93f1cf3bb5..098fa3184d 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -253,7 +253,12 @@ static int __init cf_check parse_spec_ctrl(const char *s) - { - s += 10; - -- if ( !cmdline_strcmp(s, "retpoline") ) -+ if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) ) -+ { -+ no_config_param("INDIRECT_THUNK", "spec-ctrl", s - 10, ss); -+ rc = -EINVAL; -+ } -+ else if ( !cmdline_strcmp(s, "retpoline") ) - opt_thunk = THUNK_RETPOLINE; - else if ( !cmdline_strcmp(s, "lfence") ) - opt_thunk = THUNK_LFENCE; --- -2.44.0 - diff --git a/0342-x86-spec-do-not-print-thunk-option-selection-if-not-.patch b/0342-x86-spec-do-not-print-thunk-option-selection-if-not-.patch deleted file mode 100644 index 5789f1f9..00000000 --- a/0342-x86-spec-do-not-print-thunk-option-selection-if-not-.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 0ce25b46ab2fb53a1b58f7682ca14971453f4f2c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 5 Mar 2024 11:58:36 +0100 -Subject: [PATCH 342/349] x86/spec: do not print thunk option selection if not - built-in -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Since the thunk built-in enable is printed as part of the "Compiled-in -support:" line, avoid printing anything in "Xen settings:" if the thunk is -disabled at build time. - -Note the BTI-Thunk option printing is also adjusted to print a colon in the -same way the other options on the line do. - -Requested-by: Jan Beulich -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -master commit: 576528a2a742069af203e90c613c5c93e23c9755 -master date: 2024-02-27 14:58:40 +0100 ---- - xen/arch/x86/spec_ctrl.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 098fa3184d..25a18ac598 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -516,11 +516,12 @@ static void __init print_details(enum ind_thunk thunk) - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", -- thunk == THUNK_NONE ? "N/A" : -- thunk == THUNK_RETPOLINE ? "RETPOLINE" : -- thunk == THUNK_LFENCE ? "LFENCE" : -- thunk == THUNK_JMP ? "JMP" : "?", -+ printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", -+ thunk != THUNK_NONE ? "BTI-Thunk: " : "", -+ thunk == THUNK_NONE ? "" : -+ thunk == THUNK_RETPOLINE ? "RETPOLINE, " : -+ thunk == THUNK_LFENCE ? "LFENCE, " : -+ thunk == THUNK_JMP ? "JMP, " : "?, ", - (!boot_cpu_has(X86_FEATURE_IBRSB) && - !boot_cpu_has(X86_FEATURE_IBRS)) ? "No" : - (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-", --- -2.44.0 - diff --git a/0343-xen-livepatch-register-livepatch-regions-when-loaded.patch b/0343-xen-livepatch-register-livepatch-regions-when-loaded.patch deleted file mode 100644 index f7affef0..00000000 --- a/0343-xen-livepatch-register-livepatch-regions-when-loaded.patch +++ /dev/null @@ -1,159 +0,0 @@ -From b11917de0cd261a878beaf50c18a689bde0b2f50 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 5 Mar 2024 11:59:26 +0100 -Subject: [PATCH 343/349] xen/livepatch: register livepatch regions when loaded -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Currently livepatch regions are registered as virtual regions only after the -livepatch has been applied. - -This can lead to issues when using the pre-apply or post-revert hooks, as at -that point the livepatch is not in the virtual regions list. If a livepatch -pre-apply hook contains a WARN() it would trigger an hypervisor crash, as the -code to handle the bug frame won't be able to find the instruction pointer that -triggered the #UD in any of the registered virtual regions, and hence crash. - -Fix this by adding the livepatch payloads as virtual regions as soon as loaded, -and only remove them once the payload is unloaded. This requires some changes -to the virtual regions code, as the removal of the virtual regions is no longer -done in stop machine context, and hence an RCU barrier is added in order to -make sure there are no users of the virtual region after it's been removed from -the list. - -Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks') -Signed-off-by: Roger Pau Monné -Reviewed-by: Ross Lagerwall -master commit: a57b4074ab39bee78b6c116277f0a9963bd8e687 -master date: 2024-02-28 16:57:25 +0000 ---- - xen/common/livepatch.c | 4 ++-- - xen/common/virtual_region.c | 44 ++++++++++++++----------------------- - 2 files changed, 19 insertions(+), 29 deletions(-) - -diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c -index c2ae84d18b..537e9f33e4 100644 ---- a/xen/common/livepatch.c -+++ b/xen/common/livepatch.c -@@ -1015,6 +1015,7 @@ static int build_symbol_table(struct payload *payload, - static void free_payload(struct payload *data) - { - ASSERT(spin_is_locked(&payload_lock)); -+ unregister_virtual_region(&data->region); - list_del(&data->list); - payload_cnt--; - payload_version++; -@@ -1114,6 +1115,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload) - INIT_LIST_HEAD(&data->list); - INIT_LIST_HEAD(&data->applied_list); - -+ register_virtual_region(&data->region); - list_add_tail(&data->list, &payload_list); - payload_cnt++; - payload_version++; -@@ -1330,7 +1332,6 @@ static inline void apply_payload_tail(struct payload *data) - * The applied_list is iterated by the trap code. - */ - list_add_tail_rcu(&data->applied_list, &applied_list); -- register_virtual_region(&data->region); - - data->state = LIVEPATCH_STATE_APPLIED; - } -@@ -1376,7 +1377,6 @@ static inline void revert_payload_tail(struct payload *data) - * The applied_list is iterated by the trap code. - */ - list_del_rcu(&data->applied_list); -- unregister_virtual_region(&data->region); - - data->reverted = true; - data->state = LIVEPATCH_STATE_CHECKED; -diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c -index 5f89703f51..9f12c30efe 100644 ---- a/xen/common/virtual_region.c -+++ b/xen/common/virtual_region.c -@@ -23,14 +23,8 @@ static struct virtual_region core_init __initdata = { - }; - - /* -- * RCU locking. Additions are done either at startup (when there is only -- * one CPU) or when all CPUs are running without IRQs. -- * -- * Deletions are bit tricky. We do it when Live Patch (all CPUs running -- * without IRQs) or during bootup (when clearing the init). -- * -- * Hence we use list_del_rcu (which sports an memory fence) and a spinlock -- * on deletion. -+ * RCU locking. Modifications to the list must be done in exclusive mode, and -+ * hence need to hold the spinlock. - * - * All readers of virtual_region_list MUST use list_for_each_entry_rcu. - */ -@@ -58,41 +52,36 @@ const struct virtual_region *find_text_region(unsigned long addr) - - void register_virtual_region(struct virtual_region *r) - { -- ASSERT(!local_irq_is_enabled()); -+ unsigned long flags; - -+ spin_lock_irqsave(&virtual_region_lock, flags); - list_add_tail_rcu(&r->list, &virtual_region_list); -+ spin_unlock_irqrestore(&virtual_region_lock, flags); - } - --static void remove_virtual_region(struct virtual_region *r) -+/* -+ * Suggest inline so when !CONFIG_LIVEPATCH the function is not left -+ * unreachable after init code is removed. -+ */ -+static void inline remove_virtual_region(struct virtual_region *r) - { - unsigned long flags; - - spin_lock_irqsave(&virtual_region_lock, flags); - list_del_rcu(&r->list); - spin_unlock_irqrestore(&virtual_region_lock, flags); -- /* -- * We do not need to invoke call_rcu. -- * -- * This is due to the fact that on the deletion we have made sure -- * to use spinlocks (to guard against somebody else calling -- * unregister_virtual_region) and list_deletion spiced with -- * memory barrier. -- * -- * That protects us from corrupting the list as the readers all -- * use list_for_each_entry_rcu which is safe against concurrent -- * deletions. -- */ - } - -+#ifdef CONFIG_LIVEPATCH - void unregister_virtual_region(struct virtual_region *r) - { -- /* Expected to be called from Live Patch - which has IRQs disabled. */ -- ASSERT(!local_irq_is_enabled()); -- - remove_virtual_region(r); -+ -+ /* Assert that no CPU might be using the removed region. */ -+ rcu_barrier(); - } - --#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_X86) -+#ifdef CONFIG_X86 - void relax_virtual_region_perms(void) - { - const struct virtual_region *region; -@@ -116,7 +105,8 @@ void tighten_virtual_region_perms(void) - PAGE_HYPERVISOR_RX); - rcu_read_unlock(&rcu_virtual_region_lock); - } --#endif -+#endif /* CONFIG_X86 */ -+#endif /* CONFIG_LIVEPATCH */ - - void __init unregister_init_virtual_region(void) - { --- -2.44.0 - diff --git a/0344-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch b/0344-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch deleted file mode 100644 index d14413ab..00000000 --- a/0344-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch +++ /dev/null @@ -1,149 +0,0 @@ -From c54cf903b06fb1933fad053cc547580c92c856ea Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 5 Mar 2024 11:59:35 +0100 -Subject: [PATCH 344/349] xen/livepatch: search for symbols in all loaded - payloads -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When checking if an address belongs to a patch, or when resolving a symbol, -take into account all loaded livepatch payloads, even if not applied. - -This is required in order for the pre-apply and post-revert hooks to work -properly, or else Xen won't detect the instruction pointer belonging to those -hooks as being part of the currently active text. - -Move the RCU handling to be used for payload_list instead of applied_list, as -now the calls from trap code will iterate over the payload_list. - -Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks') -Signed-off-by: Roger Pau Monné -Reviewed-by: Ross Lagerwall -master commit: d2daa40fb3ddb8f83e238e57854bd878924cde90 -master date: 2024-02-28 16:57:25 +0000 ---- - xen/common/livepatch.c | 49 +++++++++++++++--------------------------- - 1 file changed, 17 insertions(+), 32 deletions(-) - -diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c -index 537e9f33e4..a129ab9973 100644 ---- a/xen/common/livepatch.c -+++ b/xen/common/livepatch.c -@@ -36,13 +36,14 @@ - * caller in schedule_work. - */ - static DEFINE_SPINLOCK(payload_lock); --static LIST_HEAD(payload_list); -- - /* -- * Patches which have been applied. Need RCU in case we crash (and then -- * traps code would iterate via applied_list) when adding entries on the list. -+ * Need RCU in case we crash (and then traps code would iterate via -+ * payload_list) when adding entries on the list. - */ --static DEFINE_RCU_READ_LOCK(rcu_applied_lock); -+static DEFINE_RCU_READ_LOCK(rcu_payload_lock); -+static LIST_HEAD(payload_list); -+ -+/* Patches which have been applied. Only modified from stop machine context. */ - static LIST_HEAD(applied_list); - - static unsigned int payload_cnt; -@@ -111,12 +112,8 @@ bool_t is_patch(const void *ptr) - const struct payload *data; - bool_t r = 0; - -- /* -- * Only RCU locking since this list is only ever changed during apply -- * or revert context. And in case it dies there we need an safe list. -- */ -- rcu_read_lock(&rcu_applied_lock); -- list_for_each_entry_rcu ( data, &applied_list, applied_list ) -+ rcu_read_lock(&rcu_payload_lock); -+ list_for_each_entry_rcu ( data, &payload_list, list ) - { - if ( (ptr >= data->rw_addr && - ptr < (data->rw_addr + data->rw_size)) || -@@ -130,7 +127,7 @@ bool_t is_patch(const void *ptr) - } - - } -- rcu_read_unlock(&rcu_applied_lock); -+ rcu_read_unlock(&rcu_payload_lock); - - return r; - } -@@ -166,12 +163,8 @@ static const char *cf_check livepatch_symbols_lookup( - const void *va = (const void *)addr; - const char *n = NULL; - -- /* -- * Only RCU locking since this list is only ever changed during apply -- * or revert context. And in case it dies there we need an safe list. -- */ -- rcu_read_lock(&rcu_applied_lock); -- list_for_each_entry_rcu ( data, &applied_list, applied_list ) -+ rcu_read_lock(&rcu_payload_lock); -+ list_for_each_entry_rcu ( data, &payload_list, list ) - { - if ( va < data->text_addr || - va >= (data->text_addr + data->text_size) ) -@@ -200,7 +193,7 @@ static const char *cf_check livepatch_symbols_lookup( - n = data->symtab[best].name; - break; - } -- rcu_read_unlock(&rcu_applied_lock); -+ rcu_read_unlock(&rcu_payload_lock); - - return n; - } -@@ -1016,7 +1009,8 @@ static void free_payload(struct payload *data) - { - ASSERT(spin_is_locked(&payload_lock)); - unregister_virtual_region(&data->region); -- list_del(&data->list); -+ list_del_rcu(&data->list); -+ rcu_barrier(); - payload_cnt--; - payload_version++; - free_payload_data(data); -@@ -1116,7 +1110,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload) - INIT_LIST_HEAD(&data->applied_list); - - register_virtual_region(&data->region); -- list_add_tail(&data->list, &payload_list); -+ list_add_tail_rcu(&data->list, &payload_list); - payload_cnt++; - payload_version++; - } -@@ -1327,11 +1321,7 @@ static int apply_payload(struct payload *data) - - static inline void apply_payload_tail(struct payload *data) - { -- /* -- * We need RCU variant (which has barriers) in case we crash here. -- * The applied_list is iterated by the trap code. -- */ -- list_add_tail_rcu(&data->applied_list, &applied_list); -+ list_add_tail(&data->applied_list, &applied_list); - - data->state = LIVEPATCH_STATE_APPLIED; - } -@@ -1371,12 +1361,7 @@ static int revert_payload(struct payload *data) - - static inline void revert_payload_tail(struct payload *data) - { -- -- /* -- * We need RCU variant (which has barriers) in case we crash here. -- * The applied_list is iterated by the trap code. -- */ -- list_del_rcu(&data->applied_list); -+ list_del(&data->applied_list); - - data->reverted = true; - data->state = LIVEPATCH_STATE_CHECKED; --- -2.44.0 - diff --git a/0345-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch b/0345-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch deleted file mode 100644 index feeb4329..00000000 --- a/0345-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch +++ /dev/null @@ -1,186 +0,0 @@ -From 5564323f643715f9d364df88e0eb9c7d6fd2c22b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 5 Mar 2024 11:59:43 +0100 -Subject: [PATCH 345/349] xen/livepatch: fix norevert test attempt to open-code - revert -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The purpose of the norevert test is to install a dummy handler that replaces -the internal Xen revert code, and then perform the revert in the post-revert -hook. For that purpose the usage of the previous common_livepatch_revert() is -not enough, as that just reverts specific functions, but not the whole state of -the payload. - -Remove both common_livepatch_{apply,revert}() and instead expose -revert_payload{,_tail}() in order to perform the patch revert from the -post-revert hook. - -Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker') -Signed-off-by: Roger Pau Monné -Reviewed-by: Ross Lagerwall -master commit: cdae267ce10d04d71d1687b5701ff2911a96b6dc -master date: 2024-02-28 16:57:25 +0000 ---- - xen/common/livepatch.c | 41 +++++++++++++++++-- - xen/include/xen/livepatch.h | 32 ++------------- - .../livepatch/xen_action_hooks_norevert.c | 22 +++------- - 3 files changed, 46 insertions(+), 49 deletions(-) - -diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c -index a129ab9973..a5068a2217 100644 ---- a/xen/common/livepatch.c -+++ b/xen/common/livepatch.c -@@ -1310,7 +1310,22 @@ static int apply_payload(struct payload *data) - ASSERT(!local_irq_is_enabled()); - - for ( i = 0; i < data->nfuncs; i++ ) -- common_livepatch_apply(&data->funcs[i], &data->fstate[i]); -+ { -+ const struct livepatch_func *func = &data->funcs[i]; -+ struct livepatch_fstate *state = &data->fstate[i]; -+ -+ /* If the action has been already executed on this function, do nothing. */ -+ if ( state->applied == LIVEPATCH_FUNC_APPLIED ) -+ { -+ printk(XENLOG_WARNING LIVEPATCH -+ "%s: %s has been already applied before\n", -+ __func__, func->name); -+ continue; -+ } -+ -+ arch_livepatch_apply(func, state); -+ state->applied = LIVEPATCH_FUNC_APPLIED; -+ } - - arch_livepatch_revive(); - -@@ -1326,7 +1341,7 @@ static inline void apply_payload_tail(struct payload *data) - data->state = LIVEPATCH_STATE_APPLIED; - } - --static int revert_payload(struct payload *data) -+int revert_payload(struct payload *data) - { - unsigned int i; - int rc; -@@ -1341,7 +1356,25 @@ static int revert_payload(struct payload *data) - } - - for ( i = 0; i < data->nfuncs; i++ ) -- common_livepatch_revert(&data->funcs[i], &data->fstate[i]); -+ { -+ const struct livepatch_func *func = &data->funcs[i]; -+ struct livepatch_fstate *state = &data->fstate[i]; -+ -+ /* -+ * If the apply action hasn't been executed on this function, do -+ * nothing. -+ */ -+ if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED ) -+ { -+ printk(XENLOG_WARNING LIVEPATCH -+ "%s: %s has not been applied before\n", -+ __func__, func->name); -+ continue; -+ } -+ -+ arch_livepatch_revert(func, state); -+ state->applied = LIVEPATCH_FUNC_NOT_APPLIED; -+ } - - /* - * Since we are running with IRQs disabled and the hooks may call common -@@ -1359,7 +1392,7 @@ static int revert_payload(struct payload *data) - return 0; - } - --static inline void revert_payload_tail(struct payload *data) -+void revert_payload_tail(struct payload *data) - { - list_del(&data->applied_list); - -diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h -index 537d3d58b6..c9ee58fd37 100644 ---- a/xen/include/xen/livepatch.h -+++ b/xen/include/xen/livepatch.h -@@ -136,35 +136,11 @@ void arch_livepatch_post_action(void); - void arch_livepatch_mask(void); - void arch_livepatch_unmask(void); - --static inline void common_livepatch_apply(const struct livepatch_func *func, -- struct livepatch_fstate *state) --{ -- /* If the action has been already executed on this function, do nothing. */ -- if ( state->applied == LIVEPATCH_FUNC_APPLIED ) -- { -- printk(XENLOG_WARNING LIVEPATCH "%s: %s has been already applied before\n", -- __func__, func->name); -- return; -- } -- -- arch_livepatch_apply(func, state); -- state->applied = LIVEPATCH_FUNC_APPLIED; --} -+/* Only for testing purposes. */ -+struct payload; -+int revert_payload(struct payload *data); -+void revert_payload_tail(struct payload *data); - --static inline void common_livepatch_revert(const struct livepatch_func *func, -- struct livepatch_fstate *state) --{ -- /* If the apply action hasn't been executed on this function, do nothing. */ -- if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED ) -- { -- printk(XENLOG_WARNING LIVEPATCH "%s: %s has not been applied before\n", -- __func__, func->name); -- return; -- } -- -- arch_livepatch_revert(func, state); -- state->applied = LIVEPATCH_FUNC_NOT_APPLIED; --} - #else - - /* -diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c -index c173855192..c5fbab1746 100644 ---- a/xen/test/livepatch/xen_action_hooks_norevert.c -+++ b/xen/test/livepatch/xen_action_hooks_norevert.c -@@ -96,26 +96,14 @@ static int revert_hook(livepatch_payload_t *payload) - - static void post_revert_hook(livepatch_payload_t *payload) - { -- int i; -+ unsigned long flags; - - printk(KERN_DEBUG "%s: Hook starting.\n", __func__); - -- for (i = 0; i < payload->nfuncs; i++) -- { -- const struct livepatch_func *func = &payload->funcs[i]; -- struct livepatch_fstate *fstate = &payload->fstate[i]; -- -- BUG_ON(revert_cnt != 1); -- BUG_ON(fstate->applied != LIVEPATCH_FUNC_APPLIED); -- -- /* Outside of quiesce zone: MAY TRIGGER HOST CRASH/UNDEFINED BEHAVIOR */ -- arch_livepatch_quiesce(); -- common_livepatch_revert(payload); -- arch_livepatch_revive(); -- BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED); -- -- printk(KERN_DEBUG "%s: post reverted: %s\n", __func__, func->name); -- } -+ local_irq_save(flags); -+ BUG_ON(revert_payload(payload)); -+ revert_payload_tail(payload); -+ local_irq_restore(flags); - - printk(KERN_DEBUG "%s: Hook done.\n", __func__); - } --- -2.44.0 - diff --git a/0346-xen-livepatch-properly-build-the-noapply-and-norever.patch b/0346-xen-livepatch-properly-build-the-noapply-and-norever.patch deleted file mode 100644 index 1063767e..00000000 --- a/0346-xen-livepatch-properly-build-the-noapply-and-norever.patch +++ /dev/null @@ -1,43 +0,0 @@ -From a59106b27609b6ae2873bd6755949b1258290872 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 5 Mar 2024 11:59:51 +0100 -Subject: [PATCH 346/349] xen/livepatch: properly build the noapply and - norevert tests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -It seems the build variables for those tests where copy-pasted from -xen_action_hooks_marker-objs and not adjusted to use the correct source files. - -Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker') -Signed-off-by: Roger Pau Monné -Reviewed-by: Ross Lagerwall -master commit: e579677095782c7dec792597ba8b037b7d716b32 -master date: 2024-02-28 16:57:25 +0000 ---- - xen/test/livepatch/Makefile | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/xen/test/livepatch/Makefile b/xen/test/livepatch/Makefile -index c258ab0b59..d987a8367f 100644 ---- a/xen/test/livepatch/Makefile -+++ b/xen/test/livepatch/Makefile -@@ -118,12 +118,12 @@ xen_action_hooks_marker-objs := xen_action_hooks_marker.o xen_hello_world_func.o - $(obj)/xen_action_hooks_noapply.o: $(obj)/config.h - - extra-y += xen_action_hooks_noapply.livepatch --xen_action_hooks_noapply-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o -+xen_action_hooks_noapply-objs := xen_action_hooks_noapply.o xen_hello_world_func.o note.o xen_note.o - - $(obj)/xen_action_hooks_norevert.o: $(obj)/config.h - - extra-y += xen_action_hooks_norevert.livepatch --xen_action_hooks_norevert-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o -+xen_action_hooks_norevert-objs := xen_action_hooks_norevert.o xen_hello_world_func.o note.o xen_note.o - - EXPECT_BYTES_COUNT := 8 - CODE_GET_EXPECT=$(shell $(OBJDUMP) -d --insn-width=1 $(1) | sed -n -e '/<'$(2)'>:$$/,/^$$/ p' | tail -n +2 | head -n $(EXPECT_BYTES_COUNT) | awk '{$$0=$$2; printf "%s", substr($$0,length-1)}' | sed 's/.\{2\}/0x&,/g' | sed 's/^/{/;s/,$$/}/g') --- -2.44.0 - diff --git a/0347-libxl-Fix-segfault-in-device_model_spawn_outcome.patch b/0347-libxl-Fix-segfault-in-device_model_spawn_outcome.patch deleted file mode 100644 index 6f85b4f3..00000000 --- a/0347-libxl-Fix-segfault-in-device_model_spawn_outcome.patch +++ /dev/null @@ -1,39 +0,0 @@ -From c4ee68eda9937743527fff41f4ede0f6a3228080 Mon Sep 17 00:00:00 2001 -From: Jason Andryuk -Date: Tue, 5 Mar 2024 12:00:30 +0100 -Subject: [PATCH 347/349] libxl: Fix segfault in device_model_spawn_outcome - -libxl__spawn_qdisk_backend() explicitly sets guest_config to NULL when -starting QEMU (the usual launch through libxl__spawn_local_dm() has a -guest_config though). - -Bail early on a NULL guest_config/d_config. This skips the QMP queries -for chardevs and VNC, but this xenpv QEMU instance isn't expected to -provide those - only qdisk (or 9pfs backends after an upcoming change). - -Signed-off-by: Jason Andryuk -Acked-by: Anthony PERARD -master commit: d4f3d35f043f6ef29393166b0dd131c8102cf255 -master date: 2024-02-29 08:18:38 +0100 ---- - tools/libs/light/libxl_dm.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c -index ed620a9d8e..29b43ed20a 100644 ---- a/tools/libs/light/libxl_dm.c -+++ b/tools/libs/light/libxl_dm.c -@@ -3172,8 +3172,8 @@ static void device_model_spawn_outcome(libxl__egc *egc, - - /* Check if spawn failed */ - if (rc) goto out; -- -- if (d_config->b_info.device_model_version -+ /* d_config is NULL for xl devd/libxl__spawn_qemu_xenpv_backend(). */ -+ if (d_config && d_config->b_info.device_model_version - == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) { - rc = libxl__ev_time_register_rel(ao, &dmss->timeout, - devise_model_postconfig_timeout, --- -2.44.0 - diff --git a/0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch b/0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch deleted file mode 100644 index 0b7bda09..00000000 --- a/0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch +++ /dev/null @@ -1,197 +0,0 @@ -From 2f49d9f89c14519d4cb1e06ab8370cf4ba50fab7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 5 Mar 2024 12:00:47 +0100 -Subject: [PATCH 348/349] x86/altcall: always use a temporary parameter - stashing variable -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The usage in ALT_CALL_ARG() on clang of: - -register union { - typeof(arg) e; - const unsigned long r; -} ... - -When `arg` is the first argument to alternative_{,v}call() and -const_vlapic_vcpu() is used results in clang 3.5.0 complaining with: - -arch/x86/hvm/vlapic.c:141:47: error: non-const static data member must be initialized out of line - alternative_call(hvm_funcs.test_pir, const_vlapic_vcpu(vlapic), vec) ) - -Workaround this by pulling `arg1` into a local variable, like it's done for -further arguments (arg2, arg3...) - -Originally arg1 wasn't pulled into a variable because for the a1_ register -local variable the possible clobbering as a result of operators on other -variables don't matter: - -https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables - -Note clang version 3.8.1 seems to already be fixed and don't require the -workaround, but since it's harmless do it uniformly everywhere. - -Reported-by: Andrew Cooper -Fixes: 2ce562b2a413 ('x86/altcall: use a union as register type for function parameters on clang') -Signed-off-by: Roger Pau Monné -Acked-by: Jan Beulich -master commit: c20850540ad6a32f4fc17bde9b01c92b0df18bf0 -master date: 2024-02-29 08:21:49 +0100 ---- - xen/arch/x86/include/asm/alternative.h | 36 +++++++++++++++++--------- - 1 file changed, 24 insertions(+), 12 deletions(-) - -diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h -index bcb1dc94f4..fa04481316 100644 ---- a/xen/arch/x86/include/asm/alternative.h -+++ b/xen/arch/x86/include/asm/alternative.h -@@ -253,21 +253,24 @@ extern void alternative_branches(void); - }) - - #define alternative_vcall1(func, arg) ({ \ -- ALT_CALL_ARG(arg, 1); \ -+ typeof(arg) v1_ = (arg); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_NO_ARG2; \ - (void)sizeof(func(arg)); \ - (void)alternative_callN(1, int, func); \ - }) - - #define alternative_call1(func, arg) ({ \ -- ALT_CALL_ARG(arg, 1); \ -+ typeof(arg) v1_ = (arg); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_NO_ARG2; \ - alternative_callN(1, typeof(func(arg)), func); \ - }) - - #define alternative_vcall2(func, arg1, arg2) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_NO_ARG3; \ - (void)sizeof(func(arg1, arg2)); \ -@@ -275,17 +278,19 @@ extern void alternative_branches(void); - }) - - #define alternative_call2(func, arg1, arg2) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_NO_ARG3; \ - alternative_callN(2, typeof(func(arg1, arg2)), func); \ - }) - - #define alternative_vcall3(func, arg1, arg2, arg3) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_NO_ARG4; \ -@@ -294,9 +299,10 @@ extern void alternative_branches(void); - }) - - #define alternative_call3(func, arg1, arg2, arg3) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_NO_ARG4; \ -@@ -305,10 +311,11 @@ extern void alternative_branches(void); - }) - - #define alternative_vcall4(func, arg1, arg2, arg3, arg4) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ -@@ -318,10 +325,11 @@ extern void alternative_branches(void); - }) - - #define alternative_call4(func, arg1, arg2, arg3, arg4) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ -@@ -332,11 +340,12 @@ extern void alternative_branches(void); - }) - - #define alternative_vcall5(func, arg1, arg2, arg3, arg4, arg5) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ - typeof(arg5) v5_ = (arg5); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ -@@ -347,11 +356,12 @@ extern void alternative_branches(void); - }) - - #define alternative_call5(func, arg1, arg2, arg3, arg4, arg5) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ - typeof(arg5) v5_ = (arg5); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ -@@ -363,12 +373,13 @@ extern void alternative_branches(void); - }) - - #define alternative_vcall6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ - typeof(arg5) v5_ = (arg5); \ - typeof(arg6) v6_ = (arg6); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ -@@ -379,12 +390,13 @@ extern void alternative_branches(void); - }) - - #define alternative_call6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ - typeof(arg5) v5_ = (arg5); \ - typeof(arg6) v6_ = (arg6); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ --- -2.44.0 - diff --git a/0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch b/0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch deleted file mode 100644 index e233ca51..00000000 --- a/0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 54dacb5c02cba4676879ed077765734326b78e39 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 5 Mar 2024 12:01:22 +0100 -Subject: [PATCH 349/349] x86/cpu-policy: Allow for levelling of VERW side - effects -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -MD_CLEAR and FB_CLEAR need OR-ing across a migrate pool. Allow this, by -having them unconditinally set in max, with the host values reflected in -default. Annotate the bits as having special properies. - -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné -master commit: de17162cafd27f2865a3102a2ec0f386a02ed03d -master date: 2024-03-01 20:14:19 +0000 ---- - xen/arch/x86/cpu-policy.c | 24 +++++++++++++++++++++ - xen/arch/x86/include/asm/cpufeature.h | 1 + - xen/include/public/arch-x86/cpufeatureset.h | 4 ++-- - 3 files changed, 27 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index f0f2c8a1c0..7b875a7221 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -435,6 +435,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) - __set_bit(X86_FEATURE_RSBA, fs); - __set_bit(X86_FEATURE_RRSBA, fs); - -+ /* -+ * These bits indicate that the VERW instruction may have gained -+ * scrubbing side effects. With pooling, they mean "you might migrate -+ * somewhere where scrubbing is necessary", and may need exposing on -+ * unaffected hardware. This is fine, because the VERW instruction -+ * has been around since the 286. -+ */ -+ __set_bit(X86_FEATURE_MD_CLEAR, fs); -+ __set_bit(X86_FEATURE_FB_CLEAR, fs); -+ - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an - * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. -@@ -469,6 +479,20 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) - cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) - __clear_bit(X86_FEATURE_RDRAND, fs); - -+ /* -+ * These bits indicate that the VERW instruction may have gained -+ * scrubbing side effects. The max policy has them set for migration -+ * reasons, so reset the default policy back to the host values in -+ * case we're unaffected. -+ */ -+ __clear_bit(X86_FEATURE_MD_CLEAR, fs); -+ if ( cpu_has_md_clear ) -+ __set_bit(X86_FEATURE_MD_CLEAR, fs); -+ -+ __clear_bit(X86_FEATURE_FB_CLEAR, fs); -+ if ( cpu_has_fb_clear ) -+ __set_bit(X86_FEATURE_FB_CLEAR, fs); -+ - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an - * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. -diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h -index 9ef7756593..ec824e8954 100644 ---- a/xen/arch/x86/include/asm/cpufeature.h -+++ b/xen/arch/x86/include/asm/cpufeature.h -@@ -136,6 +136,7 @@ - #define cpu_has_avx512_4fmaps boot_cpu_has(X86_FEATURE_AVX512_4FMAPS) - #define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT) - #define cpu_has_srbds_ctrl boot_cpu_has(X86_FEATURE_SRBDS_CTRL) -+#define cpu_has_md_clear boot_cpu_has(X86_FEATURE_MD_CLEAR) - #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) - #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) - #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE) -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 94d211df2f..aec1407613 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -260,7 +260,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single - XEN_CPUFEATURE(FSRM, 9*32+ 4) /*A Fast Short REP MOVS */ - XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a VP2INTERSECT{D,Q} insns */ - XEN_CPUFEATURE(SRBDS_CTRL, 9*32+ 9) /* MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */ --XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*A VERW clears microarchitectural buffers */ -+XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffers */ - XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */ - XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ - XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */ -@@ -321,7 +321,7 @@ XEN_CPUFEATURE(DOITM, 16*32+12) /* Data Operand Invariant Timing - XEN_CPUFEATURE(SBDR_SSDP_NO, 16*32+13) /*A No Shared Buffer Data Read or Sideband Stale Data Propagation */ - XEN_CPUFEATURE(FBSDP_NO, 16*32+14) /*A No Fill Buffer Stale Data Propagation */ - XEN_CPUFEATURE(PSDP_NO, 16*32+15) /*A No Primary Stale Data Propagation */ --XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*A Fill Buffers cleared by VERW */ -+XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*!A Fill Buffers cleared by VERW */ - XEN_CPUFEATURE(FB_CLEAR_CTRL, 16*32+18) /* MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */ - XEN_CPUFEATURE(RRSBA, 16*32+19) /*! Restricted RSB Alternative */ - XEN_CPUFEATURE(BHI_NO, 16*32+20) /*A No Branch History Injection */ --- -2.44.0 - diff --git a/0350-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch b/0350-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch deleted file mode 100644 index 1ea5fc11..00000000 --- a/0350-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch +++ /dev/null @@ -1,90 +0,0 @@ -From bb27e11c56963e170d1f6d2fbddbc956f7164121 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 2 Apr 2024 16:17:25 +0200 -Subject: [PATCH] x86/cpu-policy: Hide x2APIC from PV guests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -PV guests can't write to MSR_APIC_BASE (in order to set EXTD), nor can they -access any of the x2APIC MSR range. Therefore they mustn't see the x2APIC -CPUID bit saying that they can. - -Right now, the host x2APIC flag filters into PV guests, meaning that PV guests -generally see x2APIC except on Zen1-and-older AMD systems. - -Linux works around this by explicitly hiding the bit itself, and filtering -EXTD out of MSR_APIC_BASE reads. NetBSD behaves more in the spirit of PV -guests, and entirely ignores the APIC when built as a PV guest. - -Change the annotation from !A to !S. This has a consequence of stripping it -out of both PV featuremasks. However, as existing guests may have seen the -bit, set it back into the PV Max policy; a VM which saw the bit and is alive -enough to migrate will have ignored it one way or another. - -Hiding x2APIC does change the contents of leaf 0xb, but as the information is -nonsense to begin with, this is likely an improvement on the status quo. - -Xen's blind assumption that APIC_ID = vCPU_ID * 2 isn't interlinked with the -host's topology structure, where a PV guest may see real host values, and the -APIC_IDs are useless without an MADT to start with. Dom0 is the only PV VM to -get an MADT but it's the host one, meaning the two sets of APIC_IDs are from -different address spaces. - -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné -master commit: 5420aa165dfa5fe95dd84bb71cb96c15459935b1 -master date: 2024-03-01 20:14:19 +0000 ---- - xen/arch/x86/cpu-policy.c | 11 +++++++++-- - xen/include/public/arch-x86/cpufeatureset.h | 2 +- - 2 files changed, 10 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 96c2cee1a8..ed64d56294 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -559,6 +559,14 @@ static void __init calculate_pv_max_policy(void) - for ( i = 0; i < ARRAY_SIZE(fs); ++i ) - fs[i] &= pv_max_featuremask[i]; - -+ /* -+ * Xen at the time of writing (Feb 2024, 4.19 dev cycle) used to leak the -+ * host x2APIC capability into PV guests, but never supported the guest -+ * trying to turn x2APIC mode on. Tolerate an incoming VM which saw the -+ * x2APIC CPUID bit and is alive enough to migrate. -+ */ -+ __set_bit(X86_FEATURE_X2APIC, fs); -+ - /* - * If Xen isn't virtualising MSR_SPEC_CTRL for PV guests (functional - * availability, or admin choice), hide the feature. -@@ -837,11 +845,10 @@ void recalculate_cpuid_policy(struct domain *d) - } - - /* -- * Allow the toolstack to set HTT, X2APIC and CMP_LEGACY. These bits -+ * Allow the toolstack to set HTT and CMP_LEGACY. These bits - * affect how to interpret topology information in other cpuid leaves. - */ - __set_bit(X86_FEATURE_HTT, max_fs); -- __set_bit(X86_FEATURE_X2APIC, max_fs); - __set_bit(X86_FEATURE_CMP_LEGACY, max_fs); - - /* -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 113e6cadc1..bc971f3c6f 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -123,7 +123,7 @@ XEN_CPUFEATURE(PCID, 1*32+17) /*H Process Context ID */ - XEN_CPUFEATURE(DCA, 1*32+18) /* Direct Cache Access */ - XEN_CPUFEATURE(SSE4_1, 1*32+19) /*A Streaming SIMD Extensions 4.1 */ - XEN_CPUFEATURE(SSE4_2, 1*32+20) /*A Streaming SIMD Extensions 4.2 */ --XEN_CPUFEATURE(X2APIC, 1*32+21) /*!A Extended xAPIC */ -+XEN_CPUFEATURE(X2APIC, 1*32+21) /*!S Extended xAPIC */ - XEN_CPUFEATURE(MOVBE, 1*32+22) /*A movbe instruction */ - XEN_CPUFEATURE(POPCNT, 1*32+23) /*A POPCNT instruction */ - XEN_CPUFEATURE(TSC_DEADLINE, 1*32+24) /*S TSC Deadline Timer */ --- -2.44.0 - diff --git a/0351-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch b/0351-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch deleted file mode 100644 index 6f3b7de2..00000000 --- a/0351-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 70ad9c5fdeac4814050080c87e06d44292ecf868 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 2 Apr 2024 16:18:05 +0200 -Subject: [PATCH] x86/cpu-policy: Fix visibility of HTT/CMP_LEGACY in max - policies -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The block in recalculate_cpuid_policy() predates the proper split between -default and max policies, and was a "slightly max for a toolstack which knows -about it" capability. It didn't get transformed properly in Xen 4.14. - -Because Xen will accept a VM with HTT/CMP_LEGACY seen, they should be visible -in the max polices. Keep the default policy matching host settings. - -This manifested as an incorrectly-rejected migration across XenServer's Xen -4.13 -> 4.17 upgrade, as Xapi is slowly growing the logic to check a VM -against the target max policy. - -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné -master commit: e2d8a652251660c3252d92b442e1a9c5d6e6a1e9 -master date: 2024-03-01 20:14:19 +0000 ---- - xen/arch/x86/cpu-policy.c | 29 ++++++++++++++++++++++------- - 1 file changed, 22 insertions(+), 7 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index ed64d56294..24acd12ce2 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -458,6 +458,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) - raw_cpu_policy.feat.clwb ) - __set_bit(X86_FEATURE_CLWB, fs); - } -+ -+ /* -+ * Topology information inside the guest is entirely at the toolstack's -+ * discretion, and bears no relationship to the host we're running on. -+ * -+ * HTT identifies p->basic.lppp as valid -+ * CMP_LEGACY identifies p->extd.nc as valid -+ */ -+ __set_bit(X86_FEATURE_HTT, fs); -+ __set_bit(X86_FEATURE_CMP_LEGACY, fs); - } - - static void __init guest_common_default_feature_adjustments(uint32_t *fs) -@@ -512,6 +522,18 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) - __clear_bit(X86_FEATURE_CLWB, fs); - } - -+ /* -+ * Topology information is at the toolstack's discretion so these are -+ * unconditionally set in max, but pick a default which matches the host. -+ */ -+ __clear_bit(X86_FEATURE_HTT, fs); -+ if ( cpu_has_htt ) -+ __set_bit(X86_FEATURE_HTT, fs); -+ -+ __clear_bit(X86_FEATURE_CMP_LEGACY, fs); -+ if ( cpu_has_cmp_legacy ) -+ __set_bit(X86_FEATURE_CMP_LEGACY, fs); -+ - /* - * On certain hardware, speculative or errata workarounds can result in - * TSX being placed in "force-abort" mode, where it doesn't actually -@@ -844,13 +866,6 @@ void recalculate_cpuid_policy(struct domain *d) - } - } - -- /* -- * Allow the toolstack to set HTT and CMP_LEGACY. These bits -- * affect how to interpret topology information in other cpuid leaves. -- */ -- __set_bit(X86_FEATURE_HTT, max_fs); -- __set_bit(X86_FEATURE_CMP_LEGACY, max_fs); -- - /* - * 32bit PV domains can't use any Long Mode features, and cannot use - * SYSCALL on non-AMD hardware. --- -2.44.0 - diff --git a/0500-xsa452-4.17-1.patch b/0500-xsa452-4.17-1.patch deleted file mode 100644 index cdec10eb..00000000 --- a/0500-xsa452-4.17-1.patch +++ /dev/null @@ -1,304 +0,0 @@ -From: Andrew Cooper -Subject: x86/entry: Introduce EFRAME_* constants - -restore_all_guest() does a lot of manipulation of the stack after popping the -GPRs, and uses raw %rsp displacements to do so. Also, almost all entrypaths -use raw %rsp displacements prior to pushing GPRs. - -Provide better mnemonics, to aid readability and reduce the chance of errors -when editing. - -No functional change. The resulting binary is identical. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 37541208f119a9c552c6c6c3246ea61be0d44035) - -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index 287dac101ad4..31fa63b77fd1 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -51,6 +51,23 @@ void __dummy__(void) - OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, es); - BLANK(); - -+ /* -+ * EFRAME_* is for the entry/exit logic where %rsp is pointing at -+ * UREGS_error_code and GPRs are still/already guest values. -+ */ -+#define OFFSET_EF(sym, mem) \ -+ DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \ -+ offsetof(struct cpu_user_regs, error_code)) -+ -+ OFFSET_EF(EFRAME_entry_vector, entry_vector); -+ OFFSET_EF(EFRAME_rip, rip); -+ OFFSET_EF(EFRAME_cs, cs); -+ OFFSET_EF(EFRAME_eflags, eflags); -+ OFFSET_EF(EFRAME_rsp, rsp); -+ BLANK(); -+ -+#undef OFFSET_EF -+ - OFFSET(VCPU_processor, struct vcpu, processor); - OFFSET(VCPU_domain, struct vcpu, domain); - OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index 253bb1688c4f..7c211314d885 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -15,7 +15,7 @@ ENTRY(entry_int82) - ENDBR64 - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP - pushq $0 -- movl $HYPERCALL_VECTOR, 4(%rsp) -+ movl $HYPERCALL_VECTOR, EFRAME_entry_vector(%rsp) - SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 585b0c955191..412cbeb3eca4 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -190,15 +190,15 @@ restore_all_guest: - SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ - - RESTORE_ALL -- testw $TRAP_syscall,4(%rsp) -+ testw $TRAP_syscall, EFRAME_entry_vector(%rsp) - jz iret_exit_to_guest - -- movq 24(%rsp),%r11 # RFLAGS -+ mov EFRAME_eflags(%rsp), %r11 - andq $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11 - orq $X86_EFLAGS_IF,%r11 - - /* Don't use SYSRET path if the return address is not canonical. */ -- movq 8(%rsp),%rcx -+ mov EFRAME_rip(%rsp), %rcx - sarq $47,%rcx - incl %ecx - cmpl $1,%ecx -@@ -213,20 +213,20 @@ restore_all_guest: - ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK - #endif - -- movq 8(%rsp), %rcx # RIP -- cmpw $FLAT_USER_CS32,16(%rsp)# CS -- movq 32(%rsp),%rsp # RSP -+ mov EFRAME_rip(%rsp), %rcx -+ cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp) -+ mov EFRAME_rsp(%rsp), %rsp - je 1f - sysretq - 1: sysretl - - ALIGN - .Lrestore_rcx_iret_exit_to_guest: -- movq 8(%rsp), %rcx # RIP -+ mov EFRAME_rip(%rsp), %rcx - /* No special register assumptions. */ - iret_exit_to_guest: -- andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), 24(%rsp) -- orl $X86_EFLAGS_IF,24(%rsp) -+ andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp) -+ orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp) - addq $8,%rsp - .Lft0: iretq - _ASM_PRE_EXTABLE(.Lft0, handle_exception) -@@ -257,7 +257,7 @@ ENTRY(lstar_enter) - pushq $FLAT_KERNEL_CS64 - pushq %rcx - pushq $0 -- movl $TRAP_syscall, 4(%rsp) -+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -294,7 +294,7 @@ ENTRY(cstar_enter) - pushq $FLAT_USER_CS32 - pushq %rcx - pushq $0 -- movl $TRAP_syscall, 4(%rsp) -+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -335,7 +335,7 @@ GLOBAL(sysenter_eflags_saved) - pushq $3 /* ring 3 null cs */ - pushq $0 /* null rip */ - pushq $0 -- movl $TRAP_syscall, 4(%rsp) -+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -389,7 +389,7 @@ ENTRY(int80_direct_trap) - ENDBR64 - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP - pushq $0 -- movl $0x80, 4(%rsp) -+ movl $0x80, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -649,7 +649,7 @@ ret_from_intr: - .section .init.text, "ax", @progbits - ENTRY(early_page_fault) - ENDBR64 -- movl $TRAP_page_fault, 4(%rsp) -+ movl $TRAP_page_fault, EFRAME_entry_vector(%rsp) - SAVE_ALL - movq %rsp, %rdi - call do_early_page_fault -@@ -716,7 +716,7 @@ ENTRY(common_interrupt) - - ENTRY(page_fault) - ENDBR64 -- movl $TRAP_page_fault,4(%rsp) -+ movl $TRAP_page_fault, EFRAME_entry_vector(%rsp) - /* No special register assumptions. */ - GLOBAL(handle_exception) - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP -@@ -892,90 +892,90 @@ FATAL_exception_with_ints_disabled: - ENTRY(divide_error) - ENDBR64 - pushq $0 -- movl $TRAP_divide_error,4(%rsp) -+ movl $TRAP_divide_error, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(coprocessor_error) - ENDBR64 - pushq $0 -- movl $TRAP_copro_error,4(%rsp) -+ movl $TRAP_copro_error, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(simd_coprocessor_error) - ENDBR64 - pushq $0 -- movl $TRAP_simd_error,4(%rsp) -+ movl $TRAP_simd_error, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(device_not_available) - ENDBR64 - pushq $0 -- movl $TRAP_no_device,4(%rsp) -+ movl $TRAP_no_device, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(debug) - ENDBR64 - pushq $0 -- movl $TRAP_debug,4(%rsp) -+ movl $TRAP_debug, EFRAME_entry_vector(%rsp) - jmp handle_ist_exception - - ENTRY(int3) - ENDBR64 - pushq $0 -- movl $TRAP_int3,4(%rsp) -+ movl $TRAP_int3, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(overflow) - ENDBR64 - pushq $0 -- movl $TRAP_overflow,4(%rsp) -+ movl $TRAP_overflow, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(bounds) - ENDBR64 - pushq $0 -- movl $TRAP_bounds,4(%rsp) -+ movl $TRAP_bounds, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(invalid_op) - ENDBR64 - pushq $0 -- movl $TRAP_invalid_op,4(%rsp) -+ movl $TRAP_invalid_op, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(invalid_TSS) - ENDBR64 -- movl $TRAP_invalid_tss,4(%rsp) -+ movl $TRAP_invalid_tss, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(segment_not_present) - ENDBR64 -- movl $TRAP_no_segment,4(%rsp) -+ movl $TRAP_no_segment, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(stack_segment) - ENDBR64 -- movl $TRAP_stack_error,4(%rsp) -+ movl $TRAP_stack_error, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(general_protection) - ENDBR64 -- movl $TRAP_gp_fault,4(%rsp) -+ movl $TRAP_gp_fault, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(alignment_check) - ENDBR64 -- movl $TRAP_alignment_check,4(%rsp) -+ movl $TRAP_alignment_check, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_CP) - ENDBR64 -- movl $X86_EXC_CP, 4(%rsp) -+ movl $X86_EXC_CP, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(double_fault) - ENDBR64 -- movl $TRAP_double_fault,4(%rsp) -+ movl $TRAP_double_fault, EFRAME_entry_vector(%rsp) - /* Set AC to reduce chance of further SMAP faults */ - ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP - SAVE_ALL -@@ -1001,7 +1001,7 @@ ENTRY(double_fault) - ENTRY(nmi) - ENDBR64 - pushq $0 -- movl $TRAP_nmi,4(%rsp) -+ movl $TRAP_nmi, EFRAME_entry_vector(%rsp) - handle_ist_exception: - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP - SAVE_ALL -@@ -1134,7 +1134,7 @@ handle_ist_exception: - ENTRY(machine_check) - ENDBR64 - pushq $0 -- movl $TRAP_machine_check,4(%rsp) -+ movl $TRAP_machine_check, EFRAME_entry_vector(%rsp) - jmp handle_ist_exception - - /* No op trap handler. Required for kexec crash path. */ -@@ -1171,7 +1171,7 @@ autogen_stubs: /* Automatically generated stubs. */ - 1: - ENDBR64 - pushq $0 -- movb $vec,4(%rsp) -+ movb $vec, EFRAME_entry_vector(%rsp) - jmp common_interrupt - - entrypoint 1b -@@ -1185,7 +1185,7 @@ autogen_stubs: /* Automatically generated stubs. */ - test $8,%spl /* 64bit exception frames are 16 byte aligned, but the word */ - jz 2f /* size is 8 bytes. Check whether the processor gave us an */ - pushq $0 /* error code, and insert an empty one if not. */ --2: movb $vec,4(%rsp) -+2: movb $vec, EFRAME_entry_vector(%rsp) - jmp handle_exception - - entrypoint 1b diff --git a/0501-xsa452-4.17-2.patch b/0501-xsa452-4.17-2.patch deleted file mode 100644 index 45353971..00000000 --- a/0501-xsa452-4.17-2.patch +++ /dev/null @@ -1,90 +0,0 @@ -From: Andrew Cooper -Subject: x86: Resync intel-family.h from Linux - -From v6.8-rc6 - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 195e75371b13c4f7ecdf7b5c50aed0d02f2d7ce8) - -diff --git a/xen/arch/x86/include/asm/intel-family.h b/xen/arch/x86/include/asm/intel-family.h -index ffc49151befe..b65e9c46b922 100644 ---- a/xen/arch/x86/include/asm/intel-family.h -+++ b/xen/arch/x86/include/asm/intel-family.h -@@ -26,6 +26,9 @@ - * _G - parts with extra graphics on - * _X - regular server parts - * _D - micro server parts -+ * _N,_P - other mobile parts -+ * _H - premium mobile parts -+ * _S - other client parts - * - * Historical OPTDIFFs: - * -@@ -37,6 +40,9 @@ - * their own names :-( - */ - -+/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ -+#define INTEL_FAM6_ANY X86_MODEL_ANY -+ - #define INTEL_FAM6_CORE_YONAH 0x0E - - #define INTEL_FAM6_CORE2_MEROM 0x0F -@@ -93,8 +99,6 @@ - #define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */ - #define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */ - --#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ -- - #define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */ - - #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ -@@ -102,12 +106,31 @@ - - #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ - -+#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF -+ -+#define INTEL_FAM6_GRANITERAPIDS_X 0xAD -+#define INTEL_FAM6_GRANITERAPIDS_D 0xAE -+ -+/* "Hybrid" Processors (P-Core/E-Core) */ -+ -+#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ -+ - #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ - #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ - --#define INTEL_FAM6_RAPTORLAKE 0xB7 -+#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */ -+#define INTEL_FAM6_RAPTORLAKE_P 0xBA -+#define INTEL_FAM6_RAPTORLAKE_S 0xBF -+ -+#define INTEL_FAM6_METEORLAKE 0xAC -+#define INTEL_FAM6_METEORLAKE_L 0xAA -+ -+#define INTEL_FAM6_ARROWLAKE_H 0xC5 -+#define INTEL_FAM6_ARROWLAKE 0xC6 -+ -+#define INTEL_FAM6_LUNARLAKE_M 0xBD - --/* "Small Core" Processors (Atom) */ -+/* "Small Core" Processors (Atom/E-Core) */ - - #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ - #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ -@@ -134,6 +157,13 @@ - #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ - #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ - -+#define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */ -+ -+#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ -+#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ -+ -+#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */ -+ - /* Xeon Phi */ - - #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ diff --git a/0502-xsa452-4.17-3.patch b/0502-xsa452-4.17-3.patch deleted file mode 100644 index 0a39333e..00000000 --- a/0502-xsa452-4.17-3.patch +++ /dev/null @@ -1,135 +0,0 @@ -From: Andrew Cooper -Subject: x86/vmx: Perform VERW flushing later in the VMExit path - -Broken out of the following patch because this change is subtle enough on its -own. See it for the rational of why we're moving VERW. - -As for how, extend the trick already used to hold one condition in -flags (RESUME vs LAUNCH) through the POPing of GPRs. - -Move the MOV CR earlier. Intel specify flags to be undefined across it. - -Encode the two conditions we want using SF and PF. See the code comment for -exactly how. - -Leave a comment to explain the lack of any content around -SPEC_CTRL_EXIT_TO_VMX, but leave the block in place. Sods law says if we -delete it, we'll need to reintroduce it. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 475fa20b7384464210f42bad7195f87bd6f1c63f) - -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index 5f5de45a1309..cdde76e13892 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -87,17 +87,39 @@ UNLIKELY_END(realmode) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ - /* SPEC_CTRL_EXIT_TO_VMX Req: %rsp=regs/cpuinfo Clob: */ -- DO_SPEC_CTRL_COND_VERW -+ /* -+ * All speculation safety work happens to be elsewhere. VERW is after -+ * popping the GPRs, while restoring the guest MSR_SPEC_CTRL is left -+ * to the MSR load list. -+ */ - - mov VCPU_hvm_guest_cr2(%rbx),%rax -+ mov %rax, %cr2 -+ -+ /* -+ * We need to perform two conditional actions (VERW, and Resume vs -+ * Launch) after popping GPRs. With some cunning, we can encode both -+ * of these in eflags together. -+ * -+ * Parity is only calculated over the bottom byte of the answer, while -+ * Sign is simply the top bit. -+ * -+ * Therefore, the final OR instruction ends up producing: -+ * SF = VCPU_vmx_launched -+ * PF = !SCF_verw -+ */ -+ BUILD_BUG_ON(SCF_verw & ~0xff) -+ movzbl VCPU_vmx_launched(%rbx), %ecx -+ shl $31, %ecx -+ movzbl CPUINFO_spec_ctrl_flags(%rsp), %eax -+ and $SCF_verw, %eax -+ or %eax, %ecx - - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp -- mov %rax,%cr2 -- cmpb $0,VCPU_vmx_launched(%rbx) - pop %rbx - pop %r11 - pop %r10 -@@ -108,7 +130,13 @@ UNLIKELY_END(realmode) - pop %rdx - pop %rsi - pop %rdi -- je .Lvmx_launch -+ -+ jpe .L_skip_verw -+ /* VERW clobbers ZF, but preserves all others, including SF. */ -+ verw STK_REL(CPUINFO_verw_sel, CPUINFO_error_code)(%rsp) -+.L_skip_verw: -+ -+ jns .Lvmx_launch - - /*.Lvmx_resume:*/ - VMRESUME -diff --git a/xen/arch/x86/include/asm/asm_defns.h b/xen/arch/x86/include/asm/asm_defns.h -index d9431180cfba..abc6822b08c8 100644 ---- a/xen/arch/x86/include/asm/asm_defns.h -+++ b/xen/arch/x86/include/asm/asm_defns.h -@@ -81,6 +81,14 @@ register unsigned long current_stack_pointer asm("rsp"); - - #ifdef __ASSEMBLY__ - -+.macro BUILD_BUG_ON condstr, cond:vararg -+ .if \cond -+ .error "Condition \"\condstr\" not satisfied" -+ .endif -+.endm -+/* preprocessor macro to make error message more user friendly */ -+#define BUILD_BUG_ON(cond) BUILD_BUG_ON #cond, cond -+ - #ifdef HAVE_AS_QUOTED_SYM - #define SUBSECTION_LBL(tag) \ - .ifndef .L.tag; \ -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index f4b8b9d9561c..ca9cb0f5dd1d 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -164,6 +164,13 @@ - #endif - .endm - -+/* -+ * Helper to improve the readibility of stack dispacements with %rsp in -+ * unusual positions. Both @field and @top_of_stack should be constants from -+ * the same object. @top_of_stack should be where %rsp is currently pointing. -+ */ -+#define STK_REL(field, top_of_stk) ((field) - (top_of_stk)) -+ - .macro DO_SPEC_CTRL_COND_VERW - /* - * Requires %rsp=cpuinfo -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index 31fa63b77fd1..a4e94d693024 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -135,6 +135,7 @@ void __dummy__(void) - #endif - - OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs); -+ OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code); - OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel); - OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); - OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset); diff --git a/0503-xsa452-4.17-4.patch b/0503-xsa452-4.17-4.patch deleted file mode 100644 index fa9bb127..00000000 --- a/0503-xsa452-4.17-4.patch +++ /dev/null @@ -1,197 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Perform VERW flushing later in exit paths - -On parts vulnerable to RFDS, VERW's side effects are extended to scrub all -non-architectural entries in various Physical Register Files. To remove all -of Xen's values, the VERW must be after popping the GPRs. - -Rework SPEC_CTRL_COND_VERW to default to an CPUINFO_error_code %rsp position, -but with overrides for other contexts. Identify that it clobbers eflags; this -is particularly relevant for the SYSRET path. - -For the IST exit return to Xen, have the main SPEC_CTRL_EXIT_TO_XEN put a -shadow copy of spec_ctrl_flags, as GPRs can't be used at the point we want to -issue the VERW. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 0a666cf2cd99df6faf3eebc81a1fc286e4eca4c7) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index ca9cb0f5dd1d..97a97b2b82c9 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -171,16 +171,23 @@ - */ - #define STK_REL(field, top_of_stk) ((field) - (top_of_stk)) - --.macro DO_SPEC_CTRL_COND_VERW -+.macro SPEC_CTRL_COND_VERW \ -+ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_error_code), \ -+ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code) - /* -- * Requires %rsp=cpuinfo -+ * Requires \scf and \sel as %rsp-relative expressions -+ * Clobbers eflags -+ * -+ * VERW needs to run after guest GPRs have been restored, where only %rsp is -+ * good to use. Default to expecting %rsp pointing at CPUINFO_error_code. -+ * Contexts where this is not true must provide an alternative \scf and \sel. - * - * Issue a VERW for its flushing side effect, if indicated. This is a Spectre - * v1 gadget, but the IRET/VMEntry is serialising. - */ -- testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp) -+ testb $SCF_verw, \scf(%rsp) - jz .L\@_verw_skip -- verw CPUINFO_verw_sel(%rsp) -+ verw \sel(%rsp) - .L\@_verw_skip: - .endm - -@@ -298,8 +305,6 @@ - */ - ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV - -- DO_SPEC_CTRL_COND_VERW -- - ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV - .endm - -@@ -379,7 +384,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - */ - .macro SPEC_CTRL_EXIT_TO_XEN - /* -- * Requires %r12=ist_exit, %r14=stack_end -+ * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs - * Clobbers %rax, %rbx, %rcx, %rdx - */ - movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx -@@ -407,11 +412,18 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - test %r12, %r12 - jz .L\@_skip_ist_exit - -- /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */ -- testb $SCF_verw, %bl -- jz .L\@_skip_verw -- verw STACK_CPUINFO_FIELD(verw_sel)(%r14) --.L\@_skip_verw: -+ /* -+ * Stash SCF and verw_sel above eflags in the case of an IST_exit. The -+ * VERW logic needs to run after guest GPRs have been restored; i.e. where -+ * we cannot use %r12 or %r14 for the purposes they have here. -+ * -+ * When the CPU pushed this exception frame, it zero-extended eflags. -+ * Therefore it is safe for the VERW logic to look at the stashed SCF -+ * outside of the ist_exit condition. Also, this stashing won't influence -+ * any other restore_all_guest() paths. -+ */ -+ or $(__HYPERVISOR_DS32 << 16), %ebx -+ mov %ebx, UREGS_eflags + 4(%rsp) /* EFRAME_shadow_scf/sel */ - - ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV - -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index a4e94d693024..4cd5938d7b9d 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -55,14 +55,22 @@ void __dummy__(void) - * EFRAME_* is for the entry/exit logic where %rsp is pointing at - * UREGS_error_code and GPRs are still/already guest values. - */ --#define OFFSET_EF(sym, mem) \ -+#define OFFSET_EF(sym, mem, ...) \ - DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \ -- offsetof(struct cpu_user_regs, error_code)) -+ offsetof(struct cpu_user_regs, error_code) __VA_ARGS__) - - OFFSET_EF(EFRAME_entry_vector, entry_vector); - OFFSET_EF(EFRAME_rip, rip); - OFFSET_EF(EFRAME_cs, cs); - OFFSET_EF(EFRAME_eflags, eflags); -+ -+ /* -+ * These aren't real fields. They're spare space, used by the IST -+ * exit-to-xen path. -+ */ -+ OFFSET_EF(EFRAME_shadow_scf, eflags, +4); -+ OFFSET_EF(EFRAME_shadow_sel, eflags, +6); -+ - OFFSET_EF(EFRAME_rsp, rsp); - BLANK(); - -@@ -136,6 +144,7 @@ void __dummy__(void) - - OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs); - OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code); -+ OFFSET(CPUINFO_rip, struct cpu_info, guest_cpu_user_regs.rip); - OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel); - OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); - OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index 7c211314d885..3b2fbcd8733a 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -161,6 +161,12 @@ ENTRY(compat_restore_all_guest) - SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ - - RESTORE_ALL adj=8 compat=1 -+ -+ /* Account for ev/ec having already been popped off the stack. */ -+ SPEC_CTRL_COND_VERW \ -+ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_rip), \ -+ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_rip) -+ - .Lft0: iretq - _ASM_PRE_EXTABLE(.Lft0, handle_exception) - -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 412cbeb3eca4..ef517e2945b0 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -214,6 +214,9 @@ restore_all_guest: - #endif - - mov EFRAME_rip(%rsp), %rcx -+ -+ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */ -+ - cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp) - mov EFRAME_rsp(%rsp), %rsp - je 1f -@@ -227,6 +230,9 @@ restore_all_guest: - iret_exit_to_guest: - andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp) - orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp) -+ -+ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */ -+ - addq $8,%rsp - .Lft0: iretq - _ASM_PRE_EXTABLE(.Lft0, handle_exception) -@@ -679,9 +685,22 @@ UNLIKELY_START(ne, exit_cr3) - UNLIKELY_END(exit_cr3) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */ -+ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end %rsp=regs, Clob: abcd */ - - RESTORE_ALL adj=8 -+ -+ /* -+ * When the CPU pushed this exception frame, it zero-extended eflags. -+ * For an IST exit, SPEC_CTRL_EXIT_TO_XEN stashed shadow copies of -+ * spec_ctrl_flags and ver_sel above eflags, as we can't use any GPRs, -+ * and we're at a random place on the stack, not in a CPUFINFO block. -+ * -+ * Account for ev/ec having already been popped off the stack. -+ */ -+ SPEC_CTRL_COND_VERW \ -+ scf=STK_REL(EFRAME_shadow_scf, EFRAME_rip), \ -+ sel=STK_REL(EFRAME_shadow_sel, EFRAME_rip) -+ - iretq - - ENTRY(common_interrupt) diff --git a/0504-xsa452-4.17-5.patch b/0504-xsa452-4.17-5.patch deleted file mode 100644 index 0230b338..00000000 --- a/0504-xsa452-4.17-5.patch +++ /dev/null @@ -1,239 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Rename VERW related options - -VERW is going to be used for a 3rd purpose, and the existing nomenclature -didn't survive the Stale MMIO issues terribly well. - -Rename the command line option from `md-clear=` to `verw=`. This is more -consistent with other options which tend to be named based on what they're -doing, not which feature enumeration they use behind the scenes. Retain -`md-clear=` as a deprecated alias. - -Rename opt_md_clear_{pv,hvm} and opt_fb_clear_mmio to opt_verw_{pv,hvm,mmio}, -which has a side effect of making spec_ctrl_init_domain() rather clearer to -follow. - -No functional change. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit f7603ca252e4226739eb3129a5290ee3da3f8ea4) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 2006697226de..d909ec94fe7c 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2324,7 +2324,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - - ### spec-ctrl (x86) - > `= List of [ , xen=, {pv,hvm}=, --> {msr-sc,rsb,md-clear,ibpb-entry}=|{pv,hvm}=, -+> {msr-sc,rsb,verw,ibpb-entry}=|{pv,hvm}=, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, - > unpriv-mmio,gds-mit,div-scrub}= ]` -@@ -2349,7 +2349,7 @@ in place for guests to use. - - Use of a positive boolean value for either of these options is invalid. - --The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `md-clear=` and `ibpb-entry=` options -+The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=` and `ibpb-entry=` options - offer fine grained control over the primitives by Xen. These impact Xen's - ability to protect itself, and/or Xen's ability to virtualise support for - guests to use. -@@ -2366,11 +2366,12 @@ guests to use. - guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc. - * `rsb=` offers control over whether to overwrite the Return Stack Buffer / - Return Address Stack on entry to Xen and on idle. --* `md-clear=` offers control over whether to use VERW to flush -- microarchitectural buffers on idle and exit from Xen. *Note: For -- compatibility with development versions of this fix, `mds=` is also accepted -- on Xen 4.12 and earlier as an alias. Consult vendor documentation in -- preference to here.* -+* `verw=` offers control over whether to use VERW for its scrubbing side -+ effects at appropriate privilege transitions. The exact side effects are -+ microarchitecture and microcode specific. *Note: `md-clear=` is accepted as -+ a deprecated alias. For compatibility with development versions of XSA-297, -+ `mds=` is also accepted on Xen 4.12 and earlier as an alias. Consult vendor -+ documentation in preference to here.* - * `ibpb-entry=` offers control over whether IBPB (Indirect Branch Prediction - Barrier) is used on entry to Xen. This is used by default on hardware - vulnerable to Branch Type Confusion, and hardware vulnerable to Speculative -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 25a18ac598fa..e12ec9930cf7 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -37,8 +37,8 @@ static bool __initdata opt_msr_sc_pv = true; - static bool __initdata opt_msr_sc_hvm = true; - static int8_t __initdata opt_rsb_pv = -1; - static bool __initdata opt_rsb_hvm = true; --static int8_t __ro_after_init opt_md_clear_pv = -1; --static int8_t __ro_after_init opt_md_clear_hvm = -1; -+static int8_t __ro_after_init opt_verw_pv = -1; -+static int8_t __ro_after_init opt_verw_hvm = -1; - - static int8_t __ro_after_init opt_ibpb_entry_pv = -1; - static int8_t __ro_after_init opt_ibpb_entry_hvm = -1; -@@ -78,7 +78,7 @@ static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. - - static int8_t __initdata opt_srb_lock = -1; - static bool __initdata opt_unpriv_mmio; --static bool __ro_after_init opt_fb_clear_mmio; -+static bool __ro_after_init opt_verw_mmio; - static int8_t __initdata opt_gds_mit = -1; - static int8_t __initdata opt_div_scrub = -1; - -@@ -120,8 +120,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) - disable_common: - opt_rsb_pv = false; - opt_rsb_hvm = false; -- opt_md_clear_pv = 0; -- opt_md_clear_hvm = 0; -+ opt_verw_pv = 0; -+ opt_verw_hvm = 0; - opt_ibpb_entry_pv = 0; - opt_ibpb_entry_hvm = 0; - opt_ibpb_entry_dom0 = false; -@@ -152,14 +152,14 @@ static int __init cf_check parse_spec_ctrl(const char *s) - { - opt_msr_sc_pv = val; - opt_rsb_pv = val; -- opt_md_clear_pv = val; -+ opt_verw_pv = val; - opt_ibpb_entry_pv = val; - } - else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) - { - opt_msr_sc_hvm = val; - opt_rsb_hvm = val; -- opt_md_clear_hvm = val; -+ opt_verw_hvm = val; - opt_ibpb_entry_hvm = val; - } - else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 ) -@@ -204,21 +204,22 @@ static int __init cf_check parse_spec_ctrl(const char *s) - break; - } - } -- else if ( (val = parse_boolean("md-clear", s, ss)) != -1 ) -+ else if ( (val = parse_boolean("verw", s, ss)) != -1 || -+ (val = parse_boolean("md-clear", s, ss)) != -1 ) - { - switch ( val ) - { - case 0: - case 1: -- opt_md_clear_pv = opt_md_clear_hvm = val; -+ opt_verw_pv = opt_verw_hvm = val; - break; - - case -2: -- s += strlen("md-clear="); -+ s += (*s == 'v') ? strlen("verw=") : strlen("md-clear="); - if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -- opt_md_clear_pv = val; -+ opt_verw_pv = val; - else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -- opt_md_clear_hvm = val; -+ opt_verw_hvm = val; - else - default: - rc = -EINVAL; -@@ -540,8 +541,8 @@ static void __init print_details(enum ind_thunk thunk) - opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-", - opt_ibpb_ctxt_switch ? " IBPB-ctxt" : "", - opt_l1d_flush ? " L1D_FLUSH" : "", -- opt_md_clear_pv || opt_md_clear_hvm || -- opt_fb_clear_mmio ? " VERW" : "", -+ opt_verw_pv || opt_verw_hvm || -+ opt_verw_mmio ? " VERW" : "", - opt_div_scrub ? " DIV" : "", - opt_branch_harden ? " BRANCH_HARDEN" : ""); - -@@ -562,13 +563,13 @@ static void __init print_details(enum ind_thunk thunk) - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) || - amd_virt_spec_ctrl || -- opt_eager_fpu || opt_md_clear_hvm) ? "" : " None", -+ opt_eager_fpu || opt_verw_hvm) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", - (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || - amd_virt_spec_ctrl) ? " MSR_VIRT_SPEC_CTRL" : "", - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", -- opt_md_clear_hvm ? " MD_CLEAR" : "", -+ opt_verw_hvm ? " VERW" : "", - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : ""); - - #endif -@@ -577,11 +578,11 @@ static void __init print_details(enum ind_thunk thunk) - (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || - boot_cpu_has(X86_FEATURE_SC_RSB_PV) || - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) || -- opt_eager_fpu || opt_md_clear_pv) ? "" : " None", -+ opt_eager_fpu || opt_verw_pv) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", - boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", -- opt_md_clear_pv ? " MD_CLEAR" : "", -+ opt_verw_pv ? " VERW" : "", - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : ""); - - printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", -@@ -1514,8 +1515,8 @@ void spec_ctrl_init_domain(struct domain *d) - { - bool pv = is_pv_domain(d); - -- bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) || -- (opt_fb_clear_mmio && is_iommu_enabled(d))); -+ bool verw = ((pv ? opt_verw_pv : opt_verw_hvm) || -+ (opt_verw_mmio && is_iommu_enabled(d))); - - bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) && - (d->domain_id != 0 || opt_ibpb_entry_dom0)); -@@ -1878,19 +1879,20 @@ void __init init_speculation_mitigations(void) - * the return-to-guest path. - */ - if ( opt_unpriv_mmio ) -- opt_fb_clear_mmio = cpu_has_fb_clear; -+ opt_verw_mmio = cpu_has_fb_clear; - - /* - * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. - * This will only be a token effort for MLPDS/MFBDS when HT is enabled, - * but it is somewhat better than nothing. - */ -- if ( opt_md_clear_pv == -1 ) -- opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- boot_cpu_has(X86_FEATURE_MD_CLEAR)); -- if ( opt_md_clear_hvm == -1 ) -- opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- boot_cpu_has(X86_FEATURE_MD_CLEAR)); -+ if ( opt_verw_pv == -1 ) -+ opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -+ cpu_has_md_clear); -+ -+ if ( opt_verw_hvm == -1 ) -+ opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -+ cpu_has_md_clear); - - /* - * Enable MDS/MMIO defences as applicable. The Idle blocks need using if -@@ -1903,12 +1905,12 @@ void __init init_speculation_mitigations(void) - * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.) - * - * After calculating the appropriate idle setting, simplify -- * opt_md_clear_hvm to mean just "should we VERW on the way into HVM -+ * opt_verw_hvm to mean just "should we VERW on the way into HVM - * guests", so spec_ctrl_init_domain() can calculate suitable settings. - */ -- if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio ) -+ if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio ) - setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); -- opt_md_clear_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; -+ opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; - - /* - * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT diff --git a/0505-xsa452-4.17-6.patch b/0505-xsa452-4.17-6.patch deleted file mode 100644 index bbe617eb..00000000 --- a/0505-xsa452-4.17-6.patch +++ /dev/null @@ -1,163 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: VERW-handling adjustments - -... before we add yet more complexity to this logic. Mostly expanded -comments, but with three minor changes. - -1) Introduce cpu_has_useful_md_clear to simplify later logic in this patch and - future ones. - -2) We only ever need SC_VERW_IDLE when SMT is active. If SMT isn't active, - then there's no re-partition of pipeline resources based on thread-idleness - to worry about. - -3) The logic to adjust HVM VERW based on L1D_FLUSH is unmaintainable and, as - it turns out, wrong. SKIP_L1DFL is just a hint bit, whereas opt_l1d_flush - is the relevant decision of whether to use L1D_FLUSH based on - susceptibility and user preference. - - Rewrite the logic so it can be followed, and incorporate the fact that when - FB_CLEAR is visible, L1D_FLUSH isn't a safe substitution. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 1eb91a8a06230b4b64228c9a380194f8cfe6c5e2) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index e12ec9930cf7..adb6bc74e8e6 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1531,7 +1531,7 @@ void __init init_speculation_mitigations(void) - { - enum ind_thunk thunk = THUNK_DEFAULT; - bool has_spec_ctrl, ibrs = false, hw_smt_enabled; -- bool cpu_has_bug_taa, retpoline_safe; -+ bool cpu_has_bug_taa, cpu_has_useful_md_clear, retpoline_safe; - - hw_smt_enabled = check_smt_enabled(); - -@@ -1867,50 +1867,97 @@ void __init init_speculation_mitigations(void) - "enabled. Please assess your configuration and choose an\n" - "explicit 'smt=' setting. See XSA-273.\n"); - -+ /* -+ * A brief summary of VERW-related changes. -+ * -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html -+ * -+ * Relevant ucodes: -+ * -+ * - May 2019, for MDS. Introduces the MD_CLEAR CPUID bit and VERW side -+ * effects to scrub Store/Load/Fill buffers as applicable. MD_CLEAR -+ * exists architecturally, even when the side effects have been removed. -+ * -+ * Use VERW to scrub on return-to-guest. Parts with L1D_FLUSH to -+ * mitigate L1TF have the same side effect, so no need to do both. -+ * -+ * Various Atoms suffer from Store-buffer sampling only. Store buffers -+ * are statically partitioned between non-idle threads, so scrubbing is -+ * wanted when going idle too. -+ * -+ * Load ports and Fill buffers are competitively shared between threads. -+ * SMT must be disabled for VERW scrubbing to be fully effective. -+ * -+ * - November 2019, for TAA. Extended VERW side effects to TSX-enabled -+ * MDS_NO parts. -+ * -+ * - February 2022, for Client TSX de-feature. Removed VERW side effects -+ * from Client CPUs only. -+ * -+ * - May 2022, for MMIO Stale Data. (Re)introduced Fill Buffer scrubbing -+ * on all MMIO-affected parts which didn't already have it for MDS -+ * reasons, enumerating FB_CLEAR on those parts only. -+ * -+ * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing -+ * side effects as VERW and cannot be used in its place. -+ */ - mds_calculations(); - - /* -- * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have -- * reintroduced the VERW fill buffer flushing side effect because of a -- * susceptibility to FBSDP. -+ * Parts which enumerate FB_CLEAR are those with now-updated microcode -+ * which weren't susceptible to the original MFBDS (and therefore didn't -+ * have Fill Buffer scrubbing side effects to begin with, or were Client -+ * MDS_NO non-TAA_NO parts where the scrubbing was removed), but have had -+ * the scrubbing reintroduced because of a susceptibility to FBSDP. - * - * If unprivileged guests have (or will have) MMIO mappings, we can - * mitigate cross-domain leakage of fill buffer data by issuing VERW on -- * the return-to-guest path. -+ * the return-to-guest path. This is only a token effort if SMT is -+ * active. - */ - if ( opt_unpriv_mmio ) - opt_verw_mmio = cpu_has_fb_clear; - - /* -- * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. -- * This will only be a token effort for MLPDS/MFBDS when HT is enabled, -- * but it is somewhat better than nothing. -+ * MD_CLEAR is enumerated architecturally forevermore, even after the -+ * scrubbing side effects have been removed. Create ourselves an version -+ * which expressed whether we think MD_CLEAR is having any useful side -+ * effect. -+ */ -+ cpu_has_useful_md_clear = (cpu_has_md_clear && -+ (cpu_has_bug_mds || cpu_has_bug_msbds_only)); -+ -+ /* -+ * By default, use VERW scrubbing on applicable hardware, if we think it's -+ * going to have an effect. This will only be a token effort for -+ * MLPDS/MFBDS when SMT is enabled. - */ - if ( opt_verw_pv == -1 ) -- opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- cpu_has_md_clear); -+ opt_verw_pv = cpu_has_useful_md_clear; - - if ( opt_verw_hvm == -1 ) -- opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- cpu_has_md_clear); -+ opt_verw_hvm = cpu_has_useful_md_clear; - - /* -- * Enable MDS/MMIO defences as applicable. The Idle blocks need using if -- * either the PV or HVM MDS defences are used, or if we may give MMIO -- * access to untrusted guests. -- * -- * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with -- * equivalent semantics to avoid needing to perform both flushes on the -- * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH (for -- * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.) -- * -- * After calculating the appropriate idle setting, simplify -- * opt_verw_hvm to mean just "should we VERW on the way into HVM -- * guests", so spec_ctrl_init_domain() can calculate suitable settings. -+ * If SMT is active, and we're protecting against MDS or MMIO stale data, -+ * we need to scrub before going idle as well as on return to guest. -+ * Various pipeline resources are repartitioned amongst non-idle threads. - */ -- if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio ) -+ if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) || -+ opt_verw_mmio) && hw_smt_enabled ) - setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); -- opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; -+ -+ /* -+ * After calculating the appropriate idle setting, simplify opt_verw_hvm -+ * to mean just "should we VERW on the way into HVM guests", so -+ * spec_ctrl_init_domain() can calculate suitable settings. -+ * -+ * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the -+ * only *_CLEAR we can see. -+ */ -+ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear ) -+ opt_verw_hvm = false; - - /* - * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT diff --git a/0506-xsa452-4.17-7.patch b/0506-xsa452-4.17-7.patch deleted file mode 100644 index 26ba4ebd..00000000 --- a/0506-xsa452-4.17-7.patch +++ /dev/null @@ -1,307 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Mitigation Register File Data Sampling - -RFDS affects Atom cores, also branded E-cores, between the Goldmont and -Gracemont microarchitectures. This includes Alder Lake and Raptor Lake hybrid -clien systems which have a mix of Gracemont and other types of cores. - -Two new bits have been defined; RFDS_CLEAR to indicate VERW has more side -effets, and RFDS_NO to incidate that the system is unaffected. Plenty of -unaffected CPUs won't be getting RFDS_NO retrofitted in microcode, so we -synthesise it. Alder Lake and Raptor Lake Xeon-E's are unaffected due to -their platform configuration, and we must use the Hybrid CPUID bit to -distinguish them from their non-Xeon counterparts. - -Like MD_CLEAR and FB_CLEAR, RFDS_CLEAR needs OR-ing across a resource pool, so -set it in the max policies and reflect the host setting in default. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit fb5b6f6744713410c74cfc12b7176c108e3c9a31) - -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index aefc140d6651..5ceea8be073b 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -172,7 +172,7 @@ static const char *const str_7d0[32] = - [ 8] = "avx512-vp2intersect", [ 9] = "srbds-ctrl", - [10] = "md-clear", [11] = "rtm-always-abort", - /* 12 */ [13] = "tsx-force-abort", -- [14] = "serialize", -+ [14] = "serialize", [15] = "hybrid", - [16] = "tsxldtrk", - [18] = "pconfig", - [20] = "cet-ibt", -@@ -237,7 +237,8 @@ static const char *const str_m10Al[32] = - [20] = "bhi-no", [21] = "xapic-status", - /* 22 */ [23] = "ovrclk-status", - [24] = "pbrsb-no", [25] = "gds-ctrl", -- [26] = "gds-no", -+ [26] = "gds-no", [27] = "rfds-no", -+ [28] = "rfds-clear", - }; - - static const char *const str_m10Ah[32] = -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 7b875a722142..96c2cee1a857 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -444,6 +444,7 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) - */ - __set_bit(X86_FEATURE_MD_CLEAR, fs); - __set_bit(X86_FEATURE_FB_CLEAR, fs); -+ __set_bit(X86_FEATURE_RFDS_CLEAR, fs); - - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an -@@ -493,6 +494,10 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) - if ( cpu_has_fb_clear ) - __set_bit(X86_FEATURE_FB_CLEAR, fs); - -+ __clear_bit(X86_FEATURE_RFDS_CLEAR, fs); -+ if ( cpu_has_rfds_clear ) -+ __set_bit(X86_FEATURE_RFDS_CLEAR, fs); -+ - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an - * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. -diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h -index ec824e895498..a6b8af12964c 100644 ---- a/xen/arch/x86/include/asm/cpufeature.h -+++ b/xen/arch/x86/include/asm/cpufeature.h -@@ -140,6 +140,7 @@ - #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) - #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) - #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE) -+#define cpu_has_hybrid boot_cpu_has(X86_FEATURE_HYBRID) - #define cpu_has_avx512_fp16 boot_cpu_has(X86_FEATURE_AVX512_FP16) - #define cpu_has_arch_caps boot_cpu_has(X86_FEATURE_ARCH_CAPS) - -@@ -161,6 +162,8 @@ - #define cpu_has_rrsba boot_cpu_has(X86_FEATURE_RRSBA) - #define cpu_has_gds_ctrl boot_cpu_has(X86_FEATURE_GDS_CTRL) - #define cpu_has_gds_no boot_cpu_has(X86_FEATURE_GDS_NO) -+#define cpu_has_rfds_no boot_cpu_has(X86_FEATURE_RFDS_NO) -+#define cpu_has_rfds_clear boot_cpu_has(X86_FEATURE_RFDS_CLEAR) - - /* Synthesized. */ - #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) -diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h -index 6abf7bc34a4f..9b5f67711f0c 100644 ---- a/xen/arch/x86/include/asm/msr-index.h -+++ b/xen/arch/x86/include/asm/msr-index.h -@@ -88,6 +88,8 @@ - #define ARCH_CAPS_PBRSB_NO (_AC(1, ULL) << 24) - #define ARCH_CAPS_GDS_CTRL (_AC(1, ULL) << 25) - #define ARCH_CAPS_GDS_NO (_AC(1, ULL) << 26) -+#define ARCH_CAPS_RFDS_NO (_AC(1, ULL) << 27) -+#define ARCH_CAPS_RFDS_CLEAR (_AC(1, ULL) << 28) - - #define MSR_FLUSH_CMD 0x0000010b - #define FLUSH_CMD_L1D (_AC(1, ULL) << 0) -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index adb6bc74e8e6..1ee81e2dfe79 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -24,6 +24,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -447,7 +448,7 @@ static void __init print_details(enum ind_thunk thunk) - * Hardware read-only information, stating immunity to certain issues, or - * suggestions of which mitigation to use. - */ -- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", - (caps & ARCH_CAPS_EIBRS) ? " EIBRS" : "", - (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", -@@ -463,6 +464,7 @@ static void __init print_details(enum ind_thunk thunk) - (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "", - (caps & ARCH_CAPS_PBRSB_NO) ? " PBRSB_NO" : "", - (caps & ARCH_CAPS_GDS_NO) ? " GDS_NO" : "", -+ (caps & ARCH_CAPS_RFDS_NO) ? " RFDS_NO" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "", -@@ -473,7 +475,7 @@ static void __init print_details(enum ind_thunk thunk) - (e21a & cpufeat_mask(X86_FEATURE_SRSO_NO)) ? " SRSO_NO" : ""); - - /* Hardware features which need driving to mitigate issues. */ -- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (e8b & cpufeat_mask(X86_FEATURE_IBPB)) || - (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBPB" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS)) || -@@ -491,6 +493,7 @@ static void __init print_details(enum ind_thunk thunk) - (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : "", - (caps & ARCH_CAPS_FB_CLEAR_CTRL) ? " FB_CLEAR_CTRL" : "", - (caps & ARCH_CAPS_GDS_CTRL) ? " GDS_CTRL" : "", -+ (caps & ARCH_CAPS_RFDS_CLEAR) ? " RFDS_CLEAR" : "", - (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : ""); - - /* Compiled-in support which pertains to mitigations. */ -@@ -1359,6 +1362,83 @@ static __init void mds_calculations(void) - } - } - -+/* -+ * Register File Data Sampling affects Atom cores from the Goldmont to -+ * Gracemont microarchitectures. The March 2024 microcode adds RFDS_NO to -+ * some but not all unaffected parts, and RFDS_CLEAR to affected parts still -+ * in support. -+ * -+ * Alder Lake and Raptor Lake client CPUs have a mix of P cores -+ * (Golden/Raptor Cove, not vulnerable) and E cores (Gracemont, -+ * vulnerable), and both enumerate RFDS_CLEAR. -+ * -+ * Both exist in a Xeon SKU, which has the E cores (Gracemont) disabled by -+ * platform configuration, and enumerate RFDS_NO. -+ * -+ * With older parts, or with out-of-date microcode, synthesise RFDS_NO when -+ * safe to do so. -+ * -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html -+ */ -+static void __init rfds_calculations(void) -+{ -+ /* RFDS is only known to affect Intel Family 6 processors at this time. */ -+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || -+ boot_cpu_data.x86 != 6 ) -+ return; -+ -+ /* -+ * If RFDS_NO or RFDS_CLEAR are visible, we've either got suitable -+ * microcode, or an RFDS-aware hypervisor is levelling us in a pool. -+ */ -+ if ( cpu_has_rfds_no || cpu_has_rfds_clear ) -+ return; -+ -+ /* If we're virtualised, don't attempt to synthesise RFDS_NO. */ -+ if ( cpu_has_hypervisor ) -+ return; -+ -+ /* -+ * Not all CPUs are expected to get a microcode update enumerating one of -+ * RFDS_{NO,CLEAR}, or we might have out-of-date microcode. -+ */ -+ switch ( boot_cpu_data.x86_model ) -+ { -+ case INTEL_FAM6_ALDERLAKE: -+ case INTEL_FAM6_RAPTORLAKE: -+ /* -+ * Alder Lake and Raptor Lake might be a client SKU (with the -+ * Gracemont cores active, and therefore vulnerable) or might be a -+ * server SKU (with the Gracemont cores disabled, and therefore not -+ * vulnerable). -+ * -+ * See if the CPU identifies as hybrid to distinguish the two cases. -+ */ -+ if ( !cpu_has_hybrid ) -+ break; -+ fallthrough; -+ case INTEL_FAM6_ALDERLAKE_L: -+ case INTEL_FAM6_RAPTORLAKE_P: -+ case INTEL_FAM6_RAPTORLAKE_S: -+ -+ case INTEL_FAM6_ATOM_GOLDMONT: /* Apollo Lake */ -+ case INTEL_FAM6_ATOM_GOLDMONT_D: /* Denverton */ -+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: /* Gemini Lake */ -+ case INTEL_FAM6_ATOM_TREMONT_D: /* Snow Ridge / Parker Ridge */ -+ case INTEL_FAM6_ATOM_TREMONT: /* Elkhart Lake */ -+ case INTEL_FAM6_ATOM_TREMONT_L: /* Jasper Lake */ -+ case INTEL_FAM6_ATOM_GRACEMONT: /* Alder Lake N */ -+ return; -+ } -+ -+ /* -+ * We appear to be on an unaffected CPU which didn't enumerate RFDS_NO, -+ * perhaps because of it's age or because of out-of-date microcode. -+ * Synthesise it. -+ */ -+ setup_force_cpu_cap(X86_FEATURE_RFDS_NO); -+} -+ - static bool __init cpu_has_gds(void) - { - /* -@@ -1872,6 +1952,7 @@ void __init init_speculation_mitigations(void) - * - * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html - * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html - * - * Relevant ucodes: - * -@@ -1901,8 +1982,12 @@ void __init init_speculation_mitigations(void) - * - * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing - * side effects as VERW and cannot be used in its place. -+ * -+ * - March 2023, for RFDS. Enumerate RFDS_CLEAR to mean that VERW now -+ * scrubs non-architectural entries from certain register files. - */ - mds_calculations(); -+ rfds_calculations(); - - /* - * Parts which enumerate FB_CLEAR are those with now-updated microcode -@@ -1934,15 +2019,19 @@ void __init init_speculation_mitigations(void) - * MLPDS/MFBDS when SMT is enabled. - */ - if ( opt_verw_pv == -1 ) -- opt_verw_pv = cpu_has_useful_md_clear; -+ opt_verw_pv = cpu_has_useful_md_clear || cpu_has_rfds_clear; - - if ( opt_verw_hvm == -1 ) -- opt_verw_hvm = cpu_has_useful_md_clear; -+ opt_verw_hvm = cpu_has_useful_md_clear || cpu_has_rfds_clear; - - /* - * If SMT is active, and we're protecting against MDS or MMIO stale data, - * we need to scrub before going idle as well as on return to guest. - * Various pipeline resources are repartitioned amongst non-idle threads. -+ * -+ * We don't need to scrub on idle for RFDS. There are no affected cores -+ * which support SMT, despite there being affected cores in hybrid systems -+ * which have SMT elsewhere in the platform. - */ - if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) || - opt_verw_mmio) && hw_smt_enabled ) -@@ -1956,7 +2045,8 @@ void __init init_speculation_mitigations(void) - * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the - * only *_CLEAR we can see. - */ -- if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear ) -+ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear && -+ !cpu_has_rfds_clear ) - opt_verw_hvm = false; - - /* -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index aec1407613c3..113e6cadc17d 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -264,6 +264,7 @@ XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffe - XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */ - XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ - XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */ -+XEN_CPUFEATURE(HYBRID, 9*32+15) /* Heterogeneous platform */ - XEN_CPUFEATURE(TSXLDTRK, 9*32+16) /*a TSX load tracking suspend/resume insns */ - XEN_CPUFEATURE(CET_IBT, 9*32+20) /* CET - Indirect Branch Tracking */ - XEN_CPUFEATURE(AVX512_FP16, 9*32+23) /* AVX512 FP16 instructions */ -@@ -330,6 +331,8 @@ XEN_CPUFEATURE(OVRCLK_STATUS, 16*32+23) /* MSR_OVERCLOCKING_STATUS */ - XEN_CPUFEATURE(PBRSB_NO, 16*32+24) /*A No Post-Barrier RSB predictions */ - XEN_CPUFEATURE(GDS_CTRL, 16*32+25) /* MCU_OPT_CTRL.GDS_MIT_{DIS,LOCK} */ - XEN_CPUFEATURE(GDS_NO, 16*32+26) /*A No Gather Data Sampling */ -+XEN_CPUFEATURE(RFDS_NO, 16*32+27) /*A No Register File Data Sampling */ -+XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A Register File(s) cleared by VERW */ - - /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ - diff --git a/0507-xsa455-4.17.patch b/0507-xsa455-4.17.patch deleted file mode 100644 index c1deea56..00000000 --- a/0507-xsa455-4.17.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 19887194865cff7d87650c323d5c6b185dfe3ddc Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 26 Mar 2024 22:47:25 +0000 -Subject: [PATCH] x86/spec-ctrl: Fix BTC/SRSO mitigations - -We were looking for SCF_entry_ibpb in the wrong variable in the top-of-stack -block, and xen_spec_ctrl won't have had bit 5 set because Xen doesn't -understand SPEC_CTRL_RRSBA_DIS_U yet. - -This is XSA-455 / CVE-2024-31142. - -Fixes: 53a570b28569 ("x86/spec-ctrl: Support IBPB-on-entry") -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich ---- - xen/arch/x86/hvm/svm/entry.S | 2 +- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S -index 934f12cf5c..c19e964bc6 100644 ---- a/xen/arch/x86/hvm/svm/entry.S -+++ b/xen/arch/x86/hvm/svm/entry.S -@@ -103,7 +103,7 @@ __UNLIKELY_END(nsvm_hap) - /* SPEC_CTRL_ENTRY_FROM_SVM Req: %rsp=regs/cpuinfo, %rdx=0 Clob: acd */ - - .macro svm_vmexit_cond_ibpb -- testb $SCF_entry_ibpb, CPUINFO_xen_spec_ctrl(%rsp) -+ testb $SCF_entry_ibpb, CPUINFO_spec_ctrl_flags(%rsp) - jz .L_skip_ibpb - - mov $MSR_PRED_CMD, %ecx -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 97a97b2b82..e85db1a329 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -102,7 +102,7 @@ - jz .L\@_skip - testb $3, UREGS_cs(%rsp) - .else -- testb $SCF_entry_ibpb, CPUINFO_xen_spec_ctrl(%rsp) -+ testb $SCF_entry_ibpb, CPUINFO_spec_ctrl_flags(%rsp) - .endif - jz .L\@_skip - --- -2.44.0 - diff --git a/0508-x86-APIC-finish-genapic-conversion-to-altcall.patch b/0508-x86-APIC-finish-genapic-conversion-to-altcall.patch deleted file mode 100644 index 645ec55f..00000000 --- a/0508-x86-APIC-finish-genapic-conversion-to-altcall.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 88d5e21e165351feef0f17157005dece78275cea Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Wed, 17 Jan 2024 10:41:52 +0100 -Subject: [PATCH 508/542] x86/APIC: finish genapic conversion to altcall - -While .probe() doesn't need fiddling with for being run only very early, -init_apic_ldr() wants converting too despite not being on a frequently -executed path: This way all pre-filled struct genapic instances can -become __initconst_cf_clobber, thus allowing to eliminate 15 more ENDBR -during the 2nd phase of alternatives patching. - -While fiddling with section annotations here, also move "genapic" itself -to .data.ro_after_init. - -Signed-off-by: Jan Beulich -Acked-by: Andrew Cooper -(cherry picked from commit b1cc53753cba4c3253f2e1093a3a6a9a828314bf) ---- - xen/arch/x86/genapic/bigsmp.c | 2 +- - xen/arch/x86/genapic/default.c | 2 +- - xen/arch/x86/genapic/probe.c | 2 +- - xen/arch/x86/genapic/x2apic.c | 6 +++--- - xen/arch/x86/include/asm/mach-generic/mach_apic.h | 2 +- - 5 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/xen/arch/x86/genapic/bigsmp.c b/xen/arch/x86/genapic/bigsmp.c -index 2000383ab0..7219ec53b0 100644 ---- a/xen/arch/x86/genapic/bigsmp.c -+++ b/xen/arch/x86/genapic/bigsmp.c -@@ -41,7 +41,7 @@ static int __init cf_check probe_bigsmp(void) - return def_to_bigsmp; - } - --const struct genapic __initconstrel apic_bigsmp = { -+const struct genapic __initconst_cf_clobber apic_bigsmp = { - APIC_INIT("bigsmp", probe_bigsmp), - GENAPIC_PHYS - }; -diff --git a/xen/arch/x86/genapic/default.c b/xen/arch/x86/genapic/default.c -index 2c63c1f917..a968836a18 100644 ---- a/xen/arch/x86/genapic/default.c -+++ b/xen/arch/x86/genapic/default.c -@@ -14,7 +14,7 @@ - #include - - /* should be called last. */ --const struct genapic __initconstrel apic_default = { -+const struct genapic __initconst_cf_clobber apic_default = { - APIC_INIT("default", NULL), - GENAPIC_FLAT - }; -diff --git a/xen/arch/x86/genapic/probe.c b/xen/arch/x86/genapic/probe.c -index ad57912f50..10ceeae4d2 100644 ---- a/xen/arch/x86/genapic/probe.c -+++ b/xen/arch/x86/genapic/probe.c -@@ -16,7 +16,7 @@ - #include - #include - --struct genapic __read_mostly genapic; -+struct genapic __ro_after_init genapic; - - static const struct genapic *const __initconstrel apic_probe[] = { - &apic_bigsmp, -diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c -index c64038adaa..eba09d7719 100644 ---- a/xen/arch/x86/genapic/x2apic.c -+++ b/xen/arch/x86/genapic/x2apic.c -@@ -169,7 +169,7 @@ static void cf_check send_IPI_mask_x2apic_cluster( - local_irq_restore(flags); - } - --static const struct genapic __initconstrel apic_x2apic_phys = { -+static const struct genapic __initconst_cf_clobber apic_x2apic_phys = { - APIC_INIT("x2apic_phys", NULL), - .int_delivery_mode = dest_Fixed, - .int_dest_mode = 0 /* physical delivery */, -@@ -180,7 +180,7 @@ static const struct genapic __initconstrel apic_x2apic_phys = { - .send_IPI_self = send_IPI_self_x2apic - }; - --static const struct genapic __initconstrel apic_x2apic_cluster = { -+static const struct genapic __initconst_cf_clobber apic_x2apic_cluster = { - APIC_INIT("x2apic_cluster", NULL), - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = 1 /* logical delivery */, -@@ -198,7 +198,7 @@ static const struct genapic __initconstrel apic_x2apic_cluster = { - * IPIs to be more efficiently delivered by not having to perform an ICR write - * for each target CPU. - */ --static const struct genapic __initconstrel apic_x2apic_mixed = { -+static const struct genapic __initconst_cf_clobber apic_x2apic_mixed = { - APIC_INIT("x2apic_mixed", NULL), - - /* -diff --git a/xen/arch/x86/include/asm/mach-generic/mach_apic.h b/xen/arch/x86/include/asm/mach-generic/mach_apic.h -index b6f6361c60..d9e02f0bc4 100644 ---- a/xen/arch/x86/include/asm/mach-generic/mach_apic.h -+++ b/xen/arch/x86/include/asm/mach-generic/mach_apic.h -@@ -13,7 +13,7 @@ - #define INT_DELIVERY_MODE (genapic.int_delivery_mode) - #define INT_DEST_MODE (genapic.int_dest_mode) - #define TARGET_CPUS ((const typeof(cpu_online_map) *)&cpu_online_map) --#define init_apic_ldr (genapic.init_apic_ldr) -+#define init_apic_ldr() alternative_vcall(genapic.init_apic_ldr) - #define cpu_mask_to_apicid(mask) ({ \ - /* \ - * There are a number of places where the address of a local variable \ --- -2.44.0 - diff --git a/0509-cpufreq-finish-conversion-to-altcall.patch b/0509-cpufreq-finish-conversion-to-altcall.patch deleted file mode 100644 index ac0bb34d..00000000 --- a/0509-cpufreq-finish-conversion-to-altcall.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 653560e02d40c480d08032f3cf1e450db79f5d71 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Wed, 17 Jan 2024 10:42:27 +0100 -Subject: [PATCH 509/542] cpufreq: finish conversion to altcall - -Even functions used on infrequently executed paths want converting: This -way all pre-filled struct cpufreq_driver instances can become -__initconst_cf_clobber, thus allowing to eliminate another 15 ENDBR -during the 2nd phase of alternatives patching. - -For acpi-cpufreq's optionally populated .get hook make sure alternatives -patching can actually see the pointer. See also the code comment. - -Signed-off-by: Jan Beulich -Acked-by: Andrew Cooper -(cherry picked from commit 467ae515caee491e9b6ae1da8b9b98d094955822) ---- - xen/arch/x86/acpi/cpufreq/cpufreq.c | 17 ++++++++++++++++- - xen/arch/x86/acpi/cpufreq/powernow.c | 3 ++- - xen/drivers/acpi/pmstat.c | 3 ++- - xen/drivers/cpufreq/cpufreq.c | 6 +++--- - xen/drivers/cpufreq/utility.c | 6 +++--- - 5 files changed, 26 insertions(+), 9 deletions(-) - -diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c b/xen/arch/x86/acpi/cpufreq/cpufreq.c -index c27cbb2304..5786943cfb 100644 ---- a/xen/arch/x86/acpi/cpufreq/cpufreq.c -+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c -@@ -622,12 +622,14 @@ static int cf_check acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) - return 0; - } - --static const struct cpufreq_driver __initconstrel acpi_cpufreq_driver = { -+static const struct cpufreq_driver __initconst_cf_clobber -+acpi_cpufreq_driver = { - .name = "acpi-cpufreq", - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, -+ .get = get_cur_freq_on_cpu, - }; - - static int __init cf_check cpufreq_driver_init(void) -@@ -653,6 +655,19 @@ static int __init cf_check cpufreq_driver_init(void) - } - presmp_initcall(cpufreq_driver_init); - -+static int __init cf_check cpufreq_driver_late_init(void) -+{ -+ /* -+ * While acpi_cpufreq_driver wants to unconditionally have all hooks -+ * populated for __initconst_cf_clobber to have as much of an effect as -+ * possible, zap the .get hook here (but not in cpufreq_driver_init()), -+ * until acpi_cpufreq_cpu_init() knows whether it's wanted / needed. -+ */ -+ cpufreq_driver.get = NULL; -+ return 0; -+} -+__initcall(cpufreq_driver_late_init); -+ - int cpufreq_cpu_init(unsigned int cpuid) - { - int ret; -diff --git a/xen/arch/x86/acpi/cpufreq/powernow.c b/xen/arch/x86/acpi/cpufreq/powernow.c -index d4c7dcd5d9..497bf24470 100644 ---- a/xen/arch/x86/acpi/cpufreq/powernow.c -+++ b/xen/arch/x86/acpi/cpufreq/powernow.c -@@ -317,7 +317,8 @@ static int cf_check powernow_cpufreq_cpu_exit(struct cpufreq_policy *policy) - return 0; - } - --static const struct cpufreq_driver __initconstrel powernow_cpufreq_driver = { -+static const struct cpufreq_driver __initconst_cf_clobber -+powernow_cpufreq_driver = { - .name = "powernow", - .verify = powernow_cpufreq_verify, - .target = powernow_cpufreq_target, -diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c -index 1bae635101..0c51c220a7 100644 ---- a/xen/drivers/acpi/pmstat.c -+++ b/xen/drivers/acpi/pmstat.c -@@ -255,7 +255,8 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op) - return ret; - - op->u.get_para.cpuinfo_cur_freq = -- cpufreq_driver.get ? cpufreq_driver.get(op->cpuid) : policy->cur; -+ cpufreq_driver.get ? alternative_call(cpufreq_driver.get, op->cpuid) -+ : policy->cur; - op->u.get_para.cpuinfo_max_freq = policy->cpuinfo.max_freq; - op->u.get_para.cpuinfo_min_freq = policy->cpuinfo.min_freq; - op->u.get_para.scaling_cur_freq = policy->cur; -diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c -index a94520ee57..daa399bbec 100644 ---- a/xen/drivers/cpufreq/cpufreq.c -+++ b/xen/drivers/cpufreq/cpufreq.c -@@ -240,7 +240,7 @@ int cpufreq_add_cpu(unsigned int cpu) - policy->cpu = cpu; - per_cpu(cpufreq_cpu_policy, cpu) = policy; - -- ret = cpufreq_driver.init(policy); -+ ret = alternative_call(cpufreq_driver.init, policy); - if (ret) { - free_cpumask_var(policy->cpus); - xfree(policy); -@@ -299,7 +299,7 @@ err1: - cpumask_clear_cpu(cpu, cpufreq_dom->map); - - if (cpumask_empty(policy->cpus)) { -- cpufreq_driver.exit(policy); -+ alternative_call(cpufreq_driver.exit, policy); - free_cpumask_var(policy->cpus); - xfree(policy); - } -@@ -363,7 +363,7 @@ int cpufreq_del_cpu(unsigned int cpu) - cpumask_clear_cpu(cpu, cpufreq_dom->map); - - if (cpumask_empty(policy->cpus)) { -- cpufreq_driver.exit(policy); -+ alternative_call(cpufreq_driver.exit, policy); - free_cpumask_var(policy->cpus); - xfree(policy); - } -diff --git a/xen/drivers/cpufreq/utility.c b/xen/drivers/cpufreq/utility.c -index 9eb7ecedcd..ec7072078c 100644 ---- a/xen/drivers/cpufreq/utility.c -+++ b/xen/drivers/cpufreq/utility.c -@@ -412,7 +412,7 @@ int cpufreq_update_turbo(int cpuid, int new_state) - policy->turbo = new_state; - if (cpufreq_driver.update) - { -- ret = cpufreq_driver.update(cpuid, policy); -+ ret = alternative_call(cpufreq_driver.update, cpuid, policy); - if (ret) - policy->turbo = curr_state; - } -@@ -448,7 +448,7 @@ int __cpufreq_set_policy(struct cpufreq_policy *data, - return -EINVAL; - - /* verify the cpu speed can be set within this limit */ -- ret = cpufreq_driver.verify(policy); -+ ret = alternative_call(cpufreq_driver.verify, policy); - if (ret) - return ret; - -@@ -456,7 +456,7 @@ int __cpufreq_set_policy(struct cpufreq_policy *data, - data->max = policy->max; - data->limits = policy->limits; - if (cpufreq_driver.setpolicy) -- return cpufreq_driver.setpolicy(data); -+ return alternative_call(cpufreq_driver.setpolicy, data); - - if (policy->governor != data->governor) { - /* save old, working values */ --- -2.44.0 - diff --git a/0510-x86-HPET-avoid-an-indirect-call.patch b/0510-x86-HPET-avoid-an-indirect-call.patch deleted file mode 100644 index 0d8c07af..00000000 --- a/0510-x86-HPET-avoid-an-indirect-call.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 06c81ea90c18c71725f51dfff79d4c4396b53d6c Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Wed, 17 Jan 2024 10:43:02 +0100 -Subject: [PATCH 510/542] x86/HPET: avoid an indirect call - -When this code was written, indirect branches still weren't considered -much of a problem (besides being a little slower). Instead of a function -pointer, pass a boolean to _disable_pit_irq(), thus allowing to -eliminate two ENDBR (one of them in .text). - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit 730d2637a8e5b98dc8e4e366179b4cedc496b3ad) ---- - xen/arch/x86/hpet.c | 4 ++-- - xen/arch/x86/include/asm/hpet.h | 4 ++-- - xen/arch/x86/time.c | 12 ++++++------ - 3 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/xen/arch/x86/hpet.c b/xen/arch/x86/hpet.c -index bc164dd82c..50d788cb6e 100644 ---- a/xen/arch/x86/hpet.c -+++ b/xen/arch/x86/hpet.c -@@ -563,7 +563,7 @@ static void cf_check handle_rtc_once(uint8_t index, uint8_t value) - } - } - --void __init cf_check hpet_broadcast_init(void) -+void __init hpet_broadcast_init(void) - { - u64 hpet_rate = hpet_setup(); - u32 hpet_id, cfg; -@@ -634,7 +634,7 @@ void __init cf_check hpet_broadcast_init(void) - hpet_events->flags = HPET_EVT_LEGACY; - } - --void cf_check hpet_broadcast_resume(void) -+void hpet_broadcast_resume(void) - { - u32 cfg; - unsigned int i, n; -diff --git a/xen/arch/x86/include/asm/hpet.h b/xen/arch/x86/include/asm/hpet.h -index 9919f74730..f343fe4740 100644 ---- a/xen/arch/x86/include/asm/hpet.h -+++ b/xen/arch/x86/include/asm/hpet.h -@@ -89,8 +89,8 @@ void hpet_disable_legacy_replacement_mode(void); - * Temporarily use an HPET event counter for timer interrupt handling, - * rather than using the LAPIC timer. Used for Cx state entry. - */ --void cf_check hpet_broadcast_init(void); --void cf_check hpet_broadcast_resume(void); -+void hpet_broadcast_init(void); -+void hpet_broadcast_resume(void); - void cf_check hpet_broadcast_enter(void); - void cf_check hpet_broadcast_exit(void); - int hpet_broadcast_is_available(void); -diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c -index b664ae4c83..4d1766284f 100644 ---- a/xen/arch/x86/time.c -+++ b/xen/arch/x86/time.c -@@ -2288,7 +2288,7 @@ void __init early_time_init(void) - } - - /* keep pit enabled for pit_broadcast working while cpuidle enabled */ --static int _disable_pit_irq(void(*hpet_broadcast_setup)(void)) -+static int _disable_pit_irq(bool init) - { - int ret = 1; - -@@ -2303,13 +2303,13 @@ static int _disable_pit_irq(void(*hpet_broadcast_setup)(void)) - */ - if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) ) - { -- hpet_broadcast_setup(); -+ init ? hpet_broadcast_init() : hpet_broadcast_resume(); - if ( !hpet_broadcast_is_available() ) - { - if ( xen_cpuidle > 0 ) - { -- printk("%ps() failed, turning to PIT broadcast\n", -- hpet_broadcast_setup); -+ printk("hpet_broadcast_%s() failed, turning to PIT broadcast\n", -+ init ? "init" : "resume"); - return -1; - } - ret = 0; -@@ -2326,7 +2326,7 @@ static int _disable_pit_irq(void(*hpet_broadcast_setup)(void)) - - static int __init cf_check disable_pit_irq(void) - { -- if ( !_disable_pit_irq(hpet_broadcast_init) ) -+ if ( !_disable_pit_irq(true) ) - { - xen_cpuidle = 0; - printk("CPUIDLE: disabled due to no HPET. " -@@ -2387,7 +2387,7 @@ int time_resume(void) - - resume_platform_timer(); - -- if ( !_disable_pit_irq(hpet_broadcast_resume) ) -+ if ( !_disable_pit_irq(false) ) - BUG(); - - init_percpu_time(); --- -2.44.0 - diff --git a/0511-core-parking-use-alternative_call.patch b/0511-core-parking-use-alternative_call.patch deleted file mode 100644 index 2b39c920..00000000 --- a/0511-core-parking-use-alternative_call.patch +++ /dev/null @@ -1,95 +0,0 @@ -From f4efbcf156fb408dcedf1fc88d3be595ec722ad0 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Mon, 22 Jan 2024 13:38:24 +0100 -Subject: [PATCH 511/542] core-parking: use alternative_call() - -This way we can arrange for core_parking_{performance,power}()'s ENDBR -to also be zapped. - -For the decision to be taken before the 2nd alternative patching pass, -the initcall needs to become a pre-SMP one, though. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit 1bc07ebcac3b1bb2a378732bc0f9a19940e76faf) ---- - xen/common/core_parking.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/xen/common/core_parking.c b/xen/common/core_parking.c -index c4f01291c0..a970ffeab8 100644 ---- a/xen/common/core_parking.c -+++ b/xen/common/core_parking.c -@@ -30,10 +30,11 @@ static DEFINE_SPINLOCK(accounting_lock); - static uint32_t cur_idle_nums; - static unsigned int core_parking_cpunum[NR_CPUS] = {[0 ... NR_CPUS-1] = -1}; - --static const struct cp_policy { -+struct cp_policy { - char name[30]; - unsigned int (*next)(unsigned int event); --} *__read_mostly core_parking_policy; -+}; -+static struct cp_policy __ro_after_init core_parking_policy; - - static enum core_parking_controller { - POWER_FIRST, -@@ -175,12 +176,13 @@ long cf_check core_parking_helper(void *data) - unsigned int cpu; - int ret = 0; - -- if ( !core_parking_policy ) -+ if ( !core_parking_policy.next ) - return -EINVAL; - - while ( cur_idle_nums < idle_nums ) - { -- cpu = core_parking_policy->next(CORE_PARKING_INCREMENT); -+ cpu = alternative_call(core_parking_policy.next, -+ CORE_PARKING_INCREMENT); - ret = cpu_down(cpu); - if ( ret ) - return ret; -@@ -193,7 +195,8 @@ long cf_check core_parking_helper(void *data) - - while ( cur_idle_nums > idle_nums ) - { -- cpu = core_parking_policy->next(CORE_PARKING_DECREMENT); -+ cpu = alternative_call(core_parking_policy.next, -+ CORE_PARKING_DECREMENT); - ret = cpu_up(cpu); - if ( ret ) - return ret; -@@ -239,12 +242,12 @@ uint32_t get_cur_idle_nums(void) - return cur_idle_nums; - } - --static const struct cp_policy power_first = { -+static const struct cp_policy __initconst_cf_clobber power_first = { - .name = "power", - .next = core_parking_power, - }; - --static const struct cp_policy performance_first = { -+static const struct cp_policy __initconst_cf_clobber performance_first = { - .name = "performance", - .next = core_parking_performance, - }; -@@ -254,7 +257,7 @@ static int __init register_core_parking_policy(const struct cp_policy *policy) - if ( !policy || !policy->next ) - return -EINVAL; - -- core_parking_policy = policy; -+ core_parking_policy = *policy; - return 0; - } - -@@ -269,4 +272,4 @@ static int __init cf_check core_parking_init(void) - - return ret; - } --__initcall(core_parking_init); -+presmp_initcall(core_parking_init); --- -2.44.0 - diff --git a/0512-x86-MTRR-avoid-several-indirect-calls.patch b/0512-x86-MTRR-avoid-several-indirect-calls.patch deleted file mode 100644 index 9958c052..00000000 --- a/0512-x86-MTRR-avoid-several-indirect-calls.patch +++ /dev/null @@ -1,366 +0,0 @@ -From aed8192f578fb02111f57eca0868c2262ada1341 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Mon, 22 Jan 2024 13:39:23 +0100 -Subject: [PATCH 512/542] x86/MTRR: avoid several indirect calls - -The use of (supposedly) vendor-specific hooks is a relic from the days -when Xen was still possible to build as 32-bit binary. There's no -expectation that a new need for such an abstraction would arise. Convert -mttr_if to a mere boolean and all prior calls through it to direct ones, -thus allowing to eliminate 6 ENDBR from .text. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit e9e0eb30d4d6565b411499ca826718b4b9acab68) ---- - xen/arch/x86/cpu/mtrr/generic.c | 26 ++++-------- - xen/arch/x86/cpu/mtrr/main.c | 66 +++++++++++-------------------- - xen/arch/x86/cpu/mtrr/mtrr.h | 37 +++++------------ - xen/arch/x86/platform_hypercall.c | 2 +- - 4 files changed, 40 insertions(+), 91 deletions(-) - -diff --git a/xen/arch/x86/cpu/mtrr/generic.c b/xen/arch/x86/cpu/mtrr/generic.c -index 47aaf76226..837d3250f1 100644 ---- a/xen/arch/x86/cpu/mtrr/generic.c -+++ b/xen/arch/x86/cpu/mtrr/generic.c -@@ -287,7 +287,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) - } - } - --int cf_check generic_get_free_region( -+int mtrr_get_free_region( - unsigned long base, unsigned long size, int replace_reg) - /* [SUMMARY] Get a free MTRR. - The starting (base) address of the region. -@@ -303,14 +303,14 @@ int cf_check generic_get_free_region( - if (replace_reg >= 0 && replace_reg < max) - return replace_reg; - for (i = 0; i < max; ++i) { -- mtrr_if->get(i, &lbase, &lsize, <ype); -+ mtrr_get(i, &lbase, &lsize, <ype); - if (lsize == 0) - return i; - } - return -ENOSPC; - } - --static void cf_check generic_get_mtrr( -+void mtrr_get( - unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type *type) - { - uint64_t _mask, _base; -@@ -500,7 +500,7 @@ static void post_set(bool pge) - spin_unlock(&set_atomicity_lock); - } - --static void cf_check generic_set_all(void) -+void mtrr_set_all(void) - { - unsigned long mask, count; - unsigned long flags; -@@ -523,7 +523,7 @@ static void cf_check generic_set_all(void) - } - } - --static void cf_check generic_set_mtrr( -+void mtrr_set( - unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) - /* [SUMMARY] Set variable MTRR register on the local CPU. - The register to set. -@@ -567,7 +567,7 @@ static void cf_check generic_set_mtrr( - local_irq_restore(flags); - } - --int cf_check generic_validate_add_page( -+int mtrr_validate_add_page( - unsigned long base, unsigned long size, unsigned int type) - { - unsigned long lbase, last; -@@ -586,21 +586,9 @@ int cf_check generic_validate_add_page( - } - - --static int cf_check generic_have_wrcomb(void) -+bool mtrr_have_wrcomb(void) - { - unsigned long config; - rdmsrl(MSR_MTRRcap, config); - return (config & (1ULL << 10)); - } -- --/* generic structure... -- */ --const struct mtrr_ops generic_mtrr_ops = { -- .use_intel_if = true, -- .set_all = generic_set_all, -- .get = generic_get_mtrr, -- .get_free_region = generic_get_free_region, -- .set = generic_set_mtrr, -- .validate_add_page = generic_validate_add_page, -- .have_wrcomb = generic_have_wrcomb, --}; -diff --git a/xen/arch/x86/cpu/mtrr/main.c b/xen/arch/x86/cpu/mtrr/main.c -index 4e01c8d6f9..dee59ea168 100644 ---- a/xen/arch/x86/cpu/mtrr/main.c -+++ b/xen/arch/x86/cpu/mtrr/main.c -@@ -57,7 +57,7 @@ static DEFINE_MUTEX(mtrr_mutex); - u64 __read_mostly size_or_mask; - u64 __read_mostly size_and_mask; - --const struct mtrr_ops *__read_mostly mtrr_if = NULL; -+static bool __ro_after_init mtrr_if; - - static void set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type); -@@ -78,23 +78,12 @@ static const char *mtrr_attrib_to_str(int x) - return (x <= 6) ? mtrr_strings[x] : "?"; - } - --/* Returns non-zero if we have the write-combining memory type */ --static int have_wrcomb(void) --{ -- return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); --} -- - /* This function returns the number of variable MTRRs */ - static void __init set_num_var_ranges(void) - { -- unsigned long config = 0; -- -- if (use_intel()) { -- rdmsrl(MSR_MTRRcap, config); -- } else if (is_cpu(AMD)) -- config = 2; -- else if (is_cpu(CENTAUR)) -- config = 8; -+ unsigned long config; -+ -+ rdmsrl(MSR_MTRRcap, config); - num_var_ranges = MASK_EXTR(config, MTRRcap_VCNT); - } - -@@ -149,10 +138,10 @@ static void cf_check ipi_handler(void *info) - if (data->smp_reg == ~0U) /* update all mtrr registers */ - /* At the cpu hot-add time this will reinitialize mtrr - * registres on the existing cpus. It is ok. */ -- mtrr_if->set_all(); -+ mtrr_set_all(); - else /* single mtrr register update */ -- mtrr_if->set(data->smp_reg, data->smp_base, -- data->smp_size, data->smp_type); -+ mtrr_set(data->smp_reg, data->smp_base, -+ data->smp_size, data->smp_type); - - atomic_dec(&data->count); - while(atomic_read(&data->gate)) -@@ -198,10 +187,9 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) { - * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait - * until it hits 0 and proceed. We set the data.gate flag and reset data.count. - * Meanwhile, they are waiting for that flag to be set. Once it's set, each -- * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it -- * differently, so we call mtrr_if->set() callback and let them take care of it. -- * When they're done, they again decrement data->count and wait for data.gate to -- * be reset. -+ * CPU goes through the transition of updating MTRRs. -+ * When mtrr_set() is done, they again decrement data->count and wait for -+ * data.gate to be reset. - * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. - * Everyone then enables interrupts and we all continue on. - * -@@ -251,9 +239,9 @@ static void set_mtrr(unsigned int reg, unsigned long base, - if (reg == ~0U) /* update all mtrr registers */ - /* at boot or resume time, this will reinitialize the mtrrs on - * the bp. It is ok. */ -- mtrr_if->set_all(); -+ mtrr_set_all(); - else /* update the single mtrr register */ -- mtrr_if->set(reg,base,size,type); -+ mtrr_set(reg, base, size, type); - - /* wait for the others */ - while (atomic_read(&data.count)) -@@ -319,7 +307,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, - if (!mtrr_if) - return -ENXIO; - -- if ((error = mtrr_if->validate_add_page(base,size,type))) -+ if ((error = mtrr_validate_add_page(base, size, type))) - return error; - - if (type >= MTRR_NUM_TYPES) { -@@ -328,7 +316,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, - } - - /* If the type is WC, check that this processor supports it */ -- if ((type == X86_MT_WC) && !have_wrcomb()) { -+ if ((type == X86_MT_WC) && mtrr_have_wrcomb()) { - printk(KERN_WARNING - "mtrr: your processor doesn't support write-combining\n"); - return -EOPNOTSUPP; -@@ -350,7 +338,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, - /* Search for existing MTRR */ - mutex_lock(&mtrr_mutex); - for (i = 0; i < num_var_ranges; ++i) { -- mtrr_if->get(i, &lbase, &lsize, <ype); -+ mtrr_get(i, &lbase, &lsize, <ype); - if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) - continue; - /* At this point we know there is some kind of overlap/enclosure */ -@@ -385,7 +373,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, - goto out; - } - /* Search for an empty MTRR */ -- i = mtrr_if->get_free_region(base, size, replace); -+ i = mtrr_get_free_region(base, size, replace); - if (i >= 0) { - set_mtrr(i, base, size, type); - if (likely(replace < 0)) -@@ -494,7 +482,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) - if (reg < 0) { - /* Search for existing MTRR */ - for (i = 0; i < max; ++i) { -- mtrr_if->get(i, &lbase, &lsize, <ype); -+ mtrr_get(i, &lbase, &lsize, <ype); - if (lbase == base && lsize == size) { - reg = i; - break; -@@ -510,7 +498,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) - printk(KERN_WARNING "mtrr: register: %d too big\n", reg); - goto out; - } -- mtrr_if->get(reg, &lbase, &lsize, <ype); -+ mtrr_get(reg, &lbase, &lsize, <ype); - if (lsize < 1) { - printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); - goto out; -@@ -568,7 +556,7 @@ struct mtrr_value { - void __init mtrr_bp_init(void) - { - if (cpu_has_mtrr) { -- mtrr_if = &generic_mtrr_ops; -+ mtrr_if = true; - size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) - 1); - size_and_mask = ~size_or_mask & 0xfffff00000ULL; - } -@@ -576,14 +564,13 @@ void __init mtrr_bp_init(void) - if (mtrr_if) { - set_num_var_ranges(); - init_table(); -- if (use_intel()) -- get_mtrr_state(); -+ get_mtrr_state(); - } - } - - void mtrr_ap_init(void) - { -- if (!mtrr_if || !use_intel() || hold_mtrr_updates_on_aps) -+ if (!mtrr_if || hold_mtrr_updates_on_aps) - return; - /* - * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, -@@ -612,32 +599,25 @@ void mtrr_save_state(void) - - void mtrr_aps_sync_begin(void) - { -- if (!use_intel()) -- return; - hold_mtrr_updates_on_aps = 1; - } - - void mtrr_aps_sync_end(void) - { -- if (!use_intel()) -- return; - set_mtrr(~0U, 0, 0, 0); - hold_mtrr_updates_on_aps = 0; - } - - void mtrr_bp_restore(void) - { -- if (!use_intel()) -- return; -- mtrr_if->set_all(); -+ mtrr_set_all(); - } - - static int __init cf_check mtrr_init_finialize(void) - { - if (!mtrr_if) - return 0; -- if (use_intel()) -- mtrr_state_warn(); -+ mtrr_state_warn(); - return 0; - } - __initcall(mtrr_init_finialize); -diff --git a/xen/arch/x86/cpu/mtrr/mtrr.h b/xen/arch/x86/cpu/mtrr/mtrr.h -index c7fd44daab..a9741e0cb0 100644 ---- a/xen/arch/x86/cpu/mtrr/mtrr.h -+++ b/xen/arch/x86/cpu/mtrr/mtrr.h -@@ -6,40 +6,21 @@ - #define MTRR_CHANGE_MASK_VARIABLE 0x02 - #define MTRR_CHANGE_MASK_DEFTYPE 0x04 - -- --struct mtrr_ops { -- u32 vendor; -- bool use_intel_if; --// void (*init)(void); -- void (*set)(unsigned int reg, unsigned long base, -- unsigned long size, mtrr_type type); -- void (*set_all)(void); -- -- void (*get)(unsigned int reg, unsigned long *base, -- unsigned long *size, mtrr_type * type); -- int (*get_free_region)(unsigned long base, unsigned long size, -- int replace_reg); -- int (*validate_add_page)(unsigned long base, unsigned long size, -- unsigned int type); -- int (*have_wrcomb)(void); --}; -- --int cf_check generic_get_free_region( -+void mtrr_get( -+ unsigned int reg, unsigned long *base, unsigned long *size, -+ mtrr_type *type); -+void mtrr_set( -+ unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); -+void mtrr_set_all(void); -+int mtrr_get_free_region( - unsigned long base, unsigned long size, int replace_reg); --int cf_check generic_validate_add_page( -+int mtrr_validate_add_page( - unsigned long base, unsigned long size, unsigned int type); -- --extern const struct mtrr_ops generic_mtrr_ops; -+bool mtrr_have_wrcomb(void); - - void get_mtrr_state(void); - --extern void set_mtrr_ops(const struct mtrr_ops *); -- - extern u64 size_or_mask, size_and_mask; --extern const struct mtrr_ops *mtrr_if; -- --#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) --#define use_intel() (mtrr_if && mtrr_if->use_intel_if) - - extern unsigned int num_var_ranges; - -diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c -index e7deee2268..27a799161a 100644 ---- a/xen/arch/x86/platform_hypercall.c -+++ b/xen/arch/x86/platform_hypercall.c -@@ -299,7 +299,7 @@ ret_t do_platform_op( - ret = -EINVAL; - if ( op->u.read_memtype.reg < num_var_ranges ) - { -- mtrr_if->get(op->u.read_memtype.reg, &mfn, &nr_mfns, &type); -+ mtrr_get(op->u.read_memtype.reg, &mfn, &nr_mfns, &type); - op->u.read_memtype.mfn = mfn; - op->u.read_memtype.nr_mfns = nr_mfns; - op->u.read_memtype.type = type; --- -2.44.0 - diff --git a/0513-x86-PV-avoid-indirect-call-for-I-O-emulation-quirk-h.patch b/0513-x86-PV-avoid-indirect-call-for-I-O-emulation-quirk-h.patch deleted file mode 100644 index 85b96418..00000000 --- a/0513-x86-PV-avoid-indirect-call-for-I-O-emulation-quirk-h.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 796959c8bd04672cb93ccbb1bc2f79e35209e30b Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Mon, 22 Jan 2024 13:40:00 +0100 -Subject: [PATCH 513/542] x86/PV: avoid indirect call for I/O emulation quirk - hook - -This way ioemul_handle_proliant_quirk() won't need ENDBR anymore. - -While touching this code, also -- arrange for it to not be built at all when !PV, -- add "const" to the last function parameter and bring the definition - in sync with the declaration (for Misra). - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit 1212af3e8c4d3a1350046d4fe0ca3b97b51e67de) ---- - xen/arch/x86/Makefile | 2 +- - xen/arch/x86/include/asm/io.h | 10 +++++++--- - xen/arch/x86/ioport_emulate.c | 9 ++++----- - xen/arch/x86/pv/emul-priv-op.c | 2 +- - 4 files changed, 13 insertions(+), 10 deletions(-) - -diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile -index f213a6b56a..cb9d952659 100644 ---- a/xen/arch/x86/Makefile -+++ b/xen/arch/x86/Makefile -@@ -43,7 +43,7 @@ obj-$(CONFIG_LIVEPATCH) += alternative.o livepatch.o - obj-y += msi.o - obj-y += msr.o - obj-$(CONFIG_INDIRECT_THUNK) += indirect-thunk.o --obj-y += ioport_emulate.o -+obj-$(CONFIG_PV) += ioport_emulate.o - obj-y += irq.o - obj-$(CONFIG_KEXEC) += machine_kexec.o - obj-y += mm.o x86_64/mm.o -diff --git a/xen/arch/x86/include/asm/io.h b/xen/arch/x86/include/asm/io.h -index 92b784a861..9b19d2d389 100644 ---- a/xen/arch/x86/include/asm/io.h -+++ b/xen/arch/x86/include/asm/io.h -@@ -47,10 +47,14 @@ __OUT(b,"b",char) - __OUT(w,"w",short) - __OUT(l,,int) - --/* Function pointer used to handle platform specific I/O port emulation. */ -+/* -+ * Boolean indicator and function used to handle platform specific I/O port -+ * emulation. -+ */ - #define IOEMUL_QUIRK_STUB_BYTES 9 -+extern bool ioemul_handle_quirk; - struct cpu_user_regs; --extern unsigned int (*ioemul_handle_quirk)( -- u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs); -+unsigned int ioemul_handle_proliant_quirk( -+ uint8_t opcode, char *io_emul_stub, const struct cpu_user_regs *regs); - - #endif -diff --git a/xen/arch/x86/ioport_emulate.c b/xen/arch/x86/ioport_emulate.c -index 6caeb3d470..0c1e389bc8 100644 ---- a/xen/arch/x86/ioport_emulate.c -+++ b/xen/arch/x86/ioport_emulate.c -@@ -8,11 +8,10 @@ - #include - #include - --unsigned int (*__read_mostly ioemul_handle_quirk)( -- uint8_t opcode, char *io_emul_stub, struct cpu_user_regs *regs); -+bool __ro_after_init ioemul_handle_quirk; - --static unsigned int cf_check ioemul_handle_proliant_quirk( -- u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs) -+unsigned int ioemul_handle_proliant_quirk( -+ uint8_t opcode, char *io_emul_stub, const struct cpu_user_regs *regs) - { - static const char stub[] = { - 0x9c, /* pushf */ -@@ -103,7 +102,7 @@ static const struct dmi_system_id __initconstrel ioport_quirks_tbl[] = { - static int __init cf_check ioport_quirks_init(void) - { - if ( dmi_check_system(ioport_quirks_tbl) ) -- ioemul_handle_quirk = ioemul_handle_proliant_quirk; -+ ioemul_handle_quirk = true; - - return 0; - } -diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c -index 2c94beb10e..e429dfa4f0 100644 ---- a/xen/arch/x86/pv/emul-priv-op.c -+++ b/xen/arch/x86/pv/emul-priv-op.c -@@ -124,7 +124,7 @@ static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode, - /* Some platforms might need to quirk the stub for specific inputs. */ - if ( unlikely(ioemul_handle_quirk) ) - { -- quirk_bytes = ioemul_handle_quirk(opcode, p, ctxt->ctxt.regs); -+ quirk_bytes = ioemul_handle_proliant_quirk(opcode, p, ctxt->ctxt.regs); - p += quirk_bytes; - } - --- -2.44.0 - diff --git a/0514-x86-MCE-separate-BSP-only-initialization.patch b/0514-x86-MCE-separate-BSP-only-initialization.patch deleted file mode 100644 index 01ae23e8..00000000 --- a/0514-x86-MCE-separate-BSP-only-initialization.patch +++ /dev/null @@ -1,192 +0,0 @@ -From 2268aacc4324f6010058c2e3bbb214a280dc8078 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Mon, 22 Jan 2024 13:40:32 +0100 -Subject: [PATCH 514/542] x86/MCE: separate BSP-only initialization - -Several function pointers are registered over and over again, when -setting them once on the BSP suffices. Arrange for this in the vendor -init functions and mark involved registration functions __init. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit 9f58616ddb1cc1870399de2202fafc7bf0d61694) ---- - xen/arch/x86/cpu/mcheck/mcaction.c | 2 +- - xen/arch/x86/cpu/mcheck/mce.c | 15 ++++++--------- - xen/arch/x86/cpu/mcheck/mce.h | 2 +- - xen/arch/x86/cpu/mcheck/mce_amd.c | 20 +++++++++++++------- - xen/arch/x86/cpu/mcheck/mce_intel.c | 10 +++++++--- - 5 files changed, 28 insertions(+), 21 deletions(-) - -diff --git a/xen/arch/x86/cpu/mcheck/mcaction.c b/xen/arch/x86/cpu/mcheck/mcaction.c -index f4f265c1bc..695fb61d7d 100644 ---- a/xen/arch/x86/cpu/mcheck/mcaction.c -+++ b/xen/arch/x86/cpu/mcheck/mcaction.c -@@ -29,7 +29,7 @@ mci_action_add_pageoffline(int bank, struct mc_info *mi, - - mce_check_addr_t mc_check_addr = NULL; - --void mce_register_addrcheck(mce_check_addr_t cbfunc) -+void __init mce_register_addrcheck(mce_check_addr_t cbfunc) - { - mc_check_addr = cbfunc; - } -diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c -index f68e31b643..0b164e2027 100644 ---- a/xen/arch/x86/cpu/mcheck/mce.c -+++ b/xen/arch/x86/cpu/mcheck/mce.c -@@ -84,7 +84,7 @@ static void cf_check unexpected_machine_check(const struct cpu_user_regs *regs) - - static x86_mce_vector_t _machine_check_vector = unexpected_machine_check; - --void x86_mce_vector_register(x86_mce_vector_t hdlr) -+void __init x86_mce_vector_register(x86_mce_vector_t hdlr) - { - _machine_check_vector = hdlr; - } -@@ -107,7 +107,7 @@ void do_machine_check(const struct cpu_user_regs *regs) - */ - static x86_mce_callback_t mc_callback_bank_extended = NULL; - --void x86_mce_callback_register(x86_mce_callback_t cbfunc) -+void __init x86_mce_callback_register(x86_mce_callback_t cbfunc) - { - mc_callback_bank_extended = cbfunc; - } -@@ -118,7 +118,7 @@ void x86_mce_callback_register(x86_mce_callback_t cbfunc) - */ - static mce_recoverable_t mc_recoverable_scan = NULL; - --void mce_recoverable_register(mce_recoverable_t cbfunc) -+void __init mce_recoverable_register(mce_recoverable_t cbfunc) - { - mc_recoverable_scan = cbfunc; - } -@@ -182,7 +182,7 @@ static void mcabank_clear(int banknum) - */ - static mce_need_clearbank_t mc_need_clearbank_scan = NULL; - --void mce_need_clearbank_register(mce_need_clearbank_t cbfunc) -+void __init mce_need_clearbank_register(mce_need_clearbank_t cbfunc) - { - mc_need_clearbank_scan = cbfunc; - } -@@ -799,7 +799,7 @@ void mcheck_init(struct cpuinfo_x86 *c, bool bsp) - { - case X86_VENDOR_AMD: - case X86_VENDOR_HYGON: -- inited = amd_mcheck_init(c); -+ inited = amd_mcheck_init(c, bsp); - break; - - case X86_VENDOR_INTEL: -@@ -1913,11 +1913,8 @@ static void cf_check mce_softirq(void) - * will help to collect and log those MCE errors. - * Round2: Do all MCE processing logic as normal. - */ --void mce_handler_init(void) -+void __init mce_handler_init(void) - { -- if ( smp_processor_id() != 0 ) -- return; -- - /* callback register, do we really need so many callback? */ - /* mce handler data initialization */ - spin_lock_init(&mce_logout_lock); -diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h -index bea08bdc74..10ed059f7c 100644 ---- a/xen/arch/x86/cpu/mcheck/mce.h -+++ b/xen/arch/x86/cpu/mcheck/mce.h -@@ -44,7 +44,7 @@ extern uint8_t cmci_apic_vector; - extern bool lmce_support; - - /* Init functions */ --enum mcheck_type amd_mcheck_init(struct cpuinfo_x86 *c); -+enum mcheck_type amd_mcheck_init(const struct cpuinfo_x86 *c, bool bsp); - enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp); - - void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); -diff --git a/xen/arch/x86/cpu/mcheck/mce_amd.c b/xen/arch/x86/cpu/mcheck/mce_amd.c -index d7ae8919df..cf80e1a275 100644 ---- a/xen/arch/x86/cpu/mcheck/mce_amd.c -+++ b/xen/arch/x86/cpu/mcheck/mce_amd.c -@@ -284,7 +284,7 @@ int vmce_amd_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) - } - - enum mcheck_type --amd_mcheck_init(struct cpuinfo_x86 *ci) -+amd_mcheck_init(const struct cpuinfo_x86 *ci, bool bsp) - { - uint32_t i; - enum mcequirk_amd_flags quirkflag = 0; -@@ -294,9 +294,12 @@ amd_mcheck_init(struct cpuinfo_x86 *ci) - - /* Assume that machine check support is available. - * The minimum provided support is at least the K8. */ -- mce_handler_init(); -- x86_mce_vector_register(mcheck_cmn_handler); -- mce_need_clearbank_register(amd_need_clearbank_scan); -+ if ( bsp ) -+ { -+ mce_handler_init(); -+ x86_mce_vector_register(mcheck_cmn_handler); -+ mce_need_clearbank_register(amd_need_clearbank_scan); -+ } - - for ( i = 0; i < this_cpu(nr_mce_banks); i++ ) - { -@@ -336,9 +339,12 @@ amd_mcheck_init(struct cpuinfo_x86 *ci) - ppin_msr = MSR_AMD_PPIN; - } - -- x86_mce_callback_register(amd_f10_handler); -- mce_recoverable_register(mc_amd_recoverable_scan); -- mce_register_addrcheck(mc_amd_addrcheck); -+ if ( bsp ) -+ { -+ x86_mce_callback_register(amd_f10_handler); -+ mce_recoverable_register(mc_amd_recoverable_scan); -+ mce_register_addrcheck(mc_amd_addrcheck); -+ } - - return ci->x86_vendor == X86_VENDOR_HYGON ? - mcheck_hygon : mcheck_amd_famXX; -diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c -index ce7678f242..837a8c6d0c 100644 ---- a/xen/arch/x86/cpu/mcheck/mce_intel.c -+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c -@@ -814,7 +814,7 @@ static void intel_mce_post_reset(void) - return; - } - --static void intel_init_mce(void) -+static void intel_init_mce(bool bsp) - { - uint64_t msr_content; - int i; -@@ -840,6 +840,9 @@ static void intel_init_mce(void) - if ( firstbank ) /* if cmci enabled, firstbank = 0 */ - wrmsrl(MSR_IA32_MC0_STATUS, 0x0ULL); - -+ if ( !bsp ) -+ return; -+ - x86_mce_vector_register(mcheck_cmn_handler); - mce_recoverable_register(intel_recoverable_scan); - mce_need_clearbank_register(intel_need_clearbank_scan); -@@ -979,9 +982,10 @@ enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp) - - intel_init_mca(c); - -- mce_handler_init(); -+ if ( bsp ) -+ mce_handler_init(); - -- intel_init_mce(); -+ intel_init_mce(bsp); - - intel_init_cmci(c); - --- -2.44.0 - diff --git a/0515-x86-MCE-switch-some-callback-invocations-to-altcall.patch b/0515-x86-MCE-switch-some-callback-invocations-to-altcall.patch deleted file mode 100644 index b849cf22..00000000 --- a/0515-x86-MCE-switch-some-callback-invocations-to-altcall.patch +++ /dev/null @@ -1,405 +0,0 @@ -From 90275d1cbfa3cbb2380028753349bcb6bc6f0717 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Mon, 22 Jan 2024 13:41:07 +0100 -Subject: [PATCH 515/542] x86/MCE: switch some callback invocations to altcall - -While not performance critical, these hook invocations still would -better be converted: This way all pre-filled (and newly introduced) -struct mce_callback instances can become __initconst_cf_clobber, thus -allowing to eliminate another 9 ENDBR during the 2nd phase of -alternatives patching. - -While this means registering callbacks a little earlier, doing so is -perhaps even advantageous, for having pointers be non-NULL earlier on. -Only one set of callbacks would only ever be registered anyway, and -neither of the respective initialization function can (subsequently) -fail. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit 85ba4d050f9f3c4286164f21660ae88435b7e83c) ---- - xen/arch/x86/cpu/mcheck/mcaction.c | 10 +--- - xen/arch/x86/cpu/mcheck/mcaction.h | 5 -- - xen/arch/x86/cpu/mcheck/mce.c | 71 ++++++++-------------------- - xen/arch/x86/cpu/mcheck/mce.h | 72 +++++++++++++++-------------- - xen/arch/x86/cpu/mcheck/mce_amd.c | 26 ++++++----- - xen/arch/x86/cpu/mcheck/mce_intel.c | 14 +++--- - 6 files changed, 80 insertions(+), 118 deletions(-) - -diff --git a/xen/arch/x86/cpu/mcheck/mcaction.c b/xen/arch/x86/cpu/mcheck/mcaction.c -index 695fb61d7d..bf7a0de965 100644 ---- a/xen/arch/x86/cpu/mcheck/mcaction.c -+++ b/xen/arch/x86/cpu/mcheck/mcaction.c -@@ -27,13 +27,6 @@ mci_action_add_pageoffline(int bank, struct mc_info *mi, - return rec; - } - --mce_check_addr_t mc_check_addr = NULL; -- --void __init mce_register_addrcheck(mce_check_addr_t cbfunc) --{ -- mc_check_addr = cbfunc; --} -- - void - mc_memerr_dhandler(struct mca_binfo *binfo, - enum mce_result *result, -@@ -48,7 +41,8 @@ mc_memerr_dhandler(struct mca_binfo *binfo, - int vmce_vcpuid; - unsigned int mc_vcpuid; - -- if ( !mc_check_addr(bank->mc_status, bank->mc_misc, MC_ADDR_PHYSICAL) ) -+ if ( !alternative_call(mce_callbacks.check_addr, bank->mc_status, -+ bank->mc_misc, MC_ADDR_PHYSICAL) ) - { - dprintk(XENLOG_WARNING, - "No physical address provided for memory error\n"); -diff --git a/xen/arch/x86/cpu/mcheck/mcaction.h b/xen/arch/x86/cpu/mcheck/mcaction.h -index 5cbe558fb0..6c79498cd2 100644 ---- a/xen/arch/x86/cpu/mcheck/mcaction.h -+++ b/xen/arch/x86/cpu/mcheck/mcaction.h -@@ -12,9 +12,4 @@ mc_memerr_dhandler(struct mca_binfo *binfo, - #define MC_ADDR_PHYSICAL 0 - #define MC_ADDR_VIRTUAL 1 - --typedef bool (*mce_check_addr_t)(uint64_t status, uint64_t misc, int addr_type); --extern void mce_register_addrcheck(mce_check_addr_t); -- --extern mce_check_addr_t mc_check_addr; -- - #endif -diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c -index 0b164e2027..5b7b85a0b5 100644 ---- a/xen/arch/x86/cpu/mcheck/mce.c -+++ b/xen/arch/x86/cpu/mcheck/mce.c -@@ -82,47 +82,21 @@ static void cf_check unexpected_machine_check(const struct cpu_user_regs *regs) - fatal_trap(regs, 1); - } - --static x86_mce_vector_t _machine_check_vector = unexpected_machine_check; -- --void __init x86_mce_vector_register(x86_mce_vector_t hdlr) --{ -- _machine_check_vector = hdlr; --} -+struct mce_callbacks __ro_after_init mce_callbacks = { -+ .handler = unexpected_machine_check, -+}; -+static const typeof(mce_callbacks.handler) __initconst_cf_clobber __used -+ default_handler = unexpected_machine_check; - - /* Call the installed machine check handler for this CPU setup. */ - - void do_machine_check(const struct cpu_user_regs *regs) - { - mce_enter(); -- _machine_check_vector(regs); -+ alternative_vcall(mce_callbacks.handler, regs); - mce_exit(); - } - --/* -- * Init machine check callback handler -- * It is used to collect additional information provided by newer -- * CPU families/models without the need to duplicate the whole handler. -- * This avoids having many handlers doing almost nearly the same and each -- * with its own tweaks ands bugs. -- */ --static x86_mce_callback_t mc_callback_bank_extended = NULL; -- --void __init x86_mce_callback_register(x86_mce_callback_t cbfunc) --{ -- mc_callback_bank_extended = cbfunc; --} -- --/* -- * Machine check recoverable judgement callback handler -- * It is used to judge whether an UC error is recoverable by software -- */ --static mce_recoverable_t mc_recoverable_scan = NULL; -- --void __init mce_recoverable_register(mce_recoverable_t cbfunc) --{ -- mc_recoverable_scan = cbfunc; --} -- - struct mca_banks *mcabanks_alloc(unsigned int nr_mce_banks) - { - struct mca_banks *mb; -@@ -174,19 +148,6 @@ static void mcabank_clear(int banknum) - mca_wrmsr(MSR_IA32_MCx_STATUS(banknum), 0x0ULL); - } - --/* -- * Judging whether to Clear Machine Check error bank callback handler -- * According to Intel latest MCA OS Recovery Writer's Guide, -- * whether the error MCA bank needs to be cleared is decided by the mca_source -- * and MCi_status bit value. -- */ --static mce_need_clearbank_t mc_need_clearbank_scan = NULL; -- --void __init mce_need_clearbank_register(mce_need_clearbank_t cbfunc) --{ -- mc_need_clearbank_scan = cbfunc; --} -- - /* - * mce_logout_lock should only be used in the trap handler, - * while MCIP has not been cleared yet in the global status -@@ -227,7 +188,8 @@ static void mca_init_bank(enum mca_source who, struct mc_info *mi, int bank) - - if ( (mib->mc_status & MCi_STATUS_MISCV) && - (mib->mc_status & MCi_STATUS_ADDRV) && -- (mc_check_addr(mib->mc_status, mib->mc_misc, MC_ADDR_PHYSICAL)) && -+ alternative_call(mce_callbacks.check_addr, mib->mc_status, -+ mib->mc_misc, MC_ADDR_PHYSICAL) && - (who == MCA_POLLER || who == MCA_CMCI_HANDLER) && - (mfn_valid(_mfn(paddr_to_pfn(mib->mc_addr)))) ) - { -@@ -327,7 +289,7 @@ mcheck_mca_logout(enum mca_source who, struct mca_banks *bankmask, - * If no mc_recovery_scan callback handler registered, - * this error is not recoverable - */ -- recover = mc_recoverable_scan ? 1 : 0; -+ recover = mce_callbacks.recoverable_scan; - - for ( i = 0; i < this_cpu(nr_mce_banks); i++ ) - { -@@ -344,8 +306,9 @@ mcheck_mca_logout(enum mca_source who, struct mca_banks *bankmask, - * decide whether to clear bank by MCi_STATUS bit value such as - * OVER/UC/EN/PCC/S/AR - */ -- if ( mc_need_clearbank_scan ) -- need_clear = mc_need_clearbank_scan(who, status); -+ if ( mce_callbacks.need_clearbank_scan ) -+ need_clear = alternative_call(mce_callbacks.need_clearbank_scan, -+ who, status); - - /* - * If this is the first bank with valid MCA DATA, then -@@ -381,12 +344,12 @@ mcheck_mca_logout(enum mca_source who, struct mca_banks *bankmask, - - if ( recover && uc ) - /* uc = true, recover = true, we need not panic. */ -- recover = mc_recoverable_scan(status); -+ recover = alternative_call(mce_callbacks.recoverable_scan, status); - - mca_init_bank(who, mci, i); - -- if ( mc_callback_bank_extended ) -- mc_callback_bank_extended(mci, i, status); -+ if ( mce_callbacks.info_collect ) -+ alternative_vcall(mce_callbacks.info_collect, mci, i, status); - - /* By default, need_clear = true */ - if ( who != MCA_MCE_SCAN && need_clear ) -@@ -1913,9 +1876,11 @@ static void cf_check mce_softirq(void) - * will help to collect and log those MCE errors. - * Round2: Do all MCE processing logic as normal. - */ --void __init mce_handler_init(void) -+void __init mce_handler_init(const struct mce_callbacks *cb) - { - /* callback register, do we really need so many callback? */ -+ mce_callbacks = *cb; -+ - /* mce handler data initialization */ - spin_lock_init(&mce_logout_lock); - open_softirq(MACHINE_CHECK_SOFTIRQ, mce_softirq); -diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h -index 10ed059f7c..6bd25d4101 100644 ---- a/xen/arch/x86/cpu/mcheck/mce.h -+++ b/xen/arch/x86/cpu/mcheck/mce.h -@@ -62,20 +62,12 @@ void noreturn mc_panic(char *s); - void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *, - uint32_t *, uint32_t *, uint32_t *, uint32_t *); - --/* Register a handler for machine check exceptions. */ --typedef void (*x86_mce_vector_t)(const struct cpu_user_regs *regs); --extern void x86_mce_vector_register(x86_mce_vector_t); -- - /* - * Common generic MCE handler that implementations may nominate - * via x86_mce_vector_register. - */ - void cf_check mcheck_cmn_handler(const struct cpu_user_regs *regs); - --/* Register a handler for judging whether mce is recoverable. */ --typedef bool (*mce_recoverable_t)(uint64_t status); --extern void mce_recoverable_register(mce_recoverable_t); -- - /* Read an MSR, checking for an interposed value first */ - extern struct intpose_ent *intpose_lookup(unsigned int, uint64_t, - uint64_t *); -@@ -134,30 +126,6 @@ extern void mcheck_mca_clearbanks(struct mca_banks *); - extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, struct mca_banks *, - struct mca_summary *, struct mca_banks *); - --/* -- * Register callbacks to be made during bank telemetry logout. -- * Those callbacks are only available to those machine check handlers -- * that call to the common mcheck_cmn_handler or who use the common -- * telemetry logout function mcheck_mca_logout in error polling. -- */ -- --/* Register a handler for judging whether the bank need to be cleared */ --typedef bool (*mce_need_clearbank_t)(enum mca_source who, u64 status); --extern void mce_need_clearbank_register(mce_need_clearbank_t); -- --/* -- * Register a callback to collect additional information (typically non- -- * architectural) provided by newer CPU families/models without the need -- * to duplicate the whole handler resulting in various handlers each with -- * its own tweaks and bugs. The callback receives an struct mc_info pointer -- * which it can use with x86_mcinfo_reserve to add additional telemetry, -- * the current MCA bank number we are reading telemetry from, and the -- * MCi_STATUS value for that bank. -- */ --typedef struct mcinfo_extended *(*x86_mce_callback_t) -- (struct mc_info *, uint16_t, uint64_t); --extern void x86_mce_callback_register(x86_mce_callback_t); -- - void *x86_mcinfo_reserve(struct mc_info *mi, - unsigned int size, unsigned int type); - void x86_mcinfo_dump(struct mc_info *mi); -@@ -198,8 +166,44 @@ static inline int mce_bank_msr(const struct vcpu *v, uint32_t msr) - return 0; - } - --/* MC softirq */ --void mce_handler_init(void); -+struct mce_callbacks { -+ void (*handler)(const struct cpu_user_regs *regs); -+ bool (*check_addr)(uint64_t status, uint64_t misc, int addr_type); -+ -+ /* Handler for judging whether mce is recoverable. */ -+ bool (*recoverable_scan)(uint64_t status); -+ -+ /* -+ * Callbacks to be made during bank telemetry logout. -+ * They are only available to those machine check handlers -+ * that call to the common mcheck_cmn_handler or who use the common -+ * telemetry logout function mcheck_mca_logout in error polling. -+ */ -+ -+ /* -+ * Judging whether to Clear Machine Check error bank callback handler. -+ * According to Intel latest MCA OS Recovery Writer's Guide, whether -+ * the error MCA bank needs to be cleared is decided by the mca_source -+ * and MCi_status bit value. -+ */ -+ bool (*need_clearbank_scan)(enum mca_source who, u64 status); -+ -+ /* -+ * Callback to collect additional information (typically non- -+ * architectural) provided by newer CPU families/models without the need -+ * to duplicate the whole handler resulting in various handlers each with -+ * its own tweaks and bugs. The callback receives an struct mc_info pointer -+ * which it can use with x86_mcinfo_reserve to add additional telemetry, -+ * the current MCA bank number we are reading telemetry from, and the -+ * MCi_STATUS value for that bank. -+ */ -+ struct mcinfo_extended *(*info_collect) -+ (struct mc_info *mi, uint16_t bank, uint64_t status); -+}; -+ -+extern struct mce_callbacks mce_callbacks; -+ -+void mce_handler_init(const struct mce_callbacks *cb); - - extern const struct mca_error_handler *mce_dhandlers; - extern const struct mca_error_handler *mce_uhandlers; -diff --git a/xen/arch/x86/cpu/mcheck/mce_amd.c b/xen/arch/x86/cpu/mcheck/mce_amd.c -index cf80e1a275..f401f54fab 100644 ---- a/xen/arch/x86/cpu/mcheck/mce_amd.c -+++ b/xen/arch/x86/cpu/mcheck/mce_amd.c -@@ -283,6 +283,19 @@ int vmce_amd_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) - return 1; - } - -+static const struct mce_callbacks __initconst_cf_clobber k8_callbacks = { -+ .handler = mcheck_cmn_handler, -+ .need_clearbank_scan = amd_need_clearbank_scan, -+}; -+ -+static const struct mce_callbacks __initconst_cf_clobber k10_callbacks = { -+ .handler = mcheck_cmn_handler, -+ .check_addr = mc_amd_addrcheck, -+ .recoverable_scan = mc_amd_recoverable_scan, -+ .need_clearbank_scan = amd_need_clearbank_scan, -+ .info_collect = amd_f10_handler, -+}; -+ - enum mcheck_type - amd_mcheck_init(const struct cpuinfo_x86 *ci, bool bsp) - { -@@ -295,11 +308,7 @@ amd_mcheck_init(const struct cpuinfo_x86 *ci, bool bsp) - /* Assume that machine check support is available. - * The minimum provided support is at least the K8. */ - if ( bsp ) -- { -- mce_handler_init(); -- x86_mce_vector_register(mcheck_cmn_handler); -- mce_need_clearbank_register(amd_need_clearbank_scan); -- } -+ mce_handler_init(ci->x86 == 0xf ? &k8_callbacks : &k10_callbacks); - - for ( i = 0; i < this_cpu(nr_mce_banks); i++ ) - { -@@ -339,13 +348,6 @@ amd_mcheck_init(const struct cpuinfo_x86 *ci, bool bsp) - ppin_msr = MSR_AMD_PPIN; - } - -- if ( bsp ) -- { -- x86_mce_callback_register(amd_f10_handler); -- mce_recoverable_register(mc_amd_recoverable_scan); -- mce_register_addrcheck(mc_amd_addrcheck); -- } -- - return ci->x86_vendor == X86_VENDOR_HYGON ? - mcheck_hygon : mcheck_amd_famXX; - } -diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c -index 837a8c6d0c..dc7e1e61a6 100644 ---- a/xen/arch/x86/cpu/mcheck/mce_intel.c -+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c -@@ -843,11 +843,6 @@ static void intel_init_mce(bool bsp) - if ( !bsp ) - return; - -- x86_mce_vector_register(mcheck_cmn_handler); -- mce_recoverable_register(intel_recoverable_scan); -- mce_need_clearbank_register(intel_need_clearbank_scan); -- mce_register_addrcheck(intel_checkaddr); -- - mce_dhandlers = intel_mce_dhandlers; - mce_dhandler_num = ARRAY_SIZE(intel_mce_dhandlers); - mce_uhandlers = intel_mce_uhandlers; -@@ -957,6 +952,13 @@ static int cf_check cpu_callback( - return !rc ? NOTIFY_DONE : notifier_from_errno(rc); - } - -+static const struct mce_callbacks __initconst_cf_clobber intel_callbacks = { -+ .handler = mcheck_cmn_handler, -+ .check_addr = intel_checkaddr, -+ .recoverable_scan = intel_recoverable_scan, -+ .need_clearbank_scan = intel_need_clearbank_scan, -+}; -+ - static struct notifier_block cpu_nfb = { - .notifier_call = cpu_callback - }; -@@ -983,7 +985,7 @@ enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp) - intel_init_mca(c); - - if ( bsp ) -- mce_handler_init(); -+ mce_handler_init(&intel_callbacks); - - intel_init_mce(bsp); - --- -2.44.0 - diff --git a/0516-IRQ-generalize-gs-et_irq_regs.patch b/0516-IRQ-generalize-gs-et_irq_regs.patch deleted file mode 100644 index c0d724c6..00000000 --- a/0516-IRQ-generalize-gs-et_irq_regs.patch +++ /dev/null @@ -1,134 +0,0 @@ -From e356ac136aac6cddf26f0287112813a9344a8aed Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Tue, 23 Jan 2024 12:03:23 +0100 -Subject: [PATCH 516/542] IRQ: generalize [gs]et_irq_regs() - -Move functions (and their data) to common code, and invoke the functions -on Arm as well. This is in preparation of dropping the register -parameters from handler functions. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Reviewed-by: Julien Grall -(cherry picked from commit f67bddf3bccd99a5fee968c3b3f288db6a57d3be) ---- - xen/arch/arm/irq.c | 2 ++ - xen/arch/x86/include/asm/irq.h | 21 --------------------- - xen/arch/x86/irq.c | 2 -- - xen/common/irq.c | 2 ++ - xen/include/xen/irq.h | 21 +++++++++++++++++++++ - 5 files changed, 25 insertions(+), 23 deletions(-) - -diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c -index fd0c15fffd..8649c636a3 100644 ---- a/xen/arch/arm/irq.c -+++ b/xen/arch/arm/irq.c -@@ -229,6 +229,7 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq) - { - struct irq_desc *desc = irq_to_desc(irq); - struct irqaction *action; -+ struct cpu_user_regs *old_regs = set_irq_regs(regs); - - perfc_incr(irqs); - -@@ -296,6 +297,7 @@ out: - out_no_end: - spin_unlock(&desc->lock); - irq_exit(); -+ set_irq_regs(old_regs); - } - - void release_irq(unsigned int irq, const void *dev_id) -diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h -index 823d627fd0..26850e5077 100644 ---- a/xen/arch/x86/include/asm/irq.h -+++ b/xen/arch/x86/include/asm/irq.h -@@ -70,27 +70,6 @@ extern bool opt_noirqbalance; - - extern int opt_irq_vector_map; - --/* -- * Per-cpu current frame pointer - the location of the last exception frame on -- * the stack -- */ --DECLARE_PER_CPU(struct cpu_user_regs *, __irq_regs); -- --static inline struct cpu_user_regs *get_irq_regs(void) --{ -- return this_cpu(__irq_regs); --} -- --static inline struct cpu_user_regs *set_irq_regs(struct cpu_user_regs *new_regs) --{ -- struct cpu_user_regs *old_regs, **pp_regs = &this_cpu(__irq_regs); -- -- old_regs = *pp_regs; -- *pp_regs = new_regs; -- return old_regs; --} -- -- - #define platform_legacy_irq(irq) ((irq) < 16) - - void cf_check event_check_interrupt(struct cpu_user_regs *regs); -diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c -index 51b4837cd3..abd6f577dd 100644 ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -53,8 +53,6 @@ static DEFINE_SPINLOCK(vector_lock); - - DEFINE_PER_CPU(vector_irq_t, vector_irq); - --DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs); -- - static LIST_HEAD(irq_ratelimit_list); - static DEFINE_SPINLOCK(irq_ratelimit_lock); - static struct timer irq_ratelimit_timer; -diff --git a/xen/common/irq.c b/xen/common/irq.c -index 727cf8bd22..236cf171e2 100644 ---- a/xen/common/irq.c -+++ b/xen/common/irq.c -@@ -1,6 +1,8 @@ - #include - #include - -+DEFINE_PER_CPU(struct cpu_user_regs *, irq_regs); -+ - int init_one_irq_desc(struct irq_desc *desc) - { - int err; -diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h -index 300625e56d..c93ef31a9c 100644 ---- a/xen/include/xen/irq.h -+++ b/xen/include/xen/irq.h -@@ -130,6 +130,27 @@ void cf_check irq_actor_none(struct irq_desc *); - #define irq_disable_none irq_actor_none - #define irq_enable_none irq_actor_none - -+/* -+ * Per-cpu interrupted context register state - the inner-most interrupt frame -+ * on the stack. -+ */ -+DECLARE_PER_CPU(struct cpu_user_regs *, irq_regs); -+ -+static inline struct cpu_user_regs *get_irq_regs(void) -+{ -+ return this_cpu(irq_regs); -+} -+ -+static inline struct cpu_user_regs *set_irq_regs(struct cpu_user_regs *new_regs) -+{ -+ struct cpu_user_regs *old_regs, **pp_regs = &this_cpu(irq_regs); -+ -+ old_regs = *pp_regs; -+ *pp_regs = new_regs; -+ -+ return old_regs; -+} -+ - struct domain; - struct vcpu; - --- -2.44.0 - diff --git a/0517-x86-spec-ctrl-Expose-IPRED_CTRL-to-guests.patch b/0517-x86-spec-ctrl-Expose-IPRED_CTRL-to-guests.patch deleted file mode 100644 index 3418ab38..00000000 --- a/0517-x86-spec-ctrl-Expose-IPRED_CTRL-to-guests.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 68468e5b0ff7f82518de6de8b74187f86a9b6e22 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 30 Jan 2024 10:13:58 +0100 -Subject: [PATCH 517/542] x86/spec-ctrl: Expose IPRED_CTRL to guests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The CPUID feature bit signals the presence of the IPRED_DIS_{U,S} controls in -SPEC_CTRL MSR, first available in Intel AlderLake and Sapphire Rapids CPUs. - -Xen already knows how to context switch MSR_SPEC_CTRL properly between guest -and hypervisor context. - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit 4dd6760706848de30f7c8b5f83462b9bcb070c91) ---- - xen/arch/x86/msr.c | 6 ++++-- - xen/include/public/arch-x86/cpufeatureset.h | 2 +- - xen/tools/gen-cpuid.py | 3 ++- - 3 files changed, 7 insertions(+), 4 deletions(-) - -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index f7192c1ddf..ac01553598 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -316,8 +316,8 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val) - - /* - * Caller to confirm that MSR_SPEC_CTRL is available. Intel and AMD have -- * separate CPUID features for this functionality, but only set will be -- * active. -+ * separate CPUID features for some of this functionality, but only one -+ * vendors-worth will be active on a single host. - */ - uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp) - { -@@ -331,6 +331,8 @@ uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp) - return (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | - (ssbd ? SPEC_CTRL_SSBD : 0) | - (psfd ? SPEC_CTRL_PSFD : 0) | -+ (cp->feat.ipred_ctrl -+ ? (SPEC_CTRL_IPRED_DIS_U | SPEC_CTRL_IPRED_DIS_S) : 0) | - 0); - } - -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index bc971f3c6f..8708b934a0 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -295,7 +295,7 @@ XEN_CPUFEATURE(INTEL_PPIN, 12*32+ 0) /* Protected Processor Inventory - - /* Intel-defined CPU features, CPUID level 0x00000007:2.edx, word 13 */ - XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */ --XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /* MSR_SPEC_CTRL.IPRED_DIS_* */ -+XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /*A MSR_SPEC_CTRL.IPRED_DIS_* */ - XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /* MSR_SPEC_CTRL.RRSBA_DIS_* */ - XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /* MSR_SPEC_CTRL.BHI_DIS_S */ - XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ -diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py -index 636ff44c8b..9d1e47cfcd 100755 ---- a/xen/tools/gen-cpuid.py -+++ b/xen/tools/gen-cpuid.py -@@ -318,7 +318,8 @@ def crunch_numbers(state): - # IBRSB/IBRS, and we pass this MSR directly to guests. Treating them - # as dependent features simplifies Xen's logic, and prevents the guest - # from seeing implausible configurations. -- IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS], -+ IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS, -+ IPRED_CTRL], - IBRS: [AMD_STIBP, AMD_SSBD, PSFD, - IBRS_ALWAYS, IBRS_FAST, IBRS_SAME_MODE], - IBPB: [IBPB_RET, SBPB, IBPB_BRTYPE], --- -2.44.0 - diff --git a/0518-x86-spec-ctrl-Expose-RRSBA_CTRL-to-guests.patch b/0518-x86-spec-ctrl-Expose-RRSBA_CTRL-to-guests.patch deleted file mode 100644 index d938e285..00000000 --- a/0518-x86-spec-ctrl-Expose-RRSBA_CTRL-to-guests.patch +++ /dev/null @@ -1,66 +0,0 @@ -From b7559a0488ac286d92a09fc00614207b32abb72d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 30 Jan 2024 10:13:59 +0100 -Subject: [PATCH 518/542] x86/spec-ctrl: Expose RRSBA_CTRL to guests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The CPUID feature bit signals the presence of the RRSBA_DIS_{U,S} controls in -SPEC_CTRL MSR, first available in Intel AlderLake and Sapphire Rapids CPUs. - -Xen already knows how to context switch MSR_SPEC_CTRL properly between guest -and hypervisor context. - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit 478e4787fa64b621061177a7843c452e9a19916d) ---- - xen/arch/x86/msr.c | 2 ++ - xen/include/public/arch-x86/cpufeatureset.h | 2 +- - xen/tools/gen-cpuid.py | 2 +- - 3 files changed, 4 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index ac01553598..615314f1e1 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -333,6 +333,8 @@ uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp) - (psfd ? SPEC_CTRL_PSFD : 0) | - (cp->feat.ipred_ctrl - ? (SPEC_CTRL_IPRED_DIS_U | SPEC_CTRL_IPRED_DIS_S) : 0) | -+ (cp->feat.rrsba_ctrl -+ ? (SPEC_CTRL_RRSBA_DIS_U | SPEC_CTRL_RRSBA_DIS_S) : 0) | - 0); - } - -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 8708b934a0..0e1581cdac 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -296,7 +296,7 @@ XEN_CPUFEATURE(INTEL_PPIN, 12*32+ 0) /* Protected Processor Inventory - /* Intel-defined CPU features, CPUID level 0x00000007:2.edx, word 13 */ - XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */ - XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /*A MSR_SPEC_CTRL.IPRED_DIS_* */ --XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /* MSR_SPEC_CTRL.RRSBA_DIS_* */ -+XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /*A MSR_SPEC_CTRL.RRSBA_DIS_* */ - XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /* MSR_SPEC_CTRL.BHI_DIS_S */ - XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ - -diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py -index 9d1e47cfcd..09acb9764c 100755 ---- a/xen/tools/gen-cpuid.py -+++ b/xen/tools/gen-cpuid.py -@@ -319,7 +319,7 @@ def crunch_numbers(state): - # as dependent features simplifies Xen's logic, and prevents the guest - # from seeing implausible configurations. - IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS, -- IPRED_CTRL], -+ IPRED_CTRL, RRSBA_CTRL], - IBRS: [AMD_STIBP, AMD_SSBD, PSFD, - IBRS_ALWAYS, IBRS_FAST, IBRS_SAME_MODE], - IBPB: [IBPB_RET, SBPB, IBPB_BRTYPE], --- -2.44.0 - diff --git a/0519-x86-spec-ctrl-Expose-BHI_CTRL-to-guests.patch b/0519-x86-spec-ctrl-Expose-BHI_CTRL-to-guests.patch deleted file mode 100644 index 54480584..00000000 --- a/0519-x86-spec-ctrl-Expose-BHI_CTRL-to-guests.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 363745e52dc758bdfb2fd42d32f12276c80ed447 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 30 Jan 2024 10:14:00 +0100 -Subject: [PATCH 519/542] x86/spec-ctrl: Expose BHI_CTRL to guests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The CPUID feature bit signals the presence of the BHI_DIS_S control in -SPEC_CTRL MSR, first available in Intel AlderLake and Sapphire Rapids CPUs - -Xen already knows how to context switch MSR_SPEC_CTRL properly between guest -and hypervisor context. - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit 583f1d0950529f3517b1741c2b21a028a82ba831) ---- - xen/arch/x86/msr.c | 1 + - xen/include/public/arch-x86/cpufeatureset.h | 2 +- - xen/tools/gen-cpuid.py | 2 +- - 3 files changed, 3 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c -index 615314f1e1..51e6744e8f 100644 ---- a/xen/arch/x86/msr.c -+++ b/xen/arch/x86/msr.c -@@ -335,6 +335,7 @@ uint64_t msr_spec_ctrl_valid_bits(const struct cpu_policy *cp) - ? (SPEC_CTRL_IPRED_DIS_U | SPEC_CTRL_IPRED_DIS_S) : 0) | - (cp->feat.rrsba_ctrl - ? (SPEC_CTRL_RRSBA_DIS_U | SPEC_CTRL_RRSBA_DIS_S) : 0) | -+ (cp->feat.bhi_ctrl ? SPEC_CTRL_BHI_DIS_S : 0) | - 0); - } - -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 0e1581cdac..51f238683c 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -297,7 +297,7 @@ XEN_CPUFEATURE(INTEL_PPIN, 12*32+ 0) /* Protected Processor Inventory - XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */ - XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /*A MSR_SPEC_CTRL.IPRED_DIS_* */ - XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /*A MSR_SPEC_CTRL.RRSBA_DIS_* */ --XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /* MSR_SPEC_CTRL.BHI_DIS_S */ -+XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /*A MSR_SPEC_CTRL.BHI_DIS_S */ - XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ - - /* Intel-defined CPU features, CPUID level 0x00000007:1.ecx, word 14 */ -diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py -index 09acb9764c..a7c2ba3e5d 100755 ---- a/xen/tools/gen-cpuid.py -+++ b/xen/tools/gen-cpuid.py -@@ -319,7 +319,7 @@ def crunch_numbers(state): - # as dependent features simplifies Xen's logic, and prevents the guest - # from seeing implausible configurations. - IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS, -- IPRED_CTRL, RRSBA_CTRL], -+ IPRED_CTRL, RRSBA_CTRL, BHI_CTRL], - IBRS: [AMD_STIBP, AMD_SSBD, PSFD, - IBRS_ALWAYS, IBRS_FAST, IBRS_SAME_MODE], - IBPB: [IBPB_RET, SBPB, IBPB_BRTYPE], --- -2.44.0 - diff --git a/0520-x86-arrange-for-ENDBR-zapping-from-vendor-_ctxt_swit.patch b/0520-x86-arrange-for-ENDBR-zapping-from-vendor-_ctxt_swit.patch deleted file mode 100644 index fe10f3c7..00000000 --- a/0520-x86-arrange-for-ENDBR-zapping-from-vendor-_ctxt_swit.patch +++ /dev/null @@ -1,69 +0,0 @@ -From f2947a0da348eafc72e166dea14983f6d7c8300e Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Mon, 5 Feb 2024 10:44:46 +0100 -Subject: [PATCH 520/542] x86: arrange for ENDBR zapping from - _ctxt_switch_masking() - -While altcall is already used for them, the functions want announcing in -.init.rodata.cf_clobber, even if the resulting static variables aren't -otherwise used. - -While doing this also move ctxt_switch_masking to .data.ro_after_init. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -(cherry picked from commit 044168fa3a65b6542bda5c21e373742de1bd5980) ---- - xen/arch/x86/cpu/amd.c | 5 +++++ - xen/arch/x86/cpu/common.c | 2 +- - xen/arch/x86/cpu/intel.c | 5 +++++ - 3 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index 3d85e9797d..d5e9ad7598 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -258,6 +258,11 @@ static void cf_check amd_ctxt_switch_masking(const struct vcpu *next) - #undef LAZY - } - -+#ifdef CONFIG_XEN_IBT /* Announce the function to ENDBR clobbering logic. */ -+static const typeof(ctxt_switch_masking) __initconst_cf_clobber __used csm = -+ amd_ctxt_switch_masking; -+#endif -+ - /* - * Mask the features and extended features returned by CPUID. Parameters are - * set from the boot line via two methods: -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 54ea7fa831..60e472da26 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -121,7 +121,7 @@ static const struct cpu_dev default_cpu = { - static const struct cpu_dev *this_cpu = &default_cpu; - - static DEFINE_PER_CPU(uint64_t, msr_misc_features); --void (* __read_mostly ctxt_switch_masking)(const struct vcpu *next); -+void (* __ro_after_init ctxt_switch_masking)(const struct vcpu *next); - - bool __init probe_cpuid_faulting(void) - { -diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c -index 96723b5d44..532e845f66 100644 ---- a/xen/arch/x86/cpu/intel.c -+++ b/xen/arch/x86/cpu/intel.c -@@ -220,6 +220,11 @@ static void cf_check intel_ctxt_switch_masking(const struct vcpu *next) - #undef LAZY - } - -+#ifdef CONFIG_XEN_IBT /* Announce the function to ENDBR clobbering logic. */ -+static const typeof(ctxt_switch_masking) __initconst_cf_clobber __used csm = -+ intel_ctxt_switch_masking; -+#endif -+ - /* - * opt_cpuid_mask_ecx/edx: cpuid.1[ecx, edx] feature mask. - * For example, E8400[Intel Core 2 Duo Processor series] ecx = 0x0008E3FD, --- -2.44.0 - diff --git a/0521-x86-guest-finish-conversion-to-altcall.patch b/0521-x86-guest-finish-conversion-to-altcall.patch deleted file mode 100644 index 6b654b8f..00000000 --- a/0521-x86-guest-finish-conversion-to-altcall.patch +++ /dev/null @@ -1,84 +0,0 @@ -From d11255f909e4b77ae1d1aa7e928cdfca5964a574 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Mon, 5 Feb 2024 10:45:31 +0100 -Subject: [PATCH 521/542] x86/guest: finish conversion to altcall - -While .setup() and .e820_fixup() don't need fiddling with for being run -only very early, both .ap_setup() and .resume() want converting too: -This way both pre-filled struct hypervisor_ops instances can become -__initconst_cf_clobber, thus allowing to eliminate up to 5 more ENDBR -(configuration dependent) during the 2nd phase of alternatives patching. - -While fiddling with section annotations here, also move "ops" itself to -.data.ro_after_init. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Acked-by: Paul Durrant -(cherry picked from commit e931edccc53c9dd6e9a505ad0ff3a03d985669bc) ---- - xen/arch/x86/guest/hyperv/hyperv.c | 2 +- - xen/arch/x86/guest/hypervisor.c | 6 +++--- - xen/arch/x86/guest/xen/xen.c | 2 +- - 3 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/x86/guest/hyperv/hyperv.c b/xen/arch/x86/guest/hyperv/hyperv.c -index b101ba3080..5c58a0c457 100644 ---- a/xen/arch/x86/guest/hyperv/hyperv.c -+++ b/xen/arch/x86/guest/hyperv/hyperv.c -@@ -219,7 +219,7 @@ static int cf_check flush_tlb( - return hyperv_flush_tlb(mask, va, flags); - } - --static const struct hypervisor_ops __initconstrel ops = { -+static const struct hypervisor_ops __initconst_cf_clobber ops = { - .name = "Hyper-V", - .setup = setup, - .ap_setup = ap_setup, -diff --git a/xen/arch/x86/guest/hypervisor.c b/xen/arch/x86/guest/hypervisor.c -index 366af1d650..c3e10c3586 100644 ---- a/xen/arch/x86/guest/hypervisor.c -+++ b/xen/arch/x86/guest/hypervisor.c -@@ -25,7 +25,7 @@ - #include - #include - --static struct hypervisor_ops __read_mostly ops; -+static struct hypervisor_ops __ro_after_init ops; - - const char *__init hypervisor_probe(void) - { -@@ -61,7 +61,7 @@ void __init hypervisor_setup(void) - int hypervisor_ap_setup(void) - { - if ( ops.ap_setup ) -- return ops.ap_setup(); -+ return alternative_call(ops.ap_setup); - - return 0; - } -@@ -69,7 +69,7 @@ int hypervisor_ap_setup(void) - void hypervisor_resume(void) - { - if ( ops.resume ) -- ops.resume(); -+ alternative_vcall(ops.resume); - } - - void __init hypervisor_e820_fixup(struct e820map *e820) -diff --git a/xen/arch/x86/guest/xen/xen.c b/xen/arch/x86/guest/xen/xen.c -index 9c2defaa66..c4cb16df38 100644 ---- a/xen/arch/x86/guest/xen/xen.c -+++ b/xen/arch/x86/guest/xen/xen.c -@@ -330,7 +330,7 @@ static int cf_check flush_tlb( - return xen_hypercall_hvm_op(HVMOP_flush_tlbs, NULL); - } - --static const struct hypervisor_ops __initconstrel ops = { -+static const struct hypervisor_ops __initconst_cf_clobber ops = { - .name = "Xen", - .setup = setup, - .ap_setup = ap_setup, --- -2.44.0 - diff --git a/0522-x86-CPU-convert-vendor-hook-invocations-to-altcall.patch b/0522-x86-CPU-convert-vendor-hook-invocations-to-altcall.patch deleted file mode 100644 index 7f668768..00000000 --- a/0522-x86-CPU-convert-vendor-hook-invocations-to-altcall.patch +++ /dev/null @@ -1,152 +0,0 @@ -From 6b899fe735d040356ead7170e0fe10f6668624d0 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Mon, 5 Feb 2024 10:48:11 +0100 -Subject: [PATCH 522/542] x86/CPU: convert vendor hook invocations to altcall - -While not performance critical, these hook invocations still want -converting: This way all pre-filled struct cpu_dev instances can become -__initconst_cf_clobber, thus allowing to eliminate further 8 ENDBR -during the 2nd phase of alternatives patching (besides moving previously -resident data to .init.*). - -Since all use sites need touching anyway, take the opportunity and also -address a Misra C:2012 Rule 5.5 violation: Rename the this_cpu static -variable. - -Signed-off-by: Jan Beulich -Acked-by: Andrew Cooper -(cherry picked from commit 660f8a75013c947fbe5358a640032a1f9f1eece5) ---- - xen/arch/x86/cpu/amd.c | 2 +- - xen/arch/x86/cpu/centaur.c | 2 +- - xen/arch/x86/cpu/common.c | 24 ++++++++++++------------ - xen/arch/x86/cpu/hygon.c | 2 +- - xen/arch/x86/cpu/intel.c | 2 +- - xen/arch/x86/cpu/shanghai.c | 2 +- - 6 files changed, 17 insertions(+), 17 deletions(-) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index d5e9ad7598..2838725bab 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -1286,7 +1286,7 @@ static void cf_check init_amd(struct cpuinfo_x86 *c) - amd_log_freq(c); - } - --const struct cpu_dev amd_cpu_dev = { -+const struct cpu_dev __initconst_cf_clobber amd_cpu_dev = { - .c_early_init = early_init_amd, - .c_init = init_amd, - }; -diff --git a/xen/arch/x86/cpu/centaur.c b/xen/arch/x86/cpu/centaur.c -index eac49d78db..750168d1e8 100644 ---- a/xen/arch/x86/cpu/centaur.c -+++ b/xen/arch/x86/cpu/centaur.c -@@ -54,6 +54,6 @@ static void cf_check init_centaur(struct cpuinfo_x86 *c) - init_c3(c); - } - --const struct cpu_dev centaur_cpu_dev = { -+const struct cpu_dev __initconst_cf_clobber centaur_cpu_dev = { - .c_init = init_centaur, - }; -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 60e472da26..88855f5773 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -115,10 +115,10 @@ static void cf_check default_init(struct cpuinfo_x86 * c) - __clear_bit(X86_FEATURE_SEP, c->x86_capability); - } - --static const struct cpu_dev default_cpu = { -+static const struct cpu_dev __initconst_cf_clobber __used default_cpu = { - .c_init = default_init, - }; --static const struct cpu_dev *this_cpu = &default_cpu; -+static struct cpu_dev __ro_after_init actual_cpu; - - static DEFINE_PER_CPU(uint64_t, msr_misc_features); - void (* __ro_after_init ctxt_switch_masking)(const struct vcpu *next); -@@ -343,12 +343,13 @@ void __init early_cpu_init(void) - - c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx); - switch (c->x86_vendor) { -- case X86_VENDOR_INTEL: this_cpu = &intel_cpu_dev; break; -- case X86_VENDOR_AMD: this_cpu = &amd_cpu_dev; break; -- case X86_VENDOR_CENTAUR: this_cpu = ¢aur_cpu_dev; break; -- case X86_VENDOR_SHANGHAI: this_cpu = &shanghai_cpu_dev; break; -- case X86_VENDOR_HYGON: this_cpu = &hygon_cpu_dev; break; -+ case X86_VENDOR_INTEL: actual_cpu = intel_cpu_dev; break; -+ case X86_VENDOR_AMD: actual_cpu = amd_cpu_dev; break; -+ case X86_VENDOR_CENTAUR: actual_cpu = centaur_cpu_dev; break; -+ case X86_VENDOR_SHANGHAI: actual_cpu = shanghai_cpu_dev; break; -+ case X86_VENDOR_HYGON: actual_cpu = hygon_cpu_dev; break; - default: -+ actual_cpu = default_cpu; - printk(XENLOG_ERR - "Unrecognised or unsupported CPU vendor '%.12s'\n", - c->x86_vendor_id); -@@ -434,8 +435,8 @@ static void generic_identify(struct cpuinfo_x86 *c) - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); - c->phys_proc_id = c->apicid; - -- if (this_cpu->c_early_init) -- this_cpu->c_early_init(c); -+ if (actual_cpu.c_early_init) -+ alternative_vcall(actual_cpu.c_early_init, c); - - /* c_early_init() may have adjusted cpuid levels/features. Reread. */ - c->cpuid_level = cpuid_eax(0); -@@ -540,9 +541,8 @@ void identify_cpu(struct cpuinfo_x86 *c) - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ -- if (this_cpu->c_init) -- this_cpu->c_init(c); -- -+ if (actual_cpu.c_init) -+ alternative_vcall(actual_cpu.c_init, c); - - if (c == &boot_cpu_data && !opt_pku) - setup_clear_cpu_cap(X86_FEATURE_PKU); -diff --git a/xen/arch/x86/cpu/hygon.c b/xen/arch/x86/cpu/hygon.c -index 361eb6fd41..0c7c97ebb7 100644 ---- a/xen/arch/x86/cpu/hygon.c -+++ b/xen/arch/x86/cpu/hygon.c -@@ -88,7 +88,7 @@ static void cf_check init_hygon(struct cpuinfo_x86 *c) - amd_log_freq(c); - } - --const struct cpu_dev hygon_cpu_dev = { -+const struct cpu_dev __initconst_cf_clobber hygon_cpu_dev = { - .c_early_init = early_init_amd, - .c_init = init_hygon, - }; -diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c -index 532e845f66..2d439e0bd2 100644 ---- a/xen/arch/x86/cpu/intel.c -+++ b/xen/arch/x86/cpu/intel.c -@@ -598,7 +598,7 @@ static void cf_check init_intel(struct cpuinfo_x86 *c) - setup_clear_cpu_cap(X86_FEATURE_CLWB); - } - --const struct cpu_dev intel_cpu_dev = { -+const struct cpu_dev __initconst_cf_clobber intel_cpu_dev = { - .c_early_init = early_init_intel, - .c_init = init_intel, - }; -diff --git a/xen/arch/x86/cpu/shanghai.c b/xen/arch/x86/cpu/shanghai.c -index 95ae544f8c..910f2c32f3 100644 ---- a/xen/arch/x86/cpu/shanghai.c -+++ b/xen/arch/x86/cpu/shanghai.c -@@ -15,6 +15,6 @@ static void cf_check init_shanghai(struct cpuinfo_x86 *c) - init_intel_cacheinfo(c); - } - --const struct cpu_dev shanghai_cpu_dev = { -+const struct cpu_dev __initconst_cf_clobber shanghai_cpu_dev = { - .c_init = init_shanghai, - }; --- -2.44.0 - diff --git a/0523-VMX-tertiary-execution-control-infrastructure.patch b/0523-VMX-tertiary-execution-control-infrastructure.patch deleted file mode 100644 index e8d0b62c..00000000 --- a/0523-VMX-tertiary-execution-control-infrastructure.patch +++ /dev/null @@ -1,256 +0,0 @@ -From 91c2a92231af71a50557c65e32e2f838ae3aed14 Mon Sep 17 00:00:00 2001 -From: Jan Beulich -Date: Wed, 7 Feb 2024 13:46:11 +0100 -Subject: [PATCH 523/542] VMX: tertiary execution control infrastructure -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This is a prereq to enabling e.g. the MSRLIST feature. - -Note that the PROCBASED_CTLS3 MSR is different from other VMX feature -reporting MSRs, in that all 64 bits report allowed 1-settings. - -vVMX code is left alone, though, for the time being. - -Signed-off-by: Jan Beulich -Reviewed-by: Roger Pau Monné -(cherry picked from commit 878159bf259bfbd7a40312829f1ea0ce1f6645e2) ---- - xen/arch/x86/hvm/vmx/vmcs.c | 57 ++++++++++++++++++++++--- - xen/arch/x86/hvm/vmx/vmx.c | 6 +++ - xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 13 ++++++ - xen/arch/x86/include/asm/hvm/vmx/vmx.h | 1 + - xen/arch/x86/include/asm/msr-index.h | 1 + - 5 files changed, 72 insertions(+), 6 deletions(-) - -diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c -index b5ecc51b43..49d51fb524 100644 ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -176,6 +176,7 @@ static int cf_check parse_ept_param_runtime(const char *s) - u32 vmx_pin_based_exec_control __read_mostly; - u32 vmx_cpu_based_exec_control __read_mostly; - u32 vmx_secondary_exec_control __read_mostly; -+uint64_t vmx_tertiary_exec_control __read_mostly; - u32 vmx_vmexit_control __read_mostly; - u32 vmx_vmentry_control __read_mostly; - u64 vmx_ept_vpid_cap __read_mostly; -@@ -241,10 +242,32 @@ static u32 adjust_vmx_controls( - return ctl; - } - --static bool_t cap_check(const char *name, u32 expected, u32 saw) -+static uint64_t adjust_vmx_controls2( -+ const char *name, uint64_t ctl_min, uint64_t ctl_opt, unsigned int msr, -+ bool *mismatch) -+{ -+ uint64_t vmx_msr, ctl = ctl_min | ctl_opt; -+ -+ rdmsrl(msr, vmx_msr); -+ -+ ctl &= vmx_msr; /* bit == 0 ==> must be zero */ -+ -+ /* Ensure minimum (required) set of control bits are supported. */ -+ if ( ctl_min & ~ctl ) -+ { -+ *mismatch = true; -+ printk("VMX: CPU%u has insufficient %s (%#lx; requires %#lx)\n", -+ smp_processor_id(), name, ctl, ctl_min); -+ } -+ -+ return ctl; -+} -+ -+static bool cap_check( -+ const char *name, unsigned long expected, unsigned long saw) - { - if ( saw != expected ) -- printk("VMX %s: saw %#x expected %#x\n", name, saw, expected); -+ printk("VMX %s: saw %#lx expected %#lx\n", name, saw, expected); - return saw != expected; - } - -@@ -254,6 +277,7 @@ static int vmx_init_vmcs_config(bool bsp) - u32 _vmx_pin_based_exec_control; - u32 _vmx_cpu_based_exec_control; - u32 _vmx_secondary_exec_control = 0; -+ uint64_t _vmx_tertiary_exec_control = 0; - u64 _vmx_ept_vpid_cap = 0; - u64 _vmx_misc_cap = 0; - u32 _vmx_vmexit_control; -@@ -287,7 +311,8 @@ static int vmx_init_vmcs_config(bool bsp) - opt = (CPU_BASED_ACTIVATE_MSR_BITMAP | - CPU_BASED_TPR_SHADOW | - CPU_BASED_MONITOR_TRAP_FLAG | -- CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); -+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | -+ CPU_BASED_ACTIVATE_TERTIARY_CONTROLS); - _vmx_cpu_based_exec_control = adjust_vmx_controls( - "CPU-Based Exec Control", min, opt, - MSR_IA32_VMX_PROCBASED_CTLS, &mismatch); -@@ -351,6 +376,15 @@ static int vmx_init_vmcs_config(bool bsp) - MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch); - } - -+ if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS ) -+ { -+ uint64_t opt = 0; -+ -+ _vmx_tertiary_exec_control = adjust_vmx_controls2( -+ "Tertiary Exec Control", 0, opt, -+ MSR_IA32_VMX_PROCBASED_CTLS3, &mismatch); -+ } -+ - /* The IA32_VMX_EPT_VPID_CAP MSR exists only when EPT or VPID available */ - if ( _vmx_secondary_exec_control & (SECONDARY_EXEC_ENABLE_EPT | - SECONDARY_EXEC_ENABLE_VPID) ) -@@ -481,6 +515,7 @@ static int vmx_init_vmcs_config(bool bsp) - vmx_pin_based_exec_control = _vmx_pin_based_exec_control; - vmx_cpu_based_exec_control = _vmx_cpu_based_exec_control; - vmx_secondary_exec_control = _vmx_secondary_exec_control; -+ vmx_tertiary_exec_control = _vmx_tertiary_exec_control; - vmx_ept_vpid_cap = _vmx_ept_vpid_cap; - vmx_vmexit_control = _vmx_vmexit_control; - vmx_vmentry_control = _vmx_vmentry_control; -@@ -516,6 +551,9 @@ static int vmx_init_vmcs_config(bool bsp) - mismatch |= cap_check( - "Secondary Exec Control", - vmx_secondary_exec_control, _vmx_secondary_exec_control); -+ mismatch |= cap_check( -+ "Tertiary Exec Control", -+ vmx_tertiary_exec_control, _vmx_tertiary_exec_control); - mismatch |= cap_check( - "VMExit Control", - vmx_vmexit_control, _vmx_vmexit_control); -@@ -1092,6 +1130,7 @@ static int construct_vmcs(struct vcpu *v) - v->arch.hvm.vmx.exec_control |= CPU_BASED_RDTSC_EXITING; - - v->arch.hvm.vmx.secondary_exec_control = vmx_secondary_exec_control; -+ v->arch.hvm.vmx.tertiary_exec_control = vmx_tertiary_exec_control; - - /* - * Disable features which we don't want active by default: -@@ -1146,6 +1185,10 @@ static int construct_vmcs(struct vcpu *v) - __vmwrite(SECONDARY_VM_EXEC_CONTROL, - v->arch.hvm.vmx.secondary_exec_control); - -+ if ( cpu_has_vmx_tertiary_exec_control ) -+ __vmwrite(TERTIARY_VM_EXEC_CONTROL, -+ v->arch.hvm.vmx.tertiary_exec_control); -+ - /* MSR access bitmap. */ - if ( cpu_has_vmx_msr_bitmap ) - { -@@ -2069,10 +2112,12 @@ void vmcs_dump_vcpu(struct vcpu *v) - vmr(HOST_PERF_GLOBAL_CTRL)); - - printk("*** Control State ***\n"); -- printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", -+ printk("PinBased=%08x CPUBased=%08x\n", - vmr32(PIN_BASED_VM_EXEC_CONTROL), -- vmr32(CPU_BASED_VM_EXEC_CONTROL), -- vmr32(SECONDARY_VM_EXEC_CONTROL)); -+ vmr32(CPU_BASED_VM_EXEC_CONTROL)); -+ printk("SecondaryExec=%08x TertiaryExec=%016lx\n", -+ vmr32(SECONDARY_VM_EXEC_CONTROL), -+ vmr(TERTIARY_VM_EXEC_CONTROL)); - printk("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); - printk("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", - vmr32(EXCEPTION_BITMAP), -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index fed362bc32..26b6e4ca61 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -725,6 +725,12 @@ void vmx_update_secondary_exec_control(struct vcpu *v) - v->arch.hvm.vmx.secondary_exec_control); - } - -+void vmx_update_tertiary_exec_control(const struct vcpu *v) -+{ -+ __vmwrite(TERTIARY_VM_EXEC_CONTROL, -+ v->arch.hvm.vmx.tertiary_exec_control); -+} -+ - void vmx_update_exception_bitmap(struct vcpu *v) - { - u32 bitmap = unlikely(v->arch.hvm.vmx.vmx_realmode) -diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -index 0af021d5f5..bbb0966fc3 100644 ---- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -+++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -@@ -125,6 +125,7 @@ struct vmx_vcpu { - /* Cache of cpu execution control. */ - u32 exec_control; - u32 secondary_exec_control; -+ uint64_t tertiary_exec_control; - u32 exception_bitmap; - - uint64_t shadow_gs; -@@ -207,6 +208,7 @@ void vmx_vmcs_reload(struct vcpu *v); - #define CPU_BASED_RDTSC_EXITING 0x00001000 - #define CPU_BASED_CR3_LOAD_EXITING 0x00008000 - #define CPU_BASED_CR3_STORE_EXITING 0x00010000 -+#define CPU_BASED_ACTIVATE_TERTIARY_CONTROLS 0x00020000 - #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 - #define CPU_BASED_CR8_STORE_EXITING 0x00100000 - #define CPU_BASED_TPR_SHADOW 0x00200000 -@@ -271,6 +273,14 @@ extern u32 vmx_vmentry_control; - #define SECONDARY_EXEC_NOTIFY_VM_EXITING 0x80000000 - extern u32 vmx_secondary_exec_control; - -+#define TERTIARY_EXEC_LOADIWKEY_EXITING BIT(0, UL) -+#define TERTIARY_EXEC_ENABLE_HLAT BIT(1, UL) -+#define TERTIARY_EXEC_EPT_PAGING_WRITE BIT(2, UL) -+#define TERTIARY_EXEC_GUEST_PAGING_VERIFY BIT(3, UL) -+#define TERTIARY_EXEC_IPI_VIRT BIT(4, UL) -+#define TERTIARY_EXEC_VIRT_SPEC_CTRL BIT(7, UL) -+extern uint64_t vmx_tertiary_exec_control; -+ - #define VMX_EPT_EXEC_ONLY_SUPPORTED 0x00000001 - #define VMX_EPT_WALK_LENGTH_4_SUPPORTED 0x00000040 - #define VMX_EPT_MEMORY_TYPE_UC 0x00000100 -@@ -307,6 +317,8 @@ extern u64 vmx_ept_vpid_cap; - (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP) - #define cpu_has_vmx_secondary_exec_control \ - (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) -+#define cpu_has_vmx_tertiary_exec_control \ -+ (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) - #define cpu_has_vmx_ept \ - (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) - #define cpu_has_vmx_dt_exiting \ -@@ -430,6 +442,7 @@ enum vmcs_field { - VIRT_EXCEPTION_INFO = 0x0000202a, - XSS_EXIT_BITMAP = 0x0000202c, - TSC_MULTIPLIER = 0x00002032, -+ TERTIARY_VM_EXEC_CONTROL = 0x00002034, - GUEST_PHYSICAL_ADDRESS = 0x00002400, - VMCS_LINK_POINTER = 0x00002800, - GUEST_IA32_DEBUGCTL = 0x00002802, -diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmx.h b/xen/arch/x86/include/asm/hvm/vmx/vmx.h -index 8e1e42ac47..4ff19488ea 100644 ---- a/xen/arch/x86/include/asm/hvm/vmx/vmx.h -+++ b/xen/arch/x86/include/asm/hvm/vmx/vmx.h -@@ -102,6 +102,7 @@ void vmx_update_debug_state(struct vcpu *v); - void vmx_update_exception_bitmap(struct vcpu *v); - void vmx_update_cpu_exec_control(struct vcpu *v); - void vmx_update_secondary_exec_control(struct vcpu *v); -+void vmx_update_tertiary_exec_control(const struct vcpu *v); - - #define POSTED_INTR_ON 0 - #define POSTED_INTR_SN 1 -diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h -index 9b5f67711f..521079191a 100644 ---- a/xen/arch/x86/include/asm/msr-index.h -+++ b/xen/arch/x86/include/asm/msr-index.h -@@ -327,6 +327,7 @@ - #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x48f - #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x490 - #define MSR_IA32_VMX_VMFUNC 0x491 -+#define MSR_IA32_VMX_PROCBASED_CTLS3 0x492 - - /* K7/K8 MSRs. Not complete. See the architecture manual for a more - complete list. */ --- -2.44.0 - diff --git a/0524-x86-spec-ctrl-Move-__read_mostly-data-into-__ro_afte.patch b/0524-x86-spec-ctrl-Move-__read_mostly-data-into-__ro_afte.patch deleted file mode 100644 index ef92b0bf..00000000 --- a/0524-x86-spec-ctrl-Move-__read_mostly-data-into-__ro_afte.patch +++ /dev/null @@ -1,71 +0,0 @@ -From a0bd738f9cd158306e046c7a6f3726128219e4eb Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 28 Mar 2024 12:38:32 +0000 -Subject: [PATCH 524/542] x86/spec-ctrl: Move __read_mostly data into - __ro_after_init - -These variables predate the introduction of __ro_after_init, but all qualify. -Update them to be consistent with the rest of the file. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 7a09966e7b2823b70f6d56d0cf66c11124f4a3c1) ---- - xen/arch/x86/spec_ctrl.c | 20 ++++++++++---------- - 1 file changed, 10 insertions(+), 10 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index ac21af2c5c..0a2de88593 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -61,17 +61,17 @@ bool __ro_after_init opt_ssbd; - int8_t __initdata opt_psfd = -1; - - int8_t __ro_after_init opt_ibpb_ctxt_switch = -1; --int8_t __read_mostly opt_eager_fpu = -1; --int8_t __read_mostly opt_l1d_flush = -1; -+int8_t __ro_after_init opt_eager_fpu = -1; -+int8_t __ro_after_init opt_l1d_flush = -1; - static bool __initdata opt_branch_harden = - IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH); - - bool __initdata bsp_delay_spec_ctrl; --uint8_t __read_mostly default_xen_spec_ctrl; --uint8_t __read_mostly default_spec_ctrl_flags; -+uint8_t __ro_after_init default_xen_spec_ctrl; -+uint8_t __ro_after_init default_spec_ctrl_flags; - --paddr_t __read_mostly l1tf_addr_mask, __read_mostly l1tf_safe_maddr; --bool __read_mostly cpu_has_bug_l1tf; -+paddr_t __ro_after_init l1tf_addr_mask, __ro_after_init l1tf_safe_maddr; -+bool __ro_after_init cpu_has_bug_l1tf; - static unsigned int __initdata l1d_maxphysaddr; - - static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */ -@@ -328,8 +328,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) - } - custom_param("spec-ctrl", parse_spec_ctrl); - --int8_t __read_mostly opt_xpti_hwdom = -1; --int8_t __read_mostly opt_xpti_domu = -1; -+int8_t __ro_after_init opt_xpti_hwdom = -1; -+int8_t __ro_after_init opt_xpti_domu = -1; - - static __init void xpti_init_default(void) - { -@@ -393,8 +393,8 @@ static int __init cf_check parse_xpti(const char *s) - } - custom_param("xpti", parse_xpti); - --int8_t __read_mostly opt_pv_l1tf_hwdom = -1; --int8_t __read_mostly opt_pv_l1tf_domu = -1; -+int8_t __ro_after_init opt_pv_l1tf_hwdom = -1; -+int8_t __ro_after_init opt_pv_l1tf_domu = -1; - - static int __init cf_check parse_pv_l1tf(const char *s) - { --- -2.44.0 - diff --git a/0525-x86-tsx-Cope-with-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch b/0525-x86-tsx-Cope-with-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch deleted file mode 100644 index a7f82222..00000000 --- a/0525-x86-tsx-Cope-with-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 81ebc5abe77223783da0ae567408d8addebd83a7 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Wed, 3 Apr 2024 17:43:42 +0100 -Subject: [PATCH 525/542] x86/tsx: Cope with RTM_ALWAYS_ABORT vs RTM mismatch -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -It turns out there is something wonky on some but not all CPUs with -MSR_TSX_FORCE_ABORT. The presence of RTM_ALWAYS_ABORT causes Xen to think -it's safe to offer HLE/RTM to guests, but in this case, XBEGIN instructions -genuinely #UD. - -Spot this case and try to back out as cleanly as we can. - -Signed-off-by: Andrew Cooper -Tested-by: Marek Marczykowski-Górecki -Acked-by: Jan Beulich -(cherry picked from commit b33f191e3ca99458fdcea1cb5a29dfa4965d1604) ---- - xen/arch/x86/tsx.c | 55 +++++++++++++++++++++++++++++++++++++--------- - 1 file changed, 45 insertions(+), 10 deletions(-) - -diff --git a/xen/arch/x86/tsx.c b/xen/arch/x86/tsx.c -index 80c6f4cedd..a019400c96 100644 ---- a/xen/arch/x86/tsx.c -+++ b/xen/arch/x86/tsx.c -@@ -1,5 +1,6 @@ - #include - #include -+#include - #include - - /* -@@ -9,6 +10,7 @@ - * -1 => Default, altered to 0/1 (if unspecified) by: - * - TAA heuristics/settings for speculative safety - * - "TSX vs PCR3" select for TSX memory ordering safety -+ * -2 => Implicit tsx=0 (from RTM_ALWAYS_ABORT vs RTM mismatch) - * -3 => Implicit tsx=1 (feed-through from spec-ctrl=0) - * - * This is arranged such that the bottom bit encodes whether TSX is actually -@@ -122,11 +124,50 @@ void tsx_init(void) - - if ( cpu_has_tsx_force_abort ) - { -+ uint64_t val; -+ - /* -- * On an early TSX-enable Skylake part subject to the memory -+ * On an early TSX-enabled Skylake part subject to the memory - * ordering erratum, with at least the March 2019 microcode. - */ - -+ rdmsrl(MSR_TSX_FORCE_ABORT, val); -+ -+ /* -+ * At the time of writing (April 2024), it was discovered that -+ * some parts (e.g. CoffeeLake 8th Gen, 06-9e-0a, ucode 0xf6) -+ * advertise RTM_ALWAYS_ABORT, but XBEGIN instructions #UD. Other -+ * similar parts (e.g. KabyLake Xeon-E3, 06-9e-09, ucode 0xf8) -+ * operate as expected. -+ * -+ * In this case: -+ * - RTM_ALWAYS_ABORT and MSR_TSX_FORCE_ABORT are enumerated. -+ * - XBEGIN instructions genuinely #UD. -+ * - MSR_TSX_FORCE_ABORT appears to be write-discard and fails to -+ * hold its value. -+ * - HLE and RTM are not enumerated, despite -+ * MSR_TSX_FORCE_ABORT.TSX_CPUID_CLEAR being clear. -+ * -+ * Spot RTM being unavailable without CLEAR_CPUID being set, and -+ * treat it as if no TSX is available at all. This will prevent -+ * Xen from thinking it's safe to offer HLE/RTM to VMs. -+ */ -+ if ( val == 0 && cpu_has_rtm_always_abort && !cpu_has_rtm ) -+ { -+ printk(XENLOG_ERR -+ "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RTM_ALWAYS_ABORT vs RTM mismatch\n", -+ boot_cpu_data.x86, boot_cpu_data.x86_model, -+ boot_cpu_data.x86_mask, this_cpu(cpu_sig).rev); -+ -+ setup_clear_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT); -+ setup_clear_cpu_cap(X86_FEATURE_TSX_FORCE_ABORT); -+ -+ if ( opt_tsx < 0 ) -+ opt_tsx = -2; -+ -+ goto done_probe; -+ } -+ - /* - * Probe for the June 2021 microcode which de-features TSX on - * client parts. (Note - this is a subset of parts impacted by -@@ -136,15 +177,8 @@ void tsx_init(void) - * read as zero if TSX_FORCE_ABORT.ENABLE_RTM has been set before - * we run. - */ -- if ( !has_rtm_always_abort ) -- { -- uint64_t val; -- -- rdmsrl(MSR_TSX_FORCE_ABORT, val); -- -- if ( val & TSX_ENABLE_RTM ) -- has_rtm_always_abort = true; -- } -+ if ( val & TSX_ENABLE_RTM ) -+ has_rtm_always_abort = true; - - /* - * If no explicit tsx= option is provided, pick a default. -@@ -199,6 +233,7 @@ void tsx_init(void) - setup_force_cpu_cap(X86_FEATURE_RTM); - } - } -+ done_probe: - - /* - * Note: MSR_TSX_CTRL is enumerated on TSX-enabled MDS_NO and later parts. --- -2.44.0 - diff --git a/0526-x86-alternatives-fix-.init-section-reference-in-_app.patch b/0526-x86-alternatives-fix-.init-section-reference-in-_app.patch deleted file mode 100644 index 00915b45..00000000 --- a/0526-x86-alternatives-fix-.init-section-reference-in-_app.patch +++ /dev/null @@ -1,43 +0,0 @@ -From e60fc805d8a2ee2822dc96715bca44ebed135a8c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 9 Apr 2024 14:50:46 +0200 -Subject: [PATCH 526/542] x86/alternatives: fix .init section reference in - _apply_alternatives() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The code in _apply_alternatives() will unconditionally attempt to read -__initdata_cf_clobber_{start,end} when called as part of applying alternatives -to a livepatch payload when Xen is using IBT. - -That leads to a page-fault as __initdata_cf_clobber_{start,end} living in -.init section will have been unmapped by the time a livepatch gets loaded. - -Fix by adding a check that limits the clobbering of endbr64 instructions to -boot time only. - -Fixes: 37ed5da851b8 ('x86/altcall: Optimise away endbr64 instruction where possible') -Signed-off-by: Roger Pau Monné -Reviewed-by: Andrew Cooper -(cherry picked from commit 4be1fef1e6572c2be0bd378902ffb62a6e73faeb) ---- - xen/arch/x86/alternative.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c -index 1d59dffc46..8356414be7 100644 ---- a/xen/arch/x86/alternative.c -+++ b/xen/arch/x86/alternative.c -@@ -338,7 +338,7 @@ static void init_or_livepatch _apply_alternatives(struct alt_instr *start, - * Clobber endbr64 instructions now that altcall has finished optimising - * all indirect branches to direct ones. - */ -- if ( force && cpu_has_xen_ibt ) -+ if ( force && cpu_has_xen_ibt && system_state < SYS_STATE_active ) - { - void *const *val; - unsigned int clobbered = 0; --- -2.44.0 - diff --git a/0527-x86-cpuid-Don-t-expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch b/0527-x86-cpuid-Don-t-expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch deleted file mode 100644 index 3dae6b67..00000000 --- a/0527-x86-cpuid-Don-t-expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch +++ /dev/null @@ -1,42 +0,0 @@ -From d2b179ba6e308769f1b37637d1c746c3dbf55cc0 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 9 Apr 2024 15:03:05 +0100 -Subject: [PATCH 527/542] x86/cpuid: Don't expose {IPRED,RRSBA,BHI}_CTRL to PV - guests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -All of these are prediction-mode (i.e. CPL) based. They don't operate as -advertised in PV context. - -Fixes: 4dd676070684 ("x86/spec-ctrl: Expose IPRED_CTRL to guests") -Fixes: 478e4787fa64 ("x86/spec-ctrl: Expose RRSBA_CTRL to guests") -Fixes: 583f1d095052 ("x86/spec-ctrl: Expose BHI_CTRL to guests") -Signed-off-by: Andrew Cooper -Acked-by: Roger Pau Monné -(cherry picked from commit 4b3da946ad7e3452761478ae683da842e7ff20d6) ---- - xen/include/public/arch-x86/cpufeatureset.h | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 51f238683c..63c8ac8486 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -295,9 +295,9 @@ XEN_CPUFEATURE(INTEL_PPIN, 12*32+ 0) /* Protected Processor Inventory - - /* Intel-defined CPU features, CPUID level 0x00000007:2.edx, word 13 */ - XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */ --XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /*A MSR_SPEC_CTRL.IPRED_DIS_* */ --XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /*A MSR_SPEC_CTRL.RRSBA_DIS_* */ --XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /*A MSR_SPEC_CTRL.BHI_DIS_S */ -+XEN_CPUFEATURE(IPRED_CTRL, 13*32+ 1) /*S MSR_SPEC_CTRL.IPRED_DIS_* */ -+XEN_CPUFEATURE(RRSBA_CTRL, 13*32+ 2) /*S MSR_SPEC_CTRL.RRSBA_DIS_* */ -+XEN_CPUFEATURE(BHI_CTRL, 13*32+ 4) /*S MSR_SPEC_CTRL.BHI_DIS_S */ - XEN_CPUFEATURE(MCDT_NO, 13*32+ 5) /*A MCDT_NO */ - - /* Intel-defined CPU features, CPUID level 0x00000007:1.ecx, word 14 */ --- -2.44.0 - diff --git a/0528-x86-spec-ctrl-Rename-spec_ctrl_flags-to-scf.patch b/0528-x86-spec-ctrl-Rename-spec_ctrl_flags-to-scf.patch deleted file mode 100644 index 99073fca..00000000 --- a/0528-x86-spec-ctrl-Rename-spec_ctrl_flags-to-scf.patch +++ /dev/null @@ -1,425 +0,0 @@ -From 164c69bcee159b6f5c9f58d49fd3b715be75146f Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 28 Mar 2024 11:57:25 +0000 -Subject: [PATCH 528/542] x86/spec-ctrl: Rename spec_ctrl_flags to scf - -XSA-455 was ultimately caused by having fields with too-similar names. - -Both {xen,last}_spec_ctrl are fields containing an architectural MSR_SPEC_CTRL -value. The spec_ctrl_flags field contains Xen-internal flags. - -To more-obviously distinguish the two, rename spec_ctrl_flags to scf, which is -also the prefix of the constants used by the fields. - -No functional change. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit c62673c4334b3372ebd4292a7ac8185357e7ea27) ---- - xen/arch/x86/acpi/power.c | 4 ++-- - xen/arch/x86/domain.c | 8 ++++---- - xen/arch/x86/hvm/svm/entry.S | 2 +- - xen/arch/x86/hvm/vmx/entry.S | 2 +- - xen/arch/x86/hvm/vmx/vmcs.c | 2 +- - xen/arch/x86/include/asm/current.h | 2 +- - xen/arch/x86/include/asm/domain.h | 2 +- - xen/arch/x86/include/asm/spec_ctrl.h | 16 ++++++++-------- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 22 +++++++++++----------- - xen/arch/x86/setup.c | 2 +- - xen/arch/x86/spec_ctrl.c | 18 +++++++++--------- - xen/arch/x86/x86_64/asm-offsets.c | 2 +- - xen/arch/x86/x86_64/compat/entry.S | 4 ++-- - xen/arch/x86/x86_64/entry.S | 2 +- - 14 files changed, 44 insertions(+), 44 deletions(-) - -diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c -index b76f673acb..5cddb0f0f6 100644 ---- a/xen/arch/x86/acpi/power.c -+++ b/xen/arch/x86/acpi/power.c -@@ -246,7 +246,7 @@ static int enter_state(u32 state) - - ci = get_cpu_info(); - /* Avoid NMI/#MC using unsafe MSRs until we've reloaded microcode. */ -- ci->spec_ctrl_flags &= ~SCF_IST_MASK; -+ ci->scf &= ~SCF_IST_MASK; - - ACPI_FLUSH_CPU_CACHE(); - -@@ -290,7 +290,7 @@ static int enter_state(u32 state) - panic("Missing previously available feature(s)\n"); - - /* Re-enabled default NMI/#MC use of MSRs now microcode is loaded. */ -- ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_IST_MASK); -+ ci->scf |= (default_scf & SCF_IST_MASK); - - if ( boot_cpu_has(X86_FEATURE_IBRSB) || boot_cpu_has(X86_FEATURE_IBRS) ) - { -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index aca9fa310c..228763b5e9 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -2096,10 +2096,10 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - } - } - -- /* Update the top-of-stack block with the new spec_ctrl settings. */ -- info->spec_ctrl_flags = -- (info->spec_ctrl_flags & ~SCF_DOM_MASK) | -- (nextd->arch.spec_ctrl_flags & SCF_DOM_MASK); -+ /* Update the top-of-stack block with the new speculation settings. */ -+ info->scf = -+ (info->scf & ~SCF_DOM_MASK) | -+ (nextd->arch.scf & SCF_DOM_MASK); - } - - sched_context_switched(prev, next); -diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S -index c19e964bc6..0264e0bac2 100644 ---- a/xen/arch/x86/hvm/svm/entry.S -+++ b/xen/arch/x86/hvm/svm/entry.S -@@ -103,7 +103,7 @@ __UNLIKELY_END(nsvm_hap) - /* SPEC_CTRL_ENTRY_FROM_SVM Req: %rsp=regs/cpuinfo, %rdx=0 Clob: acd */ - - .macro svm_vmexit_cond_ibpb -- testb $SCF_entry_ibpb, CPUINFO_spec_ctrl_flags(%rsp) -+ testb $SCF_entry_ibpb, CPUINFO_scf(%rsp) - jz .L_skip_ibpb - - mov $MSR_PRED_CMD, %ecx -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index cdde76e138..4ee529c57a 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -111,7 +111,7 @@ UNLIKELY_END(realmode) - BUILD_BUG_ON(SCF_verw & ~0xff) - movzbl VCPU_vmx_launched(%rbx), %ecx - shl $31, %ecx -- movzbl CPUINFO_spec_ctrl_flags(%rsp), %eax -+ movzbl CPUINFO_scf(%rsp), %eax - and $SCF_verw, %eax - or %eax, %ecx - -diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c -index 49d51fb524..f0fb4874b8 100644 ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -1386,7 +1386,7 @@ static int construct_vmcs(struct vcpu *v) - rc = vmx_add_msr(v, MSR_FLUSH_CMD, FLUSH_CMD_L1D, - VMX_MSR_GUEST_LOADONLY); - -- if ( !rc && (d->arch.spec_ctrl_flags & SCF_entry_ibpb) ) -+ if ( !rc && (d->arch.scf & SCF_entry_ibpb) ) - rc = vmx_add_msr(v, MSR_PRED_CMD, PRED_CMD_IBPB, - VMX_MSR_HOST); - -diff --git a/xen/arch/x86/include/asm/current.h b/xen/arch/x86/include/asm/current.h -index da5e152a10..9cc8d8e3d4 100644 ---- a/xen/arch/x86/include/asm/current.h -+++ b/xen/arch/x86/include/asm/current.h -@@ -57,7 +57,7 @@ struct cpu_info { - unsigned int shadow_spec_ctrl; - uint8_t xen_spec_ctrl; - uint8_t last_spec_ctrl; -- uint8_t spec_ctrl_flags; -+ uint8_t scf; /* SCF_* */ - - /* - * The following field controls copying of the L4 page table of 64-bit -diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h -index 5293c0cde4..f90a268b01 100644 ---- a/xen/arch/x86/include/asm/domain.h -+++ b/xen/arch/x86/include/asm/domain.h -@@ -324,7 +324,7 @@ struct arch_domain - uint32_t pci_cf8; - uint8_t cmos_idx; - -- uint8_t spec_ctrl_flags; /* See SCF_DOM_MASK */ -+ uint8_t scf; /* See SCF_DOM_MASK */ - - union { - struct pv_domain pv; -diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h -index a431fea587..8fc350abe2 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl.h -+++ b/xen/arch/x86/include/asm/spec_ctrl.h -@@ -21,10 +21,10 @@ - #define __X86_SPEC_CTRL_H__ - - /* -- * Encoding of: -- * cpuinfo.spec_ctrl_flags -- * default_spec_ctrl_flags -- * domain.spec_ctrl_flags -+ * Encoding of Xen's speculation control flags in: -+ * cpuinfo.scf -+ * default_scf -+ * domain.scf - * - * Live settings are in the top-of-stack block, because they need to be - * accessable when XPTI is active. Some settings are fixed from boot, some -@@ -94,7 +94,7 @@ extern int8_t opt_l1d_flush; - - extern bool bsp_delay_spec_ctrl; - extern uint8_t default_xen_spec_ctrl; --extern uint8_t default_spec_ctrl_flags; -+extern uint8_t default_scf; - - extern int8_t opt_xpti_hwdom, opt_xpti_domu; - -@@ -114,7 +114,7 @@ static inline void init_shadow_spec_ctrl_state(void) - - info->shadow_spec_ctrl = 0; - info->xen_spec_ctrl = default_xen_spec_ctrl; -- info->spec_ctrl_flags = default_spec_ctrl_flags; -+ info->scf = default_scf; - - /* - * For least latency, the VERW selector should be a writeable data -@@ -138,7 +138,7 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) - */ - info->shadow_spec_ctrl = val; - barrier(); -- info->spec_ctrl_flags |= SCF_use_shadow; -+ info->scf |= SCF_use_shadow; - barrier(); - alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, - "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); -@@ -187,7 +187,7 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) - * Disable shadowing before updating the MSR. There are no SMP issues - * here; only local processor ordering concerns. - */ -- info->spec_ctrl_flags &= ~SCF_use_shadow; -+ info->scf &= ~SCF_use_shadow; - barrier(); - alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, - "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index e85db1a329..8c488be048 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -51,7 +51,7 @@ - * shadowing logic. - * - * Factor 2 is harder. We maintain a shadow_spec_ctrl value, and a use_shadow -- * boolean in the per cpu spec_ctrl_flags. The synchronous use is: -+ * boolean in the per cpu scf. The synchronous use is: - * - * 1) Store guest value in shadow_spec_ctrl - * 2) Set the use_shadow boolean -@@ -98,11 +98,11 @@ - * interrupting Xen. - */ - .if \maybexen -- testb $SCF_entry_ibpb, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) -+ testb $SCF_entry_ibpb, STACK_CPUINFO_FIELD(scf)(%r14) - jz .L\@_skip - testb $3, UREGS_cs(%rsp) - .else -- testb $SCF_entry_ibpb, CPUINFO_spec_ctrl_flags(%rsp) -+ testb $SCF_entry_ibpb, CPUINFO_scf(%rsp) - .endif - jz .L\@_skip - -@@ -172,8 +172,8 @@ - #define STK_REL(field, top_of_stk) ((field) - (top_of_stk)) - - .macro SPEC_CTRL_COND_VERW \ -- scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_error_code), \ -- sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code) -+ scf=STK_REL(CPUINFO_scf, CPUINFO_error_code), \ -+ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code) - /* - * Requires \scf and \sel as %rsp-relative expressions - * Clobbers eflags -@@ -228,10 +228,10 @@ - testb $3, UREGS_cs(%rsp) - setnz %al - not %eax -- and %al, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) -+ and %al, STACK_CPUINFO_FIELD(scf)(%r14) - movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax - .else -- andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp) -+ andb $~SCF_use_shadow, CPUINFO_scf(%rsp) - movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax - .endif - -@@ -250,7 +250,7 @@ - mov %eax, CPUINFO_shadow_spec_ctrl(%rsp) - - /* Set SPEC_CTRL shadowing *before* loading the guest value. */ -- orb $SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp) -+ orb $SCF_use_shadow, CPUINFO_scf(%rsp) - - mov $MSR_SPEC_CTRL, %ecx - xor %edx, %edx -@@ -328,7 +328,7 @@ - * DO_SPEC_CTRL_ENTRY maybexen=1 - * but with conditionals rather than alternatives. - */ -- movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx -+ movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx - - test $SCF_ist_ibpb, %bl - jz .L\@_skip_ibpb -@@ -353,7 +353,7 @@ - testb $3, UREGS_cs(%rsp) - setnz %al - not %eax -- and %al, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) -+ and %al, STACK_CPUINFO_FIELD(scf)(%r14) - - /* Load Xen's intended value. */ - mov $MSR_SPEC_CTRL, %ecx -@@ -387,7 +387,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs - * Clobbers %rax, %rbx, %rcx, %rdx - */ -- movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx -+ movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx - - testb $SCF_ist_sc_msr, %bl - jz .L\@_skip_sc_msr -diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c -index 0c00ea875d..d47f156711 100644 ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -1984,7 +1984,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) - - if ( bsp_delay_spec_ctrl ) - { -- info->spec_ctrl_flags &= ~SCF_use_shadow; -+ info->scf &= ~SCF_use_shadow; - barrier(); - wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl); - info->last_spec_ctrl = default_xen_spec_ctrl; -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 0a2de88593..ab81ad457b 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -69,7 +69,7 @@ static bool __initdata opt_lock_harden; - - bool __initdata bsp_delay_spec_ctrl; - uint8_t __ro_after_init default_xen_spec_ctrl; --uint8_t __ro_after_init default_spec_ctrl_flags; -+uint8_t __ro_after_init default_scf; - - paddr_t __ro_after_init l1tf_addr_mask, __ro_after_init l1tf_safe_maddr; - bool __ro_after_init cpu_has_bug_l1tf; -@@ -1117,7 +1117,7 @@ static void __init ibpb_calculations(void) - * NMI/#MC, so can't interrupt Xen ahead of having already flushed the - * BTB. - */ -- default_spec_ctrl_flags |= SCF_ist_ibpb; -+ default_scf |= SCF_ist_ibpb; - } - if ( opt_ibpb_entry_hvm ) - setup_force_cpu_cap(X86_FEATURE_IBPB_ENTRY_HVM); -@@ -1618,7 +1618,7 @@ void spec_ctrl_init_domain(struct domain *d) - bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) && - (d->domain_id != 0 || opt_ibpb_entry_dom0)); - -- d->arch.spec_ctrl_flags = -+ d->arch.scf = - (verw ? SCF_verw : 0) | - (ibpb ? SCF_entry_ibpb : 0) | - 0; -@@ -1723,7 +1723,7 @@ void __init init_speculation_mitigations(void) - { - if ( opt_msr_sc_pv ) - { -- default_spec_ctrl_flags |= SCF_ist_sc_msr; -+ default_scf |= SCF_ist_sc_msr; - setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV); - } - -@@ -1734,7 +1734,7 @@ void __init init_speculation_mitigations(void) - * Xen's value is not restored atomically. An early NMI hitting - * the VMExit path needs to restore Xen's value for safety. - */ -- default_spec_ctrl_flags |= SCF_ist_sc_msr; -+ default_scf |= SCF_ist_sc_msr; - setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); - } - } -@@ -1869,7 +1869,7 @@ void __init init_speculation_mitigations(void) - if ( opt_rsb_pv ) - { - setup_force_cpu_cap(X86_FEATURE_SC_RSB_PV); -- default_spec_ctrl_flags |= SCF_ist_rsb; -+ default_scf |= SCF_ist_rsb; - } - - /* -@@ -1892,7 +1892,7 @@ void __init init_speculation_mitigations(void) - * possible rogue RSB speculation. - */ - if ( !cpu_has_svm ) -- default_spec_ctrl_flags |= SCF_ist_rsb; -+ default_scf |= SCF_ist_rsb; - } - - srso_calculations(hw_smt_enabled); -@@ -1905,7 +1905,7 @@ void __init init_speculation_mitigations(void) - if ( opt_eager_fpu == -1 ) - opt_eager_fpu = should_use_eager_fpu(); - -- /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */ -+ /* (Re)init BSP state now that default_scf has been calculated. */ - init_shadow_spec_ctrl_state(); - - /* -@@ -2178,7 +2178,7 @@ void __init init_speculation_mitigations(void) - { - info->shadow_spec_ctrl = 0; - barrier(); -- info->spec_ctrl_flags |= SCF_use_shadow; -+ info->scf |= SCF_use_shadow; - barrier(); - } - -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index 4cd5938d7b..fba82d6436 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -154,7 +154,7 @@ void __dummy__(void) - OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl); - OFFSET(CPUINFO_xen_spec_ctrl, struct cpu_info, xen_spec_ctrl); - OFFSET(CPUINFO_last_spec_ctrl, struct cpu_info, last_spec_ctrl); -- OFFSET(CPUINFO_spec_ctrl_flags, struct cpu_info, spec_ctrl_flags); -+ OFFSET(CPUINFO_scf, struct cpu_info, scf); - OFFSET(CPUINFO_root_pgt_changed, struct cpu_info, root_pgt_changed); - OFFSET(CPUINFO_use_pv_cr3, struct cpu_info, use_pv_cr3); - DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index 3b2fbcd873..fab85eb733 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -164,8 +164,8 @@ ENTRY(compat_restore_all_guest) - - /* Account for ev/ec having already been popped off the stack. */ - SPEC_CTRL_COND_VERW \ -- scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_rip), \ -- sel=STK_REL(CPUINFO_verw_sel, CPUINFO_rip) -+ scf=STK_REL(CPUINFO_scf, CPUINFO_rip), \ -+ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_rip) - - .Lft0: iretq - _ASM_PRE_EXTABLE(.Lft0, handle_exception) -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index ef517e2945..50fc048834 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -692,7 +692,7 @@ UNLIKELY_END(exit_cr3) - /* - * When the CPU pushed this exception frame, it zero-extended eflags. - * For an IST exit, SPEC_CTRL_EXIT_TO_XEN stashed shadow copies of -- * spec_ctrl_flags and ver_sel above eflags, as we can't use any GPRs, -+ * scf and ver_sel above eflags, as we can't use any GPRs, - * and we're at a random place on the stack, not in a CPUFINFO block. - * - * Account for ev/ec having already been popped off the stack. --- -2.44.0 - diff --git a/0529-x86-spec-ctrl-Rework-conditional-safety-for-SPEC_CTR.patch b/0529-x86-spec-ctrl-Rework-conditional-safety-for-SPEC_CTR.patch deleted file mode 100644 index 431c1880..00000000 --- a/0529-x86-spec-ctrl-Rework-conditional-safety-for-SPEC_CTR.patch +++ /dev/null @@ -1,196 +0,0 @@ -From 05f6fe7c39fe0c44807a51f6aa1d8ee1a38de197 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 22 Mar 2024 11:41:41 +0000 -Subject: [PATCH 529/542] x86/spec-ctrl: Rework conditional safety for - SPEC_CTRL_ENTRY_* -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Right now, we have a mix of safety strategies in different blocks, making the -logic fragile and hard to follow. - -Start addressing this by having a safety LFENCE at the end of the blocks, -which can be patched out if other safety criteria are met. This will allow us -to simplify the sub-blocks. For SPEC_CTRL_ENTRY_FROM_IST, simply leave an -LFENCE unconditionally at the end; the IST path is not a fast-path by any -stretch of the imagination. - -For SPEC_CTRL_ENTRY_FROM_INTR, the existing description was incorrect. The -IRET #GP path is non-fatal but can occur with the guest's choice of -MSR_SPEC_CTRL. It is safe to skip the flush/barrier-like protections when -interrupting Xen, but we must run DO_SPEC_CTRL_ENTRY irrespective. - -This will skip RSB stuffing which was previously unconditional even when -interrupting Xen. - -AFAICT, this is a missing cleanup from commit 3fffaf9c13e9 ("x86/entry: Avoid -using alternatives in NMI/#MC paths") where we split the IST entry path out of -the main INTR entry path. - -Signed-off-by: Andrew Cooper -Acked-by: Roger Pau Monné -(cherry picked from commit 94896de1a98c4289fe6fef9e16ef99fc6ef2efc4) ---- - xen/arch/x86/hvm/vmx/entry.S | 1 + - xen/arch/x86/include/asm/cpufeatures.h | 4 ++ - xen/arch/x86/include/asm/spec_ctrl_asm.h | 27 ++++++------ - xen/arch/x86/spec_ctrl.c | 52 ++++++++++++++++++++++++ - 4 files changed, 72 insertions(+), 12 deletions(-) - -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index 4ee529c57a..8d5b683879 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -43,6 +43,7 @@ ENTRY(vmx_asm_vmexit_handler) - wrmsr - .endm - ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM -+ ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_VMX - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - /* Hardware clears MSR_DEBUGCTL on VMExit. Reinstate it if debugging Xen. */ -diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h -index 7e8221fd85..6422c66b0f 100644 ---- a/xen/arch/x86/include/asm/cpufeatures.h -+++ b/xen/arch/x86/include/asm/cpufeatures.h -@@ -52,5 +52,9 @@ XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for - #define X86_BUG_CLFLUSH_MFENCE X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */ - #define X86_BUG_IBPB_NO_RET X86_BUG( 3) /* IBPB doesn't flush the RSB/RAS */ - -+#define X86_SPEC_NO_LFENCE_ENTRY_PV X86_BUG(16) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_PV. */ -+#define X86_SPEC_NO_LFENCE_ENTRY_INTR X86_BUG(17) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_INTR. */ -+#define X86_SPEC_NO_LFENCE_ENTRY_VMX X86_BUG(18) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_VMX. */ -+ - /* Total number of capability words, inc synth and bug words. */ - #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */ -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 8c488be048..e58e5110d9 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -273,25 +273,37 @@ - - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ - X86_FEATURE_SC_MSR_PV -+ -+ ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_PV - .endm - - /* - * Used after an exception or maskable interrupt, hitting Xen or PV context. -- * There will either be a guest speculation context, or (barring fatal -- * exceptions) a well-formed Xen speculation context. -+ * There will either be a guest speculation context, or a well-formed Xen -+ * speculation context, with the exception of one case. IRET #GP handling may -+ * have a guest choice of MSR_SPEC_CTRL. -+ * -+ * Therefore, we can skip the flush/barrier-like protections when hitting Xen, -+ * but we must still run the mode-based protections. - */ - .macro SPEC_CTRL_ENTRY_FROM_INTR - /* - * Requires %rsp=regs, %r14=stack_end, %rdx=0 - * Clobbers %rax, %rcx, %rdx - */ -+ testb $3, UREGS_cs(%rsp) -+ jz .L\@_skip -+ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \ - X86_FEATURE_IBPB_ENTRY_PV - - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV - -+.L\@_skip: - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ - X86_FEATURE_SC_MSR_PV -+ -+ ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_INTR - .endm - - /* -@@ -360,18 +372,9 @@ - movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax - wrmsr - -- /* Opencoded UNLIKELY_START() with no condition. */ --UNLIKELY_DISPATCH_LABEL(\@_serialise): -- .subsection 1 -- /* -- * In the case that we might need to set SPEC_CTRL.IBRS for safety, we -- * need to ensure that an attacker can't poison the `jz .L\@_skip_wrmsr` -- * to speculate around the WRMSR. As a result, we need a dispatch -- * serialising instruction in the else clause. -- */ - .L\@_skip_msr_spec_ctrl: -+ - lfence -- UNLIKELY_END(\@_serialise) - .endm - - /* -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index ab81ad457b..2b22deb891 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -2154,6 +2154,58 @@ void __init init_speculation_mitigations(void) - - print_details(thunk); - -+ /* -+ * With the alternative blocks now chosen, see if we need any other -+ * adjustments for safety. -+ * -+ * We compile the LFENCE in, and patch it out if it's not needed. -+ * -+ * Notes: -+ * - SPEC_CTRL_ENTRY_FROM_SVM doesn't need an LFENCE because it has an -+ * unconditional STGI. -+ * - SPEC_CTRL_ENTRY_FROM_IST handles its own safety, without the use of -+ * alternatives. -+ * - DO_OVERWRITE_RSB has conditional branches in it, but it's an inline -+ * sequence. It is considered safe for uarch reasons. -+ */ -+ { -+ /* -+ * SPEC_CTRL_ENTRY_FROM_PV conditional safety -+ * -+ * DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an -+ * unconditional WRMSR as the last action. -+ * -+ * If we have it, or we're not using any prior conditional mitigation, -+ * then it's safe to drop the LFENCE. -+ */ -+ if ( boot_cpu_has(X86_FEATURE_SC_MSR_PV) || -+ !boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ) -+ setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_PV); -+ -+ /* -+ * SPEC_CTRL_ENTRY_FROM_INTR conditional safety -+ * -+ * DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an -+ * unconditional WRMSR as the last action. -+ * -+ * If we have it, or we have no protections active in the block that -+ * is skipped when interrupting guest context, then it's safe to drop -+ * the LFENCE. -+ */ -+ if ( boot_cpu_has(X86_FEATURE_SC_MSR_PV) || -+ (!boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) && -+ !boot_cpu_has(X86_FEATURE_SC_RSB_PV)) ) -+ setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_INTR); -+ -+ /* -+ * SPEC_CTRL_ENTRY_FROM_VMX conditional safety -+ * -+ * Currently there are no safety actions with conditional branches, so -+ * no need for the extra safety LFENCE. -+ */ -+ setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_VMX); -+ } -+ - /* - * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard - * any firmware settings. For performance reasons, when safe to do so, we --- -2.44.0 - diff --git a/0530-x86-entry-Arrange-for-r14-to-be-STACK_END-across-SPE.patch b/0530-x86-entry-Arrange-for-r14-to-be-STACK_END-across-SPE.patch deleted file mode 100644 index 201947fa..00000000 --- a/0530-x86-entry-Arrange-for-r14-to-be-STACK_END-across-SPE.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 687691733f4834b7edfd52cae6339d43257a19b3 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 22 Mar 2024 15:52:06 +0000 -Subject: [PATCH 530/542] x86/entry: Arrange for %r14 to be STACK_END across - SPEC_CTRL_ENTRY_FROM_PV - -Other SPEC_CTRL_* paths already use %r14 like this, and it will allow for -simplifications. - -All instances of SPEC_CTRL_ENTRY_FROM_PV are followed by a GET_STACK_END() -invocation, so this change is only really logic and register shuffling. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 22390697bf1b4cd3024f2d10893dec3c3ec08a9c) ---- - xen/arch/x86/x86_64/compat/entry.S | 4 ++- - xen/arch/x86/x86_64/entry.S | 44 ++++++++++++++++-------------- - 2 files changed, 27 insertions(+), 21 deletions(-) - -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index fab85eb733..a32b95f7c3 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -18,6 +18,8 @@ ENTRY(entry_int82) - movl $HYPERCALL_VECTOR, EFRAME_entry_vector(%rsp) - SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ - -+ GET_STACK_END(14) -+ - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - -@@ -25,7 +27,7 @@ ENTRY(entry_int82) - - CR4_PV32_RESTORE - -- GET_CURRENT(bx) -+ movq STACK_CPUINFO_FIELD(current_vcpu)(%r14), %rbx - - mov %rsp, %rdi - call do_entry_int82 -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 50fc048834..78c00bdd19 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -266,21 +266,22 @@ ENTRY(lstar_enter) - movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - -+ GET_STACK_END(14) -+ - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - -- GET_STACK_END(bx) -- mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx - test %rcx, %rcx - jz .Llstar_cr3_okay -- movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) -+ movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) - mov %rcx, %cr3 - /* %r12 is still zero at this point. */ -- mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - .Llstar_cr3_okay: - sti - -- movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx -+ movq STACK_CPUINFO_FIELD(current_vcpu)(%r14), %rbx - testb $TF_kernel_mode,VCPU_thread_flags(%rbx) - jz switch_to_kernel - -@@ -303,23 +304,24 @@ ENTRY(cstar_enter) - movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - -+ GET_STACK_END(14) -+ - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - -- GET_STACK_END(bx) -- mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx - test %rcx, %rcx - jz .Lcstar_cr3_okay -- movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) -+ movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) - mov %rcx, %cr3 - /* %r12 is still zero at this point. */ -- mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - .Lcstar_cr3_okay: - sti - - CR4_PV32_RESTORE - -- movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx -+ movq STACK_CPUINFO_FIELD(current_vcpu)(%r14), %rbx - - #ifdef CONFIG_PV32 - movq VCPU_domain(%rbx), %rcx -@@ -344,23 +346,24 @@ GLOBAL(sysenter_eflags_saved) - movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - -+ GET_STACK_END(14) -+ - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - -- GET_STACK_END(bx) - /* PUSHF above has saved EFLAGS.IF clear (the caller had it set). */ - orl $X86_EFLAGS_IF, UREGS_eflags(%rsp) -- mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx - test %rcx, %rcx - jz .Lsyse_cr3_okay -- movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) -+ movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) - mov %rcx, %cr3 - /* %r12 is still zero at this point. */ -- mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - .Lsyse_cr3_okay: - sti - -- movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx -+ movq STACK_CPUINFO_FIELD(current_vcpu)(%r14), %rbx - cmpb $0,VCPU_sysenter_disables_events(%rbx) - movq VCPU_sysenter_addr(%rbx),%rax - setne %cl -@@ -398,17 +401,18 @@ ENTRY(int80_direct_trap) - movl $0x80, EFRAME_entry_vector(%rsp) - SAVE_ALL - -+ GET_STACK_END(14) -+ - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - -- GET_STACK_END(bx) -- mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx -+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx - test %rcx, %rcx - jz .Lint80_cr3_okay -- movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) -+ movb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) - mov %rcx, %cr3 - /* %r12 is still zero at this point. */ -- mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) -+ mov %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14) - .Lint80_cr3_okay: - sti - -@@ -418,7 +422,7 @@ UNLIKELY_START(ne, msi_check) - call check_for_unexpected_msi - UNLIKELY_END(msi_check) - -- movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx -+ movq STACK_CPUINFO_FIELD(current_vcpu)(%r14), %rbx - - mov VCPU_trap_ctxt(%rbx), %rsi - mov VCPU_domain(%rbx), %rax --- -2.44.0 - diff --git a/0531-x86-spec_ctrl-Hold-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch b/0531-x86-spec_ctrl-Hold-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch deleted file mode 100644 index 2d76ef69..00000000 --- a/0531-x86-spec_ctrl-Hold-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch +++ /dev/null @@ -1,122 +0,0 @@ -From d2313bd20d36a452e0b4906da4814149a18e5acf Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 22 Mar 2024 12:08:02 +0000 -Subject: [PATCH 531/542] x86/spec_ctrl: Hold SCF in %ebx across - SPEC_CTRL_ENTRY_{PV,INTR} - -... as we do in the exit paths too. This will allow simplification to the -sub-blocks. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 9607aeb6602b8ed9962404de3f5f90170ffddb66) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 10 +++++++--- - xen/arch/x86/x86_64/compat/entry.S | 2 +- - xen/arch/x86/x86_64/entry.S | 12 ++++++------ - 3 files changed, 14 insertions(+), 10 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index e58e5110d9..67f6963e8d 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -263,9 +263,11 @@ - */ - .macro SPEC_CTRL_ENTRY_FROM_PV - /* -- * Requires %rsp=regs/cpuinfo, %rdx=0 -- * Clobbers %rax, %rcx, %rdx -+ * Requires %rsp=regs/cpuinfo, %r14=stack_end, %rdx=0 -+ * Clobbers %rax, %rbx, %rcx, %rdx - */ -+ movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx -+ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \ - X86_FEATURE_IBPB_ENTRY_PV - -@@ -289,8 +291,10 @@ - .macro SPEC_CTRL_ENTRY_FROM_INTR - /* - * Requires %rsp=regs, %r14=stack_end, %rdx=0 -- * Clobbers %rax, %rcx, %rdx -+ * Clobbers %rax, %rbx, %rcx, %rdx - */ -+ movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx -+ - testb $3, UREGS_cs(%rsp) - jz .L\@_skip - -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index a32b95f7c3..ff462a92e0 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -20,7 +20,7 @@ ENTRY(entry_int82) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %r14=end, %rdx=0, Clob: abcd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - sti -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 78c00bdd19..801d241337 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -268,7 +268,7 @@ ENTRY(lstar_enter) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %r14=end, %rdx=0, Clob: abcd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -@@ -306,7 +306,7 @@ ENTRY(cstar_enter) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %r14=end, %rdx=0, Clob: abcd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -@@ -348,7 +348,7 @@ GLOBAL(sysenter_eflags_saved) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %r14=end, %rdx=0, Clob: abcd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - /* PUSHF above has saved EFLAGS.IF clear (the caller had it set). */ -@@ -403,7 +403,7 @@ ENTRY(int80_direct_trap) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %r14=end, %rdx=0, Clob: abcd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -@@ -713,7 +713,7 @@ ENTRY(common_interrupt) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: abcd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -@@ -747,7 +747,7 @@ GLOBAL(handle_exception) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: abcd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx --- -2.44.0 - diff --git a/0532-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch b/0532-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch deleted file mode 100644 index 0d1b8170..00000000 --- a/0532-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch +++ /dev/null @@ -1,95 +0,0 @@ -From b73f37b91ce05c28cb998ef4870198922fa2b17c Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 22 Mar 2024 14:33:17 +0000 -Subject: [PATCH 532/542] x86/spec-ctrl: Simplify DO_COND_IBPB -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -With the prior refactoring, SPEC_CTRL_ENTRY_{PV,INTR} both load SCF into %ebx, -and handle the conditional safety including skipping if interrupting Xen. - -Therefore, we can drop the maybexen parameter and the conditional safety. - -Signed-off-by: Andrew Cooper -Acked-by: Roger Pau Monné -(cherry picked from commit 2378d16a931de0e62c03669169989e9437306abe) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 30 +++++++----------------- - 1 file changed, 8 insertions(+), 22 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 67f6963e8d..8d171ecca2 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -87,33 +87,21 @@ - * - SPEC_CTRL_EXIT_TO_{SVM,VMX} - */ - --.macro DO_SPEC_CTRL_COND_IBPB maybexen:req -+.macro DO_COND_IBPB - /* -- * Requires %rsp=regs (also cpuinfo if !maybexen) -- * Requires %r14=stack_end (if maybexen), %rdx=0 -- * Clobbers %rax, %rcx, %rdx -+ * Requires %rbx=SCF, %rdx=0 -+ * Clobbers %rax, %rcx - * -- * Conditionally issue IBPB if SCF_entry_ibpb is active. In the maybexen -- * case, we can safely look at UREGS_cs to skip taking the hit when -- * interrupting Xen. -+ * Conditionally issue IBPB if SCF_entry_ibpb is active. - */ -- .if \maybexen -- testb $SCF_entry_ibpb, STACK_CPUINFO_FIELD(scf)(%r14) -- jz .L\@_skip -- testb $3, UREGS_cs(%rsp) -- .else -- testb $SCF_entry_ibpb, CPUINFO_scf(%rsp) -- .endif -+ testb $SCF_entry_ibpb, %bl - jz .L\@_skip - - mov $MSR_PRED_CMD, %ecx - mov $PRED_CMD_IBPB, %eax - wrmsr -- jmp .L\@_done - - .L\@_skip: -- lfence --.L\@_done: - .endm - - .macro DO_OVERWRITE_RSB tmp=rax xu -@@ -268,8 +256,7 @@ - */ - movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx - -- ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \ -- X86_FEATURE_IBPB_ENTRY_PV -+ ALTERNATIVE "", DO_COND_IBPB, X86_FEATURE_IBPB_ENTRY_PV - - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV - -@@ -298,8 +285,7 @@ - testb $3, UREGS_cs(%rsp) - jz .L\@_skip - -- ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \ -- X86_FEATURE_IBPB_ENTRY_PV -+ ALTERNATIVE "", DO_COND_IBPB, X86_FEATURE_IBPB_ENTRY_PV - - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV - -@@ -339,7 +325,7 @@ - * Clobbers %rax, %rbx, %rcx, %rdx - * - * This is logical merge of: -- * DO_SPEC_CTRL_COND_IBPB maybexen=0 -+ * DO_COND_IBPB - * DO_OVERWRITE_RSB - * DO_SPEC_CTRL_ENTRY maybexen=1 - * but with conditionals rather than alternatives. --- -2.44.0 - diff --git a/0533-x86-spec-ctrl-Detail-the-safety-properties-in-SPEC_C.patch b/0533-x86-spec-ctrl-Detail-the-safety-properties-in-SPEC_C.patch deleted file mode 100644 index ae1d5c15..00000000 --- a/0533-x86-spec-ctrl-Detail-the-safety-properties-in-SPEC_C.patch +++ /dev/null @@ -1,183 +0,0 @@ -From 046f90e1b5c72ebc609eb1629c80cf5e077da02b Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Mon, 25 Mar 2024 11:09:35 +0000 -Subject: [PATCH 533/542] x86/spec-ctrl: Detail the safety properties in - SPEC_CTRL_ENTRY_* - -The complexity is getting out of hand. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 40dea83b75386cb693481cf340024ce093be5c0f) ---- - xen/arch/x86/hvm/svm/entry.S | 14 ++++++ - xen/arch/x86/hvm/vmx/entry.S | 14 ++++++ - xen/arch/x86/include/asm/spec_ctrl_asm.h | 59 ++++++++++++++++++++++++ - 3 files changed, 87 insertions(+) - -diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S -index 0264e0bac2..58d8a1bffa 100644 ---- a/xen/arch/x86/hvm/svm/entry.S -+++ b/xen/arch/x86/hvm/svm/entry.S -@@ -102,6 +102,11 @@ __UNLIKELY_END(nsvm_hap) - - /* SPEC_CTRL_ENTRY_FROM_SVM Req: %rsp=regs/cpuinfo, %rdx=0 Clob: acd */ - -+ /* -+ * IBPB is to mitigate BTC/SRSO on AMD/Hygon parts, in particular -+ * making type-confused RETs safe to use. This is not needed on Zen5 -+ * and later parts when SRSO_MSR_FIX (BP-SPEC-REDUCE) is in use. -+ */ - .macro svm_vmexit_cond_ibpb - testb $SCF_entry_ibpb, CPUINFO_scf(%rsp) - jz .L_skip_ibpb -@@ -113,8 +118,17 @@ __UNLIKELY_END(nsvm_hap) - .endm - ALTERNATIVE "", svm_vmexit_cond_ibpb, X86_FEATURE_IBPB_ENTRY_HVM - -+ /* -+ * RSB (RAS/RAP) stuffing is to prevents RET predictions following guest -+ * entries. This is not needed on Zen4 and later, when AutoIBRS is in -+ * use. -+ */ - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM - -+ /* -+ * Restore Xen's MSR_SPEC_CTRL setting, making indirect CALLs/JMPs -+ * safe to use. The guest's setting resides in the VMCB. -+ */ - .macro svm_vmexit_spec_ctrl - movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax - movzbl CPUINFO_last_spec_ctrl(%rsp), %edx -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index 8d5b683879..008d76a6e1 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -34,8 +34,22 @@ ENTRY(vmx_asm_vmexit_handler) - mov %rax,VCPU_hvm_guest_cr2(%rbx) - - /* SPEC_CTRL_ENTRY_FROM_VMX Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ -+ /* -+ * RSB stuffing is to prevents RET predictions following guest -+ * entries. This is *not* sufficient to flush all RSB entries on -+ * parts enumerating eIBRS, although the following restore_spec_ctrl -+ * does covers us. -+ */ - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM - -+ /* -+ * Restore Xen's MSR_SPEC_CTRL setting. The guest's value resides in -+ * the MSR load/save list. For Legacy IBRS, this flushes/inhibits -+ * indirect predictions and does not flush the RSB. For eIBRS, this -+ * prevents CALLs/JMPs using predictions learnt at a lower predictor -+ * mode, and it flushes the RSB. On eIBRS parts that also suffer from -+ * PBRSB, the prior RSB stuffing suffices to make the RSB safe. -+ */ - .macro restore_spec_ctrl - mov $MSR_SPEC_CTRL, %ecx - movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 8d171ecca2..9531d046d7 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -256,10 +256,32 @@ - */ - movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx - -+ /* -+ * For all safety notes, 32bit PV guest kernels run in Ring 1 and are -+ * therefore supervisor (== Xen) in the architecture. As a result, most -+ * hardware isolation techniques do not work. -+ */ -+ -+ /* -+ * IBPB is to mitigate BTC/SRSO on AMD/Hygon parts, in particular making -+ * type-confused RETs safe to use. This is not needed on Zen5 and later -+ * parts when SRSO_U/S_NO is enumerated. -+ */ - ALTERNATIVE "", DO_COND_IBPB, X86_FEATURE_IBPB_ENTRY_PV - -+ /* -+ * RSB stuffing is to prevent RET predictions following guest entries. -+ * This is not needed if SMEP is active and the RSB is full-width. -+ */ - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV - -+ /* -+ * Only used on Intel parts. Restore Xen's MSR_SPEC_CTRL setting. The -+ * guest can't change it's value behind Xen's back. For Legacy IBRS, this -+ * flushes/inhibits indirect predictions and does not flush the RSB. For -+ * eIBRS, this prevents CALLs/JMPs using predictions learnt at a lower -+ * predictor mode, and it flushes the RSB. -+ */ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ - X86_FEATURE_SC_MSR_PV - -@@ -282,6 +304,14 @@ - */ - movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx - -+ /* -+ * All safety notes the same as SPEC_CTRL_ENTRY_FROM_PV, although there is -+ * a conditional jump skipping some actions when interrupting Xen. -+ * -+ * On Intel parts, the IRET #GP path ends up here with the guest's choice -+ * of MSR_SPEC_CTRL. -+ */ -+ - testb $3, UREGS_cs(%rsp) - jz .L\@_skip - -@@ -332,6 +362,19 @@ - */ - movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx - -+ /* -+ * For all safety notes, 32bit PV guest kernels run in Ring 1 and are -+ * therefore supervisor (== Xen) in the architecture. As a result, most -+ * hardware isolation techniques do not work. -+ */ -+ -+ /* -+ * IBPB is to mitigate BTC/SRSO on AMD/Hygon parts, in particular making -+ * type-confused RETs safe to use. This is not needed on Zen5 and later -+ * parts when SRSO_U/S_NO is enumerated. The SVM path takes care of -+ * Host/Guest interactions prior to clearing GIF, and it's not used on the -+ * VMX path. -+ */ - test $SCF_ist_ibpb, %bl - jz .L\@_skip_ibpb - -@@ -341,6 +384,12 @@ - - .L\@_skip_ibpb: - -+ /* -+ * RSB stuffing is to prevent RET predictions following guest entries. -+ * SCF_ist_rsb is active if either PV or HVM protections are needed. The -+ * VMX path cannot guarantee to make the RSB safe ahead of taking an IST -+ * vector. -+ */ - test $SCF_ist_rsb, %bl - jz .L\@_skip_rsb - -@@ -348,6 +397,16 @@ - - .L\@_skip_rsb: - -+ /* -+ * Only used on Intel parts. Restore Xen's MSR_SPEC_CTRL setting. PV -+ * guests can't change their value behind Xen's back. HVM guests have -+ * their value stored in the MSR load/save list. For Legacy IBRS, this -+ * flushes/inhibits indirect predictions and does not flush the RSB. For -+ * eIBRS, this prevents CALLs/JMPs using predictions learnt at a lower -+ * predictor mode, and it flushes the RSB. On eIBRS parts that also -+ * suffer from PBRSB, the prior RSB stuffing suffices to make the RSB -+ * safe. -+ */ - test $SCF_ist_sc_msr, %bl - jz .L\@_skip_msr_spec_ctrl - --- -2.44.0 - diff --git a/0534-x86-vmx-Add-support-for-virtualize-SPEC_CTRL.patch b/0534-x86-vmx-Add-support-for-virtualize-SPEC_CTRL.patch deleted file mode 100644 index 21c885f4..00000000 --- a/0534-x86-vmx-Add-support-for-virtualize-SPEC_CTRL.patch +++ /dev/null @@ -1,206 +0,0 @@ -From 587298a707ea6c9afd3565a1f68fd43bc21038e9 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Thu, 15 Feb 2024 17:46:53 +0100 -Subject: [PATCH 534/542] x86/vmx: Add support for virtualize SPEC_CTRL -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The feature is defined in the tertiary exec control, and is available starting -from Sapphire Rapids and Alder Lake CPUs. - -When enabled, two extra VMCS fields are used: SPEC_CTRL mask and shadow. Bits -set in mask are not allowed to be toggled by the guest (either set or clear) -and the value in the shadow field is the value the guest expects to be in the -SPEC_CTRL register. - -By using it the hypervisor can force the value of SPEC_CTRL bits behind the -guest back without having to trap all accesses to SPEC_CTRL, note that no bits -are forced into the guest as part of this patch. It also allows getting rid of -SPEC_CTRL in the guest MSR load list, since the value in the shadow field will -be loaded by the hardware on vmentry. - -Signed-off-by: Roger Pau Monné -Signed-off-by: Andrew Cooper -(cherry picked from commit 97c5b8b657e41a6645de9d40713b881234417b49) ---- - xen/arch/x86/hvm/vmx/vmcs.c | 12 +++++++- - xen/arch/x86/hvm/vmx/vmx.c | 37 ++++++++++++++++++++----- - xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 5 ++++ - xen/arch/x86/include/asm/msr.h | 7 +++-- - 4 files changed, 51 insertions(+), 10 deletions(-) - -diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c -index f0fb4874b8..11464c60ed 100644 ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -215,6 +215,7 @@ static void __init vmx_display_features(void) - P(cpu_has_vmx_tsc_scaling, "TSC Scaling"); - P(cpu_has_vmx_bus_lock_detection, "Bus Lock Detection"); - P(cpu_has_vmx_notify_vm_exiting, "Notify VM Exit"); -+ P(cpu_has_vmx_virt_spec_ctrl, "Virtualize SPEC_CTRL"); - #undef P - - if ( !printed ) -@@ -378,7 +379,7 @@ static int vmx_init_vmcs_config(bool bsp) - - if ( _vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS ) - { -- uint64_t opt = 0; -+ uint64_t opt = TERTIARY_EXEC_VIRT_SPEC_CTRL; - - _vmx_tertiary_exec_control = adjust_vmx_controls2( - "Tertiary Exec Control", 0, opt, -@@ -1377,6 +1378,12 @@ static int construct_vmcs(struct vcpu *v) - if ( cpu_has_vmx_tsc_scaling ) - __vmwrite(TSC_MULTIPLIER, d->arch.hvm.tsc_scaling_ratio); - -+ if ( cpu_has_vmx_virt_spec_ctrl ) -+ { -+ __vmwrite(SPEC_CTRL_MASK, 0); -+ __vmwrite(SPEC_CTRL_SHADOW, 0); -+ } -+ - /* will update HOST & GUEST_CR3 as reqd */ - paging_update_paging_modes(v); - -@@ -2087,6 +2094,9 @@ void vmcs_dump_vcpu(struct vcpu *v) - if ( v->arch.hvm.vmx.secondary_exec_control & - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY ) - printk("InterruptStatus = %04x\n", vmr16(GUEST_INTR_STATUS)); -+ if ( cpu_has_vmx_virt_spec_ctrl ) -+ printk("SPEC_CTRL mask = 0x%016lx shadow = 0x%016lx\n", -+ vmr(SPEC_CTRL_MASK), vmr(SPEC_CTRL_SHADOW)); - - printk("*** Host State ***\n"); - printk("RIP = 0x%016lx (%ps) RSP = 0x%016lx\n", -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index 26b6e4ca61..38d6d78607 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -759,23 +759,28 @@ static void cf_check vmx_cpuid_policy_changed(struct vcpu *v) - /* - * We can safely pass MSR_SPEC_CTRL through to the guest, even if STIBP - * isn't enumerated in hardware, as SPEC_CTRL_STIBP is ignored. -+ * -+ * If VMX_VIRT_SPEC_CTRL is available, it is activated by default and the -+ * guest MSR_SPEC_CTRL value lives in the VMCS. Otherwise, it lives in -+ * the MSR load/save list. - */ - if ( cp->feat.ibrsb ) - { - vmx_clear_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW); - -- rc = vmx_add_guest_msr(v, MSR_SPEC_CTRL, 0); -- if ( rc ) -- goto out; -+ if ( !cpu_has_vmx_virt_spec_ctrl ) -+ { -+ rc = vmx_add_guest_msr(v, MSR_SPEC_CTRL, 0); -+ if ( rc ) -+ goto out; -+ } - } - else - { - vmx_set_msr_intercept(v, MSR_SPEC_CTRL, VMX_MSR_RW); - -- rc = vmx_del_msr(v, MSR_SPEC_CTRL, VMX_MSR_GUEST); -- if ( rc && rc != -ESRCH ) -- goto out; -- rc = 0; /* Tolerate -ESRCH */ -+ if ( !cpu_has_vmx_virt_spec_ctrl ) -+ vmx_del_msr(v, MSR_SPEC_CTRL, VMX_MSR_GUEST); - } - - /* MSR_PRED_CMD is safe to pass through if the guest knows about it. */ -@@ -2592,6 +2597,10 @@ static uint64_t cf_check vmx_get_reg(struct vcpu *v, unsigned int reg) - switch ( reg ) - { - case MSR_SPEC_CTRL: -+ if ( cpu_has_vmx_virt_spec_ctrl ) -+ /* Guest value in VMCS - fetched below. */ -+ break; -+ - rc = vmx_read_guest_msr(v, reg, &val); - if ( rc ) - { -@@ -2612,6 +2621,11 @@ static uint64_t cf_check vmx_get_reg(struct vcpu *v, unsigned int reg) - vmx_vmcs_enter(v); - switch ( reg ) - { -+ case MSR_SPEC_CTRL: -+ ASSERT(cpu_has_vmx_virt_spec_ctrl); -+ __vmread(SPEC_CTRL_SHADOW, &val); -+ break; -+ - case MSR_IA32_BNDCFGS: - __vmread(GUEST_BNDCFGS, &val); - break; -@@ -2636,6 +2650,10 @@ static void cf_check vmx_set_reg(struct vcpu *v, unsigned int reg, uint64_t val) - switch ( reg ) - { - case MSR_SPEC_CTRL: -+ if ( cpu_has_vmx_virt_spec_ctrl ) -+ /* Guest value in VMCS - set below. */ -+ break; -+ - rc = vmx_write_guest_msr(v, reg, val); - if ( rc ) - { -@@ -2650,6 +2668,11 @@ static void cf_check vmx_set_reg(struct vcpu *v, unsigned int reg, uint64_t val) - vmx_vmcs_enter(v); - switch ( reg ) - { -+ case MSR_SPEC_CTRL: -+ ASSERT(cpu_has_vmx_virt_spec_ctrl); -+ __vmwrite(SPEC_CTRL_SHADOW, val); -+ break; -+ - case MSR_IA32_BNDCFGS: - __vmwrite(GUEST_BNDCFGS, val); - break; -diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -index bbb0966fc3..63074a49c0 100644 ---- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -+++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -@@ -281,6 +281,9 @@ extern u32 vmx_secondary_exec_control; - #define TERTIARY_EXEC_VIRT_SPEC_CTRL BIT(7, UL) - extern uint64_t vmx_tertiary_exec_control; - -+#define cpu_has_vmx_virt_spec_ctrl \ -+ (vmx_tertiary_exec_control & TERTIARY_EXEC_VIRT_SPEC_CTRL) -+ - #define VMX_EPT_EXEC_ONLY_SUPPORTED 0x00000001 - #define VMX_EPT_WALK_LENGTH_4_SUPPORTED 0x00000040 - #define VMX_EPT_MEMORY_TYPE_UC 0x00000100 -@@ -443,6 +446,8 @@ enum vmcs_field { - XSS_EXIT_BITMAP = 0x0000202c, - TSC_MULTIPLIER = 0x00002032, - TERTIARY_VM_EXEC_CONTROL = 0x00002034, -+ SPEC_CTRL_MASK = 0x0000204a, -+ SPEC_CTRL_SHADOW = 0x0000204c, - GUEST_PHYSICAL_ADDRESS = 0x00002400, - VMCS_LINK_POINTER = 0x00002800, - GUEST_IA32_DEBUGCTL = 0x00002802, -diff --git a/xen/arch/x86/include/asm/msr.h b/xen/arch/x86/include/asm/msr.h -index adda736efc..0309b7dfbe 100644 ---- a/xen/arch/x86/include/asm/msr.h -+++ b/xen/arch/x86/include/asm/msr.h -@@ -290,8 +290,11 @@ struct vcpu_msrs - * For PV guests, this holds the guest kernel value. It is accessed on - * every entry/exit path. - * -- * For VT-x guests, the guest value is held in the MSR guest load/save -- * list. -+ * For VT-x guests, one of two situations exist: -+ * -+ * - If hardware supports virtualized MSR_SPEC_CTRL, it is active by -+ * default and the guest value lives in the VMCS. -+ * - Otherwise, the guest value is held in the MSR load/save list. - * - * For SVM, the guest value lives in the VMCB, and hardware saves/restores - * the host value automatically. However, guests run with the OR of the --- -2.44.0 - diff --git a/0535-x86-spec-ctrl-Widen-the-xen-last-default-_spec_ctrl-.patch b/0535-x86-spec-ctrl-Widen-the-xen-last-default-_spec_ctrl-.patch deleted file mode 100644 index 7b21004e..00000000 --- a/0535-x86-spec-ctrl-Widen-the-xen-last-default-_spec_ctrl-.patch +++ /dev/null @@ -1,145 +0,0 @@ -From e32ff92de8905f35b7e0e44b53100271b5992ce2 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 26 Mar 2024 22:43:18 +0000 -Subject: [PATCH 535/542] x86/spec-ctrl: Widen the {xen,last,default}_spec_ctrl - fields - -Right now, they're all bytes, but MSR_SPEC_CTRL has been steadily gaining new -features. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 45dac88e78e8a2d9d8738eef884fe6730faf9e67) ---- - xen/arch/x86/hvm/svm/entry.S | 10 +++++----- - xen/arch/x86/hvm/vmx/entry.S | 2 +- - xen/arch/x86/include/asm/current.h | 4 ++-- - xen/arch/x86/include/asm/spec_ctrl.h | 2 +- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 6 +++--- - xen/arch/x86/spec_ctrl.c | 2 +- - 6 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S -index 58d8a1bffa..8779856fb5 100644 ---- a/xen/arch/x86/hvm/svm/entry.S -+++ b/xen/arch/x86/hvm/svm/entry.S -@@ -63,14 +63,14 @@ __UNLIKELY_END(nsvm_hap) - /* SPEC_CTRL_EXIT_TO_SVM Req: b=curr %rsp=regs/cpuinfo, Clob: acd */ - .macro svm_vmentry_spec_ctrl - mov VCPU_arch_msrs(%rbx), %rax -- movzbl CPUINFO_last_spec_ctrl(%rsp), %edx -+ mov CPUINFO_last_spec_ctrl(%rsp), %edx - mov VCPUMSR_spec_ctrl_raw(%rax), %eax - cmp %edx, %eax - je 1f /* Skip write if value is correct. */ - mov $MSR_SPEC_CTRL, %ecx - xor %edx, %edx - wrmsr -- mov %al, CPUINFO_last_spec_ctrl(%rsp) -+ mov %eax, CPUINFO_last_spec_ctrl(%rsp) - 1: /* No Spectre v1 concerns. Execution will hit VMRUN imminently. */ - .endm - ALTERNATIVE "", svm_vmentry_spec_ctrl, X86_FEATURE_SC_MSR_HVM -@@ -130,14 +130,14 @@ __UNLIKELY_END(nsvm_hap) - * safe to use. The guest's setting resides in the VMCB. - */ - .macro svm_vmexit_spec_ctrl -- movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax -- movzbl CPUINFO_last_spec_ctrl(%rsp), %edx -+ mov CPUINFO_xen_spec_ctrl(%rsp), %eax -+ mov CPUINFO_last_spec_ctrl(%rsp), %edx - cmp %edx, %eax - je 1f /* Skip write if value is correct. */ - mov $MSR_SPEC_CTRL, %ecx - xor %edx, %edx - wrmsr -- mov %al, CPUINFO_last_spec_ctrl(%rsp) -+ mov %eax, CPUINFO_last_spec_ctrl(%rsp) - 1: - .endm - ALTERNATIVE "", svm_vmexit_spec_ctrl, X86_FEATURE_SC_MSR_HVM -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index 008d76a6e1..9250eb1839 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -52,7 +52,7 @@ ENTRY(vmx_asm_vmexit_handler) - */ - .macro restore_spec_ctrl - mov $MSR_SPEC_CTRL, %ecx -- movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax -+ mov CPUINFO_xen_spec_ctrl(%rsp), %eax - xor %edx, %edx - wrmsr - .endm -diff --git a/xen/arch/x86/include/asm/current.h b/xen/arch/x86/include/asm/current.h -index 9cc8d8e3d4..ba82b413e2 100644 ---- a/xen/arch/x86/include/asm/current.h -+++ b/xen/arch/x86/include/asm/current.h -@@ -55,8 +55,8 @@ struct cpu_info { - - /* See asm/spec_ctrl_asm.h for usage. */ - unsigned int shadow_spec_ctrl; -- uint8_t xen_spec_ctrl; -- uint8_t last_spec_ctrl; -+ unsigned int xen_spec_ctrl; -+ unsigned int last_spec_ctrl; - uint8_t scf; /* SCF_* */ - - /* -diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h -index 8fc350abe2..7048e5ee21 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl.h -+++ b/xen/arch/x86/include/asm/spec_ctrl.h -@@ -93,7 +93,7 @@ extern int8_t opt_eager_fpu; - extern int8_t opt_l1d_flush; - - extern bool bsp_delay_spec_ctrl; --extern uint8_t default_xen_spec_ctrl; -+extern unsigned int default_xen_spec_ctrl; - extern uint8_t default_scf; - - extern int8_t opt_xpti_hwdom, opt_xpti_domu; -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 9531d046d7..d232172159 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -217,10 +217,10 @@ - setnz %al - not %eax - and %al, STACK_CPUINFO_FIELD(scf)(%r14) -- movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax -+ mov STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax - .else - andb $~SCF_use_shadow, CPUINFO_scf(%rsp) -- movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax -+ mov CPUINFO_xen_spec_ctrl(%rsp), %eax - .endif - - wrmsr -@@ -418,7 +418,7 @@ - - /* Load Xen's intended value. */ - mov $MSR_SPEC_CTRL, %ecx -- movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax -+ mov STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax - wrmsr - - .L\@_skip_msr_spec_ctrl: -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 2b22deb891..5ccd82f161 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -68,7 +68,7 @@ static bool __initdata opt_branch_harden = - IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH); - - bool __initdata bsp_delay_spec_ctrl; --uint8_t __ro_after_init default_xen_spec_ctrl; -+unsigned int __ro_after_init default_xen_spec_ctrl; - uint8_t __ro_after_init default_scf; - - paddr_t __ro_after_init l1tf_addr_mask, __ro_after_init l1tf_safe_maddr; --- -2.44.0 - diff --git a/0536-x86-Use-indirect-calls-in-reset-stack-infrastructure.patch b/0536-x86-Use-indirect-calls-in-reset-stack-infrastructure.patch deleted file mode 100644 index 05913c37..00000000 --- a/0536-x86-Use-indirect-calls-in-reset-stack-infrastructure.patch +++ /dev/null @@ -1,111 +0,0 @@ -From e2a2cfb4ddf05a76e072ea84172d6a83ba392d20 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 22 Dec 2023 17:44:48 +0000 -Subject: [PATCH 536/542] x86: Use indirect calls in reset-stack infrastructure - -Mixing up JMP and CALL indirect targets leads a very fun form of speculative -type confusion. A target which is expecting to be called CALLed needs a -return address on the stack, and an indirect JMP doesn't place one there. - -An indirect JMP which predicts to a target intending to be CALLed can end up -with a RET speculatively executing with a value from the JMPers stack frame. - -There are several ways get indirect JMPs in Xen. - - * From tailcall optimisations. These are safe because the compiler has - arranged the stack to point at the callee's return address. - - * From jump tables. These are unsafe, but Xen is built with -fno-jump-tables - to work around several compiler issues. - - * From reset_stack_and_jump_ind(), which is particularly unsafe. Because of - the additional stack adjustment made, the value picked up off the stack is - regs->r15 of the next vCPU to run. - -In order to mitigate this type confusion, we want to make all indirect targets -be CALL targets, and remove the use of indirect JMP except via tailcall -optimisation. - -Luckily due to XSA-348, all C target functions of reset_stack_and_jump_ind() -are noreturn. {svm,vmx}_do_resume() exits via reset_stack_and_jump(); a -direct JMP with entirely different prediction properties. idle_loop() is an -infinite loop which eventually exits via reset_stack_and_jump_ind() from a new -schedule. i.e. These paths are all fine having one extra return address on -the stack. - -This leaves continue_pv_domain(), which is expecting to be a JMP target. -Alter it to strip the return address off the stack, which is safe because -there isn't actually a RET expecting to return to its caller. - -This allows us change reset_stack_and_jump_ind() to reset_stack_and_call_ind() -in order to mitigate the speculative type confusion. - -This is part of XSA-456 / CVE-2024-2201. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 8e186f98ce0e35d1754ec9299da41ec98873b65c) ---- - xen/arch/x86/domain.c | 4 ++-- - xen/arch/x86/include/asm/current.h | 4 ++-- - xen/arch/x86/x86_64/entry.S | 8 ++++++++ - 3 files changed, 12 insertions(+), 4 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index 228763b5e9..5dbd1d8a12 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -2112,12 +2112,12 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - /* Ensure that the vcpu has an up-to-date time base. */ - update_vcpu_system_time(next); - -- reset_stack_and_jump_ind(nextd->arch.ctxt_switch->tail); -+ reset_stack_and_call_ind(nextd->arch.ctxt_switch->tail); - } - - void continue_running(struct vcpu *same) - { -- reset_stack_and_jump_ind(same->domain->arch.ctxt_switch->tail); -+ reset_stack_and_call_ind(same->domain->arch.ctxt_switch->tail); - } - - int __sync_local_execstate(void) -diff --git a/xen/arch/x86/include/asm/current.h b/xen/arch/x86/include/asm/current.h -index ba82b413e2..2f723bcf1b 100644 ---- a/xen/arch/x86/include/asm/current.h -+++ b/xen/arch/x86/include/asm/current.h -@@ -196,10 +196,10 @@ unsigned long get_stack_dump_bottom (unsigned long sp); - switch_stack_and_jump(fn, "jmp %c", "i") - - /* The constraint may only specify non-call-clobbered registers. */ --#define reset_stack_and_jump_ind(fn) \ -+#define reset_stack_and_call_ind(fn) \ - ({ \ - (void)((fn) == (void (*)(void))NULL); \ -- switch_stack_and_jump(fn, "INDIRECT_JMP %", "b"); \ -+ switch_stack_and_jump(fn, "INDIRECT_CALL %", "b"); \ - }) - - /* -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 801d241337..9c256746ba 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -637,6 +637,14 @@ ENTRY(dom_crash_sync_extable) - #ifdef CONFIG_PV - ENTRY(continue_pv_domain) - ENDBR64 -+ -+ /* -+ * For speculative type confusion reasons, we're CALLed rather than -+ * JMPed to. Drop the return address. -+ */ -+ add $8, %rsp -+ ALTERNATIVE "", "mov $2, %eax; incsspd %eax", X86_FEATURE_XEN_SHSTK -+ - call check_wakeup_from_wait - ret_from_intr: - GET_CURRENT(bx) --- -2.44.0 - diff --git a/0537-x86-Drop-INDIRECT_JMP.patch b/0537-x86-Drop-INDIRECT_JMP.patch deleted file mode 100644 index 9a95f42c..00000000 --- a/0537-x86-Drop-INDIRECT_JMP.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 801e251556c374ce3e84ca776f211e00431932ef Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 22 Dec 2023 18:01:37 +0000 -Subject: [PATCH 537/542] x86: Drop INDIRECT_JMP - -Indirect JMPs which are not tailcalls can lead to an unwelcome form of -speculative type confusion, and we've removed the uses of INDIRECT_JMP to -compensate. Remove the temptation to reintroduce new instances. - -This is part of XSA-456 / CVE-2024-2201. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 0b66d7ce3c0290eaad28bdafb35200052d012b14) ---- - xen/arch/x86/include/asm/asm-defns.h | 18 ++++-------------- - 1 file changed, 4 insertions(+), 14 deletions(-) - -diff --git a/xen/arch/x86/include/asm/asm-defns.h b/xen/arch/x86/include/asm/asm-defns.h -index 8bd9007731..7e22fcb9c0 100644 ---- a/xen/arch/x86/include/asm/asm-defns.h -+++ b/xen/arch/x86/include/asm/asm-defns.h -@@ -20,10 +20,9 @@ - .byte 0x0f, 0x01, 0xdd - .endm - --.macro INDIRECT_BRANCH insn:req arg:req -+.macro INDIRECT_CALL arg:req - /* -- * Create an indirect branch. insn is one of call/jmp, arg is a single -- * register. -+ * Create an indirect call. arg is a single register. - * - * With no compiler support, this degrades into a plain indirect call/jmp. - * With compiler support, dispatch to the correct __x86_indirect_thunk_* -@@ -33,7 +32,7 @@ - $done = 0 - .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15 - .ifeqs "\arg", "%r\reg" -- \insn __x86_indirect_thunk_r\reg -+ call __x86_indirect_thunk_r\reg - $done = 1 - .exitm - .endif -@@ -44,19 +43,10 @@ - .endif - - .else -- \insn *\arg -+ call *\arg - .endif - .endm - --/* Convenience wrappers. */ --.macro INDIRECT_CALL arg:req -- INDIRECT_BRANCH call \arg --.endm -- --.macro INDIRECT_JMP arg:req -- INDIRECT_BRANCH jmp \arg --.endm -- - #ifdef CONFIG_XEN_IBT - # define ENDBR64 endbr64 - #else --- -2.44.0 - diff --git a/0538-x86-tsx-Expose-RTM_ALWAYS_ABORT-to-guests.patch b/0538-x86-tsx-Expose-RTM_ALWAYS_ABORT-to-guests.patch deleted file mode 100644 index 0324aa9b..00000000 --- a/0538-x86-tsx-Expose-RTM_ALWAYS_ABORT-to-guests.patch +++ /dev/null @@ -1,189 +0,0 @@ -From 02a424a7a8be13f6f85eb4fa2f43100b8e76f760 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Sat, 6 Apr 2024 20:36:54 +0100 -Subject: [PATCH 538/542] x86/tsx: Expose RTM_ALWAYS_ABORT to guests - -A TSX Abort is one option mitigate Native-BHI, but a guest kernel doesn't get -to see this if Xen has turned RTM off using MSR_TSX_{CTRL,FORCE_ABORT}. - -Therefore, the meaning of RTM_ALWAYS_ABORT has been adjusted to "XBEGIN won't -fault", and it should be exposed to guests so they can make a better decision. - -Expose it in the max policy for any RTM-capable system. Offer it by default -only if RTM has been disabled. - -Update test-tsx to account for this new meaning. While adjusting the logic in -test_guest_policies(), take the opportunity to use feature names (now they're -available) to make the logic easier to follow. - -This is part of XSA-456 / CVE-2024-2201. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit c94e2105924347de0d9f32065370e802a20cc829) ---- - tools/tests/tsx/test-tsx.c | 39 ++++++++++++++------- - xen/arch/x86/cpu-policy.c | 20 +++++++++++ - xen/include/public/arch-x86/cpufeatureset.h | 2 +- - 3 files changed, 47 insertions(+), 14 deletions(-) - -diff --git a/tools/tests/tsx/test-tsx.c b/tools/tests/tsx/test-tsx.c -index 0f4ea5f9c4..fab5c9a367 100644 ---- a/tools/tests/tsx/test-tsx.c -+++ b/tools/tests/tsx/test-tsx.c -@@ -311,25 +311,25 @@ static void test_guest_policies(const struct cpu_policy *max, - dump_tsx_details(max, "Max:"); - dump_tsx_details(def, "Def:"); - -- if ( ((max->feat.raw[0].d | def->feat.raw[0].d) & -- (bitmaskof(X86_FEATURE_TSX_FORCE_ABORT) | -- bitmaskof(X86_FEATURE_RTM_ALWAYS_ABORT) | -- bitmaskof(X86_FEATURE_SRBDS_CTRL))) || -- ((max->arch_caps.raw | def->arch_caps.raw) & ARCH_CAPS_TSX_CTRL) ) -+ if ( max->feat.tsx_force_abort || def->feat.tsx_force_abort || -+ max->feat.srbds_ctrl || def->feat.srbds_ctrl || -+ max->arch_caps.tsx_ctrl || def->arch_caps.tsx_ctrl ) - fail(" Xen-only TSX controls offered to guest\n"); - - switch ( rtm_behaviour ) - { - case RTM_UD: -- if ( (max->feat.raw[0].b | def->feat.raw[0].b) & -- (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) ) -- fail(" HLE/RTM offered to guests despite not being available\n"); -+ if ( max->feat.hle || def->feat.hle || -+ max->feat.rtm || def->feat.rtm || -+ max->feat.rtm_always_abort || def->feat.rtm_always_abort ) -+ fail(" HLE/RTM/RTM_AA offered to guests despite not being available\n"); - break; - - case RTM_ABORT: -- if ( def->feat.raw[0].b & -- (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) ) -+ if ( def->feat.hle || def->feat.rtm ) - fail(" HLE/RTM offered to guests by default despite not being usable\n"); -+ if ( !def->feat.rtm_always_abort ) -+ fail(" RTM_AA not offered to guests by default despite being available\n"); - break; - - case RTM_OK: -@@ -340,6 +340,9 @@ static void test_guest_policies(const struct cpu_policy *max, - - if ( def->feat.hle ) - fail(" Fail: HLE offered in default policy\n"); -+ -+ if ( def->feat.rtm && def->feat.rtm_always_abort ) -+ fail(" Fail: Both RTM and RTM_AA offered in default policy\n"); - } - - static void test_def_max_policies(void) -@@ -388,14 +391,13 @@ static void test_guest(struct xen_domctl_createdomain *c) - - if ( guest_policy.policy.feat.hle || - guest_policy.policy.feat.tsx_force_abort || -- guest_policy.policy.feat.rtm_always_abort || - guest_policy.policy.feat.srbds_ctrl || - guest_policy.policy.arch_caps.tsx_ctrl ) - fail(" Unexpected features advertised\n"); - - if ( host.policy.feat.rtm ) - { -- unsigned int _7b0; -+ unsigned int _7b0, _7d0; - - /* - * If host RTM is available, all combinations of guest flags should be -@@ -403,6 +405,8 @@ static void test_guest(struct xen_domctl_createdomain *c) - */ - _7b0 = (guest_policy.policy.feat.raw[0].b ^= - (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM))); -+ _7d0 = (guest_policy.policy.feat.raw[0].d ^= -+ bitmaskof(X86_FEATURE_RTM_ALWAYS_ABORT)); - - /* Set the new policy. */ - rc = xc_cpu_policy_set_domain(xch, domid, &guest_policy); -@@ -426,10 +430,17 @@ static void test_guest(struct xen_domctl_createdomain *c) - - if ( guest_policy.policy.feat.raw[0].b != _7b0 ) - { -- fail(" Expected CPUID.7[1].b 0x%08x differs from actual 0x%08x\n", -+ fail(" Expected CPUID.7[0].b 0x%08x differs from actual 0x%08x\n", - _7b0, guest_policy.policy.feat.raw[0].b); - goto out; - } -+ -+ if ( guest_policy.policy.feat.raw[0].d != _7d0 ) -+ { -+ fail(" Expected CPUID.7[0].d 0x%08x differs from actual 0x%08x\n", -+ _7d0, guest_policy.policy.feat.raw[0].d); -+ goto out; -+ } - } - - out: -@@ -514,6 +525,8 @@ static void test_tsx(void) - i, errno, strerror(errno)); - } - -+ dump_tsx_details(&host.policy, "Host:"); -+ - rc = xc_physinfo(xch, &physinfo); - if ( rc ) - return fail("Failed to obtain physinfo: %d - %s\n", -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 24acd12ce2..e44de3cfcb 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -468,6 +468,21 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) - */ - __set_bit(X86_FEATURE_HTT, fs); - __set_bit(X86_FEATURE_CMP_LEGACY, fs); -+ -+ /* -+ * To mitigate Native-BHI, one option is to use a TSX Abort on capable -+ * systems. This is safe even if RTM has been disabled for other reasons -+ * via MSR_TSX_{CTRL,FORCE_ABORT}. However, a guest kernel doesn't get to -+ * know this type of information. -+ * -+ * Therefore the meaning of RTM_ALWAYS_ABORT has been adjusted, to instead -+ * mean "XBEGIN won't fault". This is enough for a guest kernel to make -+ * an informed choice WRT mitigating Native-BHI. -+ * -+ * If RTM-capable, we can run a VM which has seen RTM_ALWAYS_ABORT. -+ */ -+ if ( test_bit(X86_FEATURE_RTM, fs) ) -+ __set_bit(X86_FEATURE_RTM_ALWAYS_ABORT, fs); - } - - static void __init guest_common_default_feature_adjustments(uint32_t *fs) -@@ -540,9 +555,14 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) - * function as expected, but is technically compatible with the ISA. - * - * Do not advertise RTM to guests by default if it won't actually work. -+ * Instead, advertise RTM_ALWAYS_ABORT indicating that TSX Aborts are safe -+ * to use, e.g. for mitigating Native-BHI. - */ - if ( rtm_disabled ) -+ { - __clear_bit(X86_FEATURE_RTM, fs); -+ __set_bit(X86_FEATURE_RTM_ALWAYS_ABORT, fs); -+ } - } - - static void __init guest_common_feature_adjustments(uint32_t *fs) -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 63c8ac8486..0004fd4bf5 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -261,7 +261,7 @@ XEN_CPUFEATURE(FSRM, 9*32+ 4) /*A Fast Short REP MOVS */ - XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a VP2INTERSECT{D,Q} insns */ - XEN_CPUFEATURE(SRBDS_CTRL, 9*32+ 9) /* MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */ - XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffers */ --XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */ -+XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! RTM disabled (but XBEGIN wont fault) */ - XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ - XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */ - XEN_CPUFEATURE(HYBRID, 9*32+15) /* Heterogeneous platform */ --- -2.44.0 - diff --git a/0539-x86-spec-ctrl-Support-BHI_DIS_S-in-order-to-mitigate.patch b/0539-x86-spec-ctrl-Support-BHI_DIS_S-in-order-to-mitigate.patch deleted file mode 100644 index 44f4b3c3..00000000 --- a/0539-x86-spec-ctrl-Support-BHI_DIS_S-in-order-to-mitigate.patch +++ /dev/null @@ -1,177 +0,0 @@ -From 7d3ad6775d59ad292c3b6431e8eb73e10d191298 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 26 Mar 2024 19:01:37 +0000 -Subject: [PATCH 539/542] x86/spec-ctrl: Support BHI_DIS_S in order to mitigate - BHI -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Introduce a "bhi-dis-s" boolean to match the other options we have for -MSR_SPEC_CTRL values. Also introduce bhi_calculations(). - -Use BHI_DIS_S whenever possible. - -Guests which are levelled to be migration compatible with older CPUs can't see -BHI_DIS_S, and Xen must fill in the difference to make the guest safe. Use -the virt MSR_SPEC_CTRL infrastructure to force BHI_DIS_S behind the guest's -back. - -This is part of XSA-456 / CVE-2024-2201. - -Signed-off-by: Andrew Cooper -Acked-by: Roger Pau Monné -(cherry picked from commit 62a1106415c5e8a49b45147ca84d54a58d471343) ---- - docs/misc/xen-command-line.pandoc | 8 +++++++- - xen/arch/x86/hvm/vmx/vmx.c | 17 +++++++++++++++++ - xen/arch/x86/include/asm/spec_ctrl.h | 1 + - xen/arch/x86/spec_ctrl.c | 24 +++++++++++++++++++++++- - 4 files changed, 48 insertions(+), 2 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index e1d56407dd..0b0abf8983 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2327,7 +2327,8 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - > {msr-sc,rsb,verw,ibpb-entry}=|{pv,hvm}=, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, --> unpriv-mmio,gds-mit,div-scrub}= ]` -+> unpriv-mmio,gds-mit,div-scrub, -+> bhi-dis-s}= ]` - - Controls for speculative execution sidechannel mitigations. By default, Xen - will pick the most appropriate mitigations based on compiled in support, -@@ -2408,6 +2409,11 @@ option can be used to force or prevent Xen using the feature itself. By - default, Xen will not use PSFD. PSFD is implied by SSBD, and SSBD is off by - default. - -+On hardware supporting BHI_DIS_S (Branch History Injection Disable -+Supervisor), the `bhi-dis-s=` option can be used to force or prevent Xen using -+the feature itself. By default Xen will use BHI_DIS_S on hardware susceptible -+to Branch History Injection. -+ - On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=` - option can be used to force (the default) or prevent Xen from issuing branch - prediction barriers on vcpu context switches. -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index 38d6d78607..e5259ed034 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -58,6 +58,7 @@ - #include - #include - #include -+#include - #include - - static bool_t __initdata opt_force_ept; -@@ -783,6 +784,22 @@ static void cf_check vmx_cpuid_policy_changed(struct vcpu *v) - vmx_del_msr(v, MSR_SPEC_CTRL, VMX_MSR_GUEST); - } - -+ if ( cpu_has_vmx_virt_spec_ctrl ) -+ { -+ /* -+ * If we're on BHI_DIS_S capable hardware, the short loop sequence is -+ * not sufficient to mitigate Native-BHI. If the VM can't see it -+ * (i.e. it's levelled with older hardware), force it behind the -+ * guests back for safey. -+ * -+ * Because there's not a real Host/Guest split of the MSR_SPEC_CTRL -+ * value, this only works as expected when Xen is using BHI_DIS_S too. -+ */ -+ bool force_bhi_dis_s = opt_bhi_dis_s && !cp->feat.bhi_ctrl; -+ -+ __vmwrite(SPEC_CTRL_MASK, force_bhi_dis_s ? SPEC_CTRL_BHI_DIS_S : 0); -+ } -+ - /* MSR_PRED_CMD is safe to pass through if the guest knows about it. */ - if ( cp->feat.ibrsb || cp->extd.ibpb ) - vmx_clear_msr_intercept(v, MSR_PRED_CMD, VMX_MSR_RW); -diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h -index 7048e5ee21..617209202b 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl.h -+++ b/xen/arch/x86/include/asm/spec_ctrl.h -@@ -89,6 +89,7 @@ static always_inline void spec_ctrl_new_guest_context(void) - - extern int8_t opt_ibpb_ctxt_switch; - extern bool opt_ssbd; -+extern int8_t opt_bhi_dis_s; - extern int8_t opt_eager_fpu; - extern int8_t opt_l1d_flush; - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 5ccd82f161..085e37525d 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -59,6 +59,7 @@ static int8_t __initdata opt_ibrs = -1; - int8_t __initdata opt_stibp = -1; - bool __ro_after_init opt_ssbd; - int8_t __initdata opt_psfd = -1; -+int8_t __ro_after_init opt_bhi_dis_s = -1; - - int8_t __ro_after_init opt_ibpb_ctxt_switch = -1; - int8_t __ro_after_init opt_eager_fpu = -1; -@@ -281,6 +282,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) - opt_ssbd = val; - else if ( (val = parse_boolean("psfd", s, ss)) >= 0 ) - opt_psfd = val; -+ else if ( (val = parse_boolean("bhi-dis-s", s, ss)) >= 0 ) -+ opt_bhi_dis_s = val; - - /* Misc settings. */ - else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) -@@ -536,7 +539,7 @@ static void __init print_details(enum ind_thunk thunk) - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", -+ printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s%s, Other:%s%s%s%s%s%s\n", - thunk != THUNK_NONE ? "BTI-Thunk: " : "", - thunk == THUNK_NONE ? "" : - thunk == THUNK_RETPOLINE ? "RETPOLINE, " : -@@ -554,6 +557,8 @@ static void __init print_details(enum ind_thunk thunk) - (!boot_cpu_has(X86_FEATURE_PSFD) && - !boot_cpu_has(X86_FEATURE_INTEL_PSFD)) ? "" : - (default_xen_spec_ctrl & SPEC_CTRL_PSFD) ? " PSFD+" : " PSFD-", -+ !boot_cpu_has(X86_FEATURE_BHI_CTRL) ? "" : -+ (default_xen_spec_ctrl & SPEC_CTRL_BHI_DIS_S) ? " BHI_DIS_S+" : " BHI_DIS_S-", - !(caps & ARCH_CAPS_TSX_CTRL) ? "" : - (opt_tsx & 1) ? " TSX+" : " TSX-", - !cpu_has_srbds_ctrl ? "" : -@@ -1608,6 +1613,21 @@ static void __init gds_calculations(void) - } - } - -+/* -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/branch-history-injection.html -+ */ -+static void __init bhi_calculations(void) -+{ -+ if ( opt_bhi_dis_s == -1 ) -+ opt_bhi_dis_s = !boot_cpu_has(X86_FEATURE_BHI_NO); -+ -+ if ( !boot_cpu_has(X86_FEATURE_BHI_CTRL) ) -+ opt_bhi_dis_s = false; -+ -+ if ( opt_bhi_dis_s ) -+ default_xen_spec_ctrl |= SPEC_CTRL_BHI_DIS_S; -+} -+ - void spec_ctrl_init_domain(struct domain *d) - { - bool pv = is_pv_domain(d); -@@ -2152,6 +2172,8 @@ void __init init_speculation_mitigations(void) - - gds_calculations(); - -+ bhi_calculations(); -+ - print_details(thunk); - - /* --- -2.44.0 - diff --git a/0540-x86-spec-ctrl-Software-BHB-clearing-sequences.patch b/0540-x86-spec-ctrl-Software-BHB-clearing-sequences.patch deleted file mode 100644 index 79642fd5..00000000 --- a/0540-x86-spec-ctrl-Software-BHB-clearing-sequences.patch +++ /dev/null @@ -1,349 +0,0 @@ -From 4abd50dc17ced2e1221226b7893c437acbbd0d1b Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Jun 2023 19:41:44 +0100 -Subject: [PATCH 540/542] x86/spec-ctrl: Software BHB-clearing sequences -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Implement clear_bhb_{tsx,loops}() as per the BHI guidance. The loops variant -is set up as the "short" sequence. - -Introduce SCF_entry_bhb and extend SPEC_CTRL_ENTRY_* with a conditional call -to selected clearing routine. - -Note that due to a limitation in the ALTERNATIVE capability, the TEST/JZ can't -be included alongside a CALL in a single alternative block. This is going to -require further work to untangle. - -The BHB sequences (if used) must be after the restoration of Xen's -MSR_SPEC_CTRL value, which must be accounted for when judging whether it is -safe to skip the safety LFENCEs. - -This is part of XSA-456 / CVE-2024-2201. - -Signed-off-by: Andrew Cooper -Acked-by: Roger Pau Monné -(cherry picked from commit 954c983abceee97bf5f6230b9ae164f2c49a9aa9) ---- - xen/arch/x86/Makefile | 1 + - xen/arch/x86/bhb-thunk.S | 98 ++++++++++++++++++++++++ - xen/arch/x86/hvm/vmx/entry.S | 12 +++ - xen/arch/x86/include/asm/cpufeature.h | 3 + - xen/arch/x86/include/asm/cpufeatures.h | 3 + - xen/arch/x86/include/asm/spec_ctrl.h | 3 +- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 30 ++++++++ - xen/arch/x86/spec_ctrl.c | 39 ++++++---- - 8 files changed, 171 insertions(+), 18 deletions(-) - create mode 100644 xen/arch/x86/bhb-thunk.S - -diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile -index cb9d952659..6a070a8cf8 100644 ---- a/xen/arch/x86/Makefile -+++ b/xen/arch/x86/Makefile -@@ -14,6 +14,7 @@ alternative-y := alternative.init.o - alternative-$(CONFIG_LIVEPATCH) := - obj-bin-y += $(alternative-y) - obj-y += apic.o -+obj-y += bhb-thunk.o - obj-y += bitops.o - obj-bin-y += bzimage.init.o - obj-bin-y += clear_page.o -diff --git a/xen/arch/x86/bhb-thunk.S b/xen/arch/x86/bhb-thunk.S -new file mode 100644 -index 0000000000..f52cfb9bc2 ---- /dev/null -+++ b/xen/arch/x86/bhb-thunk.S -@@ -0,0 +1,98 @@ -+/* SPDX-License-Identifier: GPL-2.0-or-later */ -+/* -+ * Branch History Injection clearing sequences. -+ * -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/branch-history-injection.html -+ * -+ * Copyright (c) 2023, 2024 XenServer. -+ */ -+ .file __FILE__ -+ -+#include -+ -+ .section .text.entry, "ax", @progbits -+ -+/* -+ * Clear the Branch History Buffer using a TSX Abort. -+ * -+ * Any TSX Abort has a side effect of clearing the BHB, even when TSX is -+ * disabled for e.g. TAA mitigation reasons. -+ */ -+ENTRY(clear_bhb_tsx) -+ .byte 0xc7, 0xf8; .long 1f - 0f /* xbegin 1f */ -+0: .byte 0xc6, 0xf8, 0 /* xabort $0 */ -+ int3 -+1: -+ ret -+ -+ .size clear_bhb_tsx, . - clear_bhb_tsx -+ .type clear_bhb_tsx, @function -+ -+/* -+ * Clear the Branch History Buffer using the software sequence. -+ * -+ * Clobbers: %eax, %ecx -+ * -+ * This executes a specific number of taken branches, sufficient to displace -+ * all prior entries in the history tracker, therefore removing prior -+ * influence on subsequent BTB lookups. -+ * -+ * Structurally, it looks like this: -+ * -+ * call 1 -+ * call 2 -+ * ... 5x jmp loop -+ * call 2 -+ * ... 5x jmp loop -+ * ... 5x call2's deep -+ * -+ * ret -+ * ret -+ * ret -+ * ret -+ * -+ * The CALL/RETs are necessary to prevent the Loop Stream Detector from -+ * interfering. The alignment is for performance and not safety. -+ * -+ * The "short" sequence (5 and 5) is for CPUs prior to Alder Lake / Sapphire -+ * Rapids (i.e. Cores prior to Golden Cove and/or Gracemont). -+ */ -+ENTRY(clear_bhb_loops) -+ mov $5, %ecx -+ -+ call 1f -+ jmp 5f -+ int3 -+ -+ .align 64 -+1: call 2f -+ ret -+ int3 -+ -+ .align 64 -+2: mov $5, %eax -+ -+3: jmp 4f -+ int3 -+ -+4: sub $1, %eax -+ jnz 3b -+ -+ sub $1, %ecx -+ jnz 1b -+ -+ ret -+5: -+ /* -+ * The Intel sequence has an LFENCE here. The purpose is to ensure -+ * that all prior branches have executed, before dispatching a -+ * subsequent indirect branch. -+ * -+ * Xen's SPEC_CTRL_ENTRY_* blocks have safety LFENCEs at the end when -+ * protections are active, which suffices for this purpose. -+ */ -+ -+ ret -+ -+ .size clear_bhb_loops, . - clear_bhb_loops -+ .type clear_bhb_loops, @function -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index 9250eb1839..1092d1918c 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -57,6 +57,18 @@ ENTRY(vmx_asm_vmexit_handler) - wrmsr - .endm - ALTERNATIVE "", restore_spec_ctrl, X86_FEATURE_SC_MSR_HVM -+ -+ /* -+ * Clear the BHB to mitigate BHI. Used on eIBRS parts, and uses RETs -+ * itself so must be after we've perfomed all the RET-safety we can. -+ */ -+ testb $SCF_entry_bhb, CPUINFO_scf(%rsp) -+ jz .L_skip_bhb -+ ALTERNATIVE_2 "", \ -+ "call clear_bhb_loops", X86_SPEC_BHB_LOOPS, \ -+ "call clear_bhb_tsx", X86_SPEC_BHB_TSX -+.L_skip_bhb: -+ - ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_VMX - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - -diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h -index a6b8af1296..b24d535080 100644 ---- a/xen/arch/x86/include/asm/cpufeature.h -+++ b/xen/arch/x86/include/asm/cpufeature.h -@@ -181,6 +181,9 @@ - #define cpu_bug_fpu_ptrs boot_cpu_has(X86_BUG_FPU_PTRS) - #define cpu_bug_null_seg boot_cpu_has(X86_BUG_NULL_SEG) - -+#define cpu_has_bhb_seq (boot_cpu_has(X86_SPEC_BHB_TSX) || \ -+ boot_cpu_has(X86_SPEC_BHB_LOOPS)) -+ - enum _cache_type { - CACHE_TYPE_NULL = 0, - CACHE_TYPE_DATA = 1, -diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h -index 6422c66b0f..bada8912e0 100644 ---- a/xen/arch/x86/include/asm/cpufeatures.h -+++ b/xen/arch/x86/include/asm/cpufeatures.h -@@ -56,5 +56,8 @@ XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for - #define X86_SPEC_NO_LFENCE_ENTRY_INTR X86_BUG(17) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_INTR. */ - #define X86_SPEC_NO_LFENCE_ENTRY_VMX X86_BUG(18) /* (No) safety LFENCE for SPEC_CTRL_ENTRY_VMX. */ - -+#define X86_SPEC_BHB_TSX X86_BUG(19) /* Use clear_bhb_tsx for BHI mitigation. */ -+#define X86_SPEC_BHB_LOOPS X86_BUG(20) /* Use clear_bhb_loops for BHI mitigation.*/ -+ - /* Total number of capability words, inc synth and bug words. */ - #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */ -diff --git a/xen/arch/x86/include/asm/spec_ctrl.h b/xen/arch/x86/include/asm/spec_ctrl.h -index 617209202b..4439a1b243 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl.h -+++ b/xen/arch/x86/include/asm/spec_ctrl.h -@@ -36,6 +36,7 @@ - #define SCF_verw (1 << 3) - #define SCF_ist_ibpb (1 << 4) - #define SCF_entry_ibpb (1 << 5) -+#define SCF_entry_bhb (1 << 6) - - /* - * The IST paths (NMI/#MC) can interrupt any arbitrary context. Some -@@ -54,7 +55,7 @@ - * Some speculative protections are per-domain. These settings are merged - * into the top-of-stack block in the context switch path. - */ --#define SCF_DOM_MASK (SCF_verw | SCF_entry_ibpb) -+#define SCF_DOM_MASK (SCF_verw | SCF_entry_ibpb | SCF_entry_bhb) - - #ifndef __ASSEMBLY__ - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index d232172159..1d7c4f4a68 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -285,6 +285,17 @@ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ - X86_FEATURE_SC_MSR_PV - -+ /* -+ * Clear the BHB to mitigate BHI. Used on eIBRS parts, and uses RETs -+ * itself so must be after we've perfomed all the RET-safety we can. -+ */ -+ testb $SCF_entry_bhb, %bl -+ jz .L\@_skip_bhb -+ ALTERNATIVE_2 "", \ -+ "call clear_bhb_loops", X86_SPEC_BHB_LOOPS, \ -+ "call clear_bhb_tsx", X86_SPEC_BHB_TSX -+.L\@_skip_bhb: -+ - ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_PV - .endm - -@@ -323,6 +334,13 @@ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ - X86_FEATURE_SC_MSR_PV - -+ testb $SCF_entry_bhb, %bl -+ jz .L\@_skip_bhb -+ ALTERNATIVE_2 "", \ -+ "call clear_bhb_loops", X86_SPEC_BHB_LOOPS, \ -+ "call clear_bhb_tsx", X86_SPEC_BHB_TSX -+.L\@_skip_bhb: -+ - ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_INTR - .endm - -@@ -423,6 +441,18 @@ - - .L\@_skip_msr_spec_ctrl: - -+ /* -+ * Clear the BHB to mitigate BHI. Used on eIBRS parts, and uses RETs -+ * itself so must be after we've perfomed all the RET-safety we can. -+ */ -+ testb $SCF_entry_bhb, %bl -+ jz .L\@_skip_bhb -+ -+ ALTERNATIVE_2 "", \ -+ "call clear_bhb_loops", X86_SPEC_BHB_LOOPS, \ -+ "call clear_bhb_tsx", X86_SPEC_BHB_TSX -+.L\@_skip_bhb: -+ - lfence - .endm - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 085e37525d..546199fabe 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -2194,38 +2194,43 @@ void __init init_speculation_mitigations(void) - /* - * SPEC_CTRL_ENTRY_FROM_PV conditional safety - * -- * DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an -- * unconditional WRMSR as the last action. -+ * A BHB sequence, if used, is a conditional action and last. If we -+ * have this, then we must have the LFENCE. - * -- * If we have it, or we're not using any prior conditional mitigation, -- * then it's safe to drop the LFENCE. -+ * Otherwise, DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an -+ * unconditional WRMSR. If we do have it, or we're not using any -+ * prior conditional block, then it's safe to drop the LFENCE. - */ -- if ( boot_cpu_has(X86_FEATURE_SC_MSR_PV) || -- !boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ) -+ if ( !cpu_has_bhb_seq && -+ (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || -+ !boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV)) ) - setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_PV); - - /* - * SPEC_CTRL_ENTRY_FROM_INTR conditional safety - * -- * DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an -- * unconditional WRMSR as the last action. -+ * A BHB sequence, if used, is a conditional action and last. If we -+ * have this, then we must have the LFENCE. - * -- * If we have it, or we have no protections active in the block that -- * is skipped when interrupting guest context, then it's safe to drop -- * the LFENCE. -+ * Otherwise DO_SPEC_CTRL_ENTRY (X86_FEATURE_SC_MSR_PV if used) is an -+ * unconditional WRMSR. If we have it, or we have no protections -+ * active in the block that is skipped when interrupting guest -+ * context, then it's safe to drop the LFENCE. - */ -- if ( boot_cpu_has(X86_FEATURE_SC_MSR_PV) || -- (!boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) && -- !boot_cpu_has(X86_FEATURE_SC_RSB_PV)) ) -+ if ( !cpu_has_bhb_seq && -+ (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || -+ (!boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) && -+ !boot_cpu_has(X86_FEATURE_SC_RSB_PV))) ) - setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_INTR); - - /* - * SPEC_CTRL_ENTRY_FROM_VMX conditional safety - * -- * Currently there are no safety actions with conditional branches, so -- * no need for the extra safety LFENCE. -+ * A BHB sequence, if used, is the only conditional action, so if we -+ * don't have it, we don't need the safety LFENCE. - */ -- setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_VMX); -+ if ( !cpu_has_bhb_seq ) -+ setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_VMX); - } - - /* --- -2.44.0 - diff --git a/0541-x86-spec-ctrl-Wire-up-the-Native-BHI-software-sequen.patch b/0541-x86-spec-ctrl-Wire-up-the-Native-BHI-software-sequen.patch deleted file mode 100644 index 2d0940ea..00000000 --- a/0541-x86-spec-ctrl-Wire-up-the-Native-BHI-software-sequen.patch +++ /dev/null @@ -1,347 +0,0 @@ -From eab897caca018c126f925247e4c9fd4aac54afdb Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 8 Jun 2023 19:41:44 +0100 -Subject: [PATCH 541/542] x86/spec-ctrl: Wire up the Native-BHI software - sequences -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -In the absence of BHI_DIS_S, mitigating Native-BHI requires the use of a -software sequence. - -Introduce a new bhb-seq= option to select between avaialble sequences and -bhb-entry= to control the per-PV/HVM actions like we have for other blocks. - -Activate the short sequence by default for PV and HVM guests on affected -hardware if BHI_DIS_S isn't present. - -This is part of XSA-456 / CVE-2024-2201. - -Signed-off-by: Andrew Cooper -Acked-by: Roger Pau Monné -(cherry picked from commit 689ad48ce9cf4c38297cd126e7e003a1c13a3b9d) ---- - docs/misc/xen-command-line.pandoc | 25 ++++-- - xen/arch/x86/spec_ctrl.c | 145 ++++++++++++++++++++++++++++-- - 2 files changed, 157 insertions(+), 13 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 0b0abf8983..0bd0588097 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2324,8 +2324,9 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - - ### spec-ctrl (x86) - > `= List of [ , xen=, {pv,hvm}=, --> {msr-sc,rsb,verw,ibpb-entry}=|{pv,hvm}=, --> bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, -+> {msr-sc,rsb,verw,{ibpb,bhb}-entry}=|{pv,hvm}=, -+> bti-thunk=retpoline|lfence|jmp,bhb-seq=short|tsx, -+> {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, - > unpriv-mmio,gds-mit,div-scrub, - > bhi-dis-s}= ]` -@@ -2350,10 +2351,10 @@ in place for guests to use. - - Use of a positive boolean value for either of these options is invalid. - --The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=` and `ibpb-entry=` options --offer fine grained control over the primitives by Xen. These impact Xen's --ability to protect itself, and/or Xen's ability to virtualise support for --guests to use. -+The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=`, `ibpb-entry=` and `bhb-entry=` -+options offer fine grained control over the primitives by Xen. These impact -+Xen's ability to protect itself, and/or Xen's ability to virtualise support -+for guests to use. - - * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests - respectively. -@@ -2379,6 +2380,12 @@ guests to use. - Return Stack Overflow if appropriate microcode has been loaded, but for - performance reasons dom0 is unprotected by default. If it is necessary to - protect dom0 too, boot with `spec-ctrl=ibpb-entry`. -+* `bhb-entry=` offers control over whether BHB-clearing (Branch History -+ Buffer) sequences are used on entry to Xen. This is used by default on -+ hardware vulnerable to Branch History Injection, when the BHI_DIS_S control -+ is not available (see `bhi-dis-s`). The choice of scrubbing sequence can be -+ selected using the `bhb-seq=` option. If it is necessary to protect dom0 -+ too, boot with `spec-ctrl=bhb-entry`. - - If Xen was compiled with `CONFIG_INDIRECT_THUNK` support, `bti-thunk=` can be - used to select which of the thunks gets patched into the -@@ -2386,6 +2393,12 @@ used to select which of the thunks gets patched into the - (generally preferred), with the alternatives being `jmp` (a `jmp *%reg` gadget, - minimal overhead), and `lfence` (an `lfence; jmp *%reg` gadget). - -+On all hardware, `bhb-seq=` can be used to select which of the BHB-clearing -+sequences gets used. This interacts with the `bhb-entry=` and `bhi-dis-s=` -+options in order to mitigate Branch History Injection on affected hardware. -+The default sequence is `short`, with `tsx` as an alternative available -+capable hardware that can be opted in to. -+ - On hardware supporting IBRS (Indirect Branch Restricted Speculation), the - `ibrs=` option can be used to force or prevent Xen using the feature itself. - If Xen is not using IBRS itself, functionality is still set up so IBRS can be -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 546199fabe..b53e9c4e7a 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -45,6 +45,16 @@ static int8_t __ro_after_init opt_ibpb_entry_pv = -1; - static int8_t __ro_after_init opt_ibpb_entry_hvm = -1; - static bool __ro_after_init opt_ibpb_entry_dom0; - -+static int8_t __ro_after_init opt_bhb_entry_pv = -1; -+static int8_t __ro_after_init opt_bhb_entry_hvm = -1; -+static bool __ro_after_init opt_bhb_entry_dom0; -+static enum bhb_thunk { -+ BHB_DEFAULT, -+ BHB_NONE, -+ BHB_TSX, -+ BHB_SHORT, -+} opt_bhb_seq __initdata; -+ - /* Cmdline controls for Xen's speculative settings. */ - static enum ind_thunk { - THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ -@@ -128,8 +138,12 @@ static int __init cf_check parse_spec_ctrl(const char *s) - opt_ibpb_entry_pv = 0; - opt_ibpb_entry_hvm = 0; - opt_ibpb_entry_dom0 = false; -+ opt_bhb_entry_pv = 0; -+ opt_bhb_entry_hvm = 0; -+ opt_bhb_entry_dom0 = false; - - opt_thunk = THUNK_JMP; -+ opt_bhb_seq = BHB_NONE; - opt_ibrs = 0; - opt_ibpb_ctxt_switch = false; - opt_ssbd = false; -@@ -158,6 +172,7 @@ static int __init cf_check parse_spec_ctrl(const char *s) - opt_rsb_pv = val; - opt_verw_pv = val; - opt_ibpb_entry_pv = val; -+ opt_bhb_entry_pv = val; - } - else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) - { -@@ -165,6 +180,7 @@ static int __init cf_check parse_spec_ctrl(const char *s) - opt_rsb_hvm = val; - opt_verw_hvm = val; - opt_ibpb_entry_hvm = val; -+ opt_bhb_entry_hvm = val; - } - else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 ) - { -@@ -252,6 +268,28 @@ static int __init cf_check parse_spec_ctrl(const char *s) - break; - } - } -+ else if ( (val = parse_boolean("bhb-entry", s, ss)) != -1 ) -+ { -+ switch ( val ) -+ { -+ case 0: -+ case 1: -+ opt_bhb_entry_pv = opt_bhb_entry_hvm = -+ opt_bhb_entry_dom0 = val; -+ break; -+ -+ case -2: -+ s += strlen("bhb-entry="); -+ if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -+ opt_bhb_entry_pv = val; -+ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -+ opt_bhb_entry_hvm = val; -+ else -+ default: -+ rc = -EINVAL; -+ break; -+ } -+ } - - /* Xen's speculative sidechannel mitigation settings. */ - else if ( !strncmp(s, "bti-thunk=", 10) ) -@@ -272,6 +310,19 @@ static int __init cf_check parse_spec_ctrl(const char *s) - else - rc = -EINVAL; - } -+ else if ( !strncmp(s, "bhb-seq=", 8) ) -+ { -+ s += strlen("bhb-seq="); -+ -+ if ( !cmdline_strcmp(s, "none") ) -+ opt_bhb_seq = BHB_NONE; -+ else if ( !cmdline_strcmp(s, "tsx") ) -+ opt_bhb_seq = BHB_TSX; -+ else if ( !cmdline_strcmp(s, "short") ) -+ opt_bhb_seq = BHB_SHORT; -+ else -+ rc = -EINVAL; -+ } - - /* Bits in MSR_SPEC_CTRL. */ - else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 ) -@@ -539,12 +590,16 @@ static void __init print_details(enum ind_thunk thunk) - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s%s, Other:%s%s%s%s%s%s\n", -+ printk(" Xen settings: %s%s%s%sSPEC_CTRL: %s%s%s%s%s%s, Other:%s%s%s%s%s%s\n", - thunk != THUNK_NONE ? "BTI-Thunk: " : "", - thunk == THUNK_NONE ? "" : - thunk == THUNK_RETPOLINE ? "RETPOLINE, " : - thunk == THUNK_LFENCE ? "LFENCE, " : - thunk == THUNK_JMP ? "JMP, " : "?, ", -+ opt_bhb_seq != BHB_NONE ? "BHB-Seq: " : "", -+ opt_bhb_seq == BHB_NONE ? "" : -+ opt_bhb_seq == BHB_TSX ? "TSX, " : -+ opt_bhb_seq == BHB_SHORT ? "SHORT, " : "?, ", - (!boot_cpu_has(X86_FEATURE_IBRSB) && - !boot_cpu_has(X86_FEATURE_IBRS)) ? "No" : - (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-", -@@ -583,11 +638,11 @@ static void __init print_details(enum ind_thunk thunk) - * mitigation support for guests. - */ - #ifdef CONFIG_HVM -- printk(" Support for HVM VMs:%s%s%s%s%s%s%s\n", -+ printk(" Support for HVM VMs:%s%s%s%s%s%s%s%s\n", - (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) || -- amd_virt_spec_ctrl || -+ cpu_has_bhb_seq || amd_virt_spec_ctrl || - opt_eager_fpu || opt_verw_hvm) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", - (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || -@@ -595,20 +650,23 @@ static void __init print_details(enum ind_thunk thunk) - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", - opt_verw_hvm ? " VERW" : "", -- boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : ""); -+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : "", -+ cpu_has_bhb_seq ? " BHB-entry" : ""); - - #endif - #ifdef CONFIG_PV -- printk(" Support for PV VMs:%s%s%s%s%s%s\n", -+ printk(" Support for PV VMs:%s%s%s%s%s%s%s\n", - (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || - boot_cpu_has(X86_FEATURE_SC_RSB_PV) || - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) || -+ cpu_has_bhb_seq || - opt_eager_fpu || opt_verw_pv) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", - boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", - opt_verw_pv ? " VERW" : "", -- boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : ""); -+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : "", -+ cpu_has_bhb_seq ? " BHB-entry" : ""); - - printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", - opt_xpti_hwdom ? "enabled" : "disabled", -@@ -1616,16 +1674,85 @@ static void __init gds_calculations(void) - /* - * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/branch-history-injection.html - */ -+static bool __init cpu_has_bug_bhi(void) -+{ -+ /* BHI is only known to affect Intel Family 6 processors at this time. */ -+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || -+ boot_cpu_data.x86 != 6 ) -+ return false; -+ -+ if ( boot_cpu_has(X86_FEATURE_BHI_NO) ) -+ return false; -+ -+ if ( cpu_has_hypervisor ) -+ return true; /* TODO: how to figure out out if we're really eIBRS levelled out? */ -+ -+ return cpu_has_eibrs; -+} -+ - static void __init bhi_calculations(void) - { -+ bool has_bhi = cpu_has_bug_bhi(); -+ -+ /* -+ * To mitigate BHI, we want to use BHI_DIS_S wherever possible, or the -+ * short sequence otherwise. Other forms are available on request. -+ * -+ * We are repsonsbile for performing default-conversion on opt_bhi_dis_s -+ * and opt_bhb_seq, irrespective of succeptibility to BHI. -+ */ -+ - if ( opt_bhi_dis_s == -1 ) -- opt_bhi_dis_s = !boot_cpu_has(X86_FEATURE_BHI_NO); -+ opt_bhi_dis_s = has_bhi; - - if ( !boot_cpu_has(X86_FEATURE_BHI_CTRL) ) - opt_bhi_dis_s = false; - - if ( opt_bhi_dis_s ) - default_xen_spec_ctrl |= SPEC_CTRL_BHI_DIS_S; -+ -+ if ( opt_bhb_seq == BHB_DEFAULT ) -+ { -+ /* -+ * If we're using BHI_DIS_S, or we're not succeptable, don't activate -+ * the thunks. -+ */ -+ if ( !has_bhi || opt_bhi_dis_s ) -+ opt_bhb_seq = BHB_NONE; -+ else -+ opt_bhb_seq = BHB_SHORT; -+ } -+ -+ /* -+ * We can use the TSX even if it's disabled for e.g. TAA reasons. -+ * However, fall back to the loop sequence if there is no trace of RTM at -+ * all, as XBEGIN will #UD. -+ */ -+ if ( opt_bhb_seq == BHB_TSX && !cpu_has_rtm && !cpu_has_rtm_always_abort && -+ !cpu_has_tsx_force_abort ) -+ opt_bhb_seq = BHB_SHORT; -+ -+ /* -+ * Only activate SCF_entry_bhb by for guests if a sequence is in place. -+ */ -+ if ( opt_bhb_entry_pv == -1 ) -+ opt_bhb_entry_pv = has_bhi && opt_bhb_seq != BHB_NONE; -+ if ( opt_bhb_entry_hvm == -1 ) -+ opt_bhb_entry_hvm = has_bhi && opt_bhb_seq != BHB_NONE; -+ -+ switch ( opt_bhb_seq ) -+ { -+ case BHB_SHORT: -+ setup_force_cpu_cap(X86_SPEC_BHB_LOOPS); -+ break; -+ -+ case BHB_TSX: -+ setup_force_cpu_cap(X86_SPEC_BHB_TSX); -+ break; -+ -+ default: -+ break; -+ } - } - - void spec_ctrl_init_domain(struct domain *d) -@@ -1638,9 +1765,13 @@ void spec_ctrl_init_domain(struct domain *d) - bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) && - (d->domain_id != 0 || opt_ibpb_entry_dom0)); - -+ bool bhb = ((pv ? opt_bhb_entry_pv : opt_bhb_entry_hvm) && -+ (d->domain_id != 0 || opt_bhb_entry_dom0)); -+ - d->arch.scf = - (verw ? SCF_verw : 0) | - (ibpb ? SCF_entry_ibpb : 0) | -+ (bhb ? SCF_entry_bhb : 0) | - 0; - } - --- -2.44.0 - diff --git a/0542-x86-spec-ctrl-Support-the-long-BHB-loop-sequence.patch b/0542-x86-spec-ctrl-Support-the-long-BHB-loop-sequence.patch deleted file mode 100644 index e1e52702..00000000 --- a/0542-x86-spec-ctrl-Support-the-long-BHB-loop-sequence.patch +++ /dev/null @@ -1,130 +0,0 @@ -From 60859cc99033beb84da035fb1664ce6e9b698bc6 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Fri, 22 Mar 2024 19:29:34 +0000 -Subject: [PATCH 542/542] x86/spec-ctrl: Support the "long" BHB loop sequence -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Out of an abudnance of caution, implement the long loop too, and allowing for -it to be opted in to. - -This is part of XSA-456 / CVE-2024-2201. - -Signed-off-by: Andrew Cooper -Acked-by: Roger Pau Monné -(cherry picked from commit d5887c0decbd90e798b24ed696628645b04632fb) ---- - docs/misc/xen-command-line.pandoc | 4 ++-- - xen/arch/x86/bhb-thunk.S | 8 ++++++-- - xen/arch/x86/include/asm/cpufeatures.h | 1 + - xen/arch/x86/spec_ctrl.c | 10 +++++++++- - 4 files changed, 18 insertions(+), 5 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 0bd0588097..fba5a8221b 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2325,7 +2325,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - ### spec-ctrl (x86) - > `= List of [ , xen=, {pv,hvm}=, - > {msr-sc,rsb,verw,{ibpb,bhb}-entry}=|{pv,hvm}=, --> bti-thunk=retpoline|lfence|jmp,bhb-seq=short|tsx, -+> bti-thunk=retpoline|lfence|jmp,bhb-seq=short|tsx|long, - > {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, - > unpriv-mmio,gds-mit,div-scrub, -@@ -2397,7 +2397,7 @@ On all hardware, `bhb-seq=` can be used to select which of the BHB-clearing - sequences gets used. This interacts with the `bhb-entry=` and `bhi-dis-s=` - options in order to mitigate Branch History Injection on affected hardware. - The default sequence is `short`, with `tsx` as an alternative available --capable hardware that can be opted in to. -+capable hardware, and `long` that can be opted in to. - - On hardware supporting IBRS (Indirect Branch Restricted Speculation), the - `ibrs=` option can be used to force or prevent Xen using the feature itself. -diff --git a/xen/arch/x86/bhb-thunk.S b/xen/arch/x86/bhb-thunk.S -index f52cfb9bc2..7e866784f7 100644 ---- a/xen/arch/x86/bhb-thunk.S -+++ b/xen/arch/x86/bhb-thunk.S -@@ -56,9 +56,13 @@ ENTRY(clear_bhb_tsx) - * - * The "short" sequence (5 and 5) is for CPUs prior to Alder Lake / Sapphire - * Rapids (i.e. Cores prior to Golden Cove and/or Gracemont). -+ * -+ * The "long" sequence (12 and 7) is for Alder Lake / Sapphire Rapids -+ * (i.e. Golden Cove and/or Gracemont cores). However, such CPUs are expected -+ * to use BHI_DIS_S in preference. - */ - ENTRY(clear_bhb_loops) -- mov $5, %ecx -+ ALTERNATIVE "mov $5, %ecx", "mov $12, %ecx", X86_SPEC_BHB_LOOPS_LONG - - call 1f - jmp 5f -@@ -70,7 +74,7 @@ ENTRY(clear_bhb_loops) - int3 - - .align 64 --2: mov $5, %eax -+2: ALTERNATIVE "mov $5, %eax", "mov $7, %eax", X86_SPEC_BHB_LOOPS_LONG - - 3: jmp 4f - int3 -diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h -index bada8912e0..ba3df174b7 100644 ---- a/xen/arch/x86/include/asm/cpufeatures.h -+++ b/xen/arch/x86/include/asm/cpufeatures.h -@@ -58,6 +58,7 @@ XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for - - #define X86_SPEC_BHB_TSX X86_BUG(19) /* Use clear_bhb_tsx for BHI mitigation. */ - #define X86_SPEC_BHB_LOOPS X86_BUG(20) /* Use clear_bhb_loops for BHI mitigation.*/ -+#define X86_SPEC_BHB_LOOPS_LONG X86_BUG(21) /* Upgrade clear_bhb_loops to the "long" sequence. */ - - /* Total number of capability words, inc synth and bug words. */ - #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */ -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index b53e9c4e7a..7697f9ad3f 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -53,6 +53,7 @@ static enum bhb_thunk { - BHB_NONE, - BHB_TSX, - BHB_SHORT, -+ BHB_LONG, - } opt_bhb_seq __initdata; - - /* Cmdline controls for Xen's speculative settings. */ -@@ -320,6 +321,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) - opt_bhb_seq = BHB_TSX; - else if ( !cmdline_strcmp(s, "short") ) - opt_bhb_seq = BHB_SHORT; -+ else if ( !cmdline_strcmp(s, "long") ) -+ opt_bhb_seq = BHB_LONG; - else - rc = -EINVAL; - } -@@ -599,7 +602,8 @@ static void __init print_details(enum ind_thunk thunk) - opt_bhb_seq != BHB_NONE ? "BHB-Seq: " : "", - opt_bhb_seq == BHB_NONE ? "" : - opt_bhb_seq == BHB_TSX ? "TSX, " : -- opt_bhb_seq == BHB_SHORT ? "SHORT, " : "?, ", -+ opt_bhb_seq == BHB_SHORT ? "SHORT, " : -+ opt_bhb_seq == BHB_LONG ? "LONG, " : "?, ", - (!boot_cpu_has(X86_FEATURE_IBRSB) && - !boot_cpu_has(X86_FEATURE_IBRS)) ? "No" : - (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-", -@@ -1742,6 +1746,10 @@ static void __init bhi_calculations(void) - - switch ( opt_bhb_seq ) - { -+ case BHB_LONG: -+ setup_force_cpu_cap(X86_SPEC_BHB_LOOPS_LONG); -+ fallthrough; -+ - case BHB_SHORT: - setup_force_cpu_cap(X86_SPEC_BHB_LOOPS); - break; --- -2.44.0 - diff --git a/config b/config index b8758bf5..4bebb459 100644 --- a/config +++ b/config @@ -70,6 +70,7 @@ CONFIG_INDIRECT_THUNK=y CONFIG_SPECULATIVE_HARDEN_ARRAY=y CONFIG_SPECULATIVE_HARDEN_BRANCH=y CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS=y +CONFIG_SPECULATIVE_HARDEN_LOCK=y # end of Speculative hardening CONFIG_DIT_DEFAULT=y diff --git a/rel b/rel index 7ed6ff82..d00491fd 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -5 +1 diff --git a/version b/version index ab268c20..66800301 100644 --- a/version +++ b/version @@ -1 +1 @@ -4.17.3 +4.17.4 diff --git a/xen.spec.in b/xen.spec.in index 5ef2f006..2d2fa2e3 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -104,89 +104,8 @@ Patch0307: 0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch Patch0308: 0308-x86-Replace-EPT_EMT_-constants-with-X86_MT_.patch Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch Patch0314: 0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch -Patch0315: 0315-pci-fail-device-assignment-if-phantom-functions-cann.patch -Patch0316: 0316-VT-d-Fix-else-vs-endif-misplacement.patch -Patch0317: 0317-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch -Patch0318: 0318-CirrusCI-drop-FreeBSD-12.patch -Patch0319: 0319-x86-intel-ensure-Global-Performance-Counter-Control-.patch -Patch0320: 0320-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch -Patch0321: 0321-x86-vmx-Disallow-the-use-of-inactivity-states.patch -Patch0322: 0322-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch -Patch0323: 0323-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch -Patch0324: 0324-tools-xentop-fix-sorting-bug-for-some-columns.patch -Patch0325: 0325-amd-vi-fix-IVMD-memory-type-checks.patch -Patch0326: 0326-x86-hvm-Fix-fast-singlestep-state-persistence.patch -Patch0327: 0327-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch -Patch0328: 0328-build-Replace-which-with-command-v.patch -Patch0329: 0329-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch -Patch0330: 0330-build-make-sure-build-fails-when-running-kconfig-fai.patch -Patch0331: 0331-x86emul-add-missing-EVEX.R-checks.patch -Patch0332: 0332-xen-livepatch-fix-norevert-test-hook-setup-typo.patch -Patch0333: 0333-xen-cmdline-fix-printf-format-specifier-in-no_config.patch -Patch0334: 0334-x86-altcall-use-a-union-as-register-type-for-functio.patch -Patch0335: 0335-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch -Patch0336: 0336-x86-account-for-shadow-stack-in-exception-from-stub-.patch -Patch0337: 0337-xen-arm-Fix-UBSAN-failure-in-start_xen.patch -Patch0338: 0338-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch -Patch0339: 0339-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch -Patch0340: 0340-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch -Patch0341: 0341-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch -Patch0342: 0342-x86-spec-do-not-print-thunk-option-selection-if-not-.patch -Patch0343: 0343-xen-livepatch-register-livepatch-regions-when-loaded.patch -Patch0344: 0344-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch -Patch0345: 0345-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch -Patch0346: 0346-xen-livepatch-properly-build-the-noapply-and-norever.patch -Patch0347: 0347-libxl-Fix-segfault-in-device_model_spawn_outcome.patch -Patch0348: 0348-x86-altcall-always-use-a-temporary-parameter-stashin.patch -Patch0349: 0349-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch -Patch0350: 0350-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch -Patch0351: 0351-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch # Security fixes -Patch0500: 0500-xsa452-4.17-1.patch -Patch0501: 0501-xsa452-4.17-2.patch -Patch0502: 0502-xsa452-4.17-3.patch -Patch0503: 0503-xsa452-4.17-4.patch -Patch0504: 0504-xsa452-4.17-5.patch -Patch0505: 0505-xsa452-4.17-6.patch -Patch0506: 0506-xsa452-4.17-7.patch -Patch0507: 0507-xsa455-4.17.patch -# XSA-456 -Patch0508: 0508-x86-APIC-finish-genapic-conversion-to-altcall.patch -Patch0509: 0509-cpufreq-finish-conversion-to-altcall.patch -Patch0510: 0510-x86-HPET-avoid-an-indirect-call.patch -Patch0511: 0511-core-parking-use-alternative_call.patch -Patch0512: 0512-x86-MTRR-avoid-several-indirect-calls.patch -Patch0513: 0513-x86-PV-avoid-indirect-call-for-I-O-emulation-quirk-h.patch -Patch0514: 0514-x86-MCE-separate-BSP-only-initialization.patch -Patch0515: 0515-x86-MCE-switch-some-callback-invocations-to-altcall.patch -Patch0516: 0516-IRQ-generalize-gs-et_irq_regs.patch -Patch0517: 0517-x86-spec-ctrl-Expose-IPRED_CTRL-to-guests.patch -Patch0518: 0518-x86-spec-ctrl-Expose-RRSBA_CTRL-to-guests.patch -Patch0519: 0519-x86-spec-ctrl-Expose-BHI_CTRL-to-guests.patch -Patch0520: 0520-x86-arrange-for-ENDBR-zapping-from-vendor-_ctxt_swit.patch -Patch0521: 0521-x86-guest-finish-conversion-to-altcall.patch -Patch0522: 0522-x86-CPU-convert-vendor-hook-invocations-to-altcall.patch -Patch0523: 0523-VMX-tertiary-execution-control-infrastructure.patch -Patch0524: 0524-x86-spec-ctrl-Move-__read_mostly-data-into-__ro_afte.patch -Patch0525: 0525-x86-tsx-Cope-with-RTM_ALWAYS_ABORT-vs-RTM-mismatch.patch -Patch0526: 0526-x86-alternatives-fix-.init-section-reference-in-_app.patch -Patch0527: 0527-x86-cpuid-Don-t-expose-IPRED-RRSBA-BHI-_CTRL-to-PV-g.patch -Patch0528: 0528-x86-spec-ctrl-Rename-spec_ctrl_flags-to-scf.patch -Patch0529: 0529-x86-spec-ctrl-Rework-conditional-safety-for-SPEC_CTR.patch -Patch0530: 0530-x86-entry-Arrange-for-r14-to-be-STACK_END-across-SPE.patch -Patch0531: 0531-x86-spec_ctrl-Hold-SCF-in-ebx-across-SPEC_CTRL_ENTRY.patch -Patch0532: 0532-x86-spec-ctrl-Simplify-DO_COND_IBPB.patch -Patch0533: 0533-x86-spec-ctrl-Detail-the-safety-properties-in-SPEC_C.patch -Patch0534: 0534-x86-vmx-Add-support-for-virtualize-SPEC_CTRL.patch -Patch0535: 0535-x86-spec-ctrl-Widen-the-xen-last-default-_spec_ctrl-.patch -Patch0536: 0536-x86-Use-indirect-calls-in-reset-stack-infrastructure.patch -Patch0537: 0537-x86-Drop-INDIRECT_JMP.patch -Patch0538: 0538-x86-tsx-Expose-RTM_ALWAYS_ABORT-to-guests.patch -Patch0539: 0539-x86-spec-ctrl-Support-BHI_DIS_S-in-order-to-mitigate.patch -Patch0540: 0540-x86-spec-ctrl-Software-BHB-clearing-sequences.patch -Patch0541: 0541-x86-spec-ctrl-Wire-up-the-Native-BHI-software-sequen.patch -Patch0542: 0542-x86-spec-ctrl-Support-the-long-BHB-loop-sequence.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch From 2ac96437ab3a55a3e57dbec22bb0d777c488f2ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Thu, 25 Apr 2024 00:02:29 +0200 Subject: [PATCH 51/64] Backport a fix for XHCI console See patch description for details. Fixes QubesOS/qubes-issues#9030 --- ...ame-for-extra-reserved-device-memory.patch | 80 ++++++++++++++++ ...k-extra-reserved-device-memory-in-me.patch | 96 +++++++++++++++++++ xen.spec.in | 2 + 3 files changed, 178 insertions(+) create mode 100644 0315-IOMMU-store-name-for-extra-reserved-device-memory.patch create mode 100644 0316-drivers-char-mark-extra-reserved-device-memory-in-me.patch diff --git a/0315-IOMMU-store-name-for-extra-reserved-device-memory.patch b/0315-IOMMU-store-name-for-extra-reserved-device-memory.patch new file mode 100644 index 00000000..fd1f41e7 --- /dev/null +++ b/0315-IOMMU-store-name-for-extra-reserved-device-memory.patch @@ -0,0 +1,80 @@ +From a97de92e76a62e786cb8a3b5cd75f9df5c96e8e6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= + +Date: Tue, 19 Mar 2024 11:23:06 +0100 +Subject: [PATCH] IOMMU: store name for extra reserved device memory +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +It will be useful for error reporting in a subsequent patch. + +Signed-off-by: Marek Marczykowski-Górecki +Acked-by: Jan Beulich +--- + xen/drivers/char/xhci-dbc.c | 3 ++- + xen/drivers/passthrough/iommu.c | 5 ++++- + xen/include/xen/iommu.h | 3 ++- + 3 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/xen/drivers/char/xhci-dbc.c b/xen/drivers/char/xhci-dbc.c +index 3bf389be7d0b..8e2037f1a5f7 100644 +--- a/xen/drivers/char/xhci-dbc.c ++++ b/xen/drivers/char/xhci-dbc.c +@@ -1421,7 +1421,8 @@ void __init xhci_dbc_uart_init(void) + iommu_add_extra_reserved_device_memory( + PFN_DOWN(virt_to_maddr(&dbc_dma_bufs)), + PFN_UP(sizeof(dbc_dma_bufs)), +- uart->dbc.sbdf); ++ uart->dbc.sbdf, ++ "XHCI console"); + serial_register_uart(SERHND_XHCI, &dbc_uart_driver, &dbc_uart); + } + } +diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c +index 996c31be1284..03587c0cd680 100644 +--- a/xen/drivers/passthrough/iommu.c ++++ b/xen/drivers/passthrough/iommu.c +@@ -682,6 +682,7 @@ struct extra_reserved_range { + unsigned long start; + unsigned long nr; + pci_sbdf_t sbdf; ++ const char *name; + }; + static unsigned int __initdata nr_extra_reserved_ranges; + static struct extra_reserved_range __initdata +@@ -689,7 +690,8 @@ static struct extra_reserved_range __initdata + + int __init iommu_add_extra_reserved_device_memory(unsigned long start, + unsigned long nr, +- pci_sbdf_t sbdf) ++ pci_sbdf_t sbdf, ++ const char *name) + { + unsigned int idx; + +@@ -700,6 +702,7 @@ int __init iommu_add_extra_reserved_device_memory(unsigned long start, + extra_reserved_ranges[idx].start = start; + extra_reserved_ranges[idx].nr = nr; + extra_reserved_ranges[idx].sbdf = sbdf; ++ extra_reserved_ranges[idx].name = name; + + return 0; + } +diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h +index ef57f31417d0..a9c9457c07b2 100644 +--- a/xen/include/xen/iommu.h ++++ b/xen/include/xen/iommu.h +@@ -326,7 +326,8 @@ struct iommu_ops { + */ + extern int iommu_add_extra_reserved_device_memory(unsigned long start, + unsigned long nr, +- pci_sbdf_t sbdf); ++ pci_sbdf_t sbdf, ++ const char *name); + /* + * To be called by specific IOMMU driver during initialization, + * to fetch ranges registered with iommu_add_extra_reserved_device_memory(). +-- +2.44.0 + diff --git a/0316-drivers-char-mark-extra-reserved-device-memory-in-me.patch b/0316-drivers-char-mark-extra-reserved-device-memory-in-me.patch new file mode 100644 index 00000000..548fa990 --- /dev/null +++ b/0316-drivers-char-mark-extra-reserved-device-memory-in-me.patch @@ -0,0 +1,96 @@ +From dd5101a6169f89b9e3f3b72f0b0fcdb38db2fb35 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= + +Date: Wed, 3 Apr 2024 09:34:22 +0200 +Subject: [PATCH] drivers/char: mark extra reserved device memory in memory map +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The IOMMU driver checks if RMRR/IVMD are marked as reserved in memory +map. This should be true for addresses coming from the firmware, but +when extra pages used by Xen itself are included in the mapping, those +are taken from usable RAM used. Mark those pages as reserved too. + +Not marking the pages as reserved didn't caused issues before due to +another a bug in IOMMU driver code, that was fixed in 83afa3135830 +("amd-vi: fix IVMD memory type checks"). + +Failing to reserve memory will lead to panic in IOMMU setup code. And +not including the page in IOMMU mapping will lead to broken console (due +to IOMMU faults). The pages chosen by the XHCI console driver should +still be usable by the CPU though, and the console code already can deal +with too slow console by dropping characters (and console not printing +anything is a special case of "slow"). When reserving fails print an error +message showing which pages failed and who requested them. This should +be enough hint to find why XHCI console doesn't work. + +Fixes: 3a1a7b809ffa "drivers/char: mark DMA buffers as reserved for the XHCI" +Signed-off-by: Marek Marczykowski-Górecki +Acked-by: Jan Beulich +--- + xen/drivers/passthrough/iommu.c | 17 +++++++++++++++++ + xen/include/xen/iommu.h | 5 ++++- + 2 files changed, 21 insertions(+), 1 deletion(-) + +diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c +index 03587c0cd680..ba18136c461c 100644 +--- a/xen/drivers/passthrough/iommu.c ++++ b/xen/drivers/passthrough/iommu.c +@@ -22,6 +22,10 @@ + #include + #include + ++#ifdef CONFIG_X86 ++#include ++#endif ++ + unsigned int __read_mostly iommu_dev_iotlb_timeout = 1000; + integer_param("iommu_dev_iotlb_timeout", iommu_dev_iotlb_timeout); + +@@ -715,6 +719,19 @@ int __init iommu_get_extra_reserved_device_memory(iommu_grdm_t *func, + + for ( idx = 0; idx < nr_extra_reserved_ranges; idx++ ) + { ++#ifdef CONFIG_X86 ++ paddr_t start = pfn_to_paddr(extra_reserved_ranges[idx].start); ++ paddr_t end = pfn_to_paddr(extra_reserved_ranges[idx].start + ++ extra_reserved_ranges[idx].nr); ++ ++ if ( !reserve_e820_ram(&e820, start, end) ) ++ { ++ printk(XENLOG_ERR "Failed to reserve [%"PRIx64"-%"PRIx64") for %s, " ++ "skipping IOMMU mapping for it, some functionality may be broken\n", ++ start, end, extra_reserved_ranges[idx].name); ++ continue; ++ } ++#endif + ret = func(extra_reserved_ranges[idx].start, + extra_reserved_ranges[idx].nr, + extra_reserved_ranges[idx].sbdf.sbdf, +diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h +index a9c9457c07b2..92db6f124f13 100644 +--- a/xen/include/xen/iommu.h ++++ b/xen/include/xen/iommu.h +@@ -321,7 +321,8 @@ struct iommu_ops { + }; + + /* +- * To be called by Xen internally, to register extra RMRR/IVMD ranges. ++ * To be called by Xen internally, to register extra RMRR/IVMD ranges for RAM ++ * pages. + * Needs to be called before IOMMU initialization. + */ + extern int iommu_add_extra_reserved_device_memory(unsigned long start, +@@ -331,6 +332,8 @@ extern int iommu_add_extra_reserved_device_memory(unsigned long start, + /* + * To be called by specific IOMMU driver during initialization, + * to fetch ranges registered with iommu_add_extra_reserved_device_memory(). ++ * This has a side effect of marking requested ranges as "reserved" in the ++ * memory map. + */ + extern int iommu_get_extra_reserved_device_memory(iommu_grdm_t *func, + void *ctxt); +-- +2.44.0 + diff --git a/xen.spec.in b/xen.spec.in index 2d2fa2e3..d998e861 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -104,6 +104,8 @@ Patch0307: 0307-x86-Replace-MTRR_-constants-with-X86_MT_-constants.patch Patch0308: 0308-x86-Replace-EPT_EMT_-constants-with-X86_MT_.patch Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch Patch0314: 0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch +Patch0315: 0315-IOMMU-store-name-for-extra-reserved-device-memory.patch +Patch0316: 0316-drivers-char-mark-extra-reserved-device-memory-in-me.patch # Security fixes From 46b5abf7a393395b0197f4905741f44dbedf3702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Fri, 26 Apr 2024 05:46:51 +0200 Subject: [PATCH 52/64] version 4.17.4-2 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index d00491fd..0cfbf088 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -1 +2 From f83cb75485e25856411bb67cc38c97accdd8c238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 20 May 2024 04:09:04 +0200 Subject: [PATCH 53/64] archlinux: adjust makedepends Remove bin86 and dev86 needed only for hypervisor build (not applicable here). Remove yajl as already listed in depends. Add python-setuptools. --- archlinux/PKGBUILD.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archlinux/PKGBUILD.in b/archlinux/PKGBUILD.in index 6743362f..78044d82 100644 --- a/archlinux/PKGBUILD.in +++ b/archlinux/PKGBUILD.in @@ -10,7 +10,7 @@ arch=("x86_64") url="http://qubes-os.org/" license=('GPL') depends=(bridge-utils python-lxml libutil-linux lzo libsystemd yajl) -makedepends=(wget make gcc patch git bin86 dev86 iasl yajl pkg-config openssl pixman) +makedepends=(wget make gcc patch git iasl pkg-config openssl pixman python-setuptools) provides=('xen-qubes-vm-essentials') _patches=( From bc272838be06788575c0f78e37a84b2e3f04f0b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 20 May 2024 04:10:19 +0200 Subject: [PATCH 54/64] archlinux: fix number of checksums for patches --- archlinux/PKGBUILD.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archlinux/PKGBUILD.in b/archlinux/PKGBUILD.in index 78044d82..0965bc13 100644 --- a/archlinux/PKGBUILD.in +++ b/archlinux/PKGBUILD.in @@ -23,7 +23,7 @@ _patches=( 1103-Strip-build-path-directories-in-tools-xen-and-xen-ar.patch ) source=(xen-$_upstream_pkgver.tar.gz "${_patches[@]}") -md5sums=(SKIP SKIP SKIP SKIP SKIP SKIP SKIP SKIP SKIP) +md5sums=(SKIP SKIP SKIP SKIP SKIP SKIP SKIP SKIP) prepare() { cd "${pkgbase}" From 948f153fea58adde982191029353ba7b1ff21d1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 20 May 2024 04:10:38 +0200 Subject: [PATCH 55/64] archlinux: do not skip directory for logs /var/log/xen needs to be owned by the package, otherwise xendriverdomain service fails to start. --- archlinux/PKGBUILD.in | 1 - 1 file changed, 1 deletion(-) diff --git a/archlinux/PKGBUILD.in b/archlinux/PKGBUILD.in index 0965bc13..541fbe90 100644 --- a/archlinux/PKGBUILD.in +++ b/archlinux/PKGBUILD.in @@ -110,7 +110,6 @@ package() { rm -rf "$pkgdir"/var/lock rm -rf "$pkgdir"/var/lib/xen - rm -rf "$pkgdir"/var/log # From b2d5e5d127d50d1352c726a80e9f1594d85f4f5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sat, 25 May 2024 03:58:10 +0200 Subject: [PATCH 56/64] version 4.17.4-3 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 0cfbf088..00750edc 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -2 +3 From 21dddee9175f8664d498b327c06945ecb4420dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 15 Jul 2024 13:36:08 +0200 Subject: [PATCH 57/64] Apply XSA-458 patch --- 0500-xsa458.patch | 38 ++++++++++++++++++++++++++++++++++++++ xen.spec.in | 1 + 2 files changed, 39 insertions(+) create mode 100644 0500-xsa458.patch diff --git a/0500-xsa458.patch b/0500-xsa458.patch new file mode 100644 index 00000000..8be0a901 --- /dev/null +++ b/0500-xsa458.patch @@ -0,0 +1,38 @@ +From: Jan Beulich +Subject: x86/IRQ: avoid double unlock in map_domain_pirq() + +Forever since its introduction the main loop in the function dealing +with multi-vector MSI had error exit points ("break") with different +properties: In one case no IRQ descriptor lock is being held. +Nevertheless the subsequent error cleanup path assumed such a lock would +uniformly need releasing. Identify the case by setting "desc" to NULL, +thus allowing the unlock to be skipped as necessary. + +This is CVE-2024-31143 / XSA-458. + +Coverity ID: 1605298 +Fixes: d1b6d0a02489 ("x86: enable multi-vector MSI") +Signed-off-by: Jan Beulich +Reviewed-by: Roger Pau Monné + +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2273,6 +2273,7 @@ int map_domain_pirq( + + set_domain_irq_pirq(d, irq, info); + spin_unlock_irqrestore(&desc->lock, flags); ++ desc = NULL; + + info = NULL; + irq = create_irq(NUMA_NO_NODE, true); +@@ -2308,7 +2309,9 @@ int map_domain_pirq( + + if ( ret ) + { +- spin_unlock_irqrestore(&desc->lock, flags); ++ if ( desc ) ++ spin_unlock_irqrestore(&desc->lock, flags); ++ + pci_disable_msi(msi_desc); + if ( nr ) + { diff --git a/xen.spec.in b/xen.spec.in index 7ceea2b7..23550b5c 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -108,6 +108,7 @@ Patch0315: 0315-IOMMU-store-name-for-extra-reserved-device-memory.patch Patch0316: 0316-drivers-char-mark-extra-reserved-device-memory-in-me.patch # Security fixes +Patch0500: 0500-xsa458.patch # Upstreamable patches Patch0604: 0604-libxl-create-writable-error-xenstore-dir.patch From e4266f1cb7465e6daa61a0e69c706342bab0ecda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 16 Jul 2024 14:02:42 +0200 Subject: [PATCH 58/64] version 4.17.4-4 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index 00750edc..b8626c4c 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -3 +4 From 31c52f7fb0f23be5ad4a045a1770f62a3fffccca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Tue, 16 Jul 2024 16:34:37 +0200 Subject: [PATCH 59/64] rpm: cleanup build dependencies Remove no longer needed deps - we don't build stubdomain in this package anymore. --- xen.spec.in | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/xen.spec.in b/xen.spec.in index 7ceea2b7..fdff1e66 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -207,19 +207,12 @@ BuildRequires: dev86 %endif BuildRequires: python%{python3_pkgversion}-devel ncurses-devel python%{python3_pkgversion}-setuptools BuildRequires: perl-interpreter perl-generators -%ifarch %{ix86} x86_64 -# so that x86_64 builds pick up glibc32 correctly -BuildRequires: /usr/include/gnu/stubs-32.h -%endif # BEGIN QUBES SPECIFIC PART BuildRequires: autoconf BuildRequires: automake # END QUBES SPECIFIC PART BuildRequires: gettext -BuildRequires: gnutls-devel -BuildRequires: openssl-devel -# For ioemu PCI passthrough -BuildRequires: pciutils-devel +BuildRequires: zlib-devel # Several tools now use uuid BuildRequires: libuuid-devel # iasl needed to build hvmloader From 481b1985431230884be72696e3bdf849aa542269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Fri, 19 Jul 2024 01:18:23 +0200 Subject: [PATCH 60/64] Fix Arch build on conflicting _FORTIFY_SOURCE values --- ...d-conflicting-_FORTIFY_SOURCE-values.patch | 40 +++++++++++++++++++ archlinux/PKGBUILD.in | 1 + xen.spec.in | 2 + 3 files changed, 43 insertions(+) create mode 100644 0653-python-avoid-conflicting-_FORTIFY_SOURCE-values.patch diff --git a/0653-python-avoid-conflicting-_FORTIFY_SOURCE-values.patch b/0653-python-avoid-conflicting-_FORTIFY_SOURCE-values.patch new file mode 100644 index 00000000..07c1ca28 --- /dev/null +++ b/0653-python-avoid-conflicting-_FORTIFY_SOURCE-values.patch @@ -0,0 +1,40 @@ +From 5e9e49c4f0ed9c54b63bf99d7b6a013005f94865 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= + +Date: Fri, 19 Jul 2024 01:15:13 +0200 +Subject: [PATCH] python: avoid conflicting _FORTIFY_SOURCE values +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The compile flags are combined from python's build config (sysconfig +module) and CFLAGS environment. If both define the _FORTIFY_SOURCE but +to different values, the build will fail. This is the case on Arch, +where Python's sysconfig has -D_FORTIFY_SOURCE=2, while Arch's +makepkg.conf has -D_FORTIFY_SOURCE=3. Resolve the config by undefining +_FORTIFY_SOURCE first, and use the value from the CFLAGS environment. +Details: +https://setuptools.pypa.io/en/latest/userguide/ext_modules.html + +Signed-off-by: Marek Marczykowski-Górecki +--- + tools/python/setup.py | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tools/python/setup.py b/tools/python/setup.py +index 02354f698653..a73c95a9e766 100644 +--- a/tools/python/setup.py ++++ b/tools/python/setup.py +@@ -20,6 +20,9 @@ PATH_LIBXENCTRL = XEN_ROOT + "/tools/libs/ctrl" + PATH_LIBXENGUEST = XEN_ROOT + "/tools/libs/guest" + PATH_XENSTORE = XEN_ROOT + "/tools/libs/store" + ++if "-D_FORTIFY_SOURCE=" in os.environ.get("CFLAGS", ""): ++ os.environ["CFLAGS"] = "-Wp,-U_FORTIFY_SOURCE " + os.environ["CFLAGS"] ++ + xc = Extension("xc", + extra_compile_args = extra_compile_args, + include_dirs = [ PATH_XEN, +-- +2.45.2 + diff --git a/archlinux/PKGBUILD.in b/archlinux/PKGBUILD.in index 541fbe90..4b425b17 100644 --- a/archlinux/PKGBUILD.in +++ b/archlinux/PKGBUILD.in @@ -14,6 +14,7 @@ makedepends=(wget make gcc patch git iasl pkg-config openssl pixman python-setup provides=('xen-qubes-vm-essentials') _patches=( + 0653-python-avoid-conflicting-_FORTIFY_SOURCE-values.patch 1000-Do-not-access-network-during-the-build.patch 1001-hotplug-store-block-params-for-cleanup.patch 1020-xen-tools-qubes-vm.patch diff --git a/xen.spec.in b/xen.spec.in index fdff1e66..39dc5790 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -155,6 +155,8 @@ Patch0643: 0643-cpufreq-enable-HWP-by-default.patch PAtch0651: 0651-x86-msi-passthrough-all-MSI-X-vector-ctrl-writes-to-.patch PAtch0652: 0652-x86-hvm-Allow-writes-to-registers-on-the-same-page-a.patch +Patch0653: 0653-python-avoid-conflicting-_FORTIFY_SOURCE-values.patch + # S0ix support Patch0670: 0670-x86-hpet-Disable-legacy-replacement-mode-after-test-.patch Patch0671: 0671-x86-idle-Get-PC-8.10-counters-for-Tiger-and-Alder-La.patch From 1d786afc9ccb2372461a478d386d45032733297f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Fri, 19 Jul 2024 12:03:47 +0200 Subject: [PATCH 61/64] Add XHCI DbC console fixes Add patches fixing XHCI DbC console when sys-usb is running. Technically not a backport yet, but this version is very close to beeing committed upstream so should qualify as a backport soon. --- ...xen-list-add-LIST_HEAD_RO_AFTER_INIT.patch | 35 ++ ...or-marking-only-part-of-a-MMIO-page-.patch | 507 ++++++++++++++++++ ...-sub-page-ro-API-to-make-just-xhci-d.patch | 89 +++ xen.spec.in | 3 + 4 files changed, 634 insertions(+) create mode 100644 0317-xen-list-add-LIST_HEAD_RO_AFTER_INIT.patch create mode 100644 0318-x86-mm-add-API-for-marking-only-part-of-a-MMIO-page-.patch create mode 100644 0319-drivers-char-Use-sub-page-ro-API-to-make-just-xhci-d.patch diff --git a/0317-xen-list-add-LIST_HEAD_RO_AFTER_INIT.patch b/0317-xen-list-add-LIST_HEAD_RO_AFTER_INIT.patch new file mode 100644 index 00000000..5600fce8 --- /dev/null +++ b/0317-xen-list-add-LIST_HEAD_RO_AFTER_INIT.patch @@ -0,0 +1,35 @@ +From efc9b6dbe429c64ea2b6c06964cf3d7be1bc68de Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= + +Date: Sat, 22 Jun 2024 18:08:09 +0200 +Subject: [PATCH] xen/list: add LIST_HEAD_RO_AFTER_INIT +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Similar to LIST_HEAD_READ_MOSTLY. + +Signed-off-by: Marek Marczykowski-Górecki +--- +New in v5 +--- + xen/include/xen/list.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xen/include/xen/list.h b/xen/include/xen/list.h +index dc5a8c461b9c..6915a987cd1a 100644 +--- a/xen/include/xen/list.h ++++ b/xen/include/xen/list.h +@@ -42,6 +42,9 @@ struct list_head { + #define LIST_HEAD_READ_MOSTLY(name) \ + struct list_head __read_mostly name = LIST_HEAD_INIT(name) + ++#define LIST_HEAD_RO_AFTER_INIT(name) \ ++ struct list_head __ro_after_init name = LIST_HEAD_INIT(name) ++ + static inline void INIT_LIST_HEAD(struct list_head *list) + { + list->next = list; +-- +2.45.2 + diff --git a/0318-x86-mm-add-API-for-marking-only-part-of-a-MMIO-page-.patch b/0318-x86-mm-add-API-for-marking-only-part-of-a-MMIO-page-.patch new file mode 100644 index 00000000..99b592dd --- /dev/null +++ b/0318-x86-mm-add-API-for-marking-only-part-of-a-MMIO-page-.patch @@ -0,0 +1,507 @@ +From 224cb5c6751a1b9488dd1d09c81559123eab67be Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= + +Date: Mon, 20 Mar 2023 21:19:25 +0100 +Subject: [PATCH] x86/mm: add API for marking only part of a MMIO page read + only +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In some cases, only few registers on a page needs to be write-protected. +Examples include USB3 console (64 bytes worth of registers) or MSI-X's +PBA table (which doesn't need to span the whole table either), although +in the latter case the spec forbids placing other registers on the same +page. Current API allows only marking whole pages pages read-only, +which sometimes may cover other registers that guest may need to +write into. + +Currently, when a guest tries to write to an MMIO page on the +mmio_ro_ranges, it's either immediately crashed on EPT violation - if +that's HVM, or if PV, it gets #PF. In case of Linux PV, if access was +from userspace (like, /dev/mem), it will try to fixup by updating page +tables (that Xen again will force to read-only) and will hit that #PF +again (looping endlessly). Both behaviors are undesirable if guest could +actually be allowed the write. + +Introduce an API that allows marking part of a page read-only. Since +sub-page permissions are not a thing in page tables (they are in EPT, +but not granular enough), do this via emulation (or simply page fault +handler for PV) that handles writes that are supposed to be allowed. +The new subpage_mmio_ro_add() takes a start physical address and the +region size in bytes. Both start address and the size need to be 8-byte +aligned, as a practical simplification (allows using smaller bitmask, +and a smaller granularity isn't really necessary right now). +It will internally add relevant pages to mmio_ro_ranges, but if either +start or end address is not page-aligned, it additionally adds that page +to a list for sub-page R/O handling. The list holds a bitmask which +qwords are supposed to be read-only and an address where page is mapped +for write emulation - this mapping is done only on the first access. A +plain list is used instead of more efficient structure, because there +isn't supposed to be many pages needing this precise r/o control. + +The mechanism this API is plugged in is slightly different for PV and +HVM. For both paths, it's plugged into mmio_ro_emulated_write(). For PV, +it's already called for #PF on read-only MMIO page. For HVM however, EPT +violation on p2m_mmio_direct page results in a direct domain_crash() for +non hardware domains. To reach mmio_ro_emulated_write(), change how +write violations for p2m_mmio_direct are handled - specifically, check +if they relate to such partially protected page via +subpage_mmio_write_accept() and if so, call hvm_emulate_one_mmio() for +them too. This decodes what guest is trying write and finally calls +mmio_ro_emulated_write(). The EPT write violation is detected as +npfec.write_access and npfec.present both being true (similar to other +places), which may cover some other (future?) cases - if that happens, +emulator might get involved unnecessarily, but since it's limited to +pages marked with subpage_mmio_ro_add() only, the impact is minimal. +Both of those paths need an MFN to which guest tried to write (to check +which part of the page is supposed to be read-only, and where +the page is mapped for writes). This information currently isn't +available directly in mmio_ro_emulated_write(), but in both cases it is +already resolved somewhere higher in the call tree. Pass it down to +mmio_ro_emulated_write() via new mmio_ro_emulate_ctxt.mfn field. + +This may give a bit more access to the instruction emulator to HVM +guests (the change in hvm_hap_nested_page_fault()), but only for pages +explicitly marked with subpage_mmio_ro_add() - so, if the guest has a +passed through a device partially used by Xen. +As of the next patch, it applies only configuration explicitly +documented as not security supported. + +The subpage_mmio_ro_add() function cannot be called with overlapping +ranges, and on pages already added to mmio_ro_ranges separately. +Successful calls would result in correct handling, but error paths may +result in incorrect state (like pages removed from mmio_ro_ranges too +early). Debug build has asserts for relevant cases. + +Signed-off-by: Marek Marczykowski-Górecki +--- +Shadow mode is not tested, but I don't expect it to work differently than +HAP in areas related to this patch. + +Changes in v5: +- use subpage_mmio_find_page helper, simplifying several functions +- use LIST_HEAD_RO_AFTER_INIT +- don't use subpage_ro_lock in __init +- drop #ifdef in mm.h +- return error on unaligned size in subpage_mmio_ro_add() instead of + extending the size (in release build) +Changes in v4: +- rename SUBPAGE_MMIO_RO_ALIGN to MMIO_RO_SUBPAGE_GRAN +- guard subpage_mmio_write_accept with CONFIG_HVM, as it's used only + there +- rename ro_qwords to ro_elems +- use unsigned arguments for subpage_mmio_ro_remove_page() +- use volatile for __iomem +- do not set mmio_ro_ctxt.mfn for mmcfg case +- comment where fields of mmio_ro_ctxt are used +- use bool for result of __test_and_set_bit +- do not open-code mfn_to_maddr() +- remove leftover RCU +- mention hvm_hap_nested_page_fault() explicitly in the commit message +Changes in v3: +- use unsigned int for loop iterators +- use __set_bit/__clear_bit when under spinlock +- avoid ioremap() under spinlock +- do not cast away const +- handle unaligned parameters in release build +- comment fixes +- remove RCU - the add functions are __init and actual usage is only + much later after domains are running +- add checks overlapping ranges in debug build and document the + limitations +- change subpage_mmio_ro_add() so the error path doesn't potentially + remove pages from mmio_ro_ranges +- move printing message to avoid one goto in + subpage_mmio_write_emulate() +Changes in v2: +- Simplify subpage_mmio_ro_add() parameters +- add to mmio_ro_ranges from within subpage_mmio_ro_add() +- use ioremap() instead of caller-provided fixmap +- use 8-bytes granularity (largest supported single write) and a bitmap + instead of a rangeset +- clarify commit message +- change how it's plugged in for HVM domain, to not change the behavior for + read-only parts (keep it hitting domain_crash(), instead of ignoring + write) +- remove unused subpage_mmio_ro_remove() +--- + xen/arch/x86/hvm/emulate.c | 2 +- + xen/arch/x86/hvm/hvm.c | 4 +- + xen/arch/x86/include/asm/mm.h | 23 +++ + xen/arch/x86/mm.c | 262 ++++++++++++++++++++++++++++++++ + xen/arch/x86/pv/ro-page-fault.c | 6 +- + 5 files changed, 292 insertions(+), 5 deletions(-) + +diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c +index 27928dc3f3cb..296dfc4a3479 100644 +--- a/xen/arch/x86/hvm/emulate.c ++++ b/xen/arch/x86/hvm/emulate.c +@@ -2739,7 +2739,7 @@ int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla) + .write = mmio_ro_emulated_write, + .validate = hvmemul_validate, + }; +- struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = gla }; ++ struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = gla, .mfn = _mfn(mfn) }; + struct hvm_emulate_ctxt ctxt; + const struct x86_emulate_ops *ops; + unsigned int seg, bdf; +diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c +index a51d4c16cee8..02d1b3a3adcc 100644 +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -1973,8 +1973,8 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla, + goto out_put_gfn; + } + +- if ( (p2mt == p2m_mmio_direct) && is_hardware_domain(currd) && +- npfec.write_access && npfec.present && ++ if ( (p2mt == p2m_mmio_direct) && npfec.write_access && npfec.present && ++ (is_hardware_domain(currd) || subpage_mmio_write_accept(mfn, gla)) && + (hvm_emulate_one_mmio(mfn_x(mfn), gla) == X86EMUL_OKAY) ) + { + rc = 1; +diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h +index 5845b729c3f7..79a2b7f0d339 100644 +--- a/xen/arch/x86/include/asm/mm.h ++++ b/xen/arch/x86/include/asm/mm.h +@@ -518,9 +518,32 @@ extern struct rangeset *mmio_ro_ranges; + void memguard_guard_stack(void *p); + void memguard_unguard_stack(void *p); + ++/* ++ * Add more precise r/o marking for a MMIO page. Range specified here ++ * will still be R/O, but the rest of the page (not marked as R/O via another ++ * call) will have writes passed through. ++ * The start address and the size must be aligned to MMIO_RO_SUBPAGE_GRAN. ++ * ++ * This API cannot be used for overlapping ranges, nor for pages already added ++ * to mmio_ro_ranges separately. ++ * ++ * Since there is currently no subpage_mmio_ro_remove(), relevant device should ++ * not be hot-unplugged. ++ * ++ * Return values: ++ * - negative: error ++ * - 0: success ++ */ ++#define MMIO_RO_SUBPAGE_GRAN 8 ++int subpage_mmio_ro_add(paddr_t start, size_t size); ++bool subpage_mmio_write_accept(mfn_t mfn, unsigned long gla); ++ + struct mmio_ro_emulate_ctxt { + unsigned long cr2; ++ /* Used only for mmcfg case */ + unsigned int seg, bdf; ++ /* Used only for non-mmcfg case */ ++ mfn_t mfn; + }; + + int cf_check mmio_ro_emulated_write( +diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c +index 30ef59fbc5cd..8545ebed119d 100644 +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -168,6 +168,17 @@ bool __read_mostly machine_to_phys_mapping_valid; + + struct rangeset *__read_mostly mmio_ro_ranges; + ++/* Handling sub-page read-only MMIO regions */ ++struct subpage_ro_range { ++ struct list_head list; ++ mfn_t mfn; ++ void __iomem *mapped; ++ DECLARE_BITMAP(ro_elems, PAGE_SIZE / MMIO_RO_SUBPAGE_GRAN); ++}; ++ ++static LIST_HEAD_RO_AFTER_INIT(subpage_ro_ranges); ++static DEFINE_SPINLOCK(subpage_ro_lock); ++ + static uint32_t base_disallow_mask; + /* Global bit is allowed to be set on L1 PTEs. Intended for user mappings. */ + #define L1_DISALLOW_MASK ((base_disallow_mask | _PAGE_GNTTAB) & ~_PAGE_GLOBAL) +@@ -4975,6 +4986,254 @@ long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) + return 0; + } + ++static void __iomem *subpage_mmio_find_page(mfn_t mfn) ++{ ++ struct subpage_ro_range *entry; ++ ++ list_for_each_entry(entry, &subpage_ro_ranges, list) ++ if ( mfn_eq(entry->mfn, mfn) ) ++ return entry; ++ ++ return NULL; ++} ++ ++/* ++ * Mark part of the page as R/O. ++ * Returns: ++ * - 0 on success - first range in the page ++ * - 1 on success - subsequent range in the page ++ * - <0 on error ++ */ ++static int __init subpage_mmio_ro_add_page( ++ mfn_t mfn, ++ unsigned int offset_s, ++ unsigned int offset_e) ++{ ++ struct subpage_ro_range *entry = NULL, *iter; ++ unsigned int i; ++ ++ entry = subpage_mmio_find_page(mfn); ++ if ( !entry ) ++ { ++ /* iter == NULL marks it was a newly allocated entry */ ++ iter = NULL; ++ entry = xzalloc(struct subpage_ro_range); ++ if ( !entry ) ++ return -ENOMEM; ++ entry->mfn = mfn; ++ } ++ ++ for ( i = offset_s; i <= offset_e; i += MMIO_RO_SUBPAGE_GRAN ) ++ { ++ bool oldbit = __test_and_set_bit(i / MMIO_RO_SUBPAGE_GRAN, ++ entry->ro_elems); ++ ASSERT(!oldbit); ++ } ++ ++ if ( !iter ) ++ list_add(&entry->list, &subpage_ro_ranges); ++ ++ return iter ? 1 : 0; ++} ++ ++static void __init subpage_mmio_ro_remove_page( ++ mfn_t mfn, ++ unsigned int offset_s, ++ unsigned int offset_e) ++{ ++ struct subpage_ro_range *entry = NULL; ++ unsigned int i; ++ ++ entry = subpage_mmio_find_page(mfn); ++ if ( !entry ) ++ return; ++ ++ for ( i = offset_s; i <= offset_e; i += MMIO_RO_SUBPAGE_GRAN ) ++ __clear_bit(i / MMIO_RO_SUBPAGE_GRAN, entry->ro_elems); ++ ++ if ( !bitmap_empty(entry->ro_elems, PAGE_SIZE / MMIO_RO_SUBPAGE_GRAN) ) ++ return; ++ ++ list_del(&entry->list); ++ if ( entry->mapped ) ++ iounmap(entry->mapped); ++ xfree(entry); ++} ++ ++int __init subpage_mmio_ro_add( ++ paddr_t start, ++ size_t size) ++{ ++ mfn_t mfn_start = maddr_to_mfn(start); ++ paddr_t end = start + size - 1; ++ mfn_t mfn_end = maddr_to_mfn(end); ++ unsigned int offset_end = 0; ++ int rc; ++ bool subpage_start, subpage_end; ++ ++ ASSERT(IS_ALIGNED(start, MMIO_RO_SUBPAGE_GRAN)); ++ ASSERT(IS_ALIGNED(size, MMIO_RO_SUBPAGE_GRAN)); ++ if ( !IS_ALIGNED(size, MMIO_RO_SUBPAGE_GRAN) ) ++ return -EINVAL; ++ ++ if ( !size ) ++ return 0; ++ ++ if ( mfn_eq(mfn_start, mfn_end) ) ++ { ++ /* Both starting and ending parts handled at once */ ++ subpage_start = PAGE_OFFSET(start) || PAGE_OFFSET(end) != PAGE_SIZE - 1; ++ subpage_end = false; ++ } ++ else ++ { ++ subpage_start = PAGE_OFFSET(start); ++ subpage_end = PAGE_OFFSET(end) != PAGE_SIZE - 1; ++ } ++ ++ if ( subpage_start ) ++ { ++ offset_end = mfn_eq(mfn_start, mfn_end) ? ++ PAGE_OFFSET(end) : ++ (PAGE_SIZE - 1); ++ rc = subpage_mmio_ro_add_page(mfn_start, ++ PAGE_OFFSET(start), ++ offset_end); ++ if ( rc < 0 ) ++ goto err_unlock; ++ /* Check if not marking R/W part of a page intended to be fully R/O */ ++ ASSERT(rc || !rangeset_contains_singleton(mmio_ro_ranges, ++ mfn_x(mfn_start))); ++ } ++ ++ if ( subpage_end ) ++ { ++ rc = subpage_mmio_ro_add_page(mfn_end, 0, PAGE_OFFSET(end)); ++ if ( rc < 0 ) ++ goto err_unlock_remove; ++ /* Check if not marking R/W part of a page intended to be fully R/O */ ++ ASSERT(rc || !rangeset_contains_singleton(mmio_ro_ranges, ++ mfn_x(mfn_end))); ++ } ++ ++ rc = rangeset_add_range(mmio_ro_ranges, mfn_x(mfn_start), mfn_x(mfn_end)); ++ if ( rc ) ++ goto err_remove; ++ ++ return 0; ++ ++ err_remove: ++ if ( subpage_end ) ++ subpage_mmio_ro_remove_page(mfn_end, 0, PAGE_OFFSET(end)); ++ err_unlock_remove: ++ if ( subpage_start ) ++ subpage_mmio_ro_remove_page(mfn_start, PAGE_OFFSET(start), offset_end); ++ err_unlock: ++ return rc; ++} ++ ++static void __iomem *subpage_mmio_map_page( ++ struct subpage_ro_range *entry) ++{ ++ void __iomem *mapped_page; ++ ++ if ( entry->mapped ) ++ return entry->mapped; ++ ++ mapped_page = ioremap(mfn_to_maddr(entry->mfn), PAGE_SIZE); ++ ++ spin_lock(&subpage_ro_lock); ++ /* Re-check under the lock */ ++ if ( entry->mapped ) ++ { ++ spin_unlock(&subpage_ro_lock); ++ if ( mapped_page ) ++ iounmap(mapped_page); ++ return entry->mapped; ++ } ++ ++ entry->mapped = mapped_page; ++ spin_unlock(&subpage_ro_lock); ++ return entry->mapped; ++} ++ ++static void subpage_mmio_write_emulate( ++ mfn_t mfn, ++ unsigned int offset, ++ const void *data, ++ unsigned int len) ++{ ++ struct subpage_ro_range *entry; ++ volatile void __iomem *addr; ++ ++ entry = subpage_mmio_find_page(mfn); ++ if ( !entry ) ++ /* Do not print message for pages without any writable parts. */ ++ return; ++ ++ if ( test_bit(offset / MMIO_RO_SUBPAGE_GRAN, entry->ro_elems) ) ++ { ++write_ignored: ++ gprintk(XENLOG_WARNING, ++ "ignoring write to R/O MMIO 0x%"PRI_mfn"%03x len %u\n", ++ mfn_x(mfn), offset, len); ++ return; ++ } ++ ++ addr = subpage_mmio_map_page(entry); ++ if ( !addr ) ++ { ++ gprintk(XENLOG_ERR, ++ "Failed to map page for MMIO write at 0x%"PRI_mfn"%03x\n", ++ mfn_x(mfn), offset); ++ return; ++ } ++ ++ switch ( len ) ++ { ++ case 1: ++ writeb(*(const uint8_t*)data, addr); ++ break; ++ case 2: ++ writew(*(const uint16_t*)data, addr); ++ break; ++ case 4: ++ writel(*(const uint32_t*)data, addr); ++ break; ++ case 8: ++ writeq(*(const uint64_t*)data, addr); ++ break; ++ default: ++ /* mmio_ro_emulated_write() already validated the size */ ++ ASSERT_UNREACHABLE(); ++ goto write_ignored; ++ } ++} ++ ++#ifdef CONFIG_HVM ++bool subpage_mmio_write_accept(mfn_t mfn, unsigned long gla) ++{ ++ unsigned int offset = PAGE_OFFSET(gla); ++ const struct subpage_ro_range *entry; ++ ++ entry = subpage_mmio_find_page(mfn); ++ if ( !entry ) ++ return false; ++ ++ if ( !test_bit(offset / MMIO_RO_SUBPAGE_GRAN, entry->ro_elems) ) ++ { ++ /* ++ * We don't know the write size at this point yet, so it could be ++ * an unaligned write, but accept it here anyway and deal with it ++ * later. ++ */ ++ return true; ++ } ++ ++ return false; ++} ++#endif ++ + int cf_check mmio_ro_emulated_write( + enum x86_segment seg, + unsigned long offset, +@@ -4993,6 +5252,9 @@ int cf_check mmio_ro_emulated_write( + return X86EMUL_UNHANDLEABLE; + } + ++ subpage_mmio_write_emulate(mmio_ro_ctxt->mfn, PAGE_OFFSET(offset), ++ p_data, bytes); ++ + return X86EMUL_OKAY; + } + +diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c +index f23ad5d184ea..367cbc7a04c5 100644 +--- a/xen/arch/x86/pv/ro-page-fault.c ++++ b/xen/arch/x86/pv/ro-page-fault.c +@@ -345,8 +345,10 @@ static int mmio_ro_do_page_fault(struct x86_emulate_ctxt *ctxt, + ctxt->data = &mmio_ro_ctxt; + if ( pci_ro_mmcfg_decode(mfn_x(mfn), &mmio_ro_ctxt.seg, &mmio_ro_ctxt.bdf) ) + return x86_emulate(ctxt, &mmcfg_intercept_ops); +- else +- return x86_emulate(ctxt, &mmio_ro_emulate_ops); ++ ++ mmio_ro_ctxt.mfn = mfn; ++ ++ return x86_emulate(ctxt, &mmio_ro_emulate_ops); + } + + int pv_ro_page_fault(unsigned long addr, struct cpu_user_regs *regs) +-- +2.45.2 + diff --git a/0319-drivers-char-Use-sub-page-ro-API-to-make-just-xhci-d.patch b/0319-drivers-char-Use-sub-page-ro-API-to-make-just-xhci-d.patch new file mode 100644 index 00000000..15cecfd5 --- /dev/null +++ b/0319-drivers-char-Use-sub-page-ro-API-to-make-just-xhci-d.patch @@ -0,0 +1,89 @@ +From 5015db14c2076ea7617d3bcdc5f7ac81953f6df8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= + +Date: Fri, 24 Mar 2023 18:24:41 +0100 +Subject: [PATCH] drivers/char: Use sub-page ro API to make just xhci dbc cap + RO +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Not the whole page, which may contain other registers too. The XHCI +specification describes DbC as designed to be controlled by a different +driver, but does not mandate placing registers on a separate page. In fact +on Tiger Lake and newer (at least), this page do contain other registers +that Linux tries to use. And with share=yes, a domU would use them too. +Without this patch, PV dom0 would fail to initialize the controller, +while HVM would be killed on EPT violation. + +With `share=yes`, this patch gives domU more access to the emulator +(although a HVM with any emulated device already has plenty of it). This +configuration is already documented as unsafe with untrusted guests and +not security supported. + +Signed-off-by: Marek Marczykowski-Górecki +--- +Changes in v4: +- restore mmio_ro_ranges in the fallback case +- set XHCI_SHARE_NONE in the fallback case +Changes in v3: +- indentation fix +- remove stale comment +- fallback to pci_ro_device() if subpage_mmio_ro_add() fails +- extend commit message +Changes in v2: + - adjust for simplified subpage_mmio_ro_add() API +--- + xen/drivers/char/xhci-dbc.c | 36 ++++++++++++++++++++++-------------- + 1 file changed, 22 insertions(+), 14 deletions(-) + +diff --git a/xen/drivers/char/xhci-dbc.c b/xen/drivers/char/xhci-dbc.c +index c2a2129acf47..43ce96985c5d 100644 +--- a/xen/drivers/char/xhci-dbc.c ++++ b/xen/drivers/char/xhci-dbc.c +@@ -1215,20 +1215,28 @@ static void __init cf_check dbc_uart_init_postirq(struct serial_port *port) + break; + } + #ifdef CONFIG_X86 +- /* +- * This marks the whole page as R/O, which may include other registers +- * unrelated to DbC. Xen needs only DbC area protected, but it seems +- * Linux's XHCI driver (as of 5.18) works without writting to the whole +- * page, so keep it simple. +- */ +- if ( rangeset_add_range(mmio_ro_ranges, +- PFN_DOWN((uart->dbc.bar_val & PCI_BASE_ADDRESS_MEM_MASK) + +- uart->dbc.xhc_dbc_offset), +- PFN_UP((uart->dbc.bar_val & PCI_BASE_ADDRESS_MEM_MASK) + +- uart->dbc.xhc_dbc_offset + +- sizeof(*uart->dbc.dbc_reg)) - 1) ) +- printk(XENLOG_INFO +- "Error while adding MMIO range of device to mmio_ro_ranges\n"); ++ if ( subpage_mmio_ro_add( ++ (uart->dbc.bar_val & PCI_BASE_ADDRESS_MEM_MASK) + ++ uart->dbc.xhc_dbc_offset, ++ sizeof(*uart->dbc.dbc_reg)) ) ++ { ++ printk(XENLOG_WARNING ++ "Error while marking MMIO range of XHCI console as R/O, " ++ "making the whole device R/O (share=no)\n"); ++ uart->dbc.share = XHCI_SHARE_NONE; ++ if ( pci_ro_device(0, uart->dbc.sbdf.bus, uart->dbc.sbdf.devfn) ) ++ printk(XENLOG_WARNING ++ "Failed to mark read-only %pp used for XHCI console\n", ++ &uart->dbc.sbdf); ++ if ( rangeset_add_range(mmio_ro_ranges, ++ PFN_DOWN((uart->dbc.bar_val & PCI_BASE_ADDRESS_MEM_MASK) + ++ uart->dbc.xhc_dbc_offset), ++ PFN_UP((uart->dbc.bar_val & PCI_BASE_ADDRESS_MEM_MASK) + ++ uart->dbc.xhc_dbc_offset + ++ sizeof(*uart->dbc.dbc_reg)) - 1) ) ++ printk(XENLOG_INFO ++ "Error while adding MMIO range of device to mmio_ro_ranges\n"); ++ } + #endif + } + +-- +2.45.2 + diff --git a/xen.spec.in b/xen.spec.in index 8578768b..8c7df048 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -106,6 +106,9 @@ Patch0309: 0309-x86-Derive-XEN_MSR_PAT-from-its-individual-entries.patch Patch0314: 0314-drivers-char-support-up-to-1M-BAR0-of-xhci.patch Patch0315: 0315-IOMMU-store-name-for-extra-reserved-device-memory.patch Patch0316: 0316-drivers-char-mark-extra-reserved-device-memory-in-me.patch +Patch0317: 0317-xen-list-add-LIST_HEAD_RO_AFTER_INIT.patch +Patch0318: 0318-x86-mm-add-API-for-marking-only-part-of-a-MMIO-page-.patch +Patch0319: 0319-drivers-char-Use-sub-page-ro-API-to-make-just-xhci-d.patch # Security fixes Patch0500: 0500-xsa458.patch From d282d67401035105f33ccb0daf3f6fbcb2a2c69b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Sat, 20 Jul 2024 04:10:20 +0200 Subject: [PATCH 62/64] version 4.17.4-5 --- rel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel b/rel index b8626c4c..7ed6ff82 100644 --- a/rel +++ b/rel @@ -1 +1 @@ -4 +5 From cd375515cd955cd878672a4ff7decba32ed4754d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20=C5=BByjewski?= Date: Mon, 13 May 2024 00:27:20 +0300 Subject: [PATCH 63/64] xen.spec.in: update using variables in comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tomasz Żyjewski --- xen.spec.in | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/xen.spec.in b/xen.spec.in index 8c7df048..6ea368bb 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -826,11 +826,11 @@ fi # BEGIN QUBES SPECIFIC PART # Guest autostart links -#%dir %attr(0700,root,root) %{_sysconfdir}/%{name}/auto +#%dir %attr(0700,root,root) %%{_sysconfdir}/%%{name}/auto # Autostart of guests -#%config(noreplace) %{_sysconfdir}/sysconfig/xendomains +#%config(noreplace) %%{_sysconfdir}/sysconfig/xendomains -#%{_unitdir}/xendomains.service +#%%{_unitdir}/xendomains.service # END QUBES SPECIFIC PART %files libs @@ -876,8 +876,8 @@ fi %{_unitdir}/xenconsoled.service %{_unitdir}/xen-watchdog.service # BEGIN QUBES SPECIFIC PART -#%{_unitdir}/xen-qemu-dom0-disk-backend.service -#%{_unitdir}/xendriverdomain.service +# %%{_unitdir}/xen-qemu-dom0-disk-backend.service +# %%{_unitdir}/xendriverdomain.service %{_unitdir}/xen-init-dom0.service %exclude %{_unitdir}/xendriverdomain.service # END QUBES SPECIFIC PART @@ -930,7 +930,7 @@ fi %{python3_sitearch}/xenfsimage*.so %{python3_sitearch}/grub # BEGIN QUBES SPECIFIC PART -# %{python3_sitearch}/pygrub-*.egg-info +# %%{python3_sitearch}/pygrub-*.egg-info # END QUBES SPECIFIC PART # The firmware @@ -965,18 +965,18 @@ fi # All xenstore CLI tools # BEGIN QUBES SPECIFIC PART -# %{_bindir}/qemu-*-xen +# %%{_bindir}/qemu-*-xen # END QUBES SPECIFIC PART %{_bindir}/xenstore %{_bindir}/xenstore-* # BEGIN QUBES SPECIFIC PART -#%{_bindir}/pygrub +# %%{_bindir}/pygrub # END QUBES SPECIFIC PART %{_bindir}/xentrace* -#%#{_bindir}/remus +# %%#{_bindir}/remus # XSM # BEGIN QUBES SPECIFIC PART -#%{_sbindir}/flask-* +# %%{_sbindir}/flask-* # END QUBES SPECIFIC PART # Misc stuff %ifnarch armv7hl aarch64 From 413d4c8714316e9656acca661962ceaedc44afd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20=C5=BByjewski?= Date: Mon, 13 May 2024 00:27:33 +0300 Subject: [PATCH 64/64] Add patches for TrenchBoot DRTM support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tomasz Żyjewski Signed-off-by: Sergii Dmytruk --- ...intel_txt.h-constants-and-accessors-.patch | 339 ++++++++ ...t-add-MLE-header-and-new-entry-point.patch | 108 +++ ...ly-add-early-TXT-tests-and-restore-M.patch | 305 ++++++++ ...table.h-Secure-Launch-Resource-Table.patch | 283 +++++++ 1304-xen-arch-x86-reserve-TXT-memory.patch | 240 ++++++ 1305-x86-intel_txt.c-restore-boot-MTRRs.patch | 120 +++ 1306-lib-sha1.c-add-file.patch | 308 ++++++++ ...or-early-hashing-and-extending-PCRs-.patch | 657 ++++++++++++++++ ...oot-choose-AP-stack-based-on-APIC-ID.patch | 166 ++++ 1309-x86-smpboot.c-TXT-AP-bringup.patch | 217 ++++++ ...ccess-x86_cpu_to_apicid-directly-use.patch | 265 +++++++ ...p-x86_cpu_to_apicid-use-cpu_data-cpu.patch | 156 ++++ ...-x86-smp-move-stack_base-to-cpu_data.patch | 222 ++++++ ...x86-smp-call-x2apic_ap_setup-earlier.patch | 48 ++ ...n-protect-against-recurrent-machine_.patch | 54 ++ ...ch-x86-smp-drop-booting_cpu-variable.patch | 121 +++ ...-arch-x86-smp-make-cpu_state-per-CPU.patch | 219 ++++++ ...ove-MONITOR-MWAIT-loop-for-TXT-AP-br.patch | 80 ++ ...-t-send-INIT-SIPI-SIPI-if-AP-is-alre.patch | 82 ++ ...mp-start-APs-in-parallel-during-boot.patch | 128 +++ 1320-lib-sha256.c-add-file.patch | 305 ++++++++ ...m.c-support-extending-PCRs-of-TPM2.0.patch | 523 +++++++++++++ ...tpm.c-implement-event-log-for-TPM2.0.patch | 257 ++++++ 1323-arch-x86-process-DRTM-policy.patch | 282 +++++++ 1324-arch-x86-extract-slaunch-unit.patch | 731 ++++++++++++++++++ ...troduce-slaunch_slrt-global-variable.patch | 283 +++++++ 1326-x86-boot-find-MBI-and-SLRT-on-AMD.patch | 124 +++ ...neric-memory-mapping-and-protection-.patch | 174 +++++ ...-x86-support-slaunch-with-AMD-SKINIT.patch | 289 +++++++ xen.spec.in | 31 + 30 files changed, 7117 insertions(+) create mode 100644 1300-x86-include-asm-intel_txt.h-constants-and-accessors-.patch create mode 100644 1301-x86-boot-add-MLE-header-and-new-entry-point.patch create mode 100644 1302-x86-boot-txt_early-add-early-TXT-tests-and-restore-M.patch create mode 100644 1303-include-xen-slr_table.h-Secure-Launch-Resource-Table.patch create mode 100644 1304-xen-arch-x86-reserve-TXT-memory.patch create mode 100644 1305-x86-intel_txt.c-restore-boot-MTRRs.patch create mode 100644 1306-lib-sha1.c-add-file.patch create mode 100644 1307-x86-tpm.c-code-for-early-hashing-and-extending-PCRs-.patch create mode 100644 1308-x86-boot-choose-AP-stack-based-on-APIC-ID.patch create mode 100644 1309-x86-smpboot.c-TXT-AP-bringup.patch create mode 100644 1310-arch-x86-don-t-access-x86_cpu_to_apicid-directly-use.patch create mode 100644 1311-arch-x86-smp-drop-x86_cpu_to_apicid-use-cpu_data-cpu.patch create mode 100644 1312-arch-x86-smp-move-stack_base-to-cpu_data.patch create mode 100644 1313-arch-x86-smp-call-x2apic_ap_setup-earlier.patch create mode 100644 1314-arch-x86-shutdown-protect-against-recurrent-machine_.patch create mode 100644 1315-arch-x86-smp-drop-booting_cpu-variable.patch create mode 100644 1316-arch-x86-smp-make-cpu_state-per-CPU.patch create mode 100644 1317-arch-x86-smp-remove-MONITOR-MWAIT-loop-for-TXT-AP-br.patch create mode 100644 1318-arch-x86-smp-don-t-send-INIT-SIPI-SIPI-if-AP-is-alre.patch create mode 100644 1319-arch-x86-smp-start-APs-in-parallel-during-boot.patch create mode 100644 1320-lib-sha256.c-add-file.patch create mode 100644 1321-x86-tpm.c-support-extending-PCRs-of-TPM2.0.patch create mode 100644 1322-x86-tpm.c-implement-event-log-for-TPM2.0.patch create mode 100644 1323-arch-x86-process-DRTM-policy.patch create mode 100644 1324-arch-x86-extract-slaunch-unit.patch create mode 100644 1325-x86-boot-introduce-slaunch_slrt-global-variable.patch create mode 100644 1326-x86-boot-find-MBI-and-SLRT-on-AMD.patch create mode 100644 1327-arch-x86-move-generic-memory-mapping-and-protection-.patch create mode 100644 1328-arch-x86-support-slaunch-with-AMD-SKINIT.patch diff --git a/1300-x86-include-asm-intel_txt.h-constants-and-accessors-.patch b/1300-x86-include-asm-intel_txt.h-constants-and-accessors-.patch new file mode 100644 index 00000000..2ef46d90 --- /dev/null +++ b/1300-x86-include-asm-intel_txt.h-constants-and-accessors-.patch @@ -0,0 +1,339 @@ +From 3a32ed82780392fabd68ef8421f750579cbf457a Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Mon, 17 Apr 2023 20:10:13 +0200 +Subject: [PATCH 1300/1328] x86/include/asm/intel_txt.h: constants and + accessors for TXT registers and heap + +The file contains TXT register spaces base address, registers offsets, +error codes and inline functions for accessing structures stored on +TXT heap. + +Signed-off-by: Krystian Hebel +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/include/asm/intel_txt.h | 267 +++++++++++++++++++++++++++ + xen/arch/x86/tboot.c | 20 +- + 2 files changed, 269 insertions(+), 18 deletions(-) + create mode 100644 xen/arch/x86/include/asm/intel_txt.h + +diff --git a/xen/arch/x86/include/asm/intel_txt.h b/xen/arch/x86/include/asm/intel_txt.h +new file mode 100644 +index 0000000000..865161cf93 +--- /dev/null ++++ b/xen/arch/x86/include/asm/intel_txt.h +@@ -0,0 +1,267 @@ ++/* ++ * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE) ++ */ ++#define TXT_PUB_CONFIG_REGS_BASE 0xfed30000 ++#define TXT_PRIV_CONFIG_REGS_BASE 0xfed20000 ++ ++/* The same set of registers is exposed twice (with different permissions) and ++ * they are allocated continuously with page alignment. */ ++#define NR_TXT_CONFIG_SIZE \ ++ (TXT_PUB_CONFIG_REGS_BASE - TXT_PRIV_CONFIG_REGS_BASE) ++ ++/* Offsets from pub/priv config space. */ ++#define TXTCR_STS 0x0000 ++#define TXTCR_ESTS 0x0008 ++#define TXTCR_ERRORCODE 0x0030 ++#define TXTCR_CMD_RESET 0x0038 ++#define TXTCR_CMD_CLOSE_PRIVATE 0x0048 ++#define TXTCR_DIDVID 0x0110 ++#define TXTCR_VER_EMIF 0x0200 ++#define TXTCR_CMD_UNLOCK_MEM_CONFIG 0x0218 ++#define TXTCR_SINIT_BASE 0x0270 ++#define TXTCR_SINIT_SIZE 0x0278 ++#define TXTCR_MLE_JOIN 0x0290 ++#define TXTCR_HEAP_BASE 0x0300 ++#define TXTCR_HEAP_SIZE 0x0308 ++#define TXTCR_SCRATCHPAD 0x0378 ++#define TXTCR_CMD_OPEN_LOCALITY1 0x0380 ++#define TXTCR_CMD_CLOSE_LOCALITY1 0x0388 ++#define TXTCR_CMD_OPEN_LOCALITY2 0x0390 ++#define TXTCR_CMD_CLOSE_LOCALITY2 0x0398 ++#define TXTCR_CMD_SECRETS 0x08e0 ++#define TXTCR_CMD_NO_SECRETS 0x08e8 ++#define TXTCR_E2STS 0x08f0 ++ ++/* ++ * Secure Launch Defined Error Codes used in MLE-initiated TXT resets. ++ * ++ * TXT Specification ++ * Appendix I ACM Error Codes ++ */ ++#define SLAUNCH_ERROR_GENERIC 0xc0008001 ++#define SLAUNCH_ERROR_TPM_INIT 0xc0008002 ++#define SLAUNCH_ERROR_TPM_INVALID_LOG20 0xc0008003 ++#define SLAUNCH_ERROR_TPM_LOGGING_FAILED 0xc0008004 ++#define SLAUNCH_ERROR_REGION_STRADDLE_4GB 0xc0008005 ++#define SLAUNCH_ERROR_TPM_EXTEND 0xc0008006 ++#define SLAUNCH_ERROR_MTRR_INV_VCNT 0xc0008007 ++#define SLAUNCH_ERROR_MTRR_INV_DEF_TYPE 0xc0008008 ++#define SLAUNCH_ERROR_MTRR_INV_BASE 0xc0008009 ++#define SLAUNCH_ERROR_MTRR_INV_MASK 0xc000800a ++#define SLAUNCH_ERROR_MSR_INV_MISC_EN 0xc000800b ++#define SLAUNCH_ERROR_INV_AP_INTERRUPT 0xc000800c ++#define SLAUNCH_ERROR_INTEGER_OVERFLOW 0xc000800d ++#define SLAUNCH_ERROR_HEAP_WALK 0xc000800e ++#define SLAUNCH_ERROR_HEAP_MAP 0xc000800f ++#define SLAUNCH_ERROR_REGION_ABOVE_4GB 0xc0008010 ++#define SLAUNCH_ERROR_HEAP_INVALID_DMAR 0xc0008011 ++#define SLAUNCH_ERROR_HEAP_DMAR_SIZE 0xc0008012 ++#define SLAUNCH_ERROR_HEAP_DMAR_MAP 0xc0008013 ++#define SLAUNCH_ERROR_HI_PMR_BASE 0xc0008014 ++#define SLAUNCH_ERROR_HI_PMR_SIZE 0xc0008015 ++#define SLAUNCH_ERROR_LO_PMR_BASE 0xc0008016 ++#define SLAUNCH_ERROR_LO_PMR_SIZE 0xc0008017 ++#define SLAUNCH_ERROR_LO_PMR_MLE 0xc0008018 ++#define SLAUNCH_ERROR_INITRD_TOO_BIG 0xc0008019 ++#define SLAUNCH_ERROR_HEAP_ZERO_OFFSET 0xc000801a ++#define SLAUNCH_ERROR_WAKE_BLOCK_TOO_SMALL 0xc000801b ++#define SLAUNCH_ERROR_MLE_BUFFER_OVERLAP 0xc000801c ++#define SLAUNCH_ERROR_BUFFER_BEYOND_PMR 0xc000801d ++#define SLAUNCH_ERROR_OS_SINIT_BAD_VERSION 0xc000801e ++#define SLAUNCH_ERROR_EVENTLOG_MAP 0xc000801f ++#define SLAUNCH_ERROR_TPM_NUMBER_ALGS 0xc0008020 ++#define SLAUNCH_ERROR_TPM_UNKNOWN_DIGEST 0xc0008021 ++#define SLAUNCH_ERROR_TPM_INVALID_EVENT 0xc0008022 ++ ++#define SLAUNCH_BOOTLOADER_MAGIC 0x4c534254 ++ ++#ifndef __ASSEMBLY__ ++ ++/* We need to differentiate between pre- and post paging enabled. */ ++#ifdef __BOOT_DEFS_H__ ++#define _txt(x) _p(x) ++#else ++#include ++#include // __va() ++#define _txt(x) __va(x) ++#endif ++ ++/* ++ * Always use private space as some of registers are either read-only or not ++ * present in public space. ++ */ ++static inline uint64_t read_txt_reg(int reg_no) ++{ ++ volatile uint64_t *reg = _txt(TXT_PRIV_CONFIG_REGS_BASE + reg_no); ++ return *reg; ++} ++ ++static inline void write_txt_reg(int reg_no, uint64_t val) ++{ ++ volatile uint64_t *reg = _txt(TXT_PRIV_CONFIG_REGS_BASE + reg_no); ++ *reg = val; ++ /* This serves as TXT register barrier */ ++ (void)read_txt_reg(TXTCR_ESTS); ++} ++ ++static inline void txt_reset(uint32_t error) ++{ ++ write_txt_reg(TXTCR_ERRORCODE, error); ++ write_txt_reg(TXTCR_CMD_NO_SECRETS, 1); ++ write_txt_reg(TXTCR_CMD_UNLOCK_MEM_CONFIG, 1); ++ write_txt_reg(TXTCR_CMD_RESET, 1); ++ while (1); ++} ++ ++/* ++ * Secure Launch defined OS/MLE TXT Heap table ++ */ ++struct txt_os_mle_data { ++ uint32_t version; ++ uint32_t boot_params_addr; ++ uint32_t slrt; ++ uint32_t txt_info; ++ uint32_t ap_wake_block; ++ uint32_t ap_wake_block_size; ++ uint8_t mle_scratch[64]; ++} __packed; ++ ++/* ++ * TXT specification defined BIOS data TXT Heap table ++ */ ++struct txt_bios_data { ++ uint32_t version; /* Currently 5 for TPM 1.2 and 6 for TPM 2.0 */ ++ uint32_t bios_sinit_size; ++ uint64_t reserved1; ++ uint64_t reserved2; ++ uint32_t num_logical_procs; ++ /* Versions >= 3 && < 5 */ ++ uint32_t sinit_flags; ++ /* Versions >= 5 with updates in version 6 */ ++ uint32_t mle_flags; ++ /* Versions >= 4 */ ++ /* Ext Data Elements */ ++} __packed; ++ ++/* ++ * TXT specification defined OS/SINIT TXT Heap table ++ */ ++struct txt_os_sinit_data { ++ uint32_t version; /* Currently 6 for TPM 1.2 and 7 for TPM 2.0 */ ++ uint32_t flags; /* Reserved in version 6 */ ++ uint64_t mle_ptab; ++ uint64_t mle_size; ++ uint64_t mle_hdr_base; ++ uint64_t vtd_pmr_lo_base; ++ uint64_t vtd_pmr_lo_size; ++ uint64_t vtd_pmr_hi_base; ++ uint64_t vtd_pmr_hi_size; ++ uint64_t lcp_po_base; ++ uint64_t lcp_po_size; ++ uint32_t capabilities; ++ /* Version = 5 */ ++ uint64_t efi_rsdt_ptr; /* RSD*P* in versions >= 6 */ ++ /* Versions >= 6 */ ++ /* Ext Data Elements */ ++} __packed; ++ ++/* ++ * TXT specification defined SINIT/MLE TXT Heap table ++ */ ++struct txt_sinit_mle_data { ++ uint32_t version; /* Current values are 6 through 9 */ ++ /* Versions <= 8, fields until lcp_policy_control must be 0 for >= 9 */ ++ uint8_t bios_acm_id[20]; ++ uint32_t edx_senter_flags; ++ uint64_t mseg_valid; ++ uint8_t sinit_hash[20]; ++ uint8_t mle_hash[20]; ++ uint8_t stm_hash[20]; ++ uint8_t lcp_policy_hash[20]; ++ uint32_t lcp_policy_control; ++ /* Versions >= 7 */ ++ uint32_t rlp_wakeup_addr; ++ uint32_t reserved; ++ uint32_t num_of_sinit_mdrs; ++ uint32_t sinit_mdrs_table_offset; ++ uint32_t sinit_vtd_dmar_table_size; ++ uint32_t sinit_vtd_dmar_table_offset; ++ /* Versions >= 8 */ ++ uint32_t processor_scrtm_status; ++ /* Versions >= 9 */ ++ /* Ext Data Elements */ ++} __packed; ++ ++/* ++ * Functions to extract data from the Intel TXT Heap Memory. The layout ++ * of the heap is as follows: ++ * +------------------------------------+ ++ * | Size of Bios Data table (uint64_t) | ++ * +------------------------------------+ ++ * | Bios Data table | ++ * +------------------------------------+ ++ * | Size of OS MLE table (uint64_t) | ++ * +------------------------------------+ ++ * | OS MLE table | ++ * +-------------------------------- + ++ * | Size of OS SINIT table (uint64_t) | ++ * +------------------------------------+ ++ * | OS SINIT table | ++ * +------------------------------------+ ++ * | Size of SINIT MLE table (uint64_t) | ++ * +------------------------------------+ ++ * | SINIT MLE table | ++ * +------------------------------------+ ++ * ++ * NOTE: the table size fields include the 8 byte size field itself. ++ */ ++static inline uint64_t txt_bios_data_size(void *heap) ++{ ++ return *((uint64_t *)heap) - sizeof(uint64_t); ++} ++ ++static inline void *txt_bios_data_start(void *heap) ++{ ++ return heap + sizeof(uint64_t); ++} ++ ++static inline uint64_t txt_os_mle_data_size(void *heap) ++{ ++ return *((uint64_t *)(txt_bios_data_start(heap) + ++ txt_bios_data_size(heap))) - ++ sizeof(uint64_t); ++} ++ ++static inline void *txt_os_mle_data_start(void *heap) ++{ ++ return txt_bios_data_start(heap) + txt_bios_data_size(heap) + ++ sizeof(uint64_t); ++} ++ ++static inline uint64_t txt_os_sinit_data_size(void *heap) ++{ ++ return *((uint64_t *)(txt_os_mle_data_start(heap) + ++ txt_os_mle_data_size(heap))) - ++ sizeof(uint64_t); ++} ++ ++static inline void *txt_os_sinit_data_start(void *heap) ++{ ++ return txt_os_mle_data_start(heap) + txt_os_mle_data_size(heap) + ++ sizeof(uint64_t); ++} ++ ++static inline uint64_t txt_sinit_mle_data_size(void *heap) ++{ ++ return *((uint64_t *)(txt_os_sinit_data_start(heap) + ++ txt_os_sinit_data_size(heap))) - ++ sizeof(uint64_t); ++} ++ ++static inline void *txt_sinit_mle_data_start(void *heap) ++{ ++ return txt_os_sinit_data_start(heap) + txt_os_sinit_data_size(heap) + ++ sizeof(uint64_t); ++} ++ ++#endif /* __ASSEMBLY__ */ +diff --git a/xen/arch/x86/tboot.c b/xen/arch/x86/tboot.c +index a2e9e97ed7..ffd1126dfc 100644 +--- a/xen/arch/x86/tboot.c ++++ b/xen/arch/x86/tboot.c +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + + /* tboot= */ +@@ -31,23 +32,6 @@ static vmac_t frametable_mac; /* MAC for frame table during S3 */ + static uint64_t __initdata txt_heap_base, __initdata txt_heap_size; + static uint64_t __initdata sinit_base, __initdata sinit_size; + +-/* +- * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE) +- */ +- +-#define TXT_PUB_CONFIG_REGS_BASE 0xfed30000 +-#define TXT_PRIV_CONFIG_REGS_BASE 0xfed20000 +- +-/* # pages for each config regs space - used by fixmap */ +-#define NR_TXT_CONFIG_PAGES ((TXT_PUB_CONFIG_REGS_BASE - \ +- TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT) +- +-/* offsets from pub/priv config space */ +-#define TXTCR_SINIT_BASE 0x0270 +-#define TXTCR_SINIT_SIZE 0x0278 +-#define TXTCR_HEAP_BASE 0x0300 +-#define TXTCR_HEAP_SIZE 0x0308 +- + #define SHA1_SIZE 20 + typedef uint8_t sha1_hash_t[SHA1_SIZE]; + +@@ -452,7 +436,7 @@ int __init tboot_protect_mem_regions(void) + + /* TXT Private Space */ + rc = e820_change_range_type(&e820, TXT_PRIV_CONFIG_REGS_BASE, +- TXT_PRIV_CONFIG_REGS_BASE + NR_TXT_CONFIG_PAGES * PAGE_SIZE, ++ TXT_PRIV_CONFIG_REGS_BASE + NR_TXT_CONFIG_SIZE, + E820_RESERVED, E820_UNUSABLE); + if ( !rc ) + return 0; +-- +2.46.0 + diff --git a/1301-x86-boot-add-MLE-header-and-new-entry-point.patch b/1301-x86-boot-add-MLE-header-and-new-entry-point.patch new file mode 100644 index 00000000..67d5664a --- /dev/null +++ b/1301-x86-boot-add-MLE-header-and-new-entry-point.patch @@ -0,0 +1,108 @@ +From aa2c99c6b62661866cc823b9f4c78bd1bd6cce43 Mon Sep 17 00:00:00 2001 +From: Kacper Stojek +Date: Wed, 31 Aug 2022 15:03:51 +0200 +Subject: [PATCH 1301/1328] x86/boot: add MLE header and new entry point + +MLE header is used with Intel TXT, together with MB2 headers. +Entrypoint is different, but it is used just to differentiate +from other entries by moving a magic number to EAX. Execution +environment is similar to that of Multiboot 2 and code falls +through to MB2's entry point. + +Signed-off-by: Kacper Stojek +Signed-off-by: Krystian Hebel +Signed-off-by: Sergii Dmytruk +--- + docs/hypervisor-guide/x86/how-xen-boots.rst | 5 +++ + xen/arch/x86/boot/head.S | 45 +++++++++++++++++++++ + 2 files changed, 50 insertions(+) + +diff --git a/docs/hypervisor-guide/x86/how-xen-boots.rst b/docs/hypervisor-guide/x86/how-xen-boots.rst +index ca77d7c8a3..eb60a1cd80 100644 +--- a/docs/hypervisor-guide/x86/how-xen-boots.rst ++++ b/docs/hypervisor-guide/x86/how-xen-boots.rst +@@ -55,6 +55,11 @@ If ``CONFIG_PVH_GUEST`` was selected at build time, an Elf note is included + which indicates the ability to use the PVH boot protocol, and registers + ``__pvh_start`` as the entrypoint, entered in 32bit mode. + ++MLE header is used with Intel TXT, together with MB2 headers. Entrypoint is ++different, but it is used just to differentiate from other entries by moving ++a magic number to EAX. Execution environment is similar to that of Multiboot 2 ++and code falls through to ``start``. ++ + + xen.gz + ~~~~~~ +diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S +index 6bc64c9e86..1f7ce5f6ae 100644 +--- a/xen/arch/x86/boot/head.S ++++ b/xen/arch/x86/boot/head.S +@@ -4,6 +4,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -114,6 +115,25 @@ multiboot2_header: + .size multiboot2_header, . - multiboot2_header + .type multiboot2_header, @object + ++ .balign 16 ++mle_header: ++ .long 0x9082ac5a /* UUID0 */ ++ .long 0x74a7476f /* UUID1 */ ++ .long 0xa2555c0f /* UUID2 */ ++ .long 0x42b651cb /* UUID3 */ ++ .long 0x00000034 /* MLE header size */ ++ .long 0x00020002 /* MLE version 2.2 */ ++ .long (slaunch_stub_entry - start) /* Linear entry point of MLE (SINIT virt. address) */ ++ .long 0x00000000 /* First valid page of MLE */ ++ .long 0x00000000 /* Offset within binary of first byte of MLE */ ++ .long (_end - start) /* Offset within binary of last byte + 1 of MLE */ ++ .long 0x00000223 /* Bit vector of MLE-supported capabilities */ ++ .long 0x00000000 /* Starting linear address of command line (unused) */ ++ .long 0x00000000 /* Ending linear address of command line (unused) */ ++ ++ .size mle_header, .-mle_header ++ .type mle_header, @object ++ + .section .init.rodata, "a", @progbits + + .Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!" +@@ -433,6 +453,31 @@ __pvh_start: + + #endif /* CONFIG_PVH_GUEST */ + ++ /* ++ * Entry point for TrenchBoot Secure Launch on Intel TXT platforms. ++ * ++ * CPU is in 32b protected mode with paging disabled. On entry: ++ * - %ebx = %ebp = SINIT physical base address ++ * - %edx = SENTER control flags ++ * - stack pointer is undefined ++ * - CS is flat 4GB code segment ++ * - DS, ES and SS are flat 4GB data segments ++ * ++ * Additional restrictions: ++ * - some MSRs are partially cleared, among them IA32_MISC_ENABLE, so ++ * some capabilities might be reported as disabled even if they are ++ * supported by CPU ++ * - interrupts (including NMIs and SMIs) are disabled and must be ++ * enabled later ++ * - trying to enter real mode results in reset ++ * - APs must be brought up by MONITOR or GETSEC[WAKEUP], depending on ++ * which is supported by a given SINIT ACM ++ */ ++slaunch_stub_entry: ++ movl $SLAUNCH_BOOTLOADER_MAGIC,%eax ++ ++ /* Fall through to Multiboot entry point. */ ++ + __start: + cld + cli +-- +2.46.0 + diff --git a/1302-x86-boot-txt_early-add-early-TXT-tests-and-restore-M.patch b/1302-x86-boot-txt_early-add-early-TXT-tests-and-restore-M.patch new file mode 100644 index 00000000..44b7569a --- /dev/null +++ b/1302-x86-boot-txt_early-add-early-TXT-tests-and-restore-M.patch @@ -0,0 +1,305 @@ +From 37d1ca20da76cfa0fc6ef1954731c41e60da674e Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Mon, 17 Apr 2023 20:09:54 +0200 +Subject: [PATCH 1302/1328] x86/boot/txt_early: add early TXT tests and restore + MBI pointer + +These tests validate that important parts of memory are protected +against DMA attacks, including Xen and MBI. Modules can be tested later, +when it is possible to report issues to user before invoking TXT reset. + +TPM event log validation is temporarily disabled due to issue with its +allocation by bootloader (GRUB) which will need to be modified to +address this. Ultimately event log will also have to be validated early +as it is used immediately after these tests to hold MBI measurements. +See larger comment in verify_pmr_ranges(). + +Signed-off-by: Krystian Hebel +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/Makefile | 1 + + xen/arch/x86/boot/Makefile | 2 +- + xen/arch/x86/boot/head.S | 25 +++++ + xen/arch/x86/boot/txt_early.c | 132 +++++++++++++++++++++++++++ + xen/arch/x86/include/asm/intel_txt.h | 28 ++++++ + xen/arch/x86/intel_txt.c | 11 +++ + 6 files changed, 198 insertions(+), 1 deletion(-) + create mode 100644 xen/arch/x86/boot/txt_early.c + create mode 100644 xen/arch/x86/intel_txt.c + +diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile +index 3e43fcaea9..e62b9ff9bc 100644 +--- a/xen/arch/x86/Makefile ++++ b/xen/arch/x86/Makefile +@@ -57,6 +57,7 @@ obj-y += percpu.o + obj-y += physdev.o + obj-$(CONFIG_COMPAT) += x86_64/physdev.o + obj-y += psr.o ++obj-y += intel_txt.o + obj-y += setup.o + obj-y += shutdown.o + obj-y += smp.o +diff --git a/xen/arch/x86/boot/Makefile b/xen/arch/x86/boot/Makefile +index d6bc8fc084..34df17664a 100644 +--- a/xen/arch/x86/boot/Makefile ++++ b/xen/arch/x86/boot/Makefile +@@ -1,6 +1,6 @@ + obj-bin-y += head.o + +-head-bin-objs := cmdline.o reloc.o ++head-bin-objs := cmdline.o reloc.o txt_early.o + + nocov-y += $(head-bin-objs) + noubsan-y += $(head-bin-objs) +diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S +index 1f7ce5f6ae..6d8988f53c 100644 +--- a/xen/arch/x86/boot/head.S ++++ b/xen/arch/x86/boot/head.S +@@ -506,6 +506,10 @@ __start: + /* Bootloaders may set multiboot{1,2}.mem_lower to a nonzero value. */ + xor %edx,%edx + ++ /* Check for TrenchBoot slaunch bootloader. */ ++ cmp $SLAUNCH_BOOTLOADER_MAGIC,%eax ++ je .Lslaunch_proto ++ + /* Check for Multiboot2 bootloader. */ + cmp $MULTIBOOT2_BOOTLOADER_MAGIC,%eax + je .Lmultiboot2_proto +@@ -521,6 +525,23 @@ __start: + cmovnz MB_mem_lower(%ebx),%edx + jmp trampoline_bios_setup + ++.Lslaunch_proto: ++ /* Save information that TrenchBoot slaunch was used. */ ++ movb $1, sym_esi(slaunch_active) ++ ++ /* Push arguments to stack and call txt_early_tests(). */ ++ push $sym_offs(__2M_rwdata_end) /* end of target image */ ++ push $sym_offs(_start) /* target base address */ ++ push %esi /* load base address */ ++ call txt_early_tests ++ ++ /* ++ * txt_early_tests() returns MBI address, move it to EBX, move magic ++ * number expected by Multiboot 2 to EAX and fall through. ++ */ ++ movl %eax,%ebx ++ movl $MULTIBOOT2_BOOTLOADER_MAGIC,%eax ++ + .Lmultiboot2_proto: + /* Skip Multiboot2 information fixed part. */ + lea (MB2_fixed_sizeof+MULTIBOOT2_TAG_ALIGN-1)(%ebx),%ecx +@@ -851,6 +872,10 @@ cmdline_parse_early: + reloc: + .incbin "reloc.bin" + ++ ALIGN ++txt_early_tests: ++ .incbin "txt_early.bin" ++ + ENTRY(trampoline_start) + #include "trampoline.S" + ENTRY(trampoline_end) +diff --git a/xen/arch/x86/boot/txt_early.c b/xen/arch/x86/boot/txt_early.c +new file mode 100644 +index 0000000000..23ee734c47 +--- /dev/null ++++ b/xen/arch/x86/boot/txt_early.c +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 2022-2023 3mdeb Sp. z o.o. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with this program. If not, see . ++ */ ++ ++/* ++ * This entry point is entered from xen/arch/x86/boot/head.S with Xen base at ++ * 0x4(%esp). A pointer to MBI is returned in %eax. ++ */ ++asm ( ++ " .text \n" ++ " .globl _start \n" ++ "_start: \n" ++ " jmp txt_early_tests \n" ++ ); ++ ++#include "defs.h" ++#include "../include/asm/intel_txt.h" ++ ++static void verify_pmr_ranges(struct txt_os_mle_data *os_mle, ++ struct txt_os_sinit_data *os_sinit, ++ uint32_t load_base_addr, uint32_t tgt_base_addr, ++ uint32_t xen_size) ++{ ++ int check_high_pmr = 0; ++ ++ /* Verify the value of the low PMR base. It should always be 0. */ ++ if ( os_sinit->vtd_pmr_lo_base != 0 ) ++ txt_reset(SLAUNCH_ERROR_LO_PMR_BASE); ++ ++ /* ++ * Low PMR size should not be 0 on current platforms. There is an ongoing ++ * transition to TPR-based DMA protection instead of PMR-based; this is not ++ * yet supported by the code. ++ */ ++ if ( os_sinit->vtd_pmr_lo_size == 0 ) ++ txt_reset(SLAUNCH_ERROR_LO_PMR_SIZE); ++ ++ /* Check if regions overlap. Treat regions with no hole between as error. */ ++ if ( os_sinit->vtd_pmr_hi_size != 0 && ++ os_sinit->vtd_pmr_hi_base <= os_sinit->vtd_pmr_lo_size ) ++ txt_reset(SLAUNCH_ERROR_HI_PMR_BASE); ++ ++ /* All regions accessed by 32b code must be below 4G. */ ++ if ( os_sinit->vtd_pmr_hi_base + os_sinit->vtd_pmr_hi_size <= ++ 0x100000000ull ) ++ check_high_pmr = 1; ++ ++ /* ++ * ACM checks that TXT heap and MLE memory is protected against DMA. We have ++ * to check if MBI and whole Xen memory is protected. The latter is done in ++ * case bootloader failed to set whole image as MLE and to make sure that ++ * both pre- and post-relocation code is protected. ++ */ ++ ++ /* Check if all of Xen before relocation is covered by PMR. */ ++ if ( !is_in_pmr(os_sinit, load_base_addr, xen_size, check_high_pmr) ) ++ txt_reset(SLAUNCH_ERROR_LO_PMR_MLE); ++ ++ /* Check if all of Xen after relocation is covered by PMR. */ ++ if ( load_base_addr != tgt_base_addr && ++ !is_in_pmr(os_sinit, tgt_base_addr, xen_size, check_high_pmr) ) ++ txt_reset(SLAUNCH_ERROR_LO_PMR_MLE); ++ ++ /* Check if MBI is covered by PMR. MBI starts with 'uint32_t total_size'. */ ++ if ( !is_in_pmr(os_sinit, os_mle->boot_params_addr, ++ *(uint32_t *)os_mle->boot_params_addr, check_high_pmr) ) ++ txt_reset(SLAUNCH_ERROR_BUFFER_BEYOND_PMR); ++ ++ /* Check if TPM event log (if present) is covered by PMR. */ ++ /* ++ * FIXME: currently commented out as GRUB allocates it in a hole between ++ * PMR and reserved RAM, due to 2MB resolution of PMR. There are no other ++ * easy-to-use DMA protection mechanisms that would allow to protect that ++ * part of memory. TPR (TXT DMA Protection Range) gives 1MB resolution, but ++ * it still wouldn't be enough. ++ * ++ * One possible solution would be for GRUB to allocate log at lower address, ++ * but this would further increase memory space fragmentation. Another ++ * option is to align PMR up instead of down, making PMR cover part of ++ * reserved region, but it is unclear what the consequences may be. ++ * ++ * In tboot this issue was resolved by reserving leftover chunks of memory ++ * in e820 and/or UEFI memory map. This is also a valid solution, but would ++ * require more changes to GRUB than the ones listed above, as event log is ++ * allocated much earlier than PMRs. ++ */ ++ /* ++ if ( os_mle->evtlog_addr != 0 && os_mle->evtlog_size != 0 && ++ !is_in_pmr(os_sinit, os_mle->evtlog_addr, os_mle->evtlog_size, ++ check_high_pmr) ) ++ txt_reset(SLAUNCH_ERROR_BUFFER_BEYOND_PMR); ++ */ ++} ++ ++uint32_t __stdcall txt_early_tests(uint32_t load_base_addr, ++ uint32_t tgt_base_addr, ++ uint32_t tgt_end_addr) ++{ ++ void *txt_heap; ++ struct txt_os_mle_data *os_mle; ++ struct txt_os_sinit_data *os_sinit; ++ uint32_t size = tgt_end_addr - tgt_base_addr; ++ ++ /* Clear the TXT error registers for a clean start of day */ ++ write_txt_reg(TXTCR_ERRORCODE, 0); ++ ++ txt_heap = _p(read_txt_reg(TXTCR_HEAP_BASE)); ++ ++ if ( txt_os_mle_data_size(txt_heap) < sizeof(*os_mle) || ++ txt_os_sinit_data_size(txt_heap) < sizeof(*os_sinit) ) ++ txt_reset(SLAUNCH_ERROR_GENERIC); ++ ++ os_mle = txt_os_mle_data_start(txt_heap); ++ os_sinit = txt_os_sinit_data_start(txt_heap); ++ ++ verify_pmr_ranges(os_mle, os_sinit, load_base_addr, tgt_base_addr, size); ++ ++ return os_mle->boot_params_addr; ++} +diff --git a/xen/arch/x86/include/asm/intel_txt.h b/xen/arch/x86/include/asm/intel_txt.h +index 865161cf93..696ed92bac 100644 +--- a/xen/arch/x86/include/asm/intel_txt.h ++++ b/xen/arch/x86/include/asm/intel_txt.h +@@ -77,6 +77,8 @@ + + #ifndef __ASSEMBLY__ + ++extern bool slaunch_active; ++ + /* We need to differentiate between pre- and post paging enabled. */ + #ifdef __BOOT_DEFS_H__ + #define _txt(x) _p(x) +@@ -264,4 +266,30 @@ static inline void *txt_sinit_mle_data_start(void *heap) + sizeof(uint64_t); + } + ++static inline int is_in_pmr(struct txt_os_sinit_data *os_sinit, uint64_t base, ++ uint32_t size, int check_high) ++{ ++ /* Check for size overflow. */ ++ if ( base + size < base ) ++ txt_reset(SLAUNCH_ERROR_INTEGER_OVERFLOW); ++ ++ /* Low range always starts at 0, so its size is also end address. */ ++ if ( base >= os_sinit->vtd_pmr_lo_base && ++ base + size <= os_sinit->vtd_pmr_lo_size ) ++ return 1; ++ ++ if ( check_high && os_sinit->vtd_pmr_hi_size != 0 ) ++ { ++ if ( os_sinit->vtd_pmr_hi_base + os_sinit->vtd_pmr_hi_size < ++ os_sinit->vtd_pmr_hi_size ) ++ txt_reset(SLAUNCH_ERROR_INTEGER_OVERFLOW); ++ if ( base >= os_sinit->vtd_pmr_hi_base && ++ base + size <= os_sinit->vtd_pmr_hi_base + ++ os_sinit->vtd_pmr_hi_size ) ++ return 1; ++ } ++ ++ return 0; ++} ++ + #endif /* __ASSEMBLY__ */ +diff --git a/xen/arch/x86/intel_txt.c b/xen/arch/x86/intel_txt.c +new file mode 100644 +index 0000000000..d23727cc82 +--- /dev/null ++++ b/xen/arch/x86/intel_txt.c +@@ -0,0 +1,11 @@ ++#include ++#include ++#include ++#include ++ ++bool __initdata slaunch_active; ++ ++static void __maybe_unused compile_time_checks(void) ++{ ++ BUILD_BUG_ON(sizeof(slaunch_active) != 1); ++} +-- +2.46.0 + diff --git a/1303-include-xen-slr_table.h-Secure-Launch-Resource-Table.patch b/1303-include-xen-slr_table.h-Secure-Launch-Resource-Table.patch new file mode 100644 index 00000000..6f856bbd --- /dev/null +++ b/1303-include-xen-slr_table.h-Secure-Launch-Resource-Table.patch @@ -0,0 +1,283 @@ +From bbbd77a2758ebf4765c03a50176d52eafc6a5661 Mon Sep 17 00:00:00 2001 +From: Sergii Dmytruk +Date: Sun, 29 Oct 2023 00:29:30 +0300 +Subject: [PATCH 1303/1328] include/xen/slr_table.h: Secure Launch Resource + Table definitions + +The file provides constants, structures and several helper functions for +parsing SLRT. + +slr_add_entry() and slr_init_table() were omitted to not have issues +with memcpy() usage (it comes from different places for different +translation units). + +Signed-off-by: Sergii Dmytruk +--- + xen/include/xen/slr_table.h | 255 ++++++++++++++++++++++++++++++++++++ + 1 file changed, 255 insertions(+) + create mode 100644 xen/include/xen/slr_table.h + +diff --git a/xen/include/xen/slr_table.h b/xen/include/xen/slr_table.h +new file mode 100644 +index 0000000000..21ca78baa6 +--- /dev/null ++++ b/xen/include/xen/slr_table.h +@@ -0,0 +1,255 @@ ++/* SPDX-License-Identifier: GPL-3.0 */ ++ ++/* ++ * Copyright (C) 2023 Oracle and/or its affiliates. ++ * ++ * Secure Launch Resource Table definitions ++ */ ++ ++#ifndef _SLR_TABLE_H ++#define _SLR_TABLE_H ++ ++#define UEFI_SLR_TABLE_GUID \ ++ { 0x877a9b2a, 0x0385, 0x45d1, { 0xa0, 0x34, 0x9d, 0xac, 0x9c, 0x9e, 0x56, 0x5f }} ++ ++/* SLR table header values */ ++#define SLR_TABLE_MAGIC 0x4452544d ++#define SLR_TABLE_REVISION 1 ++ ++/* Current revisions for the policy and UEFI config */ ++#define SLR_POLICY_REVISION 1 ++#define SLR_UEFI_CONFIG_REVISION 1 ++ ++/* SLR defined architectures */ ++#define SLR_INTEL_TXT 1 ++#define SLR_AMD_SKINIT 2 ++ ++/* SLR defined bootloaders */ ++#define SLR_BOOTLOADER_INVALID 0 ++#define SLR_BOOTLOADER_GRUB 1 ++ ++/* Log formats */ ++#define SLR_DRTM_TPM12_LOG 1 ++#define SLR_DRTM_TPM20_LOG 2 ++ ++/* DRTM Policy Entry Flags */ ++#define SLR_POLICY_FLAG_MEASURED 0x1 ++#define SLR_POLICY_IMPLICIT_SIZE 0x2 ++ ++/* Array Lengths */ ++#define TPM_EVENT_INFO_LENGTH 32 ++#define TXT_VARIABLE_MTRRS_LENGTH 32 ++ ++/* Tags */ ++#define SLR_ENTRY_INVALID 0x0000 ++#define SLR_ENTRY_DL_INFO 0x0001 ++#define SLR_ENTRY_LOG_INFO 0x0002 ++#define SLR_ENTRY_DRTM_POLICY 0x0003 ++#define SLR_ENTRY_INTEL_INFO 0x0004 ++#define SLR_ENTRY_AMD_INFO 0x0005 ++#define SLR_ENTRY_ARM_INFO 0x0006 ++#define SLR_ENTRY_UEFI_INFO 0x0007 ++#define SLR_ENTRY_UEFI_CONFIG 0x0008 ++#define SLR_ENTRY_END 0xffff ++ ++/* Entity Types */ ++#define SLR_ET_UNSPECIFIED 0x0000 ++#define SLR_ET_SLRT 0x0001 ++#define SLR_ET_BOOT_PARAMS 0x0002 ++#define SLR_ET_SETUP_DATA 0x0003 ++#define SLR_ET_CMDLINE 0x0004 ++#define SLR_ET_UEFI_MEMMAP 0x0005 ++#define SLR_ET_RAMDISK 0x0006 ++#define SLR_ET_MULTIBOOT2_INFO 0x0007 ++#define SLR_ET_MULTIBOOT2_MODULE 0x0008 ++#define SLR_ET_TXT_OS2MLE 0x0010 ++#define SLR_ET_UNUSED 0xffff ++ ++/* ++ * Primary SLR Table Header ++ */ ++struct slr_table ++{ ++ uint32_t magic; ++ uint16_t revision; ++ uint16_t architecture; ++ uint32_t size; ++ uint32_t max_size; ++ /* entries[] */ ++} __packed; ++ ++/* ++ * Common SLRT Table Header ++ */ ++struct slr_entry_hdr ++{ ++ uint16_t tag; ++ uint16_t size; ++} __packed; ++ ++/* ++ * Boot loader context ++ */ ++struct slr_bl_context ++{ ++ uint16_t bootloader; ++ uint16_t reserved; ++ uint64_t context; ++} __packed; ++ ++/* ++ * DRTM Dynamic Launch Configuration ++ */ ++struct slr_entry_dl_info ++{ ++ struct slr_entry_hdr hdr; ++ struct slr_bl_context bl_context; ++ uint64_t dl_handler; ++ uint64_t dce_base; ++ uint32_t dce_size; ++ uint64_t dlme_entry; ++} __packed; ++ ++/* ++ * TPM Log Information ++ */ ++struct slr_entry_log_info ++{ ++ struct slr_entry_hdr hdr; ++ uint16_t format; ++ uint16_t reserved; ++ uint64_t addr; ++ uint32_t size; ++} __packed; ++ ++/* ++ * DRTM Measurement Policy ++ */ ++struct slr_entry_policy ++{ ++ struct slr_entry_hdr hdr; ++ uint16_t revision; ++ uint16_t nr_entries; ++ /* policy_entries[] */ ++} __packed; ++ ++/* ++ * DRTM Measurement Entry ++ */ ++struct slr_policy_entry ++{ ++ uint16_t pcr; ++ uint16_t entity_type; ++ uint16_t flags; ++ uint16_t reserved; ++ uint64_t entity; ++ uint64_t size; ++ char evt_info[TPM_EVENT_INFO_LENGTH]; ++} __packed; ++ ++/* ++ * Secure Launch defined MTRR saving structures ++ */ ++struct slr_txt_mtrr_pair ++{ ++ uint64_t mtrr_physbase; ++ uint64_t mtrr_physmask; ++} __packed; ++ ++struct slr_txt_mtrr_state ++{ ++ uint64_t default_mem_type; ++ uint64_t mtrr_vcnt; ++ struct slr_txt_mtrr_pair mtrr_pair[TXT_VARIABLE_MTRRS_LENGTH]; ++} __packed; ++ ++/* ++ * Intel TXT Info table ++ */ ++struct slr_entry_intel_info ++{ ++ struct slr_entry_hdr hdr; ++ uint64_t saved_misc_enable_msr; ++ struct slr_txt_mtrr_state saved_bsp_mtrrs; ++} __packed; ++ ++/* ++ * AMD SKINIT Info table ++ */ ++struct slr_entry_amd_info ++{ ++ struct slr_entry_hdr hdr; ++} __packed; ++ ++/* ++ * ARM DRTM Info table ++ */ ++struct slr_entry_arm_info ++{ ++ struct slr_entry_hdr hdr; ++} __packed; ++ ++struct slr_entry_uefi_config ++{ ++ struct slr_entry_hdr hdr; ++ uint16_t revision; ++ uint16_t nr_entries; ++ /* uefi_cfg_entries[] */ ++} __packed; ++ ++struct slr_uefi_cfg_entry ++{ ++ uint16_t pcr; ++ uint16_t reserved; ++ uint64_t cfg; /* address or value */ ++ uint32_t size; ++ char evt_info[TPM_EVENT_INFO_LENGTH]; ++} __packed; ++ ++static inline void * ++slr_end_of_entries(struct slr_table *table) ++{ ++ return (void *)table + table->size; ++} ++ ++static inline struct slr_entry_hdr * ++slr_next_entry(struct slr_table *table, struct slr_entry_hdr *curr) ++{ ++ struct slr_entry_hdr *next = (struct slr_entry_hdr *) ++ ((void *)curr + curr->size); ++ ++ if ( (void *)next >= slr_end_of_entries(table) ) ++ return NULL; ++ if ( next->tag == SLR_ENTRY_END ) ++ return NULL; ++ ++ return next; ++} ++ ++static inline struct slr_entry_hdr * ++slr_next_entry_by_tag (struct slr_table *table, ++ struct slr_entry_hdr *entry, ++ uint16_t tag) ++{ ++ if ( !entry ) /* Start from the beginning */ ++ entry = (struct slr_entry_hdr *)((void *)table + sizeof(*table)); ++ ++ for ( ; ; ) ++ { ++ if ( entry->tag == tag ) ++ return entry; ++ ++ entry = slr_next_entry(table, entry); ++ if ( !entry ) ++ return NULL; ++ } ++ ++ return NULL; ++} ++ ++/* ++ * slr_add_entry() and slr_init_table() were omitted to not have issues with ++ * memcpy() usage. ++ */ ++ ++#endif /* _SLR_TABLE_H */ +-- +2.46.0 + diff --git a/1304-xen-arch-x86-reserve-TXT-memory.patch b/1304-xen-arch-x86-reserve-TXT-memory.patch new file mode 100644 index 00000000..bb6fdd45 --- /dev/null +++ b/1304-xen-arch-x86-reserve-TXT-memory.patch @@ -0,0 +1,240 @@ +From e600644d247fc44d392bf27b1e51a0e7d3ba4de5 Mon Sep 17 00:00:00 2001 +From: Kacper Stojek +Date: Fri, 2 Sep 2022 08:11:43 +0200 +Subject: [PATCH 1304/1328] xen/arch/x86: reserve TXT memory + +TXT heap is marked as reserved in e820 to protect against being allocated +and overwritten. + +Signed-off-by: Kacper Stojek +Signed-off-by: Krystian Hebel +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/include/asm/intel_txt.h | 42 +++++++++++++ + xen/arch/x86/include/asm/mm.h | 3 + + xen/arch/x86/intel_txt.c | 94 ++++++++++++++++++++++++++++ + xen/arch/x86/setup.c | 12 +++- + 4 files changed, 148 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/include/asm/intel_txt.h b/xen/arch/x86/include/asm/intel_txt.h +index 696ed92bac..cc0ab5ac53 100644 +--- a/xen/arch/x86/include/asm/intel_txt.h ++++ b/xen/arch/x86/include/asm/intel_txt.h +@@ -88,6 +88,8 @@ extern bool slaunch_active; + #define _txt(x) __va(x) + #endif + ++#include ++ + /* + * Always use private space as some of registers are either read-only or not + * present in public space. +@@ -292,4 +294,44 @@ static inline int is_in_pmr(struct txt_os_sinit_data *os_sinit, uint64_t base, + return 0; + } + ++/* ++ * This helper function is used to map memory using L2 page tables by aligning ++ * mapped regions to 2MB. This way page allocator (which at this point isn't ++ * yet initialized) isn't needed for creating new L1 mappings. The function ++ * also checks and skips memory already mapped by the prebuilt tables. ++ * ++ * There is no unmap_l2() because the function is meant to be used for code that ++ * accesses TXT registers and TXT heap soon after which Xen rebuilds memory ++ * maps, effectively dropping all existing mappings. ++ */ ++extern int map_l2(unsigned long paddr, unsigned long size); ++ ++/* evt_log is a physical address and the caller must map it to virtual, if ++ * needed. */ ++static inline void find_evt_log(void **evt_log, uint32_t *evt_log_size) ++{ ++ struct txt_os_mle_data *os_mle; ++ struct slr_table *slrt; ++ struct slr_entry_log_info *log_info; ++ ++ os_mle = txt_os_mle_data_start(_txt(read_txt_reg(TXTCR_HEAP_BASE))); ++ slrt = _txt(os_mle->slrt); ++ ++ log_info = (struct slr_entry_log_info *) ++ slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_LOG_INFO); ++ if ( log_info != NULL ) ++ { ++ *evt_log = _p(log_info->addr); ++ *evt_log_size = log_info->size; ++ } ++ else ++ { ++ *evt_log = NULL; ++ *evt_log_size = 0; ++ } ++} ++ ++extern void map_txt_mem_regions(void); ++extern void protect_txt_mem_regions(void); ++ + #endif /* __ASSEMBLY__ */ +diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h +index 5845b729c3..99ed61f54e 100644 +--- a/xen/arch/x86/include/asm/mm.h ++++ b/xen/arch/x86/include/asm/mm.h +@@ -98,6 +98,9 @@ + #define _PGC_need_scrub _PGC_allocated + #define PGC_need_scrub PGC_allocated + ++/* How much of the directmap is prebuilt at compile time. */ ++#define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT) ++ + #ifndef CONFIG_BIGMEM + /* + * This definition is solely for the use in struct page_info (and +diff --git a/xen/arch/x86/intel_txt.c b/xen/arch/x86/intel_txt.c +index d23727cc82..368fde1dac 100644 +--- a/xen/arch/x86/intel_txt.c ++++ b/xen/arch/x86/intel_txt.c +@@ -1,7 +1,15 @@ + #include + #include + #include ++#include ++#include ++#include ++#include + #include ++#include ++#include ++ ++static uint64_t __initdata txt_heap_base, txt_heap_size; + + bool __initdata slaunch_active; + +@@ -9,3 +17,89 @@ static void __maybe_unused compile_time_checks(void) + { + BUILD_BUG_ON(sizeof(slaunch_active) != 1); + } ++ ++int __init map_l2(unsigned long paddr, unsigned long size) ++{ ++ unsigned long aligned_paddr = paddr & ~((1ULL << L2_PAGETABLE_SHIFT) - 1); ++ unsigned long pages = ((paddr + size) - aligned_paddr); ++ pages = ROUNDUP(pages, 1ULL << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT; ++ ++ if ( (aligned_paddr + pages * PAGE_SIZE) <= PREBUILT_MAP_LIMIT ) ++ return 0; ++ ++ if ( aligned_paddr < PREBUILT_MAP_LIMIT ) { ++ pages -= (PREBUILT_MAP_LIMIT - aligned_paddr) >> PAGE_SHIFT; ++ aligned_paddr = PREBUILT_MAP_LIMIT; ++ } ++ ++ return map_pages_to_xen((unsigned long)__va(aligned_paddr), ++ maddr_to_mfn(aligned_paddr), ++ pages, PAGE_HYPERVISOR); ++} ++ ++void __init map_txt_mem_regions(void) ++{ ++ void *evt_log_addr; ++ uint32_t evt_log_size; ++ ++ map_l2(TXT_PRIV_CONFIG_REGS_BASE, NR_TXT_CONFIG_SIZE); ++ ++ txt_heap_base = read_txt_reg(TXTCR_HEAP_BASE); ++ BUG_ON(txt_heap_base == 0); ++ ++ txt_heap_size = read_txt_reg(TXTCR_HEAP_SIZE); ++ BUG_ON(txt_heap_size == 0); ++ ++ map_l2(txt_heap_base, txt_heap_size); ++ ++ find_evt_log(&evt_log_addr, &evt_log_size); ++ if ( evt_log_addr != NULL ) ++ map_l2((unsigned long)evt_log_addr, evt_log_size); ++} ++ ++void __init protect_txt_mem_regions(void) ++{ ++ int rc; ++ ++ void *evt_log_addr; ++ uint32_t evt_log_size; ++ ++ uint64_t sinit_base, sinit_size; ++ ++ /* TXT Heap */ ++ BUG_ON(txt_heap_base == 0); ++ printk("SLAUNCH: reserving TXT heap (%#lx - %#lx)\n", txt_heap_base, ++ txt_heap_base + txt_heap_size); ++ rc = reserve_e820_ram(&e820_raw, txt_heap_base, ++ txt_heap_base + txt_heap_size); ++ BUG_ON(rc == 0); ++ ++ /* TXT TPM Event Log */ ++ find_evt_log(&evt_log_addr, &evt_log_size); ++ if ( evt_log_addr != NULL ) { ++ printk("SLAUNCH: reserving event log (%#lx - %#lx)\n", ++ (uint64_t)evt_log_addr, ++ (uint64_t)evt_log_addr + evt_log_size); ++ rc = reserve_e820_ram(&e820_raw, (uint64_t)evt_log_addr, ++ (uint64_t)evt_log_addr + evt_log_size); ++ BUG_ON(rc == 0); ++ } ++ ++ sinit_base = read_txt_reg(TXTCR_SINIT_BASE); ++ BUG_ON(sinit_base == 0); ++ ++ sinit_size = read_txt_reg(TXTCR_SINIT_SIZE); ++ BUG_ON(sinit_size == 0); ++ ++ /* SINIT */ ++ printk("SLAUNCH: reserving SINIT memory (%#lx - %#lx)\n", sinit_base, ++ sinit_base + sinit_size); ++ rc = reserve_e820_ram(&e820_raw, sinit_base, sinit_base + sinit_size); ++ BUG_ON(rc == 0); ++ ++ /* TXT Private Space */ ++ rc = e820_change_range_type(&e820_raw, TXT_PRIV_CONFIG_REGS_BASE, ++ TXT_PRIV_CONFIG_REGS_BASE + NR_TXT_CONFIG_SIZE, ++ E820_RAM, E820_UNUSABLE); ++ BUG_ON(rc == 0); ++} +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index d47f156711..06e5dec254 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -55,6 +55,7 @@ + #include + #include + #include ++#include + + /* opt_nosmp: If true, secondary processors are ignored. */ + static bool __initdata opt_nosmp; +@@ -849,9 +850,6 @@ static struct domain *__init create_dom0(const module_t *image, + return d; + } + +-/* How much of the directmap is prebuilt at compile time. */ +-#define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT) +- + void __init noreturn __start_xen(unsigned long mbi_p) + { + char *memmap_type = NULL; +@@ -1170,6 +1168,14 @@ void __init noreturn __start_xen(unsigned long mbi_p) + #endif + } + ++ if ( slaunch_active ) ++ { ++ /* Prepare for TXT-related code. */ ++ map_txt_mem_regions(); ++ /* Reserve TXT heap and SINIT. */ ++ protect_txt_mem_regions(); ++ } ++ + /* Sanitise the raw E820 map to produce a final clean version. */ + max_page = raw_max_page = init_e820(memmap_type, &e820_raw); + +-- +2.46.0 + diff --git a/1305-x86-intel_txt.c-restore-boot-MTRRs.patch b/1305-x86-intel_txt.c-restore-boot-MTRRs.patch new file mode 100644 index 00000000..375e402c --- /dev/null +++ b/1305-x86-intel_txt.c-restore-boot-MTRRs.patch @@ -0,0 +1,120 @@ +From 15e8a7295f605bc752ba8686b2b27802a8381f7b Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Wed, 19 Oct 2022 19:52:24 +0200 +Subject: [PATCH 1305/1328] x86/intel_txt.c: restore boot MTRRs + +In preparation for TXT SENTER call, GRUB had to modify MTRR settings +to be UC for everything except SINIT ACM. Old values are restored +from SLRT where they were saved by the bootloader. + +Signed-off-by: Krystian Hebel +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/e820.c | 4 ++ + xen/arch/x86/include/asm/intel_txt.h | 1 + + xen/arch/x86/intel_txt.c | 59 ++++++++++++++++++++++++++++ + 3 files changed, 64 insertions(+) + +diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c +index c5911cf48d..f836fec189 100644 +--- a/xen/arch/x86/e820.c ++++ b/xen/arch/x86/e820.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + + /* + * opt_mem: Limit maximum address of physical RAM. +@@ -455,6 +456,9 @@ static uint64_t __init mtrr_top_of_ram(void) + rdmsrl(MSR_MTRRcap, mtrr_cap); + rdmsrl(MSR_MTRRdefType, mtrr_def); + ++ if ( slaunch_active ) ++ txt_restore_mtrrs(e820_verbose); ++ + if ( e820_verbose ) + printk(" MTRR cap: %"PRIx64" type: %"PRIx64"\n", mtrr_cap, mtrr_def); + +diff --git a/xen/arch/x86/include/asm/intel_txt.h b/xen/arch/x86/include/asm/intel_txt.h +index cc0ab5ac53..2b528977e9 100644 +--- a/xen/arch/x86/include/asm/intel_txt.h ++++ b/xen/arch/x86/include/asm/intel_txt.h +@@ -333,5 +333,6 @@ static inline void find_evt_log(void **evt_log, uint32_t *evt_log_size) + + extern void map_txt_mem_regions(void); + extern void protect_txt_mem_regions(void); ++extern void txt_restore_mtrrs(bool e820_verbose); + + #endif /* __ASSEMBLY__ */ +diff --git a/xen/arch/x86/intel_txt.c b/xen/arch/x86/intel_txt.c +index 368fde1dac..90a3b6c04c 100644 +--- a/xen/arch/x86/intel_txt.c ++++ b/xen/arch/x86/intel_txt.c +@@ -103,3 +103,62 @@ void __init protect_txt_mem_regions(void) + E820_RAM, E820_UNUSABLE); + BUG_ON(rc == 0); + } ++ ++void __init txt_restore_mtrrs(bool e820_verbose) ++{ ++ struct txt_os_mle_data *os_mle; ++ struct slr_table *slrt; ++ struct slr_entry_intel_info *intel_info; ++ int os_mle_size; ++ uint64_t mtrr_cap, mtrr_def, base, mask; ++ unsigned int i; ++ ++ os_mle_size = txt_os_mle_data_size(__va(txt_heap_base)); ++ os_mle = txt_os_mle_data_start(__va(txt_heap_base)); ++ ++ if ( os_mle_size < sizeof(*os_mle) ) ++ panic("OS-MLE too small\n"); ++ ++ rdmsrl(MSR_MTRRcap, mtrr_cap); ++ rdmsrl(MSR_MTRRdefType, mtrr_def); ++ ++ if ( e820_verbose ) { ++ printk("MTRRs set previously for SINIT ACM:\n"); ++ printk(" MTRR cap: %"PRIx64" type: %"PRIx64"\n", mtrr_cap, mtrr_def); ++ ++ for ( i = 0; i < (uint8_t)mtrr_cap; i++ ) ++ { ++ rdmsrl(MSR_IA32_MTRR_PHYSBASE(i), base); ++ rdmsrl(MSR_IA32_MTRR_PHYSMASK(i), mask); ++ ++ printk(" MTRR[%d]: base %"PRIx64" mask %"PRIx64"\n", ++ i, base, mask); ++ } ++ } ++ ++ slrt = __va(os_mle->slrt); ++ intel_info = (struct slr_entry_intel_info *) ++ slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_INTEL_INFO); ++ ++ if ( (mtrr_cap & 0xFF) != intel_info->saved_bsp_mtrrs.mtrr_vcnt ) { ++ printk("Bootloader saved %ld MTRR values, but there should be %ld\n", ++ intel_info->saved_bsp_mtrrs.mtrr_vcnt, mtrr_cap & 0xFF); ++ /* Choose the smaller one to be on the safe side. */ ++ mtrr_cap = (mtrr_cap & 0xFF) > intel_info->saved_bsp_mtrrs.mtrr_vcnt ? ++ intel_info->saved_bsp_mtrrs.mtrr_vcnt : mtrr_cap; ++ } ++ ++ /* Restore MTRRs saved by bootloader. */ ++ wrmsrl(MSR_MTRRdefType, intel_info->saved_bsp_mtrrs.default_mem_type); ++ ++ for ( i = 0; i < (uint8_t)mtrr_cap; i++ ) ++ { ++ base = intel_info->saved_bsp_mtrrs.mtrr_pair[i].mtrr_physbase; ++ mask = intel_info->saved_bsp_mtrrs.mtrr_pair[i].mtrr_physmask; ++ wrmsrl(MSR_IA32_MTRR_PHYSBASE(i), base); ++ wrmsrl(MSR_IA32_MTRR_PHYSMASK(i), mask); ++ } ++ ++ if ( e820_verbose ) ++ printk("Restored MTRRs:\n"); /* Printed by caller, mtrr_top_of_ram(). */ ++} +-- +2.46.0 + diff --git a/1306-lib-sha1.c-add-file.patch b/1306-lib-sha1.c-add-file.patch new file mode 100644 index 00000000..01155752 --- /dev/null +++ b/1306-lib-sha1.c-add-file.patch @@ -0,0 +1,308 @@ +From 2ec50d9686083882f27e088270a1c9747f708d8f Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Tue, 25 Oct 2022 16:04:17 +0200 +Subject: [PATCH 1306/1328] lib/sha1.c: add file + +The code comes from [1] and is licensed under GPL-2.0 license. +It's a combination of: + - include/crypto/sha1.h + - include/crypto/sha1_base.h + - lib/crypto/sha1.c + - crypto/sha1_generic.c + +Changes: + - includes + - formatting + - renames and splicing of trivial some functions that are called once + - dropping of `int` return values (only zero was ever returned) + - getting rid of references to `struct shash_desc` + +[1]: https://github.com/torvalds/linux/tree/afdab700f65e14070d8ab92175544b1c62b8bf03 + +Signed-off-by: Krystian Hebel +Signed-off-by: Sergii Dmytruk +--- + xen/include/xen/sha1.h | 10 ++ + xen/lib/Makefile | 1 + + xen/lib/sha1.c | 240 +++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 251 insertions(+) + create mode 100644 xen/include/xen/sha1.h + create mode 100644 xen/lib/sha1.c + +diff --git a/xen/include/xen/sha1.h b/xen/include/xen/sha1.h +new file mode 100644 +index 0000000000..85be7b3c12 +--- /dev/null ++++ b/xen/include/xen/sha1.h +@@ -0,0 +1,10 @@ ++#ifndef __XEN_SHA1_H ++#define __XEN_SHA1_H ++ ++#include ++ ++#define SHA1_DIGEST_SIZE 20 ++ ++void sha1_hash(const u8 *data, unsigned int len, u8 *out); ++ ++#endif /* !__XEN_SHA1_H */ +diff --git a/xen/lib/Makefile b/xen/lib/Makefile +index b311ea739c..3820e0bfbb 100644 +--- a/xen/lib/Makefile ++++ b/xen/lib/Makefile +@@ -28,6 +28,7 @@ lib-y += strrchr.o + lib-y += strsep.o + lib-y += strspn.o + lib-y += strstr.o ++lib-$(CONFIG_X86) += sha1.o + lib-$(CONFIG_X86) += xxhash32.o + lib-$(CONFIG_X86) += xxhash64.o + +diff --git a/xen/lib/sha1.c b/xen/lib/sha1.c +new file mode 100644 +index 0000000000..ea4f5bf1c8 +--- /dev/null ++++ b/xen/lib/sha1.c +@@ -0,0 +1,240 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * SHA1 routine optimized to do word accesses rather than byte accesses, ++ * and to avoid unnecessary copies into the context array. ++ * ++ * This was based on the git SHA1 implementation. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++/* ++ * If you have 32 registers or more, the compiler can (and should) ++ * try to change the array[] accesses into registers. However, on ++ * machines with less than ~25 registers, that won't really work, ++ * and at least gcc will make an unholy mess of it. ++ * ++ * So to avoid that mess which just slows things down, we force ++ * the stores to memory to actually happen (we might be better off ++ * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as ++ * suggested by Artur Skawina - that will also make gcc unable to ++ * try to do the silly "optimize away loads" part because it won't ++ * see what the value will be). ++ * ++ * Ben Herrenschmidt reports that on PPC, the C version comes close ++ * to the optimized asm with this (ie on PPC you don't want that ++ * 'volatile', since there are lots of registers). ++ * ++ * On ARM we get the best code generation by forcing a full memory barrier ++ * between each SHA_ROUND, otherwise gcc happily get wild with spilling and ++ * the stack frame size simply explode and performance goes down the drain. ++ */ ++ ++#ifdef CONFIG_X86 ++ #define setW(x, val) (*(volatile __u32 *)&W(x) = (val)) ++#elif defined(CONFIG_ARM) ++ #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while ( 0 ) ++#else ++ #define setW(x, val) (W(x) = (val)) ++#endif ++ ++/* This "rolls" over the 512-bit array */ ++#define W(x) (array[(x) & 15]) ++ ++/* ++ * Where do we get the source from? The first 16 iterations get it from ++ * the input data, the next mix it from the 512-bit array. ++ */ ++#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t) ++#define SHA_MIX(t) rol32(W(t + 13) ^ W(t + 8) ^ W(t + 2) ^ W(t), 1) ++ ++#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ ++ __u32 TEMP = input(t); setW(t, TEMP); \ ++ E += TEMP + rol32(A, 5) + (fn) + (constant); \ ++ B = ror32(B, 2); \ ++ TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; \ ++ } while ( 0 ) ++ ++#define T_0_15(t, A, B, C, D, E) \ ++ SHA_ROUND(t, SHA_SRC, (((C ^ D) & B) ^ D), 0x5a827999, A, B, C, D, E) ++#define T_16_19(t, A, B, C, D, E) \ ++ SHA_ROUND(t, SHA_MIX, (((C ^ D) & B) ^ D), 0x5a827999, A, B, C, D, E) ++#define T_20_39(t, A, B, C, D, E) \ ++ SHA_ROUND(t, SHA_MIX, (B ^ C ^ D), 0x6ed9eba1, A, B, C, D, E) ++#define T_40_59(t, A, B, C, D, E) \ ++ SHA_ROUND(t, SHA_MIX, ((B & C) + (D & (B ^ C))), 0x8f1bbcdc, A, B, C, \ ++ D, E) ++#define T_60_79(t, A, B, C, D, E) \ ++ SHA_ROUND(t, SHA_MIX, (B ^ C ^ D), 0xca62c1d6, A, B, C, D, E) ++ ++#define SHA1_BLOCK_SIZE 64 ++#define SHA1_WORKSPACE_WORDS 16 ++ ++struct sha1_state { ++ u32 state[SHA1_DIGEST_SIZE / 4]; ++ u64 count; ++ u8 buffer[SHA1_BLOCK_SIZE]; ++}; ++ ++typedef void sha1_block_fn(struct sha1_state *sst, const u8 *src, int blocks); ++ ++/** ++ * sha1_transform - single block SHA1 transform (deprecated) ++ * ++ * @digest: 160 bit digest to update ++ * @data: 512 bits of data to hash ++ * @array: 16 words of workspace (see note) ++ * ++ * This function executes SHA-1's internal compression function. It updates the ++ * 160-bit internal state (@digest) with a single 512-bit data block (@data). ++ * ++ * Don't use this function. SHA-1 is no longer considered secure. And even if ++ * you do have to use SHA-1, this isn't the correct way to hash something with ++ * SHA-1 as this doesn't handle padding and finalization. ++ * ++ * Note: If the hash is security sensitive, the caller should be sure ++ * to clear the workspace. This is left to the caller to avoid ++ * unnecessary clears between chained hashing operations. ++ */ ++void sha1_transform(__u32 *digest, const u8 *data, __u32 *array) ++{ ++ __u32 A, B, C, D, E; ++ unsigned int i = 0; ++ ++ A = digest[0]; ++ B = digest[1]; ++ C = digest[2]; ++ D = digest[3]; ++ E = digest[4]; ++ ++ /* Round 1 - iterations 0-16 take their input from 'data' */ ++ for ( ; i < 16; ++i ) ++ T_0_15(i, A, B, C, D, E); ++ ++ /* Round 1 - tail. Input from 512-bit mixing array */ ++ for ( ; i < 20; ++i ) ++ T_16_19(i, A, B, C, D, E); ++ ++ /* Round 2 */ ++ for ( ; i < 40; ++i ) ++ T_20_39(i, A, B, C, D, E); ++ ++ /* Round 3 */ ++ for ( ; i < 60; ++i ) ++ T_40_59(i, A, B, C, D, E); ++ ++ /* Round 4 */ ++ for ( ; i < 80; ++i ) ++ T_60_79(i, A, B, C, D, E); ++ ++ digest[0] += A; ++ digest[1] += B; ++ digest[2] += C; ++ digest[3] += D; ++ digest[4] += E; ++} ++ ++static void sha1_init(struct sha1_state *sctx) ++{ ++ sctx->state[0] = 0x67452301UL; ++ sctx->state[1] = 0xefcdab89UL; ++ sctx->state[2] = 0x98badcfeUL; ++ sctx->state[3] = 0x10325476UL; ++ sctx->state[4] = 0xc3d2e1f0UL; ++ sctx->count = 0; ++} ++ ++static void sha1_do_update(struct sha1_state *sctx, ++ const u8 *data, ++ unsigned int len, ++ sha1_block_fn *block_fn) ++{ ++ unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; ++ ++ sctx->count += len; ++ ++ if ( unlikely((partial + len) >= SHA1_BLOCK_SIZE) ) ++ { ++ int blocks; ++ ++ if ( partial ) ++ { ++ int p = SHA1_BLOCK_SIZE - partial; ++ ++ memcpy(sctx->buffer + partial, data, p); ++ data += p; ++ len -= p; ++ ++ block_fn(sctx, sctx->buffer, 1); ++ } ++ ++ blocks = len / SHA1_BLOCK_SIZE; ++ len %= SHA1_BLOCK_SIZE; ++ ++ if ( blocks ) ++ { ++ block_fn(sctx, data, blocks); ++ data += blocks * SHA1_BLOCK_SIZE; ++ } ++ partial = 0; ++ } ++ if ( len ) ++ memcpy(sctx->buffer + partial, data, len); ++} ++ ++static void sha1_do_finalize(struct sha1_state *sctx, sha1_block_fn *block_fn) ++{ ++ const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64); ++ __be64 *bits = (__be64 *)(sctx->buffer + bit_offset); ++ unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; ++ ++ sctx->buffer[partial++] = 0x80; ++ if ( partial > bit_offset ) ++ { ++ memset(sctx->buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial); ++ partial = 0; ++ ++ block_fn(sctx, sctx->buffer, 1); ++ } ++ ++ memset(sctx->buffer + partial, 0x0, bit_offset - partial); ++ *bits = cpu_to_be64(sctx->count << 3); ++ block_fn(sctx, sctx->buffer, 1); ++} ++ ++static void sha1_finish(struct sha1_state *sctx, u8 *out) ++{ ++ __be32 *digest = (__be32 *)out; ++ int i; ++ ++ for ( i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++ ) ++ put_unaligned_be32(sctx->state[i], digest++); ++ ++ memset(sctx, 0, sizeof(*sctx)); ++} ++ ++static void sha1_generic_block_fn(struct sha1_state *sctx, const u8 *src, ++ int blocks) ++{ ++ u32 temp[SHA1_WORKSPACE_WORDS]; ++ ++ while ( blocks-- ) ++ { ++ sha1_transform(sctx->state, src, temp); ++ src += SHA1_BLOCK_SIZE; ++ } ++ memset(temp, 0, sizeof(temp)); ++} ++ ++void sha1_hash(const u8 *data, unsigned int len, u8 *out) ++{ ++ struct sha1_state sctx; ++ ++ sha1_init(&sctx); ++ sha1_do_update(&sctx, data, len, sha1_generic_block_fn); ++ sha1_do_finalize(&sctx, sha1_generic_block_fn); ++ sha1_finish(&sctx, out); ++} +-- +2.46.0 + diff --git a/1307-x86-tpm.c-code-for-early-hashing-and-extending-PCRs-.patch b/1307-x86-tpm.c-code-for-early-hashing-and-extending-PCRs-.patch new file mode 100644 index 00000000..1235943d --- /dev/null +++ b/1307-x86-tpm.c-code-for-early-hashing-and-extending-PCRs-.patch @@ -0,0 +1,657 @@ +From abc7f9edb4efac004bbbc212213ec0e79e576f59 Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Fri, 21 Oct 2022 18:46:33 +0200 +Subject: [PATCH 1307/1328] x86/tpm.c: code for early hashing and extending + PCRs (for TPM1.2) + +This file is built twice: for early 32b mode without paging to measure +MBI and for 64b code to measure dom0 kernel and initramfs. Since MBI +is small, the first case uses TPM to do the hashing. Kernel and +initramfs on the other hand are too big, sending them to the TPM would +take multiple minutes. + +Signed-off-by: Krystian Hebel +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/Makefile | 1 + + xen/arch/x86/boot/Makefile | 6 +- + xen/arch/x86/boot/head.S | 12 +- + xen/arch/x86/include/asm/intel_txt.h | 29 +- + xen/arch/x86/include/asm/tpm.h | 12 + + xen/arch/x86/intel_txt.c | 7 +- + xen/arch/x86/tpm.c | 446 +++++++++++++++++++++++++++ + 7 files changed, 502 insertions(+), 11 deletions(-) + create mode 100644 xen/arch/x86/include/asm/tpm.h + create mode 100644 xen/arch/x86/tpm.c + +diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile +index e62b9ff9bc..d967aabf29 100644 +--- a/xen/arch/x86/Makefile ++++ b/xen/arch/x86/Makefile +@@ -66,6 +66,7 @@ obj-y += spec_ctrl.o + obj-y += srat.o + obj-y += string.o + obj-y += time.o ++obj-y += tpm.o + obj-y += traps.o + obj-y += tsx.o + obj-y += usercopy.o +diff --git a/xen/arch/x86/boot/Makefile b/xen/arch/x86/boot/Makefile +index 34df17664a..913fa9d2c6 100644 +--- a/xen/arch/x86/boot/Makefile ++++ b/xen/arch/x86/boot/Makefile +@@ -1,6 +1,6 @@ + obj-bin-y += head.o + +-head-bin-objs := cmdline.o reloc.o txt_early.o ++head-bin-objs := cmdline.o reloc.o txt_early.o tpm_early.o + + nocov-y += $(head-bin-objs) + noubsan-y += $(head-bin-objs) +@@ -29,6 +29,10 @@ LDFLAGS_DIRECT += $(LDFLAGS_DIRECT-y) + %.bin: %.lnk + $(OBJCOPY) -j .text -O binary $< $@ + ++$(obj)/tpm_early.o: XEN_CFLAGS += -D__EARLY_TPM__ ++$(obj)/tpm_early.o: $(src)/../tpm.c FORCE ++ $(call if_changed_rule,cc_o_c) ++ + %.lnk: %.o $(src)/build32.lds + $(LD) $(subst x86_64,i386,$(LDFLAGS_DIRECT)) -N -T $(filter %.lds,$^) -o $@ $< + +diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S +index 6d8988f53c..33c0779368 100644 +--- a/xen/arch/x86/boot/head.S ++++ b/xen/arch/x86/boot/head.S +@@ -536,10 +536,14 @@ __start: + call txt_early_tests + + /* +- * txt_early_tests() returns MBI address, move it to EBX, move magic +- * number expected by Multiboot 2 to EAX and fall through. ++ * txt_early_tests() returns MBI address, pass it to tpm_extend_mbi() ++ * and store for later in EBX. + */ ++ push %eax + movl %eax,%ebx ++ call tpm_extend_mbi ++ ++ /* Move magic number expected by Multiboot 2 to EAX and fall through. */ + movl $MULTIBOOT2_BOOTLOADER_MAGIC,%eax + + .Lmultiboot2_proto: +@@ -876,6 +880,10 @@ reloc: + txt_early_tests: + .incbin "txt_early.bin" + ++ ALIGN ++tpm_extend_mbi: ++ .incbin "tpm_early.bin" ++ + ENTRY(trampoline_start) + #include "trampoline.S" + ENTRY(trampoline_end) +diff --git a/xen/arch/x86/include/asm/intel_txt.h b/xen/arch/x86/include/asm/intel_txt.h +index 2b528977e9..8505f37210 100644 +--- a/xen/arch/x86/include/asm/intel_txt.h ++++ b/xen/arch/x86/include/asm/intel_txt.h +@@ -308,15 +308,11 @@ extern int map_l2(unsigned long paddr, unsigned long size); + + /* evt_log is a physical address and the caller must map it to virtual, if + * needed. */ +-static inline void find_evt_log(void **evt_log, uint32_t *evt_log_size) ++static inline void find_evt_log(struct slr_table *slrt, void **evt_log, ++ uint32_t *evt_log_size) + { +- struct txt_os_mle_data *os_mle; +- struct slr_table *slrt; + struct slr_entry_log_info *log_info; + +- os_mle = txt_os_mle_data_start(_txt(read_txt_reg(TXTCR_HEAP_BASE))); +- slrt = _txt(os_mle->slrt); +- + log_info = (struct slr_entry_log_info *) + slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_LOG_INFO); + if ( log_info != NULL ) +@@ -331,8 +327,29 @@ static inline void find_evt_log(void **evt_log, uint32_t *evt_log_size) + } + } + ++/* Returns physical address. */ ++static inline uint32_t txt_find_slrt(void) ++{ ++ struct txt_os_mle_data *os_mle = ++ txt_os_mle_data_start(_txt(read_txt_reg(TXTCR_HEAP_BASE))); ++ return os_mle->slrt; ++} ++ + extern void map_txt_mem_regions(void); + extern void protect_txt_mem_regions(void); + extern void txt_restore_mtrrs(bool e820_verbose); + ++#define DRTM_LOC 2 ++#define DRTM_CODE_PCR 17 ++#define DRTM_DATA_PCR 18 ++ ++/* ++ * Secure Launch event log entry type. The TXT specification defines the ++ * base event value as 0x400 for DRTM values. ++ */ ++#define TXT_EVTYPE_BASE 0x400 ++#define TXT_EVTYPE_SLAUNCH (TXT_EVTYPE_BASE + 0x102) ++#define TXT_EVTYPE_SLAUNCH_START (TXT_EVTYPE_BASE + 0x103) ++#define TXT_EVTYPE_SLAUNCH_END (TXT_EVTYPE_BASE + 0x104) ++ + #endif /* __ASSEMBLY__ */ +diff --git a/xen/arch/x86/include/asm/tpm.h b/xen/arch/x86/include/asm/tpm.h +new file mode 100644 +index 0000000000..9bbdf63680 +--- /dev/null ++++ b/xen/arch/x86/include/asm/tpm.h +@@ -0,0 +1,12 @@ ++#ifndef _ASM_X86_TPM_H_ ++#define _ASM_X86_TPM_H_ ++ ++#include ++ ++#define TPM_TIS_BASE 0xFED40000 ++#define TPM_TIS_SIZE 0x00010000 ++ ++void tpm_hash_extend(unsigned loc, unsigned pcr, uint8_t *buf, unsigned size, ++ uint32_t type, uint8_t *log_data, unsigned log_data_size); ++ ++#endif /* _ASM_X86_TPM_H_ */ +diff --git a/xen/arch/x86/intel_txt.c b/xen/arch/x86/intel_txt.c +index 90a3b6c04c..f07a1044ee 100644 +--- a/xen/arch/x86/intel_txt.c ++++ b/xen/arch/x86/intel_txt.c +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -43,6 +44,7 @@ void __init map_txt_mem_regions(void) + uint32_t evt_log_size; + + map_l2(TXT_PRIV_CONFIG_REGS_BASE, NR_TXT_CONFIG_SIZE); ++ map_l2(TPM_TIS_BASE, TPM_TIS_SIZE); + + txt_heap_base = read_txt_reg(TXTCR_HEAP_BASE); + BUG_ON(txt_heap_base == 0); +@@ -52,7 +54,8 @@ void __init map_txt_mem_regions(void) + + map_l2(txt_heap_base, txt_heap_size); + +- find_evt_log(&evt_log_addr, &evt_log_size); ++ find_evt_log(__va(txt_find_slrt()), &evt_log_addr, &evt_log_size); ++ map_l2((unsigned long)evt_log_addr, evt_log_size); + if ( evt_log_addr != NULL ) + map_l2((unsigned long)evt_log_addr, evt_log_size); + } +@@ -75,7 +78,7 @@ void __init protect_txt_mem_regions(void) + BUG_ON(rc == 0); + + /* TXT TPM Event Log */ +- find_evt_log(&evt_log_addr, &evt_log_size); ++ find_evt_log(__va(txt_find_slrt()), &evt_log_addr, &evt_log_size); + if ( evt_log_addr != NULL ) { + printk("SLAUNCH: reserving event log (%#lx - %#lx)\n", + (uint64_t)evt_log_addr, +diff --git a/xen/arch/x86/tpm.c b/xen/arch/x86/tpm.c +new file mode 100644 +index 0000000000..1bd271dbe2 +--- /dev/null ++++ b/xen/arch/x86/tpm.c +@@ -0,0 +1,446 @@ ++/* ++ * Copyright (c) 2022 3mdeb Sp. z o.o. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with this program. If not, see . ++ */ ++ ++#ifdef __EARLY_TPM__ ++/* ++ * This entry point is entered from xen/arch/x86/boot/head.S with MBI base at ++ * 0x4(%esp). ++ */ ++asm ( ++ " .text \n" ++ " .globl _start \n" ++ "_start: \n" ++ " jmp tpm_extend_mbi \n" ++ ); ++ ++#include "boot/defs.h" ++#include "include/asm/intel_txt.h" ++#include "include/asm/tpm.h" ++#ifdef __va ++#error "__va defined in non-paged mode!" ++#endif ++#define __va(x) _p(x) ++ ++/* ++ * The code is being compiled as a standalone binary without linking to any ++ * other part of Xen. Providing implementation of builtin functions in this ++ * case is necessary if compiler chooses to not use an inline builtin. ++ */ ++void *memcpy(void *dest, const void *src, size_t n) ++{ ++ const uint8_t *s = src; ++ uint8_t *d = dest; ++ ++ while ( n-- ) ++ *d++ = *s++; ++ ++ return dest; ++} ++ ++#else /* __EARLY_TPM__ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#endif /* __EARLY_TPM__ */ ++ ++#include ++ ++#define TPM_LOC_REG(loc, reg) (0x1000 * (loc) + (reg)) ++ ++#define TPM_ACCESS_(x) TPM_LOC_REG(x, 0x00) ++#define ACCESS_REQUEST_USE (1 << 1) ++#define ACCESS_ACTIVE_LOCALITY (1 << 5) ++#define TPM_INTF_CAPABILITY_(x) TPM_LOC_REG(x, 0x14) ++#define INTF_VERSION_MASK 0x70000000 ++#define TPM_STS_(x) TPM_LOC_REG(x, 0x18) ++#define TPM_FAMILY_MASK 0x0C000000 ++#define STS_DATA_AVAIL (1 << 4) ++#define STS_TPM_GO (1 << 5) ++#define STS_COMMAND_READY (1 << 6) ++#define STS_VALID (1 << 7) ++#define TPM_DATA_FIFO_(x) TPM_LOC_REG(x, 0x24) ++ ++#define swap16(x) __builtin_bswap16(x) ++#define swap32(x) __builtin_bswap32(x) ++#define memcpy(d, s, n) __builtin_memcpy(d, s, n) ++ ++static inline volatile uint32_t tis_read32(unsigned reg) ++{ ++ return *(volatile uint32_t *)__va(TPM_TIS_BASE + reg); ++} ++ ++static inline volatile uint8_t tis_read8(unsigned reg) ++{ ++ return *(volatile uint8_t *)__va(TPM_TIS_BASE + reg); ++} ++ ++static inline void tis_write8(unsigned reg, uint8_t val) ++{ ++ *(volatile uint8_t *)__va(TPM_TIS_BASE + reg) = val; ++} ++ ++static inline void request_locality(unsigned loc) ++{ ++ tis_write8(TPM_ACCESS_(loc), ACCESS_REQUEST_USE); ++ /* Check that locality was actually activated. */ ++ while ( (tis_read8(TPM_ACCESS_(loc)) & ACCESS_ACTIVE_LOCALITY) == 0 ); ++} ++ ++static inline void relinquish_locality(unsigned loc) ++{ ++ tis_write8(TPM_ACCESS_(loc), ACCESS_ACTIVE_LOCALITY); ++} ++ ++static void send_cmd(unsigned loc, uint8_t *buf, unsigned i_size, ++ unsigned *o_size) ++{ ++ /* ++ * Value of "data available" bit counts only when "valid" field is set as ++ * well. ++ */ ++ const unsigned data_avail = STS_VALID | STS_DATA_AVAIL; ++ ++ unsigned i; ++ ++ /* Make sure TPM can accept a command. */ ++ if ( (tis_read8(TPM_STS_(loc)) & STS_COMMAND_READY) == 0 ) { ++ /* Abort current command. */ ++ tis_write8(TPM_STS_(loc), STS_COMMAND_READY); ++ /* Wait until TPM is ready for a new one. */ ++ while ( (tis_read8(TPM_STS_(loc)) & STS_COMMAND_READY) == 0 ); ++ } ++ ++ for ( i = 0; i < i_size; i++ ) ++ tis_write8(TPM_DATA_FIFO_(loc), buf[i]); ++ ++ tis_write8(TPM_STS_(loc), STS_TPM_GO); ++ ++ /* Wait for the first byte of response. */ ++ while ( (tis_read8(TPM_STS_(loc)) & data_avail) != data_avail); ++ ++ for ( i = 0; i < *o_size && tis_read8(TPM_STS_(loc)) & data_avail; i++ ) ++ buf[i] = tis_read8(TPM_DATA_FIFO_(loc)); ++ ++ if ( i < *o_size ) ++ *o_size = i; ++ ++ tis_write8(TPM_STS_(loc), STS_COMMAND_READY); ++} ++ ++static inline bool is_tpm12(void) ++{ ++ /* ++ * If one of these conditions is true: ++ * - INTF_CAPABILITY_x.interfaceVersion is 0 (TIS <= 1.21) ++ * - INTF_CAPABILITY_x.interfaceVersion is 2 (TIS == 1.3) ++ * - STS_x.tpmFamily is 0 ++ * we're dealing with TPM1.2. ++ */ ++ uint32_t intf_version = tis_read32(TPM_INTF_CAPABILITY_(0)) ++ & INTF_VERSION_MASK; ++ return (intf_version == 0x00000000 || intf_version == 0x20000000 || ++ (tis_read32(TPM_STS_(0)) & TPM_FAMILY_MASK) == 0); ++} ++ ++/****************************** TPM1.2 specific *******************************/ ++#define TPM_ORD_Extend 0x00000014 ++#define TPM_ORD_SHA1Start 0x000000A0 ++#define TPM_ORD_SHA1Update 0x000000A1 ++#define TPM_ORD_SHA1CompleteExtend 0x000000A3 ++ ++#define TPM_TAG_RQU_COMMAND 0x00C1 ++#define TPM_TAG_RSP_COMMAND 0x00C4 ++ ++/* All fields of following structs are big endian. */ ++struct tpm_cmd_hdr { ++ uint16_t tag; ++ uint32_t paramSize; ++ uint32_t ordinal; ++} __packed; ++ ++struct tpm_rsp_hdr { ++ uint16_t tag; ++ uint32_t paramSize; ++ uint32_t returnCode; ++} __packed; ++ ++struct extend_cmd { ++ struct tpm_cmd_hdr h; ++ uint32_t pcrNum; ++ uint8_t inDigest[SHA1_DIGEST_SIZE]; ++} __packed; ++ ++struct extend_rsp { ++ struct tpm_rsp_hdr h; ++ uint8_t outDigest[SHA1_DIGEST_SIZE]; ++} __packed; ++ ++struct sha1_start_cmd { ++ struct tpm_cmd_hdr h; ++} __packed; ++ ++struct sha1_start_rsp { ++ struct tpm_rsp_hdr h; ++ uint32_t maxNumBytes; ++} __packed; ++ ++struct sha1_update_cmd { ++ struct tpm_cmd_hdr h; ++ uint32_t numBytes; /* Must be a multiple of 64 */ ++ uint8_t hashData[]; ++} __packed; ++ ++struct sha1_update_rsp { ++ struct tpm_rsp_hdr h; ++} __packed; ++ ++struct sha1_complete_extend_cmd { ++ struct tpm_cmd_hdr h; ++ uint32_t pcrNum; ++ uint32_t hashDataSize; /* 0-64, inclusive */ ++ uint8_t hashData[]; ++} __packed; ++ ++struct sha1_complete_extend_rsp { ++ struct tpm_rsp_hdr h; ++ uint8_t hashValue[SHA1_DIGEST_SIZE]; ++ uint8_t outDigest[SHA1_DIGEST_SIZE]; ++} __packed; ++ ++struct TPM12_PCREvent { ++ uint32_t PCRIndex; ++ uint32_t Type; ++ uint8_t Digest[SHA1_DIGEST_SIZE]; ++ uint32_t Size; ++ uint8_t Data[]; ++}; ++ ++struct txt_ev_log_container_12 { ++ char Signature[20]; /* "TXT Event Container", null-terminated */ ++ uint8_t Reserved[12]; ++ uint8_t ContainerVerMajor; ++ uint8_t ContainerVerMinor; ++ uint8_t PCREventVerMajor; ++ uint8_t PCREventVerMinor; ++ uint32_t ContainerSize; /* Allocated size */ ++ uint32_t PCREventsOffset; ++ uint32_t NextEventOffset; ++ struct TPM12_PCREvent PCREvents[]; ++}; ++ ++#ifdef __EARLY_TPM__ ++/* ++ * TPM1.2 is required to support commands of up to 1101 bytes, vendors rarely ++ * go above that. Limit maximum size of block of data to be hashed to 1024. ++ */ ++#define MAX_HASH_BLOCK 1024 ++#define CMD_RSP_BUF_SIZE (sizeof(struct sha1_update_cmd) + MAX_HASH_BLOCK) ++ ++union cmd_rsp { ++ struct sha1_start_cmd start_c; ++ struct sha1_start_rsp start_r; ++ struct sha1_update_cmd update_c; ++ struct sha1_update_rsp update_r; ++ struct sha1_complete_extend_cmd finish_c; ++ struct sha1_complete_extend_rsp finish_r; ++ uint8_t buf[CMD_RSP_BUF_SIZE]; ++}; ++ ++/* Returns true on success. */ ++static bool tpm12_hash_extend(unsigned loc, uint8_t *buf, unsigned size, ++ unsigned pcr, uint8_t *out_digest) ++{ ++ union cmd_rsp cmd_rsp; ++ unsigned max_bytes = MAX_HASH_BLOCK; ++ unsigned o_size = sizeof(cmd_rsp); ++ bool success = false; ++ ++ request_locality(loc); ++ ++ cmd_rsp.start_c = (struct sha1_start_cmd) { ++ .h.tag = swap16(TPM_TAG_RQU_COMMAND), ++ .h.paramSize = swap32(sizeof(struct sha1_start_cmd)), ++ .h.ordinal = swap32(TPM_ORD_SHA1Start), ++ }; ++ ++ send_cmd(loc, cmd_rsp.buf, sizeof(struct sha1_start_cmd), &o_size); ++ if ( o_size < sizeof(struct sha1_start_rsp) ) ++ goto error; ++ ++ if ( max_bytes > swap32(cmd_rsp.start_r.maxNumBytes) ) ++ max_bytes = swap32(cmd_rsp.start_r.maxNumBytes); ++ ++ while ( size > 64 ) { ++ if ( size < max_bytes ) ++ max_bytes = size & ~(64 - 1); ++ ++ o_size = sizeof(cmd_rsp); ++ ++ cmd_rsp.update_c = (struct sha1_update_cmd){ ++ .h.tag = swap16(TPM_TAG_RQU_COMMAND), ++ .h.paramSize = swap32(sizeof(struct sha1_update_cmd) + max_bytes), ++ .h.ordinal = swap32(TPM_ORD_SHA1Update), ++ .numBytes = swap32(max_bytes), ++ }; ++ memcpy(cmd_rsp.update_c.hashData, buf, max_bytes); ++ ++ send_cmd(loc, cmd_rsp.buf, sizeof(struct sha1_update_cmd) + max_bytes, ++ &o_size); ++ if ( o_size < sizeof(struct sha1_update_rsp) ) ++ goto error; ++ ++ size -= max_bytes; ++ buf += max_bytes; ++ } ++ ++ o_size = sizeof(cmd_rsp); ++ ++ cmd_rsp.finish_c = (struct sha1_complete_extend_cmd) { ++ .h.tag = swap16(TPM_TAG_RQU_COMMAND), ++ .h.paramSize = swap32(sizeof(struct sha1_complete_extend_cmd) + size), ++ .h.ordinal = swap32(TPM_ORD_SHA1CompleteExtend), ++ .pcrNum = swap32(pcr), ++ .hashDataSize = swap32(size), ++ }; ++ memcpy(cmd_rsp.finish_c.hashData, buf, size); ++ ++ send_cmd(loc, cmd_rsp.buf, sizeof(struct sha1_complete_extend_cmd) + size, ++ &o_size); ++ if ( o_size < sizeof(struct sha1_complete_extend_rsp) ) ++ goto error; ++ ++ if ( out_digest != NULL ) ++ memcpy(out_digest, cmd_rsp.finish_r.hashValue, SHA1_DIGEST_SIZE); ++ ++ success = true; ++ ++error: ++ relinquish_locality(loc); ++ return success; ++} ++ ++#else ++ ++union cmd_rsp { ++ struct extend_cmd extend_c; ++ struct extend_rsp extend_r; ++}; ++ ++/* Returns true on success. */ ++static bool tpm12_hash_extend(unsigned loc, uint8_t *buf, unsigned size, ++ unsigned pcr, uint8_t *out_digest) ++{ ++ union cmd_rsp cmd_rsp; ++ unsigned o_size = sizeof(cmd_rsp); ++ ++ sha1_hash(buf, size, out_digest); ++ ++ request_locality(loc); ++ ++ cmd_rsp.extend_c = (struct extend_cmd) { ++ .h.tag = swap16(TPM_TAG_RQU_COMMAND), ++ .h.paramSize = swap32(sizeof(struct extend_cmd)), ++ .h.ordinal = swap32(TPM_ORD_Extend), ++ .pcrNum = swap32(pcr), ++ }; ++ ++ memcpy(cmd_rsp.extend_c.inDigest, out_digest, SHA1_DIGEST_SIZE); ++ ++ send_cmd(loc, (uint8_t *)&cmd_rsp, sizeof(struct extend_cmd), &o_size); ++ ++ relinquish_locality(loc); ++ ++ return (o_size >= sizeof(struct extend_rsp)); ++} ++ ++#endif /* __EARLY_TPM__ */ ++ ++static void *create_log_event12(struct txt_ev_log_container_12 *evt_log, ++ uint32_t evt_log_size, uint32_t pcr, ++ uint32_t type, uint8_t *data, ++ unsigned data_size) ++{ ++ struct TPM12_PCREvent *new_entry; ++ ++ new_entry = (void *)(((uint8_t *)evt_log) + evt_log->NextEventOffset); ++ ++ /* ++ * Check if there is enough space left for new entry. ++ * Note: it is possible to introduce a gap in event log if entry with big ++ * data_size is followed by another entry with smaller data. Maybe we should ++ * cap the event log size in such case? ++ */ ++ if ( evt_log->NextEventOffset + sizeof(struct TPM12_PCREvent) + data_size ++ > evt_log_size ) ++ return NULL; ++ ++ evt_log->NextEventOffset += sizeof(struct TPM12_PCREvent) + data_size; ++ ++ new_entry->PCRIndex = pcr; ++ new_entry->Type = type; ++ new_entry->Size = data_size; ++ ++ if ( data && data_size > 0 ) ++ memcpy(new_entry->Data, data, data_size); ++ ++ return new_entry->Digest; ++} ++ ++/************************** end of TPM1.2 specific ****************************/ ++ ++void tpm_hash_extend(unsigned loc, unsigned pcr, uint8_t *buf, unsigned size, ++ uint32_t type, uint8_t *log_data, unsigned log_data_size) ++{ ++ void *evt_log_addr; ++ uint32_t evt_log_size; ++ ++ struct slr_table *slrt = __va(txt_find_slrt()); ++ ++ find_evt_log(slrt, &evt_log_addr, &evt_log_size); ++ evt_log_addr = __va(evt_log_addr); ++ ++ if ( is_tpm12() ) { ++ uint8_t sha1_digest[SHA1_DIGEST_SIZE]; ++ ++ struct txt_ev_log_container_12 *evt_log = evt_log_addr; ++ void *entry_digest = create_log_event12(evt_log, evt_log_size, pcr, ++ type, log_data, log_data_size); ++ ++ /* We still need to write computed hash somewhere. */ ++ if ( entry_digest == NULL ) ++ entry_digest = sha1_digest; ++ ++ if ( !tpm12_hash_extend(loc, buf, size, pcr, entry_digest) ) { ++#ifndef __EARLY_TPM__ ++ printk(XENLOG_ERR "Extending PCR%u failed\n", pcr); ++#endif ++ } ++ } ++} ++ ++#ifdef __EARLY_TPM__ ++void __stdcall tpm_extend_mbi(uint32_t *mbi) ++{ ++ /* MBI starts with uint32_t total_size. */ ++ tpm_hash_extend(DRTM_LOC, DRTM_DATA_PCR, (uint8_t *)mbi, *mbi, ++ TXT_EVTYPE_SLAUNCH, NULL, 0); ++} ++#endif +-- +2.46.0 + diff --git a/1308-x86-boot-choose-AP-stack-based-on-APIC-ID.patch b/1308-x86-boot-choose-AP-stack-based-on-APIC-ID.patch new file mode 100644 index 00000000..1c54c056 --- /dev/null +++ b/1308-x86-boot-choose-AP-stack-based-on-APIC-ID.patch @@ -0,0 +1,166 @@ +From d71e552eb4eea07c92d028dda0084bdb63dfed51 Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Wed, 16 Nov 2022 15:03:07 +0100 +Subject: [PATCH 1308/1328] x86/boot: choose AP stack based on APIC ID + +This is made as the first step of making parallel AP bring-up possible. +It should be enough for pre-C code. + +Parallel AP bring-up is necessary because TXT by design releases all APs +at once. In addition to that it reduces number of IPIs (and more +importantly, delays between them) required to start all logical +processors. This results in significant reduction of boot time, even +when DRTM is not used, with performance gain growing with the number of +logical CPUs. + +Signed-off-by: Krystian Hebel +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/boot/head.S | 1 + + xen/arch/x86/boot/trampoline.S | 21 +++++++++++++++++++++ + xen/arch/x86/boot/x86_64.S | 28 +++++++++++++++++++++++++++- + xen/arch/x86/include/asm/apicdef.h | 4 ++++ + xen/arch/x86/include/asm/msr-index.h | 3 +++ + xen/arch/x86/setup.c | 7 +++++++ + 6 files changed, 63 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S +index 33c0779368..7ea50f0098 100644 +--- a/xen/arch/x86/boot/head.S ++++ b/xen/arch/x86/boot/head.S +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #include + #include + +diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S +index cdecf949b4..6440f31568 100644 +--- a/xen/arch/x86/boot/trampoline.S ++++ b/xen/arch/x86/boot/trampoline.S +@@ -72,6 +72,27 @@ trampoline_protmode_entry: + mov $X86_CR4_PAE,%ecx + mov %ecx,%cr4 + ++ /* ++ * Get APIC ID while we're in non-paged mode. Start by checking if ++ * x2APIC is enabled. ++ */ ++ mov $MSR_APIC_BASE, %ecx ++ rdmsr ++ test $APIC_BASE_EXTD, %eax ++ jnz .Lx2apic ++ ++ /* Not x2APIC, read from MMIO */ ++ and $APIC_BASE_ADDR_MASK, %eax ++ mov APIC_ID(%eax), %esp ++ shr $24, %esp ++ jmp 1f ++ ++.Lx2apic: ++ mov $(MSR_X2APIC_FIRST + (APIC_ID >> MSR_X2APIC_SHIFT)), %ecx ++ rdmsr ++ mov %eax, %esp ++1: ++ + /* Load pagetable base register. */ + mov $sym_offs(idle_pg_table),%eax + add bootsym_rel(trampoline_xen_phys_start,4,%eax) +diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S +index 5d12937a0e..ad2f5058f0 100644 +--- a/xen/arch/x86/boot/x86_64.S ++++ b/xen/arch/x86/boot/x86_64.S +@@ -15,7 +15,33 @@ ENTRY(__high_start) + mov $XEN_MINIMAL_CR4,%rcx + mov %rcx,%cr4 + +- mov stack_start(%rip),%rsp ++ test %ebx,%ebx ++ cmovz stack_start(%rip), %rsp ++ jz .L_stack_set ++ ++ /* APs only: get stack base from APIC ID saved in %esp. */ ++ mov $-1, %rax ++ lea x86_cpu_to_apicid(%rip), %rcx ++1: ++ add $1, %rax ++ cmp $NR_CPUS, %eax ++ jb 2f ++ hlt ++2: ++ cmp %esp, (%rcx, %rax, 4) ++ jne 1b ++ ++ /* %eax is now Xen CPU index. */ ++ lea stack_base(%rip), %rcx ++ mov (%rcx, %rax, 8), %rsp ++ ++ test %rsp,%rsp ++ jnz 1f ++ hlt ++1: ++ add $(STACK_SIZE - CPUINFO_sizeof), %rsp ++ ++.L_stack_set: + + /* Reset EFLAGS (subsumes CLI and CLD). */ + pushq $0 +diff --git a/xen/arch/x86/include/asm/apicdef.h b/xen/arch/x86/include/asm/apicdef.h +index 2440d83c8d..251d7f5465 100644 +--- a/xen/arch/x86/include/asm/apicdef.h ++++ b/xen/arch/x86/include/asm/apicdef.h +@@ -129,6 +129,10 @@ + + #define MAX_IO_APICS 128 + ++#ifndef __ASSEMBLY__ ++ + extern bool x2apic_enabled; + ++#endif /* !__ASSEMBLY__ */ ++ + #endif +diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h +index b9e878f8c7..57025b9bf7 100644 +--- a/xen/arch/x86/include/asm/msr-index.h ++++ b/xen/arch/x86/include/asm/msr-index.h +@@ -165,6 +165,9 @@ + #define MSR_X2APIC_FIRST 0x00000800 + #define MSR_X2APIC_LAST 0x000008ff + ++/* MSR offset can be obtained by shifting MMIO offset this number of bits to the right. */ ++#define MSR_X2APIC_SHIFT 4 ++ + #define MSR_X2APIC_TPR 0x00000808 + #define MSR_X2APIC_PPR 0x0000080a + #define MSR_X2APIC_EOI 0x0000080b +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 06e5dec254..49694e10a2 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -1907,6 +1907,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) + */ + if ( !pv_shim ) + { ++ /* Separate loop to make parallel AP bringup possible. */ + for_each_present_cpu ( i ) + { + /* Set up cpu_to_node[]. */ +@@ -1914,6 +1915,12 @@ void __init noreturn __start_xen(unsigned long mbi_p) + /* Set up node_to_cpumask based on cpu_to_node[]. */ + numa_add_cpu(i); + ++ if ( stack_base[i] == NULL ) ++ stack_base[i] = cpu_alloc_stack(i); ++ } ++ ++ for_each_present_cpu ( i ) ++ { + if ( (park_offline_cpus || num_online_cpus() < max_cpus) && + !cpu_online(i) ) + { +-- +2.46.0 + diff --git a/1309-x86-smpboot.c-TXT-AP-bringup.patch b/1309-x86-smpboot.c-TXT-AP-bringup.patch new file mode 100644 index 00000000..13b7168f --- /dev/null +++ b/1309-x86-smpboot.c-TXT-AP-bringup.patch @@ -0,0 +1,217 @@ +From e98ee2d437797e7c2ba67c5e65850dc8d3118884 Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Wed, 16 Nov 2022 15:06:18 +0100 +Subject: [PATCH 1309/1328] x86/smpboot.c: TXT AP bringup + +On Intel TXT, APs are started in one of two ways, depending on ACM +which reports it in its information table. In both cases, all APs are +started simultaneously after BSP requests them to do so. Two possible +ways are: +- GETSEC[WAKEUP] instruction, +- MONITOR address. + +GETSEC[WAKEUP] requires versions >= 7 of SINIT to MLE Data, but there is +no clear mapping of that version with regard to processor family and +it's not known which CPUs actually use it. It could have been designed +for TXT support on CPUs that lack MONITOR/MWAIT, because GETSEC[WAKEUP] +seems to be more complicated, in software and hardware alike. + +This patch implements only MONITOR approach, GETSEC[WAKEUP] support will +be added later once more details and means of testing are available and +if there is a practical need for it. + +With this patch, every AP goes through assembly part, and only when in +start_secondary() in C they re-enter MONITOR/MWAIT iff they are not the +AP that was asked to boot. The same address is reused for simplicity, +and on next wakeup call APs don't have to go through assembly part +again (GDT, paging, stack setting). + +Signed-off-by: Krystian Hebel +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/boot/trampoline.S | 19 ++++++++- + xen/arch/x86/include/asm/intel_txt.h | 6 +++ + xen/arch/x86/include/asm/processor.h | 1 + + xen/arch/x86/smpboot.c | 61 ++++++++++++++++++++++++++++ + 4 files changed, 86 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S +index 6440f31568..41eb3feaf0 100644 +--- a/xen/arch/x86/boot/trampoline.S ++++ b/xen/arch/x86/boot/trampoline.S +@@ -59,6 +59,16 @@ GLOBAL(trampoline_realmode_entry) + ljmpl $BOOT_CS32,$bootsym_rel(trampoline_protmode_entry,6) + + .code32 ++GLOBAL(txt_ap_entry) ++ /* ++ * APs enter here in protected mode without paging. GDT is set in JOIN ++ * structure, it points to trampoline_gdt. Interrupts are disabled by ++ * TXT (including NMI and SMI), so IDT doesn't matter at this point. ++ * The only missing point is telling that we are AP by saving non-zero ++ * value in EBX. ++ */ ++ mov $1, %ebx ++ + trampoline_protmode_entry: + /* Set up a few descriptors: on entry only CS is guaranteed good. */ + mov $BOOT_DS,%eax +@@ -144,7 +154,7 @@ start64: + .word 0 + idt_48: .word 0, 0, 0 # base = limit = 0 + +-trampoline_gdt: ++GLOBAL(trampoline_gdt) + .word 0 /* 0x0000: unused (reused for GDTR) */ + gdt_48: + .word .Ltrampoline_gdt_end - trampoline_gdt - 1 +@@ -155,6 +165,13 @@ gdt_48: + .quad 0x00cf93000000ffff /* 0x0018: ring 0 data */ + .quad 0x00009b000000ffff /* 0x0020: real-mode code @ BOOT_TRAMPOLINE */ + .quad 0x000093000000ffff /* 0x0028: real-mode data @ BOOT_TRAMPOLINE */ ++ /* ++ * Intel TXT requires these two in exact order. This isn't compatible ++ * with order required by syscall, so we have duplicated entries... ++ * If order ever changes, update selector numbers in asm/intel_txt.h. ++ */ ++ .quad 0x00cf9b000000ffff /* 0x0030: ring 0 code, 32-bit mode */ ++ .quad 0x00cf93000000ffff /* 0x0038: ring 0 data */ + .Ltrampoline_gdt_end: + + /* Relocations for trampoline Real Mode segments. */ +diff --git a/xen/arch/x86/include/asm/intel_txt.h b/xen/arch/x86/include/asm/intel_txt.h +index 8505f37210..59cdc3f2e9 100644 +--- a/xen/arch/x86/include/asm/intel_txt.h ++++ b/xen/arch/x86/include/asm/intel_txt.h +@@ -75,10 +75,16 @@ + + #define SLAUNCH_BOOTLOADER_MAGIC 0x4c534254 + ++#define TXT_AP_BOOT_CS 0x0030 ++#define TXT_AP_BOOT_DS 0x0038 ++ + #ifndef __ASSEMBLY__ + + extern bool slaunch_active; + ++extern char txt_ap_entry[]; ++extern uint32_t trampoline_gdt[]; ++ + /* We need to differentiate between pre- and post paging enabled. */ + #ifdef __BOOT_DEFS_H__ + #define _txt(x) _p(x) +diff --git a/xen/arch/x86/include/asm/processor.h b/xen/arch/x86/include/asm/processor.h +index f8f757a8a8..9e89f72912 100644 +--- a/xen/arch/x86/include/asm/processor.h ++++ b/xen/arch/x86/include/asm/processor.h +@@ -641,6 +641,7 @@ void set_in_mcu_opt_ctrl(uint32_t mask, uint32_t val); + enum ap_boot_method { + AP_BOOT_NORMAL, + AP_BOOT_SKINIT, ++ AP_BOOT_TXT, + }; + extern enum ap_boot_method ap_boot_method; + +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index 7aa899dac3..8cd2bc4573 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -39,6 +39,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -331,6 +332,29 @@ void start_secondary(void *unused) + */ + unsigned int cpu = booting_cpu; + ++ if ( ap_boot_method == AP_BOOT_TXT ) { ++ uint64_t misc_enable; ++ uint32_t my_apicid; ++ struct txt_sinit_mle_data *sinit_mle = ++ txt_sinit_mle_data_start(__va(read_txt_reg(TXTCR_HEAP_BASE))); ++ ++ /* TXT released us with MONITOR disabled in IA32_MISC_ENABLE. */ ++ rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); ++ wrmsrl(MSR_IA32_MISC_ENABLE, ++ misc_enable | MSR_IA32_MISC_ENABLE_MONITOR_ENABLE); ++ ++ /* get_apic_id() reads from x2APIC if it thinks it is enabled. */ ++ x2apic_ap_setup(); ++ my_apicid = get_apic_id(); ++ ++ while ( my_apicid != x86_cpu_to_apicid[cpu] ) { ++ asm volatile ("monitor; xor %0,%0; mwait" ++ :: "a"(__va(sinit_mle->rlp_wakeup_addr)), "c"(0), ++ "d"(0) : "memory"); ++ cpu = booting_cpu; ++ } ++ } ++ + /* Critical region without IDT or TSS. Any fault is deadly! */ + + set_current(idle_vcpu[cpu]); +@@ -424,6 +448,33 @@ void start_secondary(void *unused) + startup_cpu_idle_loop(); + } + ++static int wake_aps_in_txt(unsigned long trampoline_rm) ++{ ++ struct txt_sinit_mle_data *sinit_mle = ++ txt_sinit_mle_data_start(__va(read_txt_reg(TXTCR_HEAP_BASE))); ++ uint32_t *wakeup_addr = __va(sinit_mle->rlp_wakeup_addr); ++ ++#define trampoline_relative(x) \ ++ (trampoline_rm + ((char *)(x) - trampoline_realmode_entry)) ++ ++ uint32_t join[4] = { ++ trampoline_gdt[1], /* GDT limit */ ++ trampoline_relative(trampoline_gdt), /* GDT base */ ++ TXT_AP_BOOT_CS, /* CS selector, DS = CS+8 */ ++ trampoline_relative(txt_ap_entry) /* EIP */ ++ }; ++ ++#undef trampoline_relative ++ ++ write_txt_reg(TXTCR_MLE_JOIN, __pa(join)); ++ ++ smp_mb(); ++ ++ *wakeup_addr = 1; ++ ++ return 0; ++} ++ + static int wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) + { + unsigned long send_status = 0, accept_status = 0; +@@ -446,6 +497,9 @@ static int wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) + if ( tboot_in_measured_env() && !tboot_wake_ap(phys_apicid, start_eip) ) + return 0; + ++ if ( ap_boot_method == AP_BOOT_TXT ) ++ return wake_aps_in_txt(start_eip); ++ + /* + * Be paranoid about clearing APIC errors. + */ +@@ -1153,6 +1207,13 @@ static struct notifier_block cpu_smpboot_nfb = { + + void __init smp_prepare_cpus(void) + { ++ /* ++ * If the platform is performing a Secure Launch via TXT, secondary ++ * CPUs (APs) will need to be woken up in a TXT-specific way. ++ */ ++ if ( slaunch_active && boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) ++ ap_boot_method = AP_BOOT_TXT; ++ + register_cpu_notifier(&cpu_smpboot_nfb); + + mtrr_aps_sync_begin(); +-- +2.46.0 + diff --git a/1310-arch-x86-don-t-access-x86_cpu_to_apicid-directly-use.patch b/1310-arch-x86-don-t-access-x86_cpu_to_apicid-directly-use.patch new file mode 100644 index 00000000..c9a1bf5b --- /dev/null +++ b/1310-arch-x86-don-t-access-x86_cpu_to_apicid-directly-use.patch @@ -0,0 +1,265 @@ +From b9097f06f2ba539af16a58969f689b4ade373555 Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Thu, 1 Jun 2023 16:05:18 +0200 +Subject: [PATCH 1310/1328] arch/x86: don't access x86_cpu_to_apicid[] + directly, use cpu_physical_id(cpu) + +This is done in preparation to move data from x86_cpu_to_apicid[] +elsewhere. + +Signed-off-by: Krystian Hebel +--- + xen/arch/x86/acpi/cpu_idle.c | 4 ++-- + xen/arch/x86/acpi/lib.c | 2 +- + xen/arch/x86/apic.c | 2 +- + xen/arch/x86/cpu/mwait-idle.c | 4 ++-- + xen/arch/x86/domain.c | 2 +- + xen/arch/x86/mpparse.c | 6 +++--- + xen/arch/x86/numa.c | 2 +- + xen/arch/x86/platform_hypercall.c | 2 +- + xen/arch/x86/setup.c | 14 +++++++------- + xen/arch/x86/smpboot.c | 6 +++--- + xen/arch/x86/spec_ctrl.c | 2 +- + xen/arch/x86/sysctl.c | 2 +- + 12 files changed, 24 insertions(+), 24 deletions(-) + +diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c +index 44347e3390..2ae666ec94 100644 +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -1260,7 +1260,7 @@ int get_cpu_id(u32 acpi_id) + + for ( i = 0; i < nr_cpu_ids; i++ ) + { +- if ( apic_id == x86_cpu_to_apicid[i] ) ++ if ( apic_id == cpu_physical_id(i) ) + return i; + } + +@@ -1320,7 +1320,7 @@ static void print_cx_pminfo(uint32_t cpu, struct xen_processor_power *power) + + static void repark_cpu(int cpu_id) + { +- uint32_t apic_id = x86_cpu_to_apicid[cpu_id]; ++ uint32_t apic_id = cpu_physical_id(cpu_id); + + /* + * If we've just learned of more available C states, wake the CPU if +diff --git a/xen/arch/x86/acpi/lib.c b/xen/arch/x86/acpi/lib.c +index 43831b92d1..55d0ef919c 100644 +--- a/xen/arch/x86/acpi/lib.c ++++ b/xen/arch/x86/acpi/lib.c +@@ -89,7 +89,7 @@ unsigned int acpi_get_processor_id(unsigned int cpu) + { + unsigned int acpiid, apicid; + +- if ((apicid = x86_cpu_to_apicid[cpu]) == BAD_APICID) ++ if ((apicid = cpu_physical_id(cpu)) == BAD_APICID) + return INVALID_ACPIID; + + for (acpiid = 0; acpiid < ARRAY_SIZE(x86_acpiid_to_apicid); acpiid++) +diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c +index e43b779036..0feffbf305 100644 +--- a/xen/arch/x86/apic.c ++++ b/xen/arch/x86/apic.c +@@ -985,7 +985,7 @@ __next: + */ + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = get_apic_id(); +- x86_cpu_to_apicid[0] = get_apic_id(); ++ cpu_physical_id(0) = get_apic_id(); + + ioapic_init(); + } +diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c +index e3d3b8ba73..3b35fef774 100644 +--- a/xen/arch/x86/cpu/mwait-idle.c ++++ b/xen/arch/x86/cpu/mwait-idle.c +@@ -1221,8 +1221,8 @@ static void __init ivt_idle_state_table_update(void) + unsigned int cpu, max_apicid = boot_cpu_physical_apicid; + + for_each_present_cpu(cpu) +- if (max_apicid < x86_cpu_to_apicid[cpu]) +- max_apicid = x86_cpu_to_apicid[cpu]; ++ if (max_apicid < cpu_physical_id(cpu)) ++ max_apicid = cpu_physical_id(cpu); + switch (apicid_to_socket(max_apicid)) { + case 0: case 1: + /* 1 and 2 socket systems use default ivt_cstates */ +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index 5dbd1d8a12..6738449761 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -1538,7 +1538,7 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg) + break; + + cpu_id.phys_id = +- (uint64_t)x86_cpu_to_apicid[v->vcpu_id] | ++ (uint64_t)cpu_physical_id(v->vcpu_id) | + ((uint64_t)acpi_get_processor_id(v->vcpu_id) << 32); + + rc = -EFAULT; +diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c +index d8ccab2449..b8cabebe7b 100644 +--- a/xen/arch/x86/mpparse.c ++++ b/xen/arch/x86/mpparse.c +@@ -187,7 +187,7 @@ static int MP_processor_info_x(struct mpc_config_processor *m, + " Processor with apicid %i ignored\n", apicid); + return cpu; + } +- x86_cpu_to_apicid[cpu] = apicid; ++ cpu_physical_id(cpu) = apicid; + cpumask_set_cpu(cpu, &cpu_present_map); + } + +@@ -822,12 +822,12 @@ void mp_unregister_lapic(uint32_t apic_id, uint32_t cpu) + if (!cpu || (apic_id == boot_cpu_physical_apicid)) + return; + +- if (x86_cpu_to_apicid[cpu] != apic_id) ++ if (cpu_physical_id(cpu) != apic_id) + return; + + physid_clear(apic_id, phys_cpu_present_map); + +- x86_cpu_to_apicid[cpu] = BAD_APICID; ++ cpu_physical_id(cpu) = BAD_APICID; + cpumask_clear_cpu(cpu, &cpu_present_map); + } + +diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c +index 322157fab7..22ff0923d4 100644 +--- a/xen/arch/x86/numa.c ++++ b/xen/arch/x86/numa.c +@@ -340,7 +340,7 @@ void __init init_cpu_to_node(void) + + for ( i = 0; i < nr_cpu_ids; i++ ) + { +- u32 apicid = x86_cpu_to_apicid[i]; ++ u32 apicid = cpu_physical_id(i); + if ( apicid == BAD_APICID ) + continue; + node = apicid < MAX_LOCAL_APIC ? apicid_to_node[apicid] : NUMA_NO_NODE; +diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c +index 27a799161a..890595340e 100644 +--- a/xen/arch/x86/platform_hypercall.c ++++ b/xen/arch/x86/platform_hypercall.c +@@ -588,7 +588,7 @@ ret_t do_platform_op( + } + else + { +- g_info->apic_id = x86_cpu_to_apicid[g_info->xen_cpuid]; ++ g_info->apic_id = cpu_physical_id(g_info->xen_cpuid); + g_info->acpi_id = acpi_get_processor_id(g_info->xen_cpuid); + ASSERT(g_info->apic_id != BAD_APICID); + g_info->flags = 0; +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 49694e10a2..1a4bab89a4 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -317,7 +317,7 @@ static void __init init_idle_domain(void) + void srat_detect_node(int cpu) + { + nodeid_t node; +- u32 apicid = x86_cpu_to_apicid[cpu]; ++ u32 apicid = cpu_physical_id(cpu); + + node = apicid < MAX_LOCAL_APIC ? apicid_to_node[apicid] : NUMA_NO_NODE; + if ( node == NUMA_NO_NODE ) +@@ -344,7 +344,7 @@ static void __init normalise_cpu_order(void) + + for_each_present_cpu ( i ) + { +- apicid = x86_cpu_to_apicid[i]; ++ apicid = cpu_physical_id(i); + min_diff = min_cpu = ~0u; + + /* +@@ -355,12 +355,12 @@ static void __init normalise_cpu_order(void) + j < nr_cpu_ids; + j = cpumask_next(j, &cpu_present_map) ) + { +- diff = x86_cpu_to_apicid[j] ^ apicid; ++ diff = cpu_physical_id(j) ^ apicid; + while ( diff & (diff-1) ) + diff &= diff-1; + if ( (diff < min_diff) || + ((diff == min_diff) && +- (x86_cpu_to_apicid[j] < x86_cpu_to_apicid[min_cpu])) ) ++ (cpu_physical_id(j) < cpu_physical_id(min_cpu))) ) + { + min_diff = diff; + min_cpu = j; +@@ -376,9 +376,9 @@ static void __init normalise_cpu_order(void) + + /* Switch the best-matching CPU with the next CPU in logical order. */ + j = cpumask_next(i, &cpu_present_map); +- apicid = x86_cpu_to_apicid[min_cpu]; +- x86_cpu_to_apicid[min_cpu] = x86_cpu_to_apicid[j]; +- x86_cpu_to_apicid[j] = apicid; ++ apicid = cpu_physical_id(min_cpu); ++ cpu_physical_id(min_cpu) = cpu_physical_id(j); ++ cpu_physical_id(j) = apicid; + } + } + +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index 8cd2bc4573..cf45ba21c6 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -347,7 +347,7 @@ void start_secondary(void *unused) + x2apic_ap_setup(); + my_apicid = get_apic_id(); + +- while ( my_apicid != x86_cpu_to_apicid[cpu] ) { ++ while ( my_apicid != cpu_physical_id(cpu) ) { + asm volatile ("monitor; xor %0,%0; mwait" + :: "a"(__va(sinit_mle->rlp_wakeup_addr)), "c"(0), + "d"(0) : "memory"); +@@ -1223,7 +1223,7 @@ void __init smp_prepare_cpus(void) + print_cpu_info(0); + + boot_cpu_physical_apicid = get_apic_id(); +- x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; ++ cpu_physical_id(0) = boot_cpu_physical_apicid; + + stack_base[0] = (void *)((unsigned long)stack_start & ~(STACK_SIZE - 1)); + +@@ -1443,7 +1443,7 @@ int __cpu_up(unsigned int cpu) + { + int apicid, ret; + +- if ( (apicid = x86_cpu_to_apicid[cpu]) == BAD_APICID ) ++ if ( (apicid = cpu_physical_id(cpu)) == BAD_APICID ) + return -ENODEV; + + if ( (!x2apic_enabled && apicid >= APIC_ALL_CPUS) || +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 7697f9ad3f..d3fc2ed637 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -717,7 +717,7 @@ static bool __init check_smt_enabled(void) + * has a non-zero thread id component indicates that SMT is active. + */ + for_each_present_cpu ( cpu ) +- if ( x86_cpu_to_apicid[cpu] & (boot_cpu_data.x86_num_siblings - 1) ) ++ if ( cpu_physical_id(cpu) & (boot_cpu_data.x86_num_siblings - 1) ) + return true; + + return false; +diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c +index 42dc360ad6..de9b75c1ec 100644 +--- a/xen/arch/x86/sysctl.c ++++ b/xen/arch/x86/sysctl.c +@@ -59,7 +59,7 @@ static long cf_check smt_up_down_helper(void *data) + for_each_present_cpu ( cpu ) + { + /* Skip primary siblings (those whose thread id is 0). */ +- if ( !(x86_cpu_to_apicid[cpu] & sibling_mask) ) ++ if ( !(cpu_physical_id(cpu) & sibling_mask) ) + continue; + + if ( !up && core_parking_remove(cpu) ) +-- +2.46.0 + diff --git a/1311-arch-x86-smp-drop-x86_cpu_to_apicid-use-cpu_data-cpu.patch b/1311-arch-x86-smp-drop-x86_cpu_to_apicid-use-cpu_data-cpu.patch new file mode 100644 index 00000000..4251b28a --- /dev/null +++ b/1311-arch-x86-smp-drop-x86_cpu_to_apicid-use-cpu_data-cpu.patch @@ -0,0 +1,156 @@ +From e6a90f0d3de0c5ec68a021fddfc9bfd91174dace Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Thu, 1 Jun 2023 17:01:59 +0200 +Subject: [PATCH 1311/1328] arch/x86/smp: drop x86_cpu_to_apicid, use + cpu_data[cpu].apicid instead + +Both fields held the same data. + +Signed-off-by: Krystian Hebel +--- + xen/arch/x86/boot/x86_64.S | 8 +++++--- + xen/arch/x86/include/asm/asm_defns.h | 2 +- + xen/arch/x86/include/asm/processor.h | 2 ++ + xen/arch/x86/include/asm/smp.h | 4 ---- + xen/arch/x86/numa.c | 15 +++++++-------- + xen/arch/x86/smpboot.c | 8 ++++---- + xen/arch/x86/x86_64/asm-offsets.c | 4 +++- + 7 files changed, 22 insertions(+), 21 deletions(-) + +diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S +index ad2f5058f0..ce280c914a 100644 +--- a/xen/arch/x86/boot/x86_64.S ++++ b/xen/arch/x86/boot/x86_64.S +@@ -20,15 +20,17 @@ ENTRY(__high_start) + jz .L_stack_set + + /* APs only: get stack base from APIC ID saved in %esp. */ +- mov $-1, %rax +- lea x86_cpu_to_apicid(%rip), %rcx ++ mov $0, %rax ++ lea cpu_data(%rip), %rcx ++ /* cpu_data[0] is BSP, skip it. */ + 1: + add $1, %rax ++ add $CPUINFO_X86_sizeof, %rcx + cmp $NR_CPUS, %eax + jb 2f + hlt + 2: +- cmp %esp, (%rcx, %rax, 4) ++ cmp %esp, CPUINFO_X86_apicid(%rcx) + jne 1b + + /* %eax is now Xen CPU index. */ +diff --git a/xen/arch/x86/include/asm/asm_defns.h b/xen/arch/x86/include/asm/asm_defns.h +index abc6822b08..b6f9ec10b0 100644 +--- a/xen/arch/x86/include/asm/asm_defns.h ++++ b/xen/arch/x86/include/asm/asm_defns.h +@@ -166,7 +166,7 @@ register unsigned long current_stack_pointer asm("rsp"); + #endif + + #define CPUINFO_FEATURE_OFFSET(feature) \ +- (CPUINFO_features + (cpufeat_word(feature) * 4)) ++ (CPUINFO_X86_features + (cpufeat_word(feature) * 4)) + + #else + +diff --git a/xen/arch/x86/include/asm/processor.h b/xen/arch/x86/include/asm/processor.h +index 9e89f72912..339d6e146e 100644 +--- a/xen/arch/x86/include/asm/processor.h ++++ b/xen/arch/x86/include/asm/processor.h +@@ -149,6 +149,8 @@ extern struct cpuinfo_x86 boot_cpu_data; + extern struct cpuinfo_x86 cpu_data[]; + #define current_cpu_data cpu_data[smp_processor_id()] + ++#define cpu_physical_id(cpu) cpu_data[cpu].apicid ++ + extern bool probe_cpuid_faulting(void); + extern void ctxt_switch_levelling(const struct vcpu *next); + extern void (*ctxt_switch_masking)(const struct vcpu *next); +diff --git a/xen/arch/x86/include/asm/smp.h b/xen/arch/x86/include/asm/smp.h +index 41a3b6a0da..b0cefdf814 100644 +--- a/xen/arch/x86/include/asm/smp.h ++++ b/xen/arch/x86/include/asm/smp.h +@@ -39,10 +39,6 @@ extern void (*mtrr_hook) (void); + + extern void zap_low_mappings(void); + +-extern u32 x86_cpu_to_apicid[]; +- +-#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +- + #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) + extern void cpu_exit_clear(unsigned int cpu); + extern void cpu_uninit(unsigned int cpu); +diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c +index 22ff0923d4..393e4108c7 100644 +--- a/xen/arch/x86/numa.c ++++ b/xen/arch/x86/numa.c +@@ -324,14 +324,13 @@ custom_param("numa", numa_setup); + /* + * Setup early cpu_to_node. + * +- * Populate cpu_to_node[] only if x86_cpu_to_apicid[], +- * and apicid_to_node[] tables have valid entries for a CPU. +- * This means we skip cpu_to_node[] initialisation for NUMA +- * emulation and faking node case (when running a kernel compiled +- * for NUMA on a non NUMA box), which is OK as cpu_to_node[] +- * is already initialized in a round robin manner at numa_init_array, +- * prior to this call, and this initialization is good enough +- * for the fake NUMA cases. ++ * Populate cpu_to_node[] only if cpu_data[], and apicid_to_node[] ++ * tables have valid entries for a CPU. This means we skip ++ * cpu_to_node[] initialisation for NUMA emulation and faking node ++ * case (when running a kernel compiled for NUMA on a non NUMA box), ++ * which is OK as cpu_to_node[] is already initialized in a round ++ * robin manner at numa_init_array, prior to this call, and this ++ * initialization is good enough for the fake NUMA cases. + */ + void __init init_cpu_to_node(void) + { +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index cf45ba21c6..df94e57269 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -73,10 +73,8 @@ unsigned int __read_mostly nr_sockets; + cpumask_t **__read_mostly socket_cpumask; + static cpumask_t *secondary_socket_cpumask; + +-struct cpuinfo_x86 cpu_data[NR_CPUS]; +- +-u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly = +- { [0 ... NR_CPUS-1] = BAD_APICID }; ++struct cpuinfo_x86 cpu_data[NR_CPUS] = ++ { [0 ... NR_CPUS-1] .apicid = BAD_APICID }; + + static int cpu_error; + static enum cpu_state { +@@ -93,7 +91,9 @@ void *stack_base[NR_CPUS]; + + void initialize_cpu_data(unsigned int cpu) + { ++ uint32_t apicid = cpu_physical_id(cpu); + cpu_data[cpu] = boot_cpu_data; ++ cpu_physical_id(cpu) = apicid; + } + + static bool smp_store_cpu_info(unsigned int id) +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index fba82d6436..cc5eb5a518 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -186,7 +186,9 @@ void __dummy__(void) + OFFSET(IRQSTAT_softirq_pending, irq_cpustat_t, __softirq_pending); + BLANK(); + +- OFFSET(CPUINFO_features, struct cpuinfo_x86, x86_capability); ++ OFFSET(CPUINFO_X86_features, struct cpuinfo_x86, x86_capability); ++ OFFSET(CPUINFO_X86_apicid, struct cpuinfo_x86, apicid); ++ DEFINE(CPUINFO_X86_sizeof, sizeof(struct cpuinfo_x86)); + BLANK(); + + OFFSET(MB_flags, multiboot_info_t, flags); +-- +2.46.0 + diff --git a/1312-arch-x86-smp-move-stack_base-to-cpu_data.patch b/1312-arch-x86-smp-move-stack_base-to-cpu_data.patch new file mode 100644 index 00000000..6e968888 --- /dev/null +++ b/1312-arch-x86-smp-move-stack_base-to-cpu_data.patch @@ -0,0 +1,222 @@ +From 9129e7d09307c71a700c63f4e62ee6eb0db01468 Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Thu, 1 Jun 2023 19:27:22 +0200 +Subject: [PATCH 1312/1328] arch/x86/smp: move stack_base to cpu_data + +Signed-off-by: Krystian Hebel +--- + xen/arch/x86/boot/x86_64.S | 5 ++--- + xen/arch/x86/include/asm/processor.h | 1 + + xen/arch/x86/include/asm/smp.h | 2 +- + xen/arch/x86/setup.c | 6 +++--- + xen/arch/x86/smpboot.c | 25 +++++++++++++------------ + xen/arch/x86/tboot.c | 4 ++-- + xen/arch/x86/traps.c | 4 ++-- + xen/arch/x86/x86_64/asm-offsets.c | 1 + + xen/include/xen/smp.h | 2 -- + 9 files changed, 25 insertions(+), 25 deletions(-) + +diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S +index ce280c914a..61040315f9 100644 +--- a/xen/arch/x86/boot/x86_64.S ++++ b/xen/arch/x86/boot/x86_64.S +@@ -33,9 +33,8 @@ ENTRY(__high_start) + cmp %esp, CPUINFO_X86_apicid(%rcx) + jne 1b + +- /* %eax is now Xen CPU index. */ +- lea stack_base(%rip), %rcx +- mov (%rcx, %rax, 8), %rsp ++ /* %rcx is now cpu_data[cpu], read stack base from it. */ ++ mov CPUINFO_X86_stack_base(%rcx), %rsp + + test %rsp,%rsp + jnz 1f +diff --git a/xen/arch/x86/include/asm/processor.h b/xen/arch/x86/include/asm/processor.h +index 339d6e146e..1f78785689 100644 +--- a/xen/arch/x86/include/asm/processor.h ++++ b/xen/arch/x86/include/asm/processor.h +@@ -137,6 +137,7 @@ struct cpuinfo_x86 { + __u32 phys_proc_id; /* package ID of each logical CPU */ + __u32 cpu_core_id; /* core ID of each logical CPU*/ + __u32 compute_unit_id; /* AMD compute unit ID of each logical CPU */ ++ void *stack_base; + unsigned short x86_clflush_size; + } __cacheline_aligned; + +diff --git a/xen/arch/x86/include/asm/smp.h b/xen/arch/x86/include/asm/smp.h +index b0cefdf814..4432a8ea1b 100644 +--- a/xen/arch/x86/include/asm/smp.h ++++ b/xen/arch/x86/include/asm/smp.h +@@ -76,7 +76,7 @@ extern cpumask_t **socket_cpumask; + * by certain scheduling code only. + */ + #define get_cpu_current(cpu) \ +- (get_cpu_info_from_stack((unsigned long)stack_base[cpu])->current_vcpu) ++ (get_cpu_info_from_stack((unsigned long)cpu_data[cpu].stack_base)->current_vcpu) + + extern unsigned int disabled_cpus; + extern bool unaccounted_cpus; +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 1a4bab89a4..f014b681d9 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -683,7 +683,7 @@ static void __init noreturn reinit_bsp_stack(void) + /* Update SYSCALL trampolines */ + percpu_traps_init(); + +- stack_base[0] = stack; ++ cpu_data[0].stack_base = stack; + + rc = setup_cpu_root_pgt(0); + if ( rc ) +@@ -1915,8 +1915,8 @@ void __init noreturn __start_xen(unsigned long mbi_p) + /* Set up node_to_cpumask based on cpu_to_node[]. */ + numa_add_cpu(i); + +- if ( stack_base[i] == NULL ) +- stack_base[i] = cpu_alloc_stack(i); ++ if ( cpu_data[i].stack_base == NULL ) ++ cpu_data[i].stack_base = cpu_alloc_stack(i); + } + + for_each_present_cpu ( i ) +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index df94e57269..2a36812d6d 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -87,13 +87,15 @@ static enum cpu_state { + } cpu_state; + #define set_cpu_state(state) do { smp_mb(); cpu_state = (state); } while (0) + +-void *stack_base[NR_CPUS]; +- + void initialize_cpu_data(unsigned int cpu) + { + uint32_t apicid = cpu_physical_id(cpu); ++ void *stack = cpu_data[cpu].stack_base; ++ + cpu_data[cpu] = boot_cpu_data; ++ + cpu_physical_id(cpu) = apicid; ++ cpu_data[cpu].stack_base = stack; + } + + static bool smp_store_cpu_info(unsigned int id) +@@ -641,8 +643,6 @@ static int do_boot_cpu(int apicid, int cpu) + printk("Booting processor %d/%d eip %lx\n", + cpu, apicid, start_eip); + +- stack_start = stack_base[cpu] + STACK_SIZE - sizeof(struct cpu_info); +- + /* This grunge runs the startup process for the targeted processor. */ + + set_cpu_state(CPU_STATE_INIT); +@@ -918,7 +918,7 @@ int setup_cpu_root_pgt(unsigned int cpu) + + /* Install direct map page table entries for stack, IDT, and TSS. */ + for ( off = rc = 0; !rc && off < STACK_SIZE; off += PAGE_SIZE ) +- rc = clone_mapping(__va(__pa(stack_base[cpu])) + off, rpt); ++ rc = clone_mapping(__va(__pa(cpu_data[cpu].stack_base)) + off, rpt); + + if ( !rc ) + rc = clone_mapping(idt_tables[cpu], rpt); +@@ -1069,10 +1069,10 @@ static void cpu_smpboot_free(unsigned int cpu, bool remove) + FREE_XENHEAP_PAGE(per_cpu(gdt, cpu)); + FREE_XENHEAP_PAGE(idt_tables[cpu]); + +- if ( stack_base[cpu] ) ++ if ( cpu_data[cpu].stack_base ) + { +- memguard_unguard_stack(stack_base[cpu]); +- FREE_XENHEAP_PAGES(stack_base[cpu], STACK_ORDER); ++ memguard_unguard_stack(cpu_data[cpu].stack_base); ++ FREE_XENHEAP_PAGES(cpu_data[cpu].stack_base, STACK_ORDER); + } + } + } +@@ -1106,11 +1106,11 @@ static int cpu_smpboot_alloc(unsigned int cpu) + if ( node != NUMA_NO_NODE ) + memflags = MEMF_node(node); + +- if ( stack_base[cpu] == NULL && +- (stack_base[cpu] = cpu_alloc_stack(cpu)) == NULL ) ++ if ( cpu_data[cpu].stack_base == NULL && ++ (cpu_data[cpu].stack_base = cpu_alloc_stack(cpu)) == NULL ) + goto out; + +- info = get_cpu_info_from_stack((unsigned long)stack_base[cpu]); ++ info = get_cpu_info_from_stack((unsigned long)cpu_data[cpu].stack_base); + info->processor_id = cpu; + info->per_cpu_offset = __per_cpu_offset[cpu]; + +@@ -1225,7 +1225,8 @@ void __init smp_prepare_cpus(void) + boot_cpu_physical_apicid = get_apic_id(); + cpu_physical_id(0) = boot_cpu_physical_apicid; + +- stack_base[0] = (void *)((unsigned long)stack_start & ~(STACK_SIZE - 1)); ++ cpu_data[0].stack_base = (void *) ++ ((unsigned long)stack_start & ~(STACK_SIZE - 1)); + + set_nr_sockets(); + +diff --git a/xen/arch/x86/tboot.c b/xen/arch/x86/tboot.c +index ffd1126dfc..a06719061d 100644 +--- a/xen/arch/x86/tboot.c ++++ b/xen/arch/x86/tboot.c +@@ -238,9 +238,9 @@ static int mfn_in_guarded_stack(unsigned long mfn) + + for ( i = 0; i < nr_cpu_ids; i++ ) + { +- if ( !stack_base[i] ) ++ if ( !cpu_data[i].stack_base ) + continue; +- p = (void *)((unsigned long)stack_base[i] + STACK_SIZE - ++ p = (void *)((unsigned long)cpu_data[i].stack_base + STACK_SIZE - + PRIMARY_STACK_SIZE - PAGE_SIZE); + if ( mfn == virt_to_mfn(p) ) + return -1; +diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c +index 7599bee361..77e69bf80d 100644 +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -623,9 +623,9 @@ void show_stack_overflow(unsigned int cpu, const struct cpu_user_regs *regs) + unsigned long curr_stack_base = esp & ~(STACK_SIZE - 1); + unsigned long esp_top, esp_bottom; + +- if ( _p(curr_stack_base) != stack_base[cpu] ) ++ if ( _p(curr_stack_base) != cpu_data[cpu].stack_base ) + printk("Current stack base %p differs from expected %p\n", +- _p(curr_stack_base), stack_base[cpu]); ++ _p(curr_stack_base), cpu_data[cpu].stack_base); + + esp_bottom = (esp | (STACK_SIZE - 1)) + 1; + esp_top = esp_bottom - PRIMARY_STACK_SIZE; +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index cc5eb5a518..a8d74fbd20 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -188,6 +188,7 @@ void __dummy__(void) + + OFFSET(CPUINFO_X86_features, struct cpuinfo_x86, x86_capability); + OFFSET(CPUINFO_X86_apicid, struct cpuinfo_x86, apicid); ++ OFFSET(CPUINFO_X86_stack_base, struct cpuinfo_x86, stack_base); + DEFINE(CPUINFO_X86_sizeof, sizeof(struct cpuinfo_x86)); + BLANK(); + +diff --git a/xen/include/xen/smp.h b/xen/include/xen/smp.h +index 0a9219173f..994fdc4742 100644 +--- a/xen/include/xen/smp.h ++++ b/xen/include/xen/smp.h +@@ -67,8 +67,6 @@ void smp_send_call_function_mask(const cpumask_t *mask); + + int alloc_cpu_id(void); + +-extern void *stack_base[NR_CPUS]; +- + void initialize_cpu_data(unsigned int cpu); + int setup_cpu_root_pgt(unsigned int cpu); + +-- +2.46.0 + diff --git a/1313-arch-x86-smp-call-x2apic_ap_setup-earlier.patch b/1313-arch-x86-smp-call-x2apic_ap_setup-earlier.patch new file mode 100644 index 00000000..951dfce1 --- /dev/null +++ b/1313-arch-x86-smp-call-x2apic_ap_setup-earlier.patch @@ -0,0 +1,48 @@ +From 4b8b09c88b6c0428b0fb5d7cabfa4b62d94332be Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Tue, 13 Jun 2023 15:44:36 +0200 +Subject: [PATCH 1313/1328] arch/x86/smp: call x2apic_ap_setup() earlier + +It used to be called from smp_callin(), however BUG_ON() was invoked on +multiple occasions before that. It may end up calling machine_restart() +which tries to get APIC ID for CPU running this code. If BSP detected +that x2APIC is enabled, get_apic_id() will try to use it for all CPUs. +Enabling x2APIC on secondary CPUs earlier protects against an endless +loop of #GP exceptions caused by attempts to read IA32_X2APIC_APICID +MSR while x2APIC is disabled in IA32_APIC_BASE. + +Signed-off-by: Krystian Hebel +--- + xen/arch/x86/smpboot.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index 2a36812d6d..cfb0daf10a 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -196,7 +196,6 @@ static void smp_callin(void) + * update until we finish. We are free to set up this CPU: first the APIC. + */ + Dprintk("CALLIN, before setup_local_APIC().\n"); +- x2apic_ap_setup(); + setup_local_APIC(false); + + /* Save our processor parameters. */ +@@ -386,6 +385,14 @@ void start_secondary(void *unused) + get_cpu_info()->xen_cr3 = 0; + get_cpu_info()->pv_cr3 = 0; + ++ /* ++ * BUG_ON() used in load_system_tables() and later code may end up calling ++ * machine_restart() which tries to get APIC ID for CPU running this code. ++ * If BSP detected that x2APIC is enabled, get_apic_id() will try to use it ++ * for _all_ CPUs. Enable x2APIC on secondary CPUs now so we won't end up ++ * with endless #GP loop. ++ */ ++ x2apic_ap_setup(); + load_system_tables(); + + /* Full exception support from here on in. */ +-- +2.46.0 + diff --git a/1314-arch-x86-shutdown-protect-against-recurrent-machine_.patch b/1314-arch-x86-shutdown-protect-against-recurrent-machine_.patch new file mode 100644 index 00000000..801e8ba0 --- /dev/null +++ b/1314-arch-x86-shutdown-protect-against-recurrent-machine_.patch @@ -0,0 +1,54 @@ +From f43fd9db331a4e76eeaddce06555bb430d62c436 Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Tue, 13 Jun 2023 15:56:12 +0200 +Subject: [PATCH 1314/1328] arch/x86/shutdown: protect against recurrent + machine_restart() + +If multiple CPUs called machine_restart() before actual restart took +place, but after boot CPU declared itself not online, ASSERT in +on_selected_cpus() will fail. Few calls later execution would end up +in machine_restart() again, with another frame on call stack for new +exception. + +To protect against running out of stack, code checks if boot CPU is +still online before calling on_selected_cpus(). + +Signed-off-by: Krystian Hebel +--- + xen/arch/x86/shutdown.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/shutdown.c b/xen/arch/x86/shutdown.c +index 7619544d14..32c70505ed 100644 +--- a/xen/arch/x86/shutdown.c ++++ b/xen/arch/x86/shutdown.c +@@ -577,9 +577,23 @@ void machine_restart(unsigned int delay_millisecs) + /* Ensure we are the boot CPU. */ + if ( get_apic_id() != boot_cpu_physical_apicid ) + { +- /* Send IPI to the boot CPU (logical cpu 0). */ +- on_selected_cpus(cpumask_of(0), __machine_restart, +- &delay_millisecs, 0); ++ /* ++ * Send IPI to the boot CPU (logical cpu 0). ++ * ++ * If multiple CPUs called machine_restart() before actual restart ++ * took place, but after boot CPU declared itself not online, ASSERT ++ * in on_selected_cpus() will fail. Few calls later we would end up ++ * here again, with another frame on call stack for new exception. ++ * To protect against running out of stack, check if boot CPU is ++ * online. ++ * ++ * Note this is not an atomic operation, so it is possible for ++ * on_selected_cpus() to be called once after boot CPU is offline ++ * before we hit halt() below. ++ */ ++ if ( cpu_online(0) ) ++ on_selected_cpus(cpumask_of(0), __machine_restart, ++ &delay_millisecs, 0); + for ( ; ; ) + halt(); + } +-- +2.46.0 + diff --git a/1315-arch-x86-smp-drop-booting_cpu-variable.patch b/1315-arch-x86-smp-drop-booting_cpu-variable.patch new file mode 100644 index 00000000..9ee166c4 --- /dev/null +++ b/1315-arch-x86-smp-drop-booting_cpu-variable.patch @@ -0,0 +1,121 @@ +From 489fab9280499956e54a25e3c2ce242f438fde48 Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Tue, 13 Jun 2023 18:58:21 +0200 +Subject: [PATCH 1315/1328] arch/x86/smp: drop booting_cpu variable + +CPU id is obtained as a side effect of searching for appropriate +stack for AP. It can be used as a parameter to start_secondary(). +Coincidentally this also makes further work on making AP bring-up +code parallel easier. + +Signed-off-by: Krystian Hebel +--- + xen/arch/x86/boot/x86_64.S | 13 +++++++++---- + xen/arch/x86/smpboot.c | 16 +++++----------- + 2 files changed, 14 insertions(+), 15 deletions(-) + +diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S +index 61040315f9..a5f1f681c9 100644 +--- a/xen/arch/x86/boot/x86_64.S ++++ b/xen/arch/x86/boot/x86_64.S +@@ -20,20 +20,24 @@ ENTRY(__high_start) + jz .L_stack_set + + /* APs only: get stack base from APIC ID saved in %esp. */ +- mov $0, %rax ++ mov $0, %rbx + lea cpu_data(%rip), %rcx + /* cpu_data[0] is BSP, skip it. */ + 1: +- add $1, %rax ++ add $1, %rbx + add $CPUINFO_X86_sizeof, %rcx +- cmp $NR_CPUS, %eax ++ cmp $NR_CPUS, %rbx + jb 2f + hlt + 2: + cmp %esp, CPUINFO_X86_apicid(%rcx) + jne 1b + +- /* %rcx is now cpu_data[cpu], read stack base from it. */ ++ /* ++ * At this point: ++ * - %rcx is cpu_data[cpu], read stack base from it, ++ * - %rbx (callee-save) is Xen cpu number, pass it to start_secondary(). ++ */ + mov CPUINFO_X86_stack_base(%rcx), %rsp + + test %rsp,%rsp +@@ -101,6 +105,7 @@ ENTRY(__high_start) + .L_ap_cet_done: + #endif /* CONFIG_XEN_SHSTK || CONFIG_XEN_IBT */ + ++ mov %rbx, %rdi + call start_secondary + BUG /* start_secondary() shouldn't return. */ + +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index cfb0daf10a..e9c6837e99 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -234,8 +234,6 @@ static void smp_callin(void) + cpu_relax(); + } + +-static int booting_cpu; +- + /* CPUs for which sibling maps can be computed. */ + static cpumask_t cpu_sibling_setup_map; + +@@ -323,15 +321,14 @@ static void set_cpu_sibling_map(unsigned int cpu) + } + } + +-void start_secondary(void *unused) ++void start_secondary(unsigned int cpu) + { + struct cpu_info *info = get_cpu_info(); + + /* +- * Dont put anything before smp_callin(), SMP booting is so fragile that we ++ * Don't put anything before smp_callin(), SMP booting is so fragile that we + * want to limit the things done here to the most necessary things. + */ +- unsigned int cpu = booting_cpu; + + if ( ap_boot_method == AP_BOOT_TXT ) { + uint64_t misc_enable; +@@ -352,7 +349,6 @@ void start_secondary(void *unused) + asm volatile ("monitor; xor %0,%0; mwait" + :: "a"(__va(sinit_mle->rlp_wakeup_addr)), "c"(0), + "d"(0) : "memory"); +- cpu = booting_cpu; + } + } + +@@ -381,9 +377,9 @@ void start_secondary(void *unused) + */ + spin_debug_disable(); + +- get_cpu_info()->use_pv_cr3 = false; +- get_cpu_info()->xen_cr3 = 0; +- get_cpu_info()->pv_cr3 = 0; ++ info->use_pv_cr3 = false; ++ info->xen_cr3 = 0; ++ info->pv_cr3 = 0; + + /* + * BUG_ON() used in load_system_tables() and later code may end up calling +@@ -637,8 +633,6 @@ static int do_boot_cpu(int apicid, int cpu) + */ + mtrr_save_state(); + +- booting_cpu = cpu; +- + start_eip = bootsym_phys(trampoline_realmode_entry); + + /* start_eip needs be page aligned, and below the 1M boundary. */ +-- +2.46.0 + diff --git a/1316-arch-x86-smp-make-cpu_state-per-CPU.patch b/1316-arch-x86-smp-make-cpu_state-per-CPU.patch new file mode 100644 index 00000000..38417e69 --- /dev/null +++ b/1316-arch-x86-smp-make-cpu_state-per-CPU.patch @@ -0,0 +1,219 @@ +From ce4b2d1ce181dc66afe12ac61fa975e9eb34e095 Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Fri, 16 Jun 2023 12:18:23 +0200 +Subject: [PATCH 1316/1328] arch/x86/smp: make cpu_state per-CPU + +This will be used for parallel AP bring-up. + +CPU_STATE_INIT changed direction. It was previously set by BSP and never +consumed by AP. Now it signals that AP got through assembly part of +initialization and waits for BSP to call notifiers that set up data +structures required for further initialization. + +Signed-off-by: Krystian Hebel +--- + xen/arch/x86/include/asm/processor.h | 1 + + xen/arch/x86/smpboot.c | 80 +++++++++++++++++----------- + 2 files changed, 49 insertions(+), 32 deletions(-) + +diff --git a/xen/arch/x86/include/asm/processor.h b/xen/arch/x86/include/asm/processor.h +index 1f78785689..0e9d9a0d88 100644 +--- a/xen/arch/x86/include/asm/processor.h ++++ b/xen/arch/x86/include/asm/processor.h +@@ -138,6 +138,7 @@ struct cpuinfo_x86 { + __u32 cpu_core_id; /* core ID of each logical CPU*/ + __u32 compute_unit_id; /* AMD compute unit ID of each logical CPU */ + void *stack_base; ++ unsigned int cpu_state; + unsigned short x86_clflush_size; + } __cacheline_aligned; + +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index e9c6837e99..f8c3db2107 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -77,15 +77,18 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] = + { [0 ... NR_CPUS-1] .apicid = BAD_APICID }; + + static int cpu_error; +-static enum cpu_state { ++enum cpu_state { + CPU_STATE_DYING, /* slave -> master: I am dying */ + CPU_STATE_DEAD, /* slave -> master: I am completely dead */ +- CPU_STATE_INIT, /* master -> slave: Early bringup phase 1 */ +- CPU_STATE_CALLOUT, /* master -> slave: Early bringup phase 2 */ ++ CPU_STATE_INIT, /* slave -> master: Early bringup phase 1 completed */ ++ CPU_STATE_CALLOUT, /* master -> slave: Start early bringup phase 2 */ + CPU_STATE_CALLIN, /* slave -> master: Completed phase 2 */ + CPU_STATE_ONLINE /* master -> slave: Go fully online now. */ +-} cpu_state; +-#define set_cpu_state(state) do { smp_mb(); cpu_state = (state); } while (0) ++}; ++#define set_cpu_state(cpu, state) do { \ ++ smp_mb(); \ ++ cpu_data[cpu].cpu_state = (state); \ ++} while (0) + + void initialize_cpu_data(unsigned int cpu) + { +@@ -180,16 +183,7 @@ static void synchronize_tsc_slave(unsigned int slave) + static void smp_callin(void) + { + unsigned int cpu = smp_processor_id(); +- int i, rc; +- +- /* Wait 2s total for startup. */ +- Dprintk("Waiting for CALLOUT.\n"); +- for ( i = 0; cpu_state != CPU_STATE_CALLOUT; i++ ) +- { +- BUG_ON(i >= 200); +- cpu_relax(); +- mdelay(10); +- } ++ int rc; + + /* + * The boot CPU has finished the init stage and is spinning on cpu_state +@@ -225,12 +219,12 @@ static void smp_callin(void) + } + + /* Allow the master to continue. */ +- set_cpu_state(CPU_STATE_CALLIN); ++ set_cpu_state(cpu, CPU_STATE_CALLIN); + + synchronize_tsc_slave(cpu); + + /* And wait for our final Ack. */ +- while ( cpu_state != CPU_STATE_ONLINE ) ++ while ( cpu_data[cpu].cpu_state != CPU_STATE_ONLINE ) + cpu_relax(); + } + +@@ -325,6 +319,9 @@ void start_secondary(unsigned int cpu) + { + struct cpu_info *info = get_cpu_info(); + ++ /* Tell BSP that we are awake. */ ++ set_cpu_state(cpu, CPU_STATE_INIT); ++ + /* + * Don't put anything before smp_callin(), SMP booting is so fragile that we + * want to limit the things done here to the most necessary things. +@@ -354,6 +351,10 @@ void start_secondary(unsigned int cpu) + + /* Critical region without IDT or TSS. Any fault is deadly! */ + ++ /* Wait until data set up by CPU_UP_PREPARE notifiers is ready. */ ++ while ( cpu_data[cpu].cpu_state != CPU_STATE_CALLOUT ) ++ cpu_relax(); ++ + set_current(idle_vcpu[cpu]); + this_cpu(curr_vcpu) = idle_vcpu[cpu]; + rdmsrl(MSR_EFER, this_cpu(efer)); +@@ -646,26 +647,35 @@ static int do_boot_cpu(int apicid, int cpu) + + /* This grunge runs the startup process for the targeted processor. */ + +- set_cpu_state(CPU_STATE_INIT); +- + /* Starting actual IPI sequence... */ + boot_error = wakeup_secondary_cpu(apicid, start_eip); + + if ( !boot_error ) + { +- /* Allow AP to start initializing. */ +- set_cpu_state(CPU_STATE_CALLOUT); +- Dprintk("After Callout %d.\n", cpu); +- +- /* Wait 5s total for a response. */ +- for ( timeout = 0; timeout < 50000; timeout++ ) ++ /* Wait 2s total for a response. */ ++ for ( timeout = 0; timeout < 20000; timeout++ ) + { +- if ( cpu_state != CPU_STATE_CALLOUT ) ++ if ( cpu_data[cpu].cpu_state == CPU_STATE_INIT ) + break; + udelay(100); + } + +- if ( cpu_state == CPU_STATE_CALLIN ) ++ if ( cpu_data[cpu].cpu_state == CPU_STATE_INIT ) ++ { ++ /* Allow AP to start initializing. */ ++ set_cpu_state(cpu, CPU_STATE_CALLOUT); ++ Dprintk("After Callout %d.\n", cpu); ++ ++ /* Wait 5s total for a response. */ ++ for ( timeout = 0; timeout < 500000; timeout++ ) ++ { ++ if ( cpu_data[cpu].cpu_state != CPU_STATE_CALLOUT ) ++ break; ++ udelay(10); ++ } ++ } ++ ++ if ( cpu_data[cpu].cpu_state == CPU_STATE_CALLIN ) + { + /* number CPUs logically, starting from 1 (BSP is 0) */ + Dprintk("OK.\n"); +@@ -673,7 +683,7 @@ static int do_boot_cpu(int apicid, int cpu) + synchronize_tsc_master(cpu); + Dprintk("CPU has booted.\n"); + } +- else if ( cpu_state == CPU_STATE_DEAD ) ++ else if ( cpu_data[cpu].cpu_state == CPU_STATE_DEAD ) + { + smp_rmb(); + rc = cpu_error; +@@ -744,7 +754,7 @@ unsigned long alloc_stub_page(unsigned int cpu, unsigned long *mfn) + void cpu_exit_clear(unsigned int cpu) + { + cpu_uninit(cpu); +- set_cpu_state(CPU_STATE_DEAD); ++ set_cpu_state(cpu, CPU_STATE_DEAD); + } + + static int clone_mapping(const void *ptr, root_pgentry_t *rpt) +@@ -1229,6 +1239,12 @@ void __init smp_prepare_cpus(void) + cpu_data[0].stack_base = (void *) + ((unsigned long)stack_start & ~(STACK_SIZE - 1)); + ++ /* Set state as CALLOUT so APs won't change it in initialize_cpu_data() */ ++ boot_cpu_data.cpu_state = CPU_STATE_CALLOUT; ++ ++ /* Not really used anywhere, but set it just in case. */ ++ set_cpu_state(0, CPU_STATE_ONLINE); ++ + set_nr_sockets(); + + socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets); +@@ -1335,7 +1351,7 @@ void __cpu_disable(void) + { + int cpu = smp_processor_id(); + +- set_cpu_state(CPU_STATE_DYING); ++ set_cpu_state(cpu, CPU_STATE_DYING); + + local_irq_disable(); + clear_local_APIC(); +@@ -1360,7 +1376,7 @@ void __cpu_die(unsigned int cpu) + unsigned int i = 0; + enum cpu_state seen_state; + +- while ( (seen_state = cpu_state) != CPU_STATE_DEAD ) ++ while ( (seen_state = cpu_data[cpu].cpu_state) != CPU_STATE_DEAD ) + { + BUG_ON(seen_state != CPU_STATE_DYING); + mdelay(100); +@@ -1461,7 +1477,7 @@ int __cpu_up(unsigned int cpu) + + time_latch_stamps(); + +- set_cpu_state(CPU_STATE_ONLINE); ++ set_cpu_state(cpu, CPU_STATE_ONLINE); + while ( !cpu_online(cpu) ) + { + cpu_relax(); +-- +2.46.0 + diff --git a/1317-arch-x86-smp-remove-MONITOR-MWAIT-loop-for-TXT-AP-br.patch b/1317-arch-x86-smp-remove-MONITOR-MWAIT-loop-for-TXT-AP-br.patch new file mode 100644 index 00000000..27d35854 --- /dev/null +++ b/1317-arch-x86-smp-remove-MONITOR-MWAIT-loop-for-TXT-AP-br.patch @@ -0,0 +1,80 @@ +From 69e5ee9fbefe3a1ca17237ab7bd889942db28470 Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Fri, 16 Jun 2023 14:31:27 +0200 +Subject: [PATCH 1317/1328] arch/x86/smp: remove MONITOR/MWAIT loop for TXT AP + bringup + +This is no longer necessary, since AP loops on cpu_state and CPU +index is passed as argument. + +In addition, move TXT JOIN structure to static data. There is no +guarantee that it would be consumed before it is overwritten on BSP +stack. + +Signed-off-by: Krystian Hebel +--- + xen/arch/x86/smpboot.c | 37 +++++++++---------------------------- + 1 file changed, 9 insertions(+), 28 deletions(-) + +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index f8c3db2107..4eb5506ff9 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -327,28 +327,6 @@ void start_secondary(unsigned int cpu) + * want to limit the things done here to the most necessary things. + */ + +- if ( ap_boot_method == AP_BOOT_TXT ) { +- uint64_t misc_enable; +- uint32_t my_apicid; +- struct txt_sinit_mle_data *sinit_mle = +- txt_sinit_mle_data_start(__va(read_txt_reg(TXTCR_HEAP_BASE))); +- +- /* TXT released us with MONITOR disabled in IA32_MISC_ENABLE. */ +- rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); +- wrmsrl(MSR_IA32_MISC_ENABLE, +- misc_enable | MSR_IA32_MISC_ENABLE_MONITOR_ENABLE); +- +- /* get_apic_id() reads from x2APIC if it thinks it is enabled. */ +- x2apic_ap_setup(); +- my_apicid = get_apic_id(); +- +- while ( my_apicid != cpu_physical_id(cpu) ) { +- asm volatile ("monitor; xor %0,%0; mwait" +- :: "a"(__va(sinit_mle->rlp_wakeup_addr)), "c"(0), +- "d"(0) : "memory"); +- } +- } +- + /* Critical region without IDT or TSS. Any fault is deadly! */ + + /* Wait until data set up by CPU_UP_PREPARE notifiers is ready. */ +@@ -459,16 +437,19 @@ static int wake_aps_in_txt(unsigned long trampoline_rm) + struct txt_sinit_mle_data *sinit_mle = + txt_sinit_mle_data_start(__va(read_txt_reg(TXTCR_HEAP_BASE))); + uint32_t *wakeup_addr = __va(sinit_mle->rlp_wakeup_addr); ++ static uint32_t join[4] = {0}; ++ ++ /* Check if already started. */ ++ if ( join[0] != 0 ) ++ return -1; + + #define trampoline_relative(x) \ + (trampoline_rm + ((char *)(x) - trampoline_realmode_entry)) + +- uint32_t join[4] = { +- trampoline_gdt[1], /* GDT limit */ +- trampoline_relative(trampoline_gdt), /* GDT base */ +- TXT_AP_BOOT_CS, /* CS selector, DS = CS+8 */ +- trampoline_relative(txt_ap_entry) /* EIP */ +- }; ++ join[0] = trampoline_gdt[1]; /* GDT limit */ ++ join[1] = trampoline_relative(trampoline_gdt); /* GDT base */ ++ join[2] = TXT_AP_BOOT_CS; /* CS selector, DS = CS+8 */ ++ join[3] = trampoline_relative(txt_ap_entry); /* EIP */ + + #undef trampoline_relative + +-- +2.46.0 + diff --git a/1318-arch-x86-smp-don-t-send-INIT-SIPI-SIPI-if-AP-is-alre.patch b/1318-arch-x86-smp-don-t-send-INIT-SIPI-SIPI-if-AP-is-alre.patch new file mode 100644 index 00000000..ecb6c1d6 --- /dev/null +++ b/1318-arch-x86-smp-don-t-send-INIT-SIPI-SIPI-if-AP-is-alre.patch @@ -0,0 +1,82 @@ +From 1f048091eec0bdd868c7427d124f3ceede305296 Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Fri, 16 Jun 2023 14:41:17 +0200 +Subject: [PATCH 1318/1328] arch/x86/smp: don't send INIT-SIPI-SIPI if AP is + already running + +This is another requirement for parallel AP bringup. + +Signed-off-by: Krystian Hebel +--- + xen/arch/x86/smpboot.c | 37 +++++++++++++++++++++---------------- + 1 file changed, 21 insertions(+), 16 deletions(-) + +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index 4eb5506ff9..ce37266b13 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -607,7 +607,6 @@ int alloc_cpu_id(void) + static int do_boot_cpu(int apicid, int cpu) + { + int timeout, boot_error = 0, rc = 0; +- unsigned long start_eip; + + /* + * Save current MTRR state in case it was changed since early boot +@@ -615,21 +614,31 @@ static int do_boot_cpu(int apicid, int cpu) + */ + mtrr_save_state(); + +- start_eip = bootsym_phys(trampoline_realmode_entry); ++ /* Check if AP is already up. */ ++ if ( cpu_data[cpu].cpu_state != CPU_STATE_INIT ) ++ { ++ /* This grunge runs the startup process for the targeted processor. */ ++ unsigned long start_eip; ++ start_eip = bootsym_phys(trampoline_realmode_entry); + +- /* start_eip needs be page aligned, and below the 1M boundary. */ +- if ( start_eip & ~0xff000 ) +- panic("AP trampoline %#lx not suitably positioned\n", start_eip); ++ /* start_eip needs be page aligned, and below the 1M boundary. */ ++ if ( start_eip & ~0xff000 ) ++ panic("AP trampoline %#lx not suitably positioned\n", start_eip); + +- /* So we see what's up */ +- if ( opt_cpu_info ) +- printk("Booting processor %d/%d eip %lx\n", +- cpu, apicid, start_eip); ++ /* So we see what's up */ ++ if ( opt_cpu_info ) ++ printk("AP trampoline at %lx\n", start_eip); + +- /* This grunge runs the startup process for the targeted processor. */ ++ /* mark "stuck" area as not stuck */ ++ bootsym(trampoline_cpu_started) = 0; ++ smp_mb(); + +- /* Starting actual IPI sequence... */ +- boot_error = wakeup_secondary_cpu(apicid, start_eip); ++ /* Starting actual IPI sequence... */ ++ boot_error = wakeup_secondary_cpu(apicid, start_eip); ++ } ++ ++ if ( opt_cpu_info ) ++ printk("Booting processor %d/%d\n", cpu, apicid); + + if ( !boot_error ) + { +@@ -688,10 +697,6 @@ static int do_boot_cpu(int apicid, int cpu) + rc = -EIO; + } + +- /* mark "stuck" area as not stuck */ +- bootsym(trampoline_cpu_started) = 0; +- smp_mb(); +- + return rc; + } + +-- +2.46.0 + diff --git a/1319-arch-x86-smp-start-APs-in-parallel-during-boot.patch b/1319-arch-x86-smp-start-APs-in-parallel-during-boot.patch new file mode 100644 index 00000000..86b6324e --- /dev/null +++ b/1319-arch-x86-smp-start-APs-in-parallel-during-boot.patch @@ -0,0 +1,128 @@ +From d308d88523f956a1e34bf3b79c2c2e0bfa3ad3dd Mon Sep 17 00:00:00 2001 +From: Krystian Hebel +Date: Fri, 16 Jun 2023 15:45:32 +0200 +Subject: [PATCH 1319/1328] arch/x86/smp: start APs in parallel during boot + +Multiple delays are required when sending IPIs and waiting for +responses. During boot, 4 such IPIs were sent per each AP. With this +change, only one set of broadcast IPIs is sent. This reduces boot time, +especially for platforms with large number of cores. + +Single CPU initialization is still possible, it is used for hotplug. + +During wakeup from S3 APs are started one by one. It should be possible +to enable parallel execution there as well, but I don't have a way of +testing it as of now. + +Signed-off-by: Krystian Hebel +--- + xen/arch/x86/include/asm/smp.h | 1 + + xen/arch/x86/setup.c | 2 ++ + xen/arch/x86/smpboot.c | 31 +++++++++++++++++++++++++++---- + 3 files changed, 30 insertions(+), 4 deletions(-) + +diff --git a/xen/arch/x86/include/asm/smp.h b/xen/arch/x86/include/asm/smp.h +index 4432a8ea1b..68c8a349fc 100644 +--- a/xen/arch/x86/include/asm/smp.h ++++ b/xen/arch/x86/include/asm/smp.h +@@ -31,6 +31,7 @@ DECLARE_PER_CPU(cpumask_var_t, send_ipi_cpumask); + extern bool park_offline_cpus; + + void smp_send_nmi_allbutself(void); ++void smp_send_init_sipi_sipi_allbutself(void); + + void send_IPI_mask(const cpumask_t *, int vector); + void send_IPI_self(int vector); +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index f014b681d9..22cbdbdc88 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -1919,6 +1919,8 @@ void __init noreturn __start_xen(unsigned long mbi_p) + cpu_data[i].stack_base = cpu_alloc_stack(i); + } + ++ smp_send_init_sipi_sipi_allbutself(); ++ + for_each_present_cpu ( i ) + { + if ( (park_offline_cpus || num_online_cpus() < max_cpus) && +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index ce37266b13..c732e5ace7 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -464,7 +464,7 @@ static int wake_aps_in_txt(unsigned long trampoline_rm) + + static int wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) + { +- unsigned long send_status = 0, accept_status = 0; ++ unsigned long send_status = 0, accept_status = 0, sh = 0; + int maxlvt, timeout, i; + + /* +@@ -487,6 +487,12 @@ static int wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) + if ( ap_boot_method == AP_BOOT_TXT ) + return wake_aps_in_txt(start_eip); + ++ /* ++ * Use destination shorthand for broadcasting IPIs during boot. ++ */ ++ if ( phys_apicid == BAD_APICID ) ++ sh = APIC_DEST_ALLBUT; ++ + /* + * Be paranoid about clearing APIC errors. + */ +@@ -500,7 +506,7 @@ static int wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) + /* + * Turn INIT on target chip via IPI + */ +- apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, ++ apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT | sh, + phys_apicid); + + if ( !x2apic_enabled ) +@@ -517,7 +523,7 @@ static int wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) + + Dprintk("Deasserting INIT.\n"); + +- apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); ++ apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT | sh, phys_apicid); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; +@@ -554,7 +560,7 @@ static int wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) + * STARTUP IPI + * Boot on the stack + */ +- apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), phys_apicid); ++ apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12) | sh, phys_apicid); + + if ( !x2apic_enabled ) + { +@@ -1202,6 +1208,23 @@ static struct notifier_block cpu_smpboot_nfb = { + .notifier_call = cpu_smpboot_callback + }; + ++void smp_send_init_sipi_sipi_allbutself(void) ++{ ++ unsigned long start_eip; ++ start_eip = bootsym_phys(trampoline_realmode_entry); ++ ++ /* start_eip needs be page aligned, and below the 1M boundary. */ ++ if ( start_eip & ~0xff000 ) ++ panic("AP trampoline %#lx not suitably positioned\n", start_eip); ++ ++ /* So we see what's up */ ++ if ( opt_cpu_info ) ++ printk("Booting APs in parallel, eip %lx\n", start_eip); ++ ++ /* Starting actual broadcast IPI sequence... */ ++ wakeup_secondary_cpu(BAD_APICID, start_eip); ++} ++ + void __init smp_prepare_cpus(void) + { + /* +-- +2.46.0 + diff --git a/1320-lib-sha256.c-add-file.patch b/1320-lib-sha256.c-add-file.patch new file mode 100644 index 00000000..59cac038 --- /dev/null +++ b/1320-lib-sha256.c-add-file.patch @@ -0,0 +1,305 @@ +From 14730214ed562757e88a3e08d7adb1bbf0ae9861 Mon Sep 17 00:00:00 2001 +From: Sergii Dmytruk +Date: Mon, 26 Jun 2023 00:17:15 +0300 +Subject: [PATCH 1320/1328] lib/sha256.c: add file + +The code comes from [1] and is licensed under GPL-2.0 or later version of +license. It's a combination of: + - include/crypto/sha2.h + - include/crypto/sha256_base.h + - lib/crypto/sha256.c + - crypto/sha256_generic.c + +Changes: + - includes + - formatting + - renames and splicing of trivial some functions that are called once + - dropping of `int` return values (only zero was ever returned) + - getting rid of references to `struct shash_desc` + +[1]: https://github.com/torvalds/linux/tree/afdab700f65e14070d8ab92175544b1c62b8bf03 + +Signed-off-by: Sergii Dmytruk +--- + xen/include/xen/sha256.h | 10 ++ + xen/lib/Makefile | 1 + + xen/lib/sha256.c | 238 +++++++++++++++++++++++++++++++++++++++ + 3 files changed, 249 insertions(+) + create mode 100644 xen/include/xen/sha256.h + create mode 100644 xen/lib/sha256.c + +diff --git a/xen/include/xen/sha256.h b/xen/include/xen/sha256.h +new file mode 100644 +index 0000000000..0a483b6fd6 +--- /dev/null ++++ b/xen/include/xen/sha256.h +@@ -0,0 +1,10 @@ ++#ifndef __XEN_SHA256_H ++#define __XEN_SHA256_H ++ ++#include ++ ++#define SHA256_DIGEST_SIZE 32 ++ ++void sha256_hash(const u8 *data, unsigned int len, u8 *out); ++ ++#endif /* !__XEN_SHA256_H */ +diff --git a/xen/lib/Makefile b/xen/lib/Makefile +index 3820e0bfbb..40f7960ef1 100644 +--- a/xen/lib/Makefile ++++ b/xen/lib/Makefile +@@ -29,6 +29,7 @@ lib-y += strsep.o + lib-y += strspn.o + lib-y += strstr.o + lib-$(CONFIG_X86) += sha1.o ++lib-$(CONFIG_X86) += sha256.o + lib-$(CONFIG_X86) += xxhash32.o + lib-$(CONFIG_X86) += xxhash64.o + +diff --git a/xen/lib/sha256.c b/xen/lib/sha256.c +new file mode 100644 +index 0000000000..e1507c7dd5 +--- /dev/null ++++ b/xen/lib/sha256.c +@@ -0,0 +1,238 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * SHA-256, as specified in ++ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf ++ * ++ * SHA-256 code by Jean-Luc Cooke . ++ * ++ * Copyright (c) Jean-Luc Cooke ++ * Copyright (c) Andrew McDonald ++ * Copyright (c) 2002 James Morris ++ * Copyright (c) 2014 Red Hat Inc. ++ */ ++ ++#include ++#include ++#include ++ ++#define SHA256_BLOCK_SIZE 64 ++ ++struct sha256_state { ++ u32 state[SHA256_DIGEST_SIZE / 4]; ++ u64 count; ++ u8 buf[SHA256_BLOCK_SIZE]; ++}; ++ ++typedef void sha256_block_fn(struct sha256_state *sst, u8 const *src, ++ int blocks); ++ ++static const u32 SHA256_K[] = { ++ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, ++ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, ++ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, ++ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, ++ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, ++ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, ++ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, ++ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, ++ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, ++ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, ++ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, ++ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, ++ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, ++ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, ++ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, ++ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, ++}; ++ ++static u32 Ch(u32 x, u32 y, u32 z) ++{ ++ return z ^ (x & (y ^ z)); ++} ++ ++static u32 Maj(u32 x, u32 y, u32 z) ++{ ++ return (x & y) | (z & (x | y)); ++} ++ ++#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22)) ++#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25)) ++#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3)) ++#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10)) ++ ++static void LOAD_OP(int I, u32 *W, const u8 *input) ++{ ++ W[I] = get_unaligned_be32((__u32 *)input + I); ++} ++ ++static void BLEND_OP(int I, u32 *W) ++{ ++ W[I] = s1(W[I - 2]) + W[I - 7] + s0(W[I - 15]) + W[I - 16]; ++} ++ ++#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) do { \ ++ u32 t1, t2; \ ++ t1 = h + e1(e) + Ch(e, f, g) + SHA256_K[i] + W[i]; \ ++ t2 = e0(a) + Maj(a, b, c); \ ++ d += t1; \ ++ h = t1 + t2; \ ++ } while ( 0 ) ++ ++static void sha256_init(struct sha256_state *sctx) ++{ ++ sctx->state[0] = 0x6a09e667UL; ++ sctx->state[1] = 0xbb67ae85UL; ++ sctx->state[2] = 0x3c6ef372UL; ++ sctx->state[3] = 0xa54ff53aUL; ++ sctx->state[4] = 0x510e527fUL; ++ sctx->state[5] = 0x9b05688cUL; ++ sctx->state[6] = 0x1f83d9abUL; ++ sctx->state[7] = 0x5be0cd19UL; ++ sctx->count = 0; ++} ++ ++static void sha256_do_update(struct sha256_state *sctx, ++ const u8 *data, ++ unsigned int len, ++ sha256_block_fn *block_fn) ++{ ++ unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; ++ ++ sctx->count += len; ++ ++ if ( unlikely((partial + len) >= SHA256_BLOCK_SIZE) ) ++ { ++ int blocks; ++ ++ if ( partial ) ++ { ++ int p = SHA256_BLOCK_SIZE - partial; ++ ++ memcpy(sctx->buf + partial, data, p); ++ data += p; ++ len -= p; ++ ++ block_fn(sctx, sctx->buf, 1); ++ } ++ ++ blocks = len / SHA256_BLOCK_SIZE; ++ len %= SHA256_BLOCK_SIZE; ++ ++ if ( blocks ) ++ { ++ block_fn(sctx, data, blocks); ++ data += blocks * SHA256_BLOCK_SIZE; ++ } ++ partial = 0; ++ } ++ if ( len ) ++ memcpy(sctx->buf + partial, data, len); ++} ++ ++static void sha256_do_finalize(struct sha256_state *sctx, ++ sha256_block_fn *block_fn) ++{ ++ const int bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64); ++ __be64 *bits = (__be64 *)(sctx->buf + bit_offset); ++ unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; ++ ++ sctx->buf[partial++] = 0x80; ++ if ( partial > bit_offset ) ++ { ++ memset(sctx->buf + partial, 0x0, SHA256_BLOCK_SIZE - partial); ++ partial = 0; ++ ++ block_fn(sctx, sctx->buf, 1); ++ } ++ ++ memset(sctx->buf + partial, 0x0, bit_offset - partial); ++ *bits = cpu_to_be64(sctx->count << 3); ++ block_fn(sctx, sctx->buf, 1); ++} ++ ++static void sha256_finish(struct sha256_state *sctx, u8 *out, ++ unsigned int digest_size) ++{ ++ __be32 *digest = (__be32 *)out; ++ int i; ++ ++ for ( i = 0; digest_size > 0; i++, digest_size -= sizeof(__be32) ) ++ put_unaligned_be32(sctx->state[i], digest++); ++ ++ memset(sctx, 0, sizeof(*sctx)); ++} ++ ++static void sha256_transform(u32 *state, const u8 *input, u32 *W) ++{ ++ u32 a, b, c, d, e, f, g, h; ++ int i; ++ ++ /* load the input */ ++ for ( i = 0; i < 16; i += 8 ) ++ { ++ LOAD_OP(i + 0, W, input); ++ LOAD_OP(i + 1, W, input); ++ LOAD_OP(i + 2, W, input); ++ LOAD_OP(i + 3, W, input); ++ LOAD_OP(i + 4, W, input); ++ LOAD_OP(i + 5, W, input); ++ LOAD_OP(i + 6, W, input); ++ LOAD_OP(i + 7, W, input); ++ } ++ ++ /* now blend */ ++ for ( i = 16; i < 64; i += 8 ) ++ { ++ BLEND_OP(i + 0, W); ++ BLEND_OP(i + 1, W); ++ BLEND_OP(i + 2, W); ++ BLEND_OP(i + 3, W); ++ BLEND_OP(i + 4, W); ++ BLEND_OP(i + 5, W); ++ BLEND_OP(i + 6, W); ++ BLEND_OP(i + 7, W); ++ } ++ ++ /* load the state into our registers */ ++ a = state[0]; b = state[1]; c = state[2]; d = state[3]; ++ e = state[4]; f = state[5]; g = state[6]; h = state[7]; ++ ++ /* now iterate */ ++ for ( i = 0; i < 64; i += 8 ) ++ { ++ SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h); ++ SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g); ++ SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f); ++ SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e); ++ SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d); ++ SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c); ++ SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b); ++ SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a); ++ } ++ ++ state[0] += a; state[1] += b; state[2] += c; state[3] += d; ++ state[4] += e; state[5] += f; state[6] += g; state[7] += h; ++} ++ ++static void sha256_transform_blocks(struct sha256_state *sctx, ++ const u8 *input, int blocks) ++{ ++ u32 W[64]; ++ ++ do { ++ sha256_transform(sctx->state, input, W); ++ input += SHA256_BLOCK_SIZE; ++ } while ( --blocks ); ++ ++ memset(W, 0, sizeof(W)); ++} ++ ++void sha256_hash(const u8 *data, unsigned int len, u8 *out) ++{ ++ struct sha256_state sctx; ++ ++ sha256_init(&sctx); ++ sha256_do_update(&sctx, data, len, sha256_transform_blocks); ++ sha256_do_finalize(&sctx, sha256_transform_blocks); ++ sha256_finish(&sctx, out, SHA256_DIGEST_SIZE); ++} +-- +2.46.0 + diff --git a/1321-x86-tpm.c-support-extending-PCRs-of-TPM2.0.patch b/1321-x86-tpm.c-support-extending-PCRs-of-TPM2.0.patch new file mode 100644 index 00000000..5ce618b8 --- /dev/null +++ b/1321-x86-tpm.c-support-extending-PCRs-of-TPM2.0.patch @@ -0,0 +1,523 @@ +From 916d06aeb4b76006526f4bdc727480aaadde40e0 Mon Sep 17 00:00:00 2001 +From: Sergii Dmytruk +Date: Wed, 28 Jun 2023 20:23:24 +0300 +Subject: [PATCH 1321/1328] x86/tpm.c: support extending PCRs of TPM2.0 + +SHA1 and SHA256 is hardcoded here, but their support by TPM is checked +for. Addition of event log for TPM2.0 will generalize the code further. + +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/tpm.c | 446 +++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 434 insertions(+), 12 deletions(-) + +diff --git a/xen/arch/x86/tpm.c b/xen/arch/x86/tpm.c +index 1bd271dbe2..e3e4bda841 100644 +--- a/xen/arch/x86/tpm.c ++++ b/xen/arch/x86/tpm.c +@@ -40,6 +40,15 @@ asm ( + * other part of Xen. Providing implementation of builtin functions in this + * case is necessary if compiler chooses to not use an inline builtin. + */ ++void *memset(void *dest, int c, size_t n) ++{ ++ uint8_t *d = dest; ++ ++ while ( n-- ) ++ *d++ = c; ++ ++ return dest; ++} + void *memcpy(void *dest, const void *src, size_t n) + { + const uint8_t *s = src; +@@ -62,6 +71,7 @@ void *memcpy(void *dest, const void *src, size_t n) + #endif /* __EARLY_TPM__ */ + + #include ++#include + + #define TPM_LOC_REG(loc, reg) (0x1000 * (loc) + (reg)) + +@@ -80,6 +90,7 @@ void *memcpy(void *dest, const void *src, size_t n) + + #define swap16(x) __builtin_bswap16(x) + #define swap32(x) __builtin_bswap32(x) ++#define memset(s, c, n) __builtin_memset(s, c, n) + #define memcpy(d, s, n) __builtin_memcpy(d, s, n) + + static inline volatile uint32_t tis_read32(unsigned reg) +@@ -160,14 +171,15 @@ static inline bool is_tpm12(void) + (tis_read32(TPM_STS_(0)) & TPM_FAMILY_MASK) == 0); + } + +-/****************************** TPM1.2 specific *******************************/ +-#define TPM_ORD_Extend 0x00000014 +-#define TPM_ORD_SHA1Start 0x000000A0 +-#define TPM_ORD_SHA1Update 0x000000A1 +-#define TPM_ORD_SHA1CompleteExtend 0x000000A3 ++/****************************** TPM1.2 & TPM2.0 *******************************/ + +-#define TPM_TAG_RQU_COMMAND 0x00C1 +-#define TPM_TAG_RSP_COMMAND 0x00C4 ++/* ++ * TPM1.2 is required to support commands of up to 1101 bytes, vendors rarely ++ * go above that. Limit maximum size of block of data to be hashed to 1024. ++ * ++ * TPM2.0 should support hashing of at least 1024 bytes. ++ */ ++#define MAX_HASH_BLOCK 1024 + + /* All fields of following structs are big endian. */ + struct tpm_cmd_hdr { +@@ -182,6 +194,17 @@ struct tpm_rsp_hdr { + uint32_t returnCode; + } __packed; + ++/****************************** TPM1.2 specific *******************************/ ++ ++#define TPM_ORD_Extend 0x00000014 ++#define TPM_ORD_SHA1Start 0x000000A0 ++#define TPM_ORD_SHA1Update 0x000000A1 ++#define TPM_ORD_SHA1CompleteExtend 0x000000A3 ++ ++#define TPM_TAG_RQU_COMMAND 0x00C1 ++#define TPM_TAG_RSP_COMMAND 0x00C4 ++ ++/* All fields of following structs are big endian. */ + struct extend_cmd { + struct tpm_cmd_hdr h; + uint32_t pcrNum; +@@ -247,11 +270,6 @@ struct txt_ev_log_container_12 { + }; + + #ifdef __EARLY_TPM__ +-/* +- * TPM1.2 is required to support commands of up to 1101 bytes, vendors rarely +- * go above that. Limit maximum size of block of data to be hashed to 1024. +- */ +-#define MAX_HASH_BLOCK 1024 + #define CMD_RSP_BUF_SIZE (sizeof(struct sha1_update_cmd) + MAX_HASH_BLOCK) + + union cmd_rsp { +@@ -406,6 +424,382 @@ static void *create_log_event12(struct txt_ev_log_container_12 *evt_log, + + /************************** end of TPM1.2 specific ****************************/ + ++/****************************** TPM2.0 specific *******************************/ ++ ++/* ++ * These constants are for TPM2.0 but don't have a distinct prefix to match ++ * names in the specification. ++ */ ++ ++#define TPM_HT_PCR 0x00 ++ ++#define TPM_RH_NULL 0x40000007 ++#define TPM_RS_PW 0x40000009 ++ ++#define HR_SHIFT 24 ++#define HR_PCR (TPM_HT_PCR << HR_SHIFT) ++ ++#define TPM_ST_NO_SESSIONS 0x8001 ++#define TPM_ST_SESSIONS 0x8002 ++ ++#define TPM_ALG_SHA1 0x0004 ++#define TPM_ALG_SHA256 0x000b ++#define TPM_ALG_NULL 0x0010 ++ ++#define TPM2_PCR_Extend 0x00000182 ++#define TPM2_PCR_HashSequenceStart 0x00000186 ++#define TPM2_PCR_SequenceUpdate 0x0000015C ++#define TPM2_PCR_EventSequenceComplete 0x00000185 ++ ++#define PUT_BYTES(p, bytes, size) do { \ ++ memcpy((p), (bytes), (size)); \ ++ (p) += (size); \ ++ } while ( 0 ) ++ ++#define PUT_16BIT(p, data) do { \ ++ *(uint16_t *)(p) = swap16(data); \ ++ (p) += 2; \ ++ } while ( 0 ) ++ ++/* All fields of following structs are big endian. */ ++struct tpm2_session_header { ++ uint32_t handle; ++ uint16_t nonceSize; ++ uint8_t nonce[0]; ++ uint8_t attrs; ++ uint16_t hmacSize; ++ uint8_t hmac[0]; ++} __packed; ++ ++struct tpm2_extend_cmd { ++ struct tpm_cmd_hdr h; ++ uint32_t pcrHandle; ++ uint32_t sessionHdrSize; ++ struct tpm2_session_header pcrSession; ++ uint32_t hashCount; ++ uint8_t hashes[0]; ++} __packed; ++ ++struct tpm2_extend_rsp { ++ struct tpm_rsp_hdr h; ++} __packed; ++ ++struct tpm2_sequence_start_cmd { ++ struct tpm_cmd_hdr h; ++ uint16_t hmacSize; ++ uint8_t hmac[0]; ++ uint16_t hashAlg; ++} __packed; ++ ++struct tpm2_sequence_start_rsp { ++ struct tpm_rsp_hdr h; ++ uint32_t sequenceHandle; ++} __packed; ++ ++struct tpm2_sequence_update_cmd { ++ struct tpm_cmd_hdr h; ++ uint32_t sequenceHandle; ++ uint32_t sessionHdrSize; ++ struct tpm2_session_header session; ++ uint16_t dataSize; ++ uint8_t data[0]; ++} __packed; ++ ++struct tpm2_sequence_update_rsp { ++ struct tpm_rsp_hdr h; ++} __packed; ++ ++struct tpm2_sequence_complete_cmd { ++ struct tpm_cmd_hdr h; ++ uint32_t pcrHandle; ++ uint32_t sequenceHandle; ++ uint32_t sessionHdrSize; ++ struct tpm2_session_header pcrSession; ++ struct tpm2_session_header sequenceSession; ++ uint16_t dataSize; ++ uint8_t data[0]; ++} __packed; ++ ++struct tpm2_sequence_complete_rsp { ++ struct tpm_rsp_hdr h; ++ uint32_t paramSize; ++ uint32_t hashCount; ++ uint8_t hashes[0]; ++ /* ++ * Each hash is represented as: ++ * struct { ++ * uint16_t hashAlg; ++ * uint8_t hash[size of hashAlg]; ++ * }; ++ */ ++} __packed; ++ ++/* ++ * These two structure are for convenience, they don't correspond to anything in ++ * any spec. ++ */ ++struct tpm2_log_hash { ++ uint16_t alg; /* TPM_ALG_* */ ++ uint16_t size; ++ uint8_t *data; /* Non-owning reference to a buffer inside log entry. */ ++}; ++/* Should be more than enough for now and awhile in the future. */ ++#define MAX_HASH_COUNT 8 ++struct tpm2_log_hashes { ++ uint32_t count; ++ struct tpm2_log_hash hashes[MAX_HASH_COUNT]; ++}; ++ ++#ifdef __EARLY_TPM__ ++ ++union tpm2_cmd_rsp { ++ uint8_t b[sizeof(struct tpm2_sequence_update_cmd) + MAX_HASH_BLOCK]; ++ struct tpm_cmd_hdr c; ++ struct tpm_rsp_hdr r; ++ struct tpm2_sequence_start_cmd start_c; ++ struct tpm2_sequence_start_rsp start_r; ++ struct tpm2_sequence_update_cmd update_c; ++ struct tpm2_sequence_update_rsp update_r; ++ struct tpm2_sequence_complete_cmd finish_c; ++ struct tpm2_sequence_complete_rsp finish_r; ++}; ++ ++static uint32_t tpm2_hash_extend(unsigned loc, uint8_t *buf, unsigned size, ++ unsigned pcr, ++ struct tpm2_log_hashes *log_hashes) ++{ ++ uint32_t seq_handle; ++ unsigned max_bytes = MAX_HASH_BLOCK; ++ ++ union tpm2_cmd_rsp cmd_rsp; ++ unsigned o_size; ++ unsigned i; ++ uint8_t *p; ++ uint32_t rc; ++ ++ cmd_rsp.start_c = (struct tpm2_sequence_start_cmd) { ++ .h.tag = swap16(TPM_ST_NO_SESSIONS), ++ .h.paramSize = swap32(sizeof(cmd_rsp.start_c)), ++ .h.ordinal = swap32(TPM2_PCR_HashSequenceStart), ++ .hashAlg = swap16(TPM_ALG_NULL), /* Compute all supported hashes. */ ++ }; ++ ++ request_locality(loc); ++ ++ o_size = sizeof(cmd_rsp); ++ send_cmd(loc, cmd_rsp.b, swap32(cmd_rsp.c.paramSize), &o_size); ++ ++ if ( cmd_rsp.r.tag == swap16(TPM_ST_NO_SESSIONS) && ++ cmd_rsp.r.paramSize == swap32(10) ) { ++ rc = swap32(cmd_rsp.r.returnCode); ++ if ( rc != 0 ) ++ goto error; ++ } ++ ++ seq_handle = swap32(cmd_rsp.start_r.sequenceHandle); ++ ++ while ( size > 64 ) { ++ if ( size < max_bytes ) ++ max_bytes = size & ~(64 - 1); ++ ++ cmd_rsp.update_c = (struct tpm2_sequence_update_cmd) { ++ .h.tag = swap16(TPM_ST_SESSIONS), ++ .h.paramSize = swap32(sizeof(cmd_rsp.update_c) + max_bytes), ++ .h.ordinal = swap32(TPM2_PCR_SequenceUpdate), ++ .sequenceHandle = swap32(seq_handle), ++ .sessionHdrSize = swap32(sizeof(struct tpm2_session_header)), ++ .session.handle = swap32(TPM_RS_PW), ++ .dataSize = swap16(max_bytes), ++ }; ++ ++ memcpy(cmd_rsp.update_c.data, buf, max_bytes); ++ ++ o_size = sizeof(cmd_rsp); ++ send_cmd(loc, cmd_rsp.b, swap32(cmd_rsp.c.paramSize), &o_size); ++ ++ if ( cmd_rsp.r.tag == swap16(TPM_ST_NO_SESSIONS) && ++ cmd_rsp.r.paramSize == swap32(10) ) { ++ rc = swap32(cmd_rsp.r.returnCode); ++ if ( rc != 0 ) ++ goto error; ++ } ++ ++ size -= max_bytes; ++ buf += max_bytes; ++ } ++ ++ cmd_rsp.finish_c = (struct tpm2_sequence_complete_cmd) { ++ .h.tag = swap16(TPM_ST_SESSIONS), ++ .h.paramSize = swap32(sizeof(cmd_rsp.finish_c) + size), ++ .h.ordinal = swap32(TPM2_PCR_EventSequenceComplete), ++ .pcrHandle = swap32(HR_PCR + pcr), ++ .sequenceHandle = swap32(seq_handle), ++ .sessionHdrSize = swap32(sizeof(struct tpm2_session_header)*2), ++ .pcrSession.handle = swap32(TPM_RS_PW), ++ .sequenceSession.handle = swap32(TPM_RS_PW), ++ .dataSize = swap16(size), ++ }; ++ ++ memcpy(cmd_rsp.finish_c.data, buf, size); ++ ++ o_size = sizeof(cmd_rsp); ++ send_cmd(loc, cmd_rsp.b, swap32(cmd_rsp.c.paramSize), &o_size); ++ ++ if ( cmd_rsp.r.tag == swap16(TPM_ST_NO_SESSIONS) && ++ cmd_rsp.r.paramSize == swap32(10) ) { ++ rc = swap32(cmd_rsp.r.returnCode); ++ if ( rc != 0 ) ++ goto error; ++ } ++ ++ p = cmd_rsp.finish_r.hashes; ++ for ( i = 0; i < swap32(cmd_rsp.finish_r.hashCount); ++i ) { ++ unsigned j; ++ uint16_t hash_type; ++ ++ hash_type = swap16(*(uint16_t *)p); ++ p += sizeof(uint16_t); ++ ++ for ( j = 0; j < log_hashes->count; ++j ) { ++ struct tpm2_log_hash *hash = &log_hashes->hashes[j]; ++ if ( hash->alg == hash_type ) { ++ memcpy(hash->data, p, hash->size); ++ p += hash->size; ++ break; ++ } ++ } ++ ++ if ( j == log_hashes->count ) { ++ /* Can't continue parsing without knowing hash size. */ ++ break; ++ } ++ } ++ ++ rc = 0; ++ ++error: ++ relinquish_locality(loc); ++ return rc; ++} ++ ++#else ++ ++union tpm2_cmd_rsp { ++ /* Enough space for multiple hashes. */ ++ uint8_t b[sizeof(struct tpm2_extend_cmd) + 1024]; ++ struct tpm_cmd_hdr c; ++ struct tpm_rsp_hdr r; ++ struct tpm2_extend_cmd extend_c; ++ struct tpm2_extend_rsp extend_r; ++}; ++ ++static uint32_t tpm20_pcr_extend(unsigned loc, uint32_t pcr_handle, ++ const struct tpm2_log_hashes *log_hashes) ++{ ++ union tpm2_cmd_rsp cmd_rsp; ++ unsigned o_size; ++ unsigned i; ++ uint8_t *p; ++ ++ cmd_rsp.extend_c = (struct tpm2_extend_cmd) { ++ .h.tag = swap16(TPM_ST_SESSIONS), ++ .h.ordinal = swap32(TPM2_PCR_Extend), ++ .pcrHandle = swap32(pcr_handle), ++ .sessionHdrSize = swap32(sizeof(struct tpm2_session_header)), ++ .pcrSession.handle = swap32(TPM_RS_PW), ++ .hashCount = swap32(log_hashes->count), ++ }; ++ ++ p = cmd_rsp.extend_c.hashes; ++ for ( i = 0; i < log_hashes->count; ++i ) { ++ const struct tpm2_log_hash *hash = &log_hashes->hashes[i]; ++ ++ if ( p + sizeof(uint16_t) + hash->size > &cmd_rsp.b[sizeof(cmd_rsp)] ) { ++ printk(XENLOG_ERR "Hit TPM message size implementation limit: %ld\n", ++ sizeof(cmd_rsp)); ++ return -1; ++ } ++ ++ *(uint16_t *)p = swap16(hash->alg); ++ p += sizeof(uint16_t); ++ ++ memcpy(p, hash->data, hash->size); ++ p += hash->size; ++ } ++ ++ /* Fill in command size (size of the whole buffer). */ ++ cmd_rsp.extend_c.h.paramSize = swap32(sizeof(cmd_rsp.extend_c) + ++ (p - cmd_rsp.extend_c.hashes)), ++ ++ o_size = sizeof(cmd_rsp); ++ send_cmd(loc, cmd_rsp.b, swap32(cmd_rsp.c.paramSize), &o_size); ++ ++ return swap32(cmd_rsp.r.returnCode); ++} ++ ++static bool tpm_supports_hash(unsigned loc, const struct tpm2_log_hash *hash) ++{ ++ uint32_t rc; ++ struct tpm2_log_hashes hashes = { ++ .count = 1, ++ .hashes[0] = *hash, ++ }; ++ ++ /* This is a valid way of checking hash support, using it to not implement ++ * TPM2_GetCapability(). */ ++ rc = tpm20_pcr_extend(loc, /*pcr_handle=*/TPM_RH_NULL, &hashes); ++ ++ return rc == 0; ++} ++ ++static uint32_t tpm2_hash_extend(unsigned loc, uint8_t *buf, unsigned size, ++ unsigned pcr, ++ const struct tpm2_log_hashes *log_hashes) ++{ ++ uint32_t rc; ++ unsigned i; ++ struct tpm2_log_hashes supported_hashes = {0}; ++ ++ request_locality(loc); ++ ++ for ( i = 0; i < log_hashes->count; ++i ) { ++ const struct tpm2_log_hash *hash = &log_hashes->hashes[i]; ++ if ( !tpm_supports_hash(loc, hash) ) { ++ printk(XENLOG_WARNING "Skipped hash unsupported by TPM: %d\n", ++ hash->alg); ++ continue; ++ } ++ ++ if ( hash->alg == TPM_ALG_SHA1 ) { ++ sha1_hash(buf, size, hash->data); ++ } else if ( hash->alg == TPM_ALG_SHA256 ) { ++ sha256_hash(buf, size, hash->data); ++ } else { ++ /* This is called "OneDigest" in TXT Software Development Guide. */ ++ memset(hash->data, 0, size); ++ hash->data[0] = 1; ++ } ++ ++ if ( supported_hashes.count == MAX_HASH_COUNT ) { ++ printk(XENLOG_ERR "Hit hash count implementation limit: %d\n", ++ MAX_HASH_COUNT); ++ return -1; ++ } ++ ++ supported_hashes.hashes[supported_hashes.count] = *hash; ++ ++supported_hashes.count; ++ } ++ ++ rc = tpm20_pcr_extend(loc, HR_PCR + pcr, &supported_hashes); ++ relinquish_locality(loc); ++ ++ return rc; ++} ++ ++#endif /* __EARLY_TPM__ */ ++ ++/************************** end of TPM2.0 specific ****************************/ ++ + void tpm_hash_extend(unsigned loc, unsigned pcr, uint8_t *buf, unsigned size, + uint32_t type, uint8_t *log_data, unsigned log_data_size) + { +@@ -431,6 +825,34 @@ void tpm_hash_extend(unsigned loc, unsigned pcr, uint8_t *buf, unsigned size, + if ( !tpm12_hash_extend(loc, buf, size, pcr, entry_digest) ) { + #ifndef __EARLY_TPM__ + printk(XENLOG_ERR "Extending PCR%u failed\n", pcr); ++#endif ++ } ++ } else { ++ uint8_t sha1_digest[SHA1_DIGEST_SIZE]; ++ uint8_t sha256_digest[SHA256_DIGEST_SIZE]; ++ uint32_t rc; ++ ++ struct tpm2_log_hashes log_hashes = { ++ .count = 2, ++ .hashes = { ++ { ++ .alg = TPM_ALG_SHA1, ++ .size = SHA1_DIGEST_SIZE, ++ .data = sha1_digest, ++ }, ++ { ++ .alg = TPM_ALG_SHA256, ++ .size = SHA256_DIGEST_SIZE, ++ .data = sha256_digest, ++ }, ++ }, ++ }; ++ ++ rc = tpm2_hash_extend(loc, buf, size, pcr, &log_hashes); ++ if ( rc != 0 ) { ++#ifndef __EARLY_TPM__ ++ printk(XENLOG_ERR "Extending PCR%u failed with TPM error: 0x%08x\n", ++ pcr, rc); + #endif + } + } +-- +2.46.0 + diff --git a/1322-x86-tpm.c-implement-event-log-for-TPM2.0.patch b/1322-x86-tpm.c-implement-event-log-for-TPM2.0.patch new file mode 100644 index 00000000..7125da5d --- /dev/null +++ b/1322-x86-tpm.c-implement-event-log-for-TPM2.0.patch @@ -0,0 +1,257 @@ +From 9ab27bc21b1747b95120805a61676b236333fc65 Mon Sep 17 00:00:00 2001 +From: Sergii Dmytruk +Date: Sat, 1 Jul 2023 00:41:35 +0300 +Subject: [PATCH 1322/1328] x86/tpm.c: implement event log for TPM2.0 + +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/include/asm/intel_txt.h | 33 ++++++ + xen/arch/x86/tpm.c | 163 +++++++++++++++++++++++---- + 2 files changed, 172 insertions(+), 24 deletions(-) + +diff --git a/xen/arch/x86/include/asm/intel_txt.h b/xen/arch/x86/include/asm/intel_txt.h +index 59cdc3f2e9..8f9f4d9570 100644 +--- a/xen/arch/x86/include/asm/intel_txt.h ++++ b/xen/arch/x86/include/asm/intel_txt.h +@@ -202,6 +202,39 @@ struct txt_sinit_mle_data { + /* Ext Data Elements */ + } __packed; + ++/* Types of extended data. */ ++#define TXT_HEAP_EXTDATA_TYPE_END 0 ++#define TXT_HEAP_EXTDATA_TYPE_BIOS_SPEC_VER 1 ++#define TXT_HEAP_EXTDATA_TYPE_ACM 2 ++#define TXT_HEAP_EXTDATA_TYPE_STM 3 ++#define TXT_HEAP_EXTDATA_TYPE_CUSTOM 4 ++#define TXT_HEAP_EXTDATA_TYPE_MADT 6 ++#define TXT_HEAP_EXTDATA_TYPE_EVENT_LOG_POINTER2_1 8 ++#define TXT_HEAP_EXTDATA_TYPE_MCFG 9 ++#define TXT_HEAP_EXTDATA_TYPE_TPR_REQ 13 ++#define TXT_HEAP_EXTDATA_TYPE_DTPR 14 ++#define TXT_HEAP_EXTDATA_TYPE_CEDT 15 ++ ++/* ++ * Self-describing data structure that is used for extensions to TXT heap ++ * tables. ++ */ ++struct txt_ext_data_element { ++ uint32_t type; /* One of TXT_HEAP_EXTDATA_TYPE_*. */ ++ uint32_t size; ++ uint8_t data[0]; /* size bytes. */ ++} __packed; ++ ++/* ++ * Extended data describing TPM 2.0 log. ++ */ ++struct heap_event_log_pointer_element2_1 { ++ uint64_t physical_address; ++ uint32_t allocated_event_container_size; ++ uint32_t first_record_offset; ++ uint32_t next_record_offset; ++} __packed; ++ + /* + * Functions to extract data from the Intel TXT Heap Memory. The layout + * of the heap is as follows: +diff --git a/xen/arch/x86/tpm.c b/xen/arch/x86/tpm.c +index e3e4bda841..283d735b38 100644 +--- a/xen/arch/x86/tpm.c ++++ b/xen/arch/x86/tpm.c +@@ -550,6 +550,44 @@ struct tpm2_log_hashes { + struct tpm2_log_hash hashes[MAX_HASH_COUNT]; + }; + ++struct tpm2_pcr_event_header { ++ uint32_t pcrIndex; ++ uint32_t eventType; ++ uint32_t digestCount; ++ uint8_t digests[0]; ++ /* ++ * Each hash is represented as: ++ * struct { ++ * uint16_t hashAlg; ++ * uint8_t hash[size of hashAlg]; ++ * }; ++ */ ++ /* uint32_t eventSize; */ ++ /* uint8_t event[0]; */ ++} __packed; ++ ++struct tpm2_digest_sizes { ++ uint16_t algId; ++ uint16_t digestSize; ++} __packed; ++ ++struct tpm2_spec_id_event { ++ uint32_t pcrIndex; ++ uint32_t eventType; ++ uint8_t digest[20]; ++ uint32_t eventSize; ++ uint8_t signature[16]; ++ uint32_t platformClass; ++ uint8_t specVersionMinor; ++ uint8_t specVersionMajor; ++ uint8_t specErrata; ++ uint8_t uintnSize; ++ uint32_t digestCount; ++ struct tpm2_digest_sizes digestSizes[0]; /* variable number of members */ ++ /* uint8_t vendorInfoSize; */ ++ /* uint8_t vendorInfo[vendorInfoSize]; */ ++} __packed; ++ + #ifdef __EARLY_TPM__ + + union tpm2_cmd_rsp { +@@ -770,15 +808,12 @@ static uint32_t tpm2_hash_extend(unsigned loc, uint8_t *buf, unsigned size, + continue; + } + +- if ( hash->alg == TPM_ALG_SHA1 ) { ++ if ( hash->alg == TPM_ALG_SHA1 ) + sha1_hash(buf, size, hash->data); +- } else if ( hash->alg == TPM_ALG_SHA256 ) { ++ else if ( hash->alg == TPM_ALG_SHA256 ) + sha256_hash(buf, size, hash->data); +- } else { +- /* This is called "OneDigest" in TXT Software Development Guide. */ +- memset(hash->data, 0, size); +- hash->data[0] = 1; +- } ++ else ++ /* create_log_event20() took care of initializing the digest. */; + + if ( supported_hashes.count == MAX_HASH_COUNT ) { + printk(XENLOG_ERR "Hit hash count implementation limit: %d\n", +@@ -798,6 +833,99 @@ static uint32_t tpm2_hash_extend(unsigned loc, uint8_t *buf, unsigned size, + + #endif /* __EARLY_TPM__ */ + ++static struct heap_event_log_pointer_element2_1 *find_evt_log_ext_data(void) ++{ ++ struct txt_os_sinit_data *os_sinit; ++ struct txt_ext_data_element *ext_data; ++ ++ os_sinit = txt_os_sinit_data_start(__va(read_txt_reg(TXTCR_HEAP_BASE))); ++ ext_data = (void *)((uint8_t *)os_sinit + sizeof(*os_sinit)); ++ ++ /* ++ * Find TXT_HEAP_EXTDATA_TYPE_EVENT_LOG_POINTER2_1 which is necessary to ++ * know where to put the next entry. ++ */ ++ while ( ext_data->type != TXT_HEAP_EXTDATA_TYPE_END ) { ++ if ( ext_data->type == TXT_HEAP_EXTDATA_TYPE_EVENT_LOG_POINTER2_1 ) ++ break; ++ ext_data = (void *)&ext_data->data[ext_data->size]; ++ } ++ ++ if ( ext_data->type == TXT_HEAP_EXTDATA_TYPE_END ) ++ return NULL; ++ ++ return (void *)&ext_data->data[0]; ++} ++ ++static struct tpm2_log_hashes ++create_log_event20(struct tpm2_spec_id_event *evt_log, uint32_t evt_log_size, ++ uint32_t pcr, uint32_t type, uint8_t *data, ++ unsigned data_size) ++{ ++ struct tpm2_log_hashes log_hashes = {0}; ++ ++ struct heap_event_log_pointer_element2_1 *log_ext_data; ++ struct tpm2_pcr_event_header *new_entry; ++ uint32_t entry_size; ++ unsigned i; ++ uint8_t *p; ++ ++ log_ext_data = find_evt_log_ext_data(); ++ if ( log_ext_data == NULL ) ++ return log_hashes; ++ ++ entry_size = sizeof(*new_entry); ++ for ( i = 0; i < evt_log->digestCount; ++i ) { ++ entry_size += sizeof(uint16_t); /* hash type */ ++ entry_size += evt_log->digestSizes[i].digestSize; ++ } ++ entry_size += sizeof(uint32_t); /* data size field */ ++ entry_size += data_size; ++ ++ /* ++ * Check if there is enough space left for new entry. ++ * Note: it is possible to introduce a gap in event log if entry with big ++ * data_size is followed by another entry with smaller data. Maybe we should ++ * cap the event log size in such case? ++ */ ++ if ( log_ext_data->next_record_offset + entry_size > evt_log_size ) ++ return log_hashes; ++ ++ new_entry = (void *)((uint8_t *)evt_log + log_ext_data->next_record_offset); ++ log_ext_data->next_record_offset += entry_size; ++ ++ new_entry->pcrIndex = pcr; ++ new_entry->eventType = type; ++ new_entry->digestCount = evt_log->digestCount; ++ ++ p = &new_entry->digests[0]; ++ for ( i = 0; i < evt_log->digestCount; ++i ) { ++ uint16_t alg = evt_log->digestSizes[i].algId; ++ uint16_t size = evt_log->digestSizes[i].digestSize; ++ ++ *(uint16_t *)p = alg; ++ p += sizeof(uint16_t); ++ ++ log_hashes.hashes[i].alg = alg; ++ log_hashes.hashes[i].size = size; ++ log_hashes.hashes[i].data = p; ++ p += size; ++ ++ /* This is called "OneDigest" in TXT Software Development Guide. */ ++ memset(log_hashes.hashes[i].data, 0, size); ++ log_hashes.hashes[i].data[0] = 1; ++ } ++ log_hashes.count = evt_log->digestCount; ++ ++ *(uint32_t *)p = data_size; ++ p += sizeof(uint32_t); ++ ++ if ( data && data_size > 0 ) ++ memcpy(p, data, data_size); ++ ++ return log_hashes; ++} ++ + /************************** end of TPM2.0 specific ****************************/ + + void tpm_hash_extend(unsigned loc, unsigned pcr, uint8_t *buf, unsigned size, +@@ -828,25 +956,12 @@ void tpm_hash_extend(unsigned loc, unsigned pcr, uint8_t *buf, unsigned size, + #endif + } + } else { +- uint8_t sha1_digest[SHA1_DIGEST_SIZE]; +- uint8_t sha256_digest[SHA256_DIGEST_SIZE]; + uint32_t rc; + +- struct tpm2_log_hashes log_hashes = { +- .count = 2, +- .hashes = { +- { +- .alg = TPM_ALG_SHA1, +- .size = SHA1_DIGEST_SIZE, +- .data = sha1_digest, +- }, +- { +- .alg = TPM_ALG_SHA256, +- .size = SHA256_DIGEST_SIZE, +- .data = sha256_digest, +- }, +- }, +- }; ++ struct tpm2_spec_id_event *evt_log = evt_log_addr; ++ struct tpm2_log_hashes log_hashes = ++ create_log_event20(evt_log, evt_log_size, pcr, type, log_data, ++ log_data_size); + + rc = tpm2_hash_extend(loc, buf, size, pcr, &log_hashes); + if ( rc != 0 ) { +-- +2.46.0 + diff --git a/1323-arch-x86-process-DRTM-policy.patch b/1323-arch-x86-process-DRTM-policy.patch new file mode 100644 index 00000000..30aa7d70 --- /dev/null +++ b/1323-arch-x86-process-DRTM-policy.patch @@ -0,0 +1,282 @@ +From 34f1eaca4d3e7f005b62aed22b568a610ef02ba1 Mon Sep 17 00:00:00 2001 +From: Sergii Dmytruk +Date: Sun, 29 Oct 2023 00:42:04 +0300 +Subject: [PATCH 1323/1328] arch/x86: process DRTM policy + +Go through entires in the DRTM policy of SLRT to hash and extend data +that they describe into corresponding PCRs. + +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/include/asm/tpm.h | 11 ++ + xen/arch/x86/setup.c | 10 ++ + xen/arch/x86/tpm.c | 193 +++++++++++++++++++++++++++++++++ + 3 files changed, 214 insertions(+) + +diff --git a/xen/arch/x86/include/asm/tpm.h b/xen/arch/x86/include/asm/tpm.h +index 9bbdf63680..3ca5d3528d 100644 +--- a/xen/arch/x86/include/asm/tpm.h ++++ b/xen/arch/x86/include/asm/tpm.h +@@ -2,6 +2,7 @@ + #define _ASM_X86_TPM_H_ + + #include ++#include + + #define TPM_TIS_BASE 0xFED40000 + #define TPM_TIS_SIZE 0x00010000 +@@ -9,4 +10,14 @@ + void tpm_hash_extend(unsigned loc, unsigned pcr, uint8_t *buf, unsigned size, + uint32_t type, uint8_t *log_data, unsigned log_data_size); + ++/* Measures essential parts of SLR table before making use of them. */ ++void tpm_measure_slrt(void); ++ ++/* Takes measurements of DRTM policy entries except for MBI and SLRT which ++ * should have been measured by the time this is called. Also performs sanity ++ * checks of the policy and panics on failure. In particular, the function ++ * verifies that DRTM is consistent with MultibootInfo (MBI) (the MBI address ++ * is assumed to be virtual). */ ++void tpm_process_drtm_policy(const multiboot_info_t *mbi); ++ + #endif /* _ASM_X86_TPM_H_ */ +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 22cbdbdc88..519e3311f5 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + + /* opt_nosmp: If true, secondary processors are ignored. */ + static bool __initdata opt_nosmp; +@@ -1172,6 +1173,9 @@ void __init noreturn __start_xen(unsigned long mbi_p) + { + /* Prepare for TXT-related code. */ + map_txt_mem_regions(); ++ /* Measure SLRT here because it gets used by init_e820(), the rest is ++ * measured below by tpm_process_drtm_policy(). */ ++ tpm_measure_slrt(); + /* Reserve TXT heap and SINIT. */ + protect_txt_mem_regions(); + } +@@ -1194,6 +1198,12 @@ void __init noreturn __start_xen(unsigned long mbi_p) + /* Create a temporary copy of the E820 map. */ + memcpy(&boot_e820, &e820, sizeof(e820)); + ++ /* Process all yet unmeasured DRTM entries after E820 initialization to not ++ * do this while memory is uncached (too slow). This must also happen before ++ * fields of Multiboot modules change their format below. */ ++ if ( slaunch_active ) ++ tpm_process_drtm_policy(mbi); ++ + /* Early kexec reservation (explicit static start address). */ + nr_pages = 0; + for ( i = 0; i < e820.nr_map; i++ ) +diff --git a/xen/arch/x86/tpm.c b/xen/arch/x86/tpm.c +index 283d735b38..58cddf7c8c 100644 +--- a/xen/arch/x86/tpm.c ++++ b/xen/arch/x86/tpm.c +@@ -980,4 +980,197 @@ void __stdcall tpm_extend_mbi(uint32_t *mbi) + tpm_hash_extend(DRTM_LOC, DRTM_DATA_PCR, (uint8_t *)mbi, *mbi, + TXT_EVTYPE_SLAUNCH, NULL, 0); + } ++#else ++static struct slr_table *slr_get_table(void) ++{ ++ uint32_t slrt_pa = txt_find_slrt(); ++ struct slr_table *slrt = __va(slrt_pa); ++ ++ map_l2(slrt_pa, PAGE_SIZE); ++ ++ if ( slrt->magic != SLR_TABLE_MAGIC ) ++ panic("SLRT has invalid magic value: %#08x!\n", slrt->magic); ++ /* XXX: are newer revisions allowed? */ ++ if ( slrt->revision != SLR_TABLE_REVISION ) ++ panic("SLRT is of unsupported revision: %#04x!\n", slrt->revision); ++ if ( slrt->architecture != SLR_INTEL_TXT ) ++ panic("SLRT is for unexpected architecture: %#04x!\n", ++ slrt->architecture); ++ if ( slrt->size > slrt->max_size ) ++ panic("SLRT is larger than its max size: %#08x > %#08x!\n", ++ slrt->size, slrt->max_size); ++ ++ if ( slrt->size > PAGE_SIZE ) ++ map_l2(slrt_pa, slrt->size); ++ ++ return slrt; ++} ++ ++void tpm_measure_slrt(void) ++{ ++ struct slr_table *slrt = slr_get_table(); ++ ++ if ( slrt->revision == 1 ) { ++ /* In revision one of the SLRT, only Intel info table is measured. */ ++ struct slr_entry_intel_info *intel_info = ++ (void *)slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_INTEL_INFO); ++ if ( intel_info == NULL ) ++ panic("SLRT is missing Intel-specific information!\n"); ++ ++ tpm_hash_extend(DRTM_LOC, DRTM_DATA_PCR, (uint8_t *)intel_info, ++ sizeof(*intel_info), TXT_EVTYPE_SLAUNCH, NULL, 0); ++ } else { ++ /* ++ * slr_get_table() checks that the revision is valid, so we must not ++ * get here unless the code is wrong. ++ */ ++ panic("Unhandled SLRT revision: %d!\n", slrt->revision); ++ } ++} ++ ++static struct slr_entry_policy *slr_get_policy(struct slr_table *slrt) ++{ ++ struct slr_entry_policy *policy; ++ ++ policy = (struct slr_entry_policy *) ++ slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_DRTM_POLICY); ++ if (policy == NULL) ++ panic("SLRT is missing DRTM policy!\n"); ++ ++ /* XXX: are newer revisions allowed? */ ++ if ( policy->revision != SLR_POLICY_REVISION ) ++ panic("DRTM policy in SLRT is of unsupported revision: %#04x!\n", ++ slrt->revision); ++ ++ return policy; ++} ++ ++static void check_drtm_policy(struct slr_table *slrt, ++ struct slr_entry_policy *policy, ++ struct slr_policy_entry *policy_entry, ++ const multiboot_info_t *mbi) ++{ ++ uint32_t i; ++ module_t *mods; ++ uint32_t num_mod_entries; ++ ++ if ( policy->nr_entries < 2 ) ++ panic("DRTM policy in SLRT contains less than 2 entries (%d)!\n", ++ policy->nr_entries); ++ ++ /* MBI policy entry must be the first one, so that measuring order matches ++ * policy order. */ ++ if ( policy_entry[0].entity_type != SLR_ET_MULTIBOOT2_INFO ) ++ panic("First entry of DRTM policy in SLRT is not MBI: %#04x!\n", ++ policy_entry[0].entity_type); ++ if ( policy_entry[0].pcr != DRTM_DATA_PCR ) ++ panic("MBI was measured to %d instead of %d PCR!\n", DRTM_DATA_PCR, ++ policy_entry[0].pcr); ++ ++ /* SLRT policy entry must be the second one. */ ++ if ( policy_entry[1].entity_type != SLR_ET_SLRT ) ++ panic("Second entry of DRTM policy in SLRT is not SLRT: %#04x!\n", ++ policy_entry[1].entity_type); ++ if ( policy_entry[1].pcr != DRTM_DATA_PCR ) ++ panic("SLRT was measured to %d instead of %d PCR!\n", DRTM_DATA_PCR, ++ policy_entry[1].pcr); ++ if ( policy_entry[1].entity != (uint64_t)__pa(slrt) ) ++ panic("SLRT address (%#08lx) differes from its DRTM entry (%#08lx)\n", ++ __pa(slrt), policy_entry[1].entity); ++ ++ mods = __va(mbi->mods_addr); ++ for ( i = 0; i < mbi->mods_count; i++ ) { ++ uint16_t j; ++ uint64_t start = mods[i].mod_start; ++ uint64_t size = mods[i].mod_end - mods[i].mod_start; ++ ++ for ( j = 0; j < policy->nr_entries; j++ ) { ++ if ( policy_entry[j].entity_type != SLR_ET_MULTIBOOT2_MODULE ) ++ continue; ++ ++ if ( policy_entry[j].entity == start && ++ policy_entry[j].size == size ) ++ break; ++ } ++ ++ if ( j >= policy->nr_entries ) { ++ panic("Couldn't find Multiboot module \"%s\" (at %d) in DRTM of Secure Launch\n", ++ (const char *)__va(mods[i].string), i); ++ } ++ } ++ ++ num_mod_entries = 0; ++ for ( i = 0; i < policy->nr_entries; i++ ) { ++ if ( policy_entry[i].entity_type == SLR_ET_MULTIBOOT2_MODULE ) ++ num_mod_entries++; ++ } ++ ++ if ( mbi->mods_count != num_mod_entries ) { ++ panic("Unexpected number of Multiboot modules: %d instead of %d\n", ++ (int)mbi->mods_count, (int)num_mod_entries); ++ } ++} ++ ++void tpm_process_drtm_policy(const multiboot_info_t *mbi) ++{ ++ struct slr_table *slrt; ++ struct slr_entry_policy *policy; ++ struct slr_policy_entry *policy_entry; ++ uint16_t i; ++ ++ slrt = slr_get_table(); ++ ++ policy = slr_get_policy(slrt); ++ policy_entry = (struct slr_policy_entry *) ++ ((uint8_t *)policy + sizeof(*policy)); ++ ++ check_drtm_policy(slrt, policy, policy_entry, mbi); ++ /* MBI was measured in tpm_extend_mbi(). */ ++ policy_entry[0].flags |= SLR_POLICY_FLAG_MEASURED; ++ /* SLRT was measured in tpm_measure_slrt(). */ ++ policy_entry[1].flags |= SLR_POLICY_FLAG_MEASURED; ++ ++ for ( i = 2; i < policy->nr_entries; i++ ) { ++ uint64_t start = policy_entry[i].entity; ++ uint64_t size = policy_entry[i].size; ++ ++ /* No already measured entries are expected here. */ ++ if ( policy_entry[i].flags & SLR_POLICY_FLAG_MEASURED ) ++ panic("DRTM entry at %d was measured out of order!\n", i); ++ ++ switch ( policy_entry[i].entity_type ) { ++ case SLR_ET_MULTIBOOT2_INFO: ++ panic("Duplicated MBI entry in DRTM of Secure Launch at %d\n", i); ++ case SLR_ET_SLRT: ++ panic("Duplicated SLRT entry in DRTM of Secure Launch at %d\n", i); ++ ++ case SLR_ET_UNSPECIFIED: ++ case SLR_ET_BOOT_PARAMS: ++ case SLR_ET_SETUP_DATA: ++ case SLR_ET_CMDLINE: ++ case SLR_ET_UEFI_MEMMAP: ++ case SLR_ET_RAMDISK: ++ case SLR_ET_MULTIBOOT2_MODULE: ++ case SLR_ET_TXT_OS2MLE: ++ /* Measure this entry below. */ ++ break; ++ ++ case SLR_ET_UNUSED: ++ /* Skip this entry. */ ++ continue; ++ } ++ ++ if ( policy_entry[i].flags & SLR_POLICY_IMPLICIT_SIZE ) ++ panic("Unexpected implicitly-sized DRTM entry of Secure Launch at %d\n", ++ i); ++ ++ map_l2(start, size); ++ tpm_hash_extend(DRTM_LOC, policy_entry[i].pcr, __va(start), size, ++ TXT_EVTYPE_SLAUNCH, (uint8_t *)policy_entry[i].evt_info, ++ strnlen(policy_entry[i].evt_info, ++ TPM_EVENT_INFO_LENGTH)); ++ ++ policy_entry[i].flags |= SLR_POLICY_FLAG_MEASURED; ++ } ++} + #endif +-- +2.46.0 + diff --git a/1324-arch-x86-extract-slaunch-unit.patch b/1324-arch-x86-extract-slaunch-unit.patch new file mode 100644 index 00000000..54498d1b --- /dev/null +++ b/1324-arch-x86-extract-slaunch-unit.patch @@ -0,0 +1,731 @@ +From 7665e099a92dfa3665da8098ac41681928124ae5 Mon Sep 17 00:00:00 2001 +From: Sergii Dmytruk +Date: Sat, 16 Mar 2024 21:25:16 +0200 +Subject: [PATCH 1324/1328] arch/x86: extract slaunch unit + +To collect its core functionality in one place instead of having some in +intel_txt and other in tpm units. + +TXT_EVTYPE_* now live in and are called +DLE_EVTYPE_* despite being based on TXT specification. This way code +for non-Intel won't need to include TXT header. + +No functional changes. + +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/Makefile | 1 + + xen/arch/x86/e820.c | 1 + + xen/arch/x86/include/asm/intel_txt.h | 50 ------ + xen/arch/x86/include/asm/slaunch.h | 54 ++++++ + xen/arch/x86/intel_txt.c | 27 +-- + xen/arch/x86/setup.c | 1 + + xen/arch/x86/slaunch.c | 239 +++++++++++++++++++++++++++ + xen/arch/x86/smpboot.c | 1 + + xen/arch/x86/tpm.c | 197 +--------------------- + 9 files changed, 301 insertions(+), 270 deletions(-) + create mode 100644 xen/arch/x86/include/asm/slaunch.h + create mode 100644 xen/arch/x86/slaunch.c + +diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile +index d967aabf29..884929e051 100644 +--- a/xen/arch/x86/Makefile ++++ b/xen/arch/x86/Makefile +@@ -60,6 +60,7 @@ obj-y += psr.o + obj-y += intel_txt.o + obj-y += setup.o + obj-y += shutdown.o ++obj-y += slaunch.o + obj-y += smp.o + obj-y += smpboot.o + obj-y += spec_ctrl.o +diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c +index f836fec189..5b2d960a88 100644 +--- a/xen/arch/x86/e820.c ++++ b/xen/arch/x86/e820.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + + /* + * opt_mem: Limit maximum address of physical RAM. +diff --git a/xen/arch/x86/include/asm/intel_txt.h b/xen/arch/x86/include/asm/intel_txt.h +index 8f9f4d9570..112f670816 100644 +--- a/xen/arch/x86/include/asm/intel_txt.h ++++ b/xen/arch/x86/include/asm/intel_txt.h +@@ -80,8 +80,6 @@ + + #ifndef __ASSEMBLY__ + +-extern bool slaunch_active; +- + extern char txt_ap_entry[]; + extern uint32_t trampoline_gdt[]; + +@@ -94,8 +92,6 @@ extern uint32_t trampoline_gdt[]; + #define _txt(x) __va(x) + #endif + +-#include +- + /* + * Always use private space as some of registers are either read-only or not + * present in public space. +@@ -333,39 +329,6 @@ static inline int is_in_pmr(struct txt_os_sinit_data *os_sinit, uint64_t base, + return 0; + } + +-/* +- * This helper function is used to map memory using L2 page tables by aligning +- * mapped regions to 2MB. This way page allocator (which at this point isn't +- * yet initialized) isn't needed for creating new L1 mappings. The function +- * also checks and skips memory already mapped by the prebuilt tables. +- * +- * There is no unmap_l2() because the function is meant to be used for code that +- * accesses TXT registers and TXT heap soon after which Xen rebuilds memory +- * maps, effectively dropping all existing mappings. +- */ +-extern int map_l2(unsigned long paddr, unsigned long size); +- +-/* evt_log is a physical address and the caller must map it to virtual, if +- * needed. */ +-static inline void find_evt_log(struct slr_table *slrt, void **evt_log, +- uint32_t *evt_log_size) +-{ +- struct slr_entry_log_info *log_info; +- +- log_info = (struct slr_entry_log_info *) +- slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_LOG_INFO); +- if ( log_info != NULL ) +- { +- *evt_log = _p(log_info->addr); +- *evt_log_size = log_info->size; +- } +- else +- { +- *evt_log = NULL; +- *evt_log_size = 0; +- } +-} +- + /* Returns physical address. */ + static inline uint32_t txt_find_slrt(void) + { +@@ -378,17 +341,4 @@ extern void map_txt_mem_regions(void); + extern void protect_txt_mem_regions(void); + extern void txt_restore_mtrrs(bool e820_verbose); + +-#define DRTM_LOC 2 +-#define DRTM_CODE_PCR 17 +-#define DRTM_DATA_PCR 18 +- +-/* +- * Secure Launch event log entry type. The TXT specification defines the +- * base event value as 0x400 for DRTM values. +- */ +-#define TXT_EVTYPE_BASE 0x400 +-#define TXT_EVTYPE_SLAUNCH (TXT_EVTYPE_BASE + 0x102) +-#define TXT_EVTYPE_SLAUNCH_START (TXT_EVTYPE_BASE + 0x103) +-#define TXT_EVTYPE_SLAUNCH_END (TXT_EVTYPE_BASE + 0x104) +- + #endif /* __ASSEMBLY__ */ +diff --git a/xen/arch/x86/include/asm/slaunch.h b/xen/arch/x86/include/asm/slaunch.h +new file mode 100644 +index 0000000000..03c686061a +--- /dev/null ++++ b/xen/arch/x86/include/asm/slaunch.h +@@ -0,0 +1,54 @@ ++#ifndef _ASM_X86_SLAUNCH_H_ ++#define _ASM_X86_SLAUNCH_H_ ++ ++#include ++#include ++ ++#define DRTM_LOC 2 ++#define DRTM_CODE_PCR 17 ++#define DRTM_DATA_PCR 18 ++ ++/* ++ * Secure Launch event log entry types. The TXT specification defines the ++ * base event value as 0x400 for DRTM values. ++ */ ++#define TXT_EVTYPE_BASE 0x400 ++#define DLE_EVTYPE_SLAUNCH (TXT_EVTYPE_BASE + 0x102) ++#define DLE_EVTYPE_SLAUNCH_START (TXT_EVTYPE_BASE + 0x103) ++#define DLE_EVTYPE_SLAUNCH_END (TXT_EVTYPE_BASE + 0x104) ++ ++extern bool slaunch_active; ++ ++/* evt_log is a physical address and the caller must map it to virtual, if ++ * needed. */ ++static inline void find_evt_log(struct slr_table *slrt, void **evt_log, ++ uint32_t *evt_log_size) ++{ ++ struct slr_entry_log_info *log_info = ++ (void *)slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_LOG_INFO); ++ ++ if ( log_info != NULL ) ++ { ++ *evt_log = _p(log_info->addr); ++ *evt_log_size = log_info->size; ++ } ++ else ++ { ++ *evt_log = NULL; ++ *evt_log_size = 0; ++ } ++} ++ ++/* ++ * This helper function is used to map memory using L2 page tables by aligning ++ * mapped regions to 2MB. This way page allocator (which at this point isn't ++ * yet initialized) isn't needed for creating new L1 mappings. The function ++ * also checks and skips memory already mapped by the prebuilt tables. ++ * ++ * There is no unmap_l2() because the function is meant to be used for code that ++ * accesses TXT registers and TXT heap soon after which Xen rebuilds memory ++ * maps, effectively dropping all existing mappings. ++ */ ++extern int map_l2(unsigned long paddr, unsigned long size); ++ ++#endif /* _ASM_X86_SLAUNCH_H_ */ +diff --git a/xen/arch/x86/intel_txt.c b/xen/arch/x86/intel_txt.c +index f07a1044ee..36fda89cf2 100644 +--- a/xen/arch/x86/intel_txt.c ++++ b/xen/arch/x86/intel_txt.c +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -12,32 +13,6 @@ + + static uint64_t __initdata txt_heap_base, txt_heap_size; + +-bool __initdata slaunch_active; +- +-static void __maybe_unused compile_time_checks(void) +-{ +- BUILD_BUG_ON(sizeof(slaunch_active) != 1); +-} +- +-int __init map_l2(unsigned long paddr, unsigned long size) +-{ +- unsigned long aligned_paddr = paddr & ~((1ULL << L2_PAGETABLE_SHIFT) - 1); +- unsigned long pages = ((paddr + size) - aligned_paddr); +- pages = ROUNDUP(pages, 1ULL << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT; +- +- if ( (aligned_paddr + pages * PAGE_SIZE) <= PREBUILT_MAP_LIMIT ) +- return 0; +- +- if ( aligned_paddr < PREBUILT_MAP_LIMIT ) { +- pages -= (PREBUILT_MAP_LIMIT - aligned_paddr) >> PAGE_SHIFT; +- aligned_paddr = PREBUILT_MAP_LIMIT; +- } +- +- return map_pages_to_xen((unsigned long)__va(aligned_paddr), +- maddr_to_mfn(aligned_paddr), +- pages, PAGE_HYPERVISOR); +-} +- + void __init map_txt_mem_regions(void) + { + void *evt_log_addr; +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 519e3311f5..75e5dbb725 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + #include + + /* opt_nosmp: If true, secondary processors are ignored. */ +diff --git a/xen/arch/x86/slaunch.c b/xen/arch/x86/slaunch.c +new file mode 100644 +index 0000000000..07500ee8db +--- /dev/null ++++ b/xen/arch/x86/slaunch.c +@@ -0,0 +1,239 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++bool __initdata slaunch_active; ++ ++static void __maybe_unused compile_time_checks(void) ++{ ++ BUILD_BUG_ON(sizeof(slaunch_active) != 1); ++} ++ ++int __init map_l2(unsigned long paddr, unsigned long size) ++{ ++ unsigned long aligned_paddr = paddr & ~((1ULL << L2_PAGETABLE_SHIFT) - 1); ++ unsigned long pages = ((paddr + size) - aligned_paddr); ++ pages = ROUNDUP(pages, 1ULL << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT; ++ ++ if ( (aligned_paddr + pages * PAGE_SIZE) <= PREBUILT_MAP_LIMIT ) ++ return 0; ++ ++ if ( aligned_paddr < PREBUILT_MAP_LIMIT ) ++ { ++ pages -= (PREBUILT_MAP_LIMIT - aligned_paddr) >> PAGE_SHIFT; ++ aligned_paddr = PREBUILT_MAP_LIMIT; ++ } ++ ++ return map_pages_to_xen((unsigned long)__va(aligned_paddr), ++ maddr_to_mfn(aligned_paddr), ++ pages, PAGE_HYPERVISOR); ++} ++ ++static struct slr_table *slr_get_table(void) ++{ ++ uint32_t slrt_pa = txt_find_slrt(); ++ struct slr_table *slrt = __va(slrt_pa); ++ ++ map_l2(slrt_pa, PAGE_SIZE); ++ ++ if ( slrt->magic != SLR_TABLE_MAGIC ) ++ panic("SLRT has invalid magic value: %#08x!\n", slrt->magic); ++ /* XXX: are newer revisions allowed? */ ++ if ( slrt->revision != SLR_TABLE_REVISION ) ++ panic("SLRT is of unsupported revision: %#04x!\n", slrt->revision); ++ if ( slrt->architecture != SLR_INTEL_TXT ) ++ panic("SLRT is for unexpected architecture: %#04x!\n", ++ slrt->architecture); ++ if ( slrt->size > slrt->max_size ) ++ panic("SLRT is larger than its max size: %#08x > %#08x!\n", ++ slrt->size, slrt->max_size); ++ ++ if ( slrt->size > PAGE_SIZE ) ++ map_l2(slrt_pa, slrt->size); ++ ++ return slrt; ++} ++ ++void tpm_measure_slrt(void) ++{ ++ struct slr_table *slrt = slr_get_table(); ++ ++ if ( slrt->revision == 1 ) ++ { ++ /* In revision one of the SLRT, only Intel info table is measured. */ ++ struct slr_entry_intel_info *intel_info = ++ (void *)slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_INTEL_INFO); ++ if ( intel_info == NULL ) ++ panic("SLRT is missing Intel-specific information!\n"); ++ ++ tpm_hash_extend(DRTM_LOC, DRTM_DATA_PCR, (uint8_t *)intel_info, ++ sizeof(*intel_info), DLE_EVTYPE_SLAUNCH, NULL, 0); ++ } ++ else ++ { ++ /* ++ * slr_get_table() checks that the revision is valid, so we must not ++ * get here unless the code is wrong. ++ */ ++ panic("Unhandled SLRT revision: %d!\n", slrt->revision); ++ } ++} ++ ++static struct slr_entry_policy *slr_get_policy(struct slr_table *slrt) ++{ ++ struct slr_entry_policy *policy; ++ ++ policy = (struct slr_entry_policy *) ++ slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_DRTM_POLICY); ++ if (policy == NULL) ++ panic("SLRT is missing DRTM policy!\n"); ++ ++ /* XXX: are newer revisions allowed? */ ++ if ( policy->revision != SLR_POLICY_REVISION ) ++ panic("DRTM policy in SLRT is of unsupported revision: %#04x!\n", ++ slrt->revision); ++ ++ return policy; ++} ++ ++static void check_drtm_policy(struct slr_table *slrt, ++ struct slr_entry_policy *policy, ++ struct slr_policy_entry *policy_entry, ++ const multiboot_info_t *mbi) ++{ ++ uint32_t i; ++ module_t *mods; ++ uint32_t num_mod_entries; ++ ++ if ( policy->nr_entries < 2 ) ++ panic("DRTM policy in SLRT contains less than 2 entries (%d)!\n", ++ policy->nr_entries); ++ ++ /* MBI policy entry must be the first one, so that measuring order matches ++ * policy order. */ ++ if ( policy_entry[0].entity_type != SLR_ET_MULTIBOOT2_INFO ) ++ panic("First entry of DRTM policy in SLRT is not MBI: %#04x!\n", ++ policy_entry[0].entity_type); ++ if ( policy_entry[0].pcr != DRTM_DATA_PCR ) ++ panic("MBI was measured to %d instead of %d PCR!\n", DRTM_DATA_PCR, ++ policy_entry[0].pcr); ++ ++ /* SLRT policy entry must be the second one. */ ++ if ( policy_entry[1].entity_type != SLR_ET_SLRT ) ++ panic("Second entry of DRTM policy in SLRT is not SLRT: %#04x!\n", ++ policy_entry[1].entity_type); ++ if ( policy_entry[1].pcr != DRTM_DATA_PCR ) ++ panic("SLRT was measured to %d instead of %d PCR!\n", DRTM_DATA_PCR, ++ policy_entry[1].pcr); ++ if ( policy_entry[1].entity != (uint64_t)__pa(slrt) ) ++ panic("SLRT address (%#08lx) differes from its DRTM entry (%#08lx)\n", ++ __pa(slrt), policy_entry[1].entity); ++ ++ mods = __va(mbi->mods_addr); ++ for ( i = 0; i < mbi->mods_count; i++ ) ++ { ++ uint16_t j; ++ uint64_t start = mods[i].mod_start; ++ uint64_t size = mods[i].mod_end - mods[i].mod_start; ++ ++ for ( j = 0; j < policy->nr_entries; j++ ) ++ { ++ if ( policy_entry[j].entity_type != SLR_ET_MULTIBOOT2_MODULE ) ++ continue; ++ ++ if ( policy_entry[j].entity == start && ++ policy_entry[j].size == size ) ++ break; ++ } ++ ++ if ( j >= policy->nr_entries ) ++ { ++ panic("Couldn't find Multiboot module \"%s\" (at %d) in DRTM of Secure Launch\n", ++ (const char *)__va(mods[i].string), i); ++ } ++ } ++ ++ num_mod_entries = 0; ++ for ( i = 0; i < policy->nr_entries; i++ ) ++ { ++ if ( policy_entry[i].entity_type == SLR_ET_MULTIBOOT2_MODULE ) ++ num_mod_entries++; ++ } ++ ++ if ( mbi->mods_count != num_mod_entries ) ++ { ++ panic("Unexpected number of Multiboot modules: %d instead of %d\n", ++ (int)mbi->mods_count, (int)num_mod_entries); ++ } ++} ++ ++void tpm_process_drtm_policy(const multiboot_info_t *mbi) ++{ ++ struct slr_table *slrt; ++ struct slr_entry_policy *policy; ++ struct slr_policy_entry *policy_entry; ++ uint16_t i; ++ ++ slrt = slr_get_table(); ++ ++ policy = slr_get_policy(slrt); ++ policy_entry = (struct slr_policy_entry *) ++ ((uint8_t *)policy + sizeof(*policy)); ++ ++ check_drtm_policy(slrt, policy, policy_entry, mbi); ++ /* MBI was measured in tpm_extend_mbi(). */ ++ policy_entry[0].flags |= SLR_POLICY_FLAG_MEASURED; ++ /* SLRT was measured in tpm_measure_slrt(). */ ++ policy_entry[1].flags |= SLR_POLICY_FLAG_MEASURED; ++ ++ for ( i = 2; i < policy->nr_entries; i++ ) ++ { ++ uint64_t start = policy_entry[i].entity; ++ uint64_t size = policy_entry[i].size; ++ ++ /* No already measured entries are expected here. */ ++ if ( policy_entry[i].flags & SLR_POLICY_FLAG_MEASURED ) ++ panic("DRTM entry at %d was measured out of order!\n", i); ++ ++ switch ( policy_entry[i].entity_type ) ++ { ++ case SLR_ET_MULTIBOOT2_INFO: ++ panic("Duplicated MBI entry in DRTM of Secure Launch at %d\n", i); ++ case SLR_ET_SLRT: ++ panic("Duplicated SLRT entry in DRTM of Secure Launch at %d\n", i); ++ ++ case SLR_ET_UNSPECIFIED: ++ case SLR_ET_BOOT_PARAMS: ++ case SLR_ET_SETUP_DATA: ++ case SLR_ET_CMDLINE: ++ case SLR_ET_UEFI_MEMMAP: ++ case SLR_ET_RAMDISK: ++ case SLR_ET_MULTIBOOT2_MODULE: ++ case SLR_ET_TXT_OS2MLE: ++ /* Measure this entry below. */ ++ break; ++ ++ case SLR_ET_UNUSED: ++ /* Skip this entry. */ ++ continue; ++ } ++ ++ if ( policy_entry[i].flags & SLR_POLICY_IMPLICIT_SIZE ) ++ panic("Unexpected implicitly-sized DRTM entry of Secure Launch at %d\n", ++ i); ++ ++ map_l2(start, size); ++ tpm_hash_extend(DRTM_LOC, policy_entry[i].pcr, __va(start), size, ++ DLE_EVTYPE_SLAUNCH, (uint8_t *)policy_entry[i].evt_info, ++ strnlen(policy_entry[i].evt_info, ++ TPM_EVENT_INFO_LENGTH)); ++ ++ policy_entry[i].flags |= SLR_POLICY_FLAG_MEASURED; ++ } ++} +diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c +index c732e5ace7..ecbc645ae1 100644 +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + #include + #include + #include +diff --git a/xen/arch/x86/tpm.c b/xen/arch/x86/tpm.c +index 58cddf7c8c..b075dafaf2 100644 +--- a/xen/arch/x86/tpm.c ++++ b/xen/arch/x86/tpm.c +@@ -29,6 +29,7 @@ asm ( + + #include "boot/defs.h" + #include "include/asm/intel_txt.h" ++#include "include/asm/slaunch.h" + #include "include/asm/tpm.h" + #ifdef __va + #error "__va defined in non-paged mode!" +@@ -66,6 +67,7 @@ void *memcpy(void *dest, const void *src, size_t n) + #include + #include + #include ++#include + #include + + #endif /* __EARLY_TPM__ */ +@@ -978,199 +980,6 @@ void __stdcall tpm_extend_mbi(uint32_t *mbi) + { + /* MBI starts with uint32_t total_size. */ + tpm_hash_extend(DRTM_LOC, DRTM_DATA_PCR, (uint8_t *)mbi, *mbi, +- TXT_EVTYPE_SLAUNCH, NULL, 0); +-} +-#else +-static struct slr_table *slr_get_table(void) +-{ +- uint32_t slrt_pa = txt_find_slrt(); +- struct slr_table *slrt = __va(slrt_pa); +- +- map_l2(slrt_pa, PAGE_SIZE); +- +- if ( slrt->magic != SLR_TABLE_MAGIC ) +- panic("SLRT has invalid magic value: %#08x!\n", slrt->magic); +- /* XXX: are newer revisions allowed? */ +- if ( slrt->revision != SLR_TABLE_REVISION ) +- panic("SLRT is of unsupported revision: %#04x!\n", slrt->revision); +- if ( slrt->architecture != SLR_INTEL_TXT ) +- panic("SLRT is for unexpected architecture: %#04x!\n", +- slrt->architecture); +- if ( slrt->size > slrt->max_size ) +- panic("SLRT is larger than its max size: %#08x > %#08x!\n", +- slrt->size, slrt->max_size); +- +- if ( slrt->size > PAGE_SIZE ) +- map_l2(slrt_pa, slrt->size); +- +- return slrt; +-} +- +-void tpm_measure_slrt(void) +-{ +- struct slr_table *slrt = slr_get_table(); +- +- if ( slrt->revision == 1 ) { +- /* In revision one of the SLRT, only Intel info table is measured. */ +- struct slr_entry_intel_info *intel_info = +- (void *)slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_INTEL_INFO); +- if ( intel_info == NULL ) +- panic("SLRT is missing Intel-specific information!\n"); +- +- tpm_hash_extend(DRTM_LOC, DRTM_DATA_PCR, (uint8_t *)intel_info, +- sizeof(*intel_info), TXT_EVTYPE_SLAUNCH, NULL, 0); +- } else { +- /* +- * slr_get_table() checks that the revision is valid, so we must not +- * get here unless the code is wrong. +- */ +- panic("Unhandled SLRT revision: %d!\n", slrt->revision); +- } +-} +- +-static struct slr_entry_policy *slr_get_policy(struct slr_table *slrt) +-{ +- struct slr_entry_policy *policy; +- +- policy = (struct slr_entry_policy *) +- slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_DRTM_POLICY); +- if (policy == NULL) +- panic("SLRT is missing DRTM policy!\n"); +- +- /* XXX: are newer revisions allowed? */ +- if ( policy->revision != SLR_POLICY_REVISION ) +- panic("DRTM policy in SLRT is of unsupported revision: %#04x!\n", +- slrt->revision); +- +- return policy; +-} +- +-static void check_drtm_policy(struct slr_table *slrt, +- struct slr_entry_policy *policy, +- struct slr_policy_entry *policy_entry, +- const multiboot_info_t *mbi) +-{ +- uint32_t i; +- module_t *mods; +- uint32_t num_mod_entries; +- +- if ( policy->nr_entries < 2 ) +- panic("DRTM policy in SLRT contains less than 2 entries (%d)!\n", +- policy->nr_entries); +- +- /* MBI policy entry must be the first one, so that measuring order matches +- * policy order. */ +- if ( policy_entry[0].entity_type != SLR_ET_MULTIBOOT2_INFO ) +- panic("First entry of DRTM policy in SLRT is not MBI: %#04x!\n", +- policy_entry[0].entity_type); +- if ( policy_entry[0].pcr != DRTM_DATA_PCR ) +- panic("MBI was measured to %d instead of %d PCR!\n", DRTM_DATA_PCR, +- policy_entry[0].pcr); +- +- /* SLRT policy entry must be the second one. */ +- if ( policy_entry[1].entity_type != SLR_ET_SLRT ) +- panic("Second entry of DRTM policy in SLRT is not SLRT: %#04x!\n", +- policy_entry[1].entity_type); +- if ( policy_entry[1].pcr != DRTM_DATA_PCR ) +- panic("SLRT was measured to %d instead of %d PCR!\n", DRTM_DATA_PCR, +- policy_entry[1].pcr); +- if ( policy_entry[1].entity != (uint64_t)__pa(slrt) ) +- panic("SLRT address (%#08lx) differes from its DRTM entry (%#08lx)\n", +- __pa(slrt), policy_entry[1].entity); +- +- mods = __va(mbi->mods_addr); +- for ( i = 0; i < mbi->mods_count; i++ ) { +- uint16_t j; +- uint64_t start = mods[i].mod_start; +- uint64_t size = mods[i].mod_end - mods[i].mod_start; +- +- for ( j = 0; j < policy->nr_entries; j++ ) { +- if ( policy_entry[j].entity_type != SLR_ET_MULTIBOOT2_MODULE ) +- continue; +- +- if ( policy_entry[j].entity == start && +- policy_entry[j].size == size ) +- break; +- } +- +- if ( j >= policy->nr_entries ) { +- panic("Couldn't find Multiboot module \"%s\" (at %d) in DRTM of Secure Launch\n", +- (const char *)__va(mods[i].string), i); +- } +- } +- +- num_mod_entries = 0; +- for ( i = 0; i < policy->nr_entries; i++ ) { +- if ( policy_entry[i].entity_type == SLR_ET_MULTIBOOT2_MODULE ) +- num_mod_entries++; +- } +- +- if ( mbi->mods_count != num_mod_entries ) { +- panic("Unexpected number of Multiboot modules: %d instead of %d\n", +- (int)mbi->mods_count, (int)num_mod_entries); +- } +-} +- +-void tpm_process_drtm_policy(const multiboot_info_t *mbi) +-{ +- struct slr_table *slrt; +- struct slr_entry_policy *policy; +- struct slr_policy_entry *policy_entry; +- uint16_t i; +- +- slrt = slr_get_table(); +- +- policy = slr_get_policy(slrt); +- policy_entry = (struct slr_policy_entry *) +- ((uint8_t *)policy + sizeof(*policy)); +- +- check_drtm_policy(slrt, policy, policy_entry, mbi); +- /* MBI was measured in tpm_extend_mbi(). */ +- policy_entry[0].flags |= SLR_POLICY_FLAG_MEASURED; +- /* SLRT was measured in tpm_measure_slrt(). */ +- policy_entry[1].flags |= SLR_POLICY_FLAG_MEASURED; +- +- for ( i = 2; i < policy->nr_entries; i++ ) { +- uint64_t start = policy_entry[i].entity; +- uint64_t size = policy_entry[i].size; +- +- /* No already measured entries are expected here. */ +- if ( policy_entry[i].flags & SLR_POLICY_FLAG_MEASURED ) +- panic("DRTM entry at %d was measured out of order!\n", i); +- +- switch ( policy_entry[i].entity_type ) { +- case SLR_ET_MULTIBOOT2_INFO: +- panic("Duplicated MBI entry in DRTM of Secure Launch at %d\n", i); +- case SLR_ET_SLRT: +- panic("Duplicated SLRT entry in DRTM of Secure Launch at %d\n", i); +- +- case SLR_ET_UNSPECIFIED: +- case SLR_ET_BOOT_PARAMS: +- case SLR_ET_SETUP_DATA: +- case SLR_ET_CMDLINE: +- case SLR_ET_UEFI_MEMMAP: +- case SLR_ET_RAMDISK: +- case SLR_ET_MULTIBOOT2_MODULE: +- case SLR_ET_TXT_OS2MLE: +- /* Measure this entry below. */ +- break; +- +- case SLR_ET_UNUSED: +- /* Skip this entry. */ +- continue; +- } +- +- if ( policy_entry[i].flags & SLR_POLICY_IMPLICIT_SIZE ) +- panic("Unexpected implicitly-sized DRTM entry of Secure Launch at %d\n", +- i); +- +- map_l2(start, size); +- tpm_hash_extend(DRTM_LOC, policy_entry[i].pcr, __va(start), size, +- TXT_EVTYPE_SLAUNCH, (uint8_t *)policy_entry[i].evt_info, +- strnlen(policy_entry[i].evt_info, +- TPM_EVENT_INFO_LENGTH)); +- +- policy_entry[i].flags |= SLR_POLICY_FLAG_MEASURED; +- } ++ DLE_EVTYPE_SLAUNCH, NULL, 0); + } + #endif +-- +2.46.0 + diff --git a/1325-x86-boot-introduce-slaunch_slrt-global-variable.patch b/1325-x86-boot-introduce-slaunch_slrt-global-variable.patch new file mode 100644 index 00000000..cb505af9 --- /dev/null +++ b/1325-x86-boot-introduce-slaunch_slrt-global-variable.patch @@ -0,0 +1,283 @@ +From 0480b882d6b56c020a57d6f05d6ae30924775274 Mon Sep 17 00:00:00 2001 +From: Sergii Dmytruk +Date: Thu, 21 Mar 2024 19:35:10 +0200 +Subject: [PATCH 1325/1328] x86/boot: introduce slaunch_slrt global variable + +It holds physical address of SLRT. The value is produced by +slaunch_early (known as txt_early previously), gets set in assembly and +then used by the main C code which don't need to know how we got +it (which is different for different CPUs). + +This change additionally renames txt_early.c into slaunch_early.c + +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/boot/Makefile | 2 +- + xen/arch/x86/boot/head.S | 32 +++++++++++++------ + .../x86/boot/{txt_early.c => slaunch_early.c} | 19 ++++++++--- + xen/arch/x86/include/asm/intel_txt.h | 8 ----- + xen/arch/x86/include/asm/slaunch.h | 1 + + xen/arch/x86/intel_txt.c | 4 +-- + xen/arch/x86/slaunch.c | 9 +++--- + xen/arch/x86/tpm.c | 12 ++++--- + 8 files changed, 52 insertions(+), 35 deletions(-) + rename xen/arch/x86/boot/{txt_early.c => slaunch_early.c} (91%) + +diff --git a/xen/arch/x86/boot/Makefile b/xen/arch/x86/boot/Makefile +index 913fa9d2c6..e1cb421ad3 100644 +--- a/xen/arch/x86/boot/Makefile ++++ b/xen/arch/x86/boot/Makefile +@@ -1,6 +1,6 @@ + obj-bin-y += head.o + +-head-bin-objs := cmdline.o reloc.o txt_early.o tpm_early.o ++head-bin-objs := cmdline.o reloc.o slaunch_early.o tpm_early.o + + nocov-y += $(head-bin-objs) + noubsan-y += $(head-bin-objs) +diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S +index 7ea50f0098..4efe47e85a 100644 +--- a/xen/arch/x86/boot/head.S ++++ b/xen/arch/x86/boot/head.S +@@ -527,21 +527,33 @@ __start: + jmp trampoline_bios_setup + + .Lslaunch_proto: ++ /* Upon reaching here, CPU state mostly matches the one setup by the ++ * bootloader with ESP, ESI and EDX being clobbered above. */ ++ + /* Save information that TrenchBoot slaunch was used. */ + movb $1, sym_esi(slaunch_active) + +- /* Push arguments to stack and call txt_early_tests(). */ ++ /* Prepare space for output parameter of slaunch_early_tests(), which is ++ * a structure of two uint32_t fields. */ ++ sub $8, %esp ++ ++ /* Push arguments to stack and call slaunch_early_tests(). */ ++ push %esp /* pointer to output structure */ + push $sym_offs(__2M_rwdata_end) /* end of target image */ + push $sym_offs(_start) /* target base address */ + push %esi /* load base address */ +- call txt_early_tests ++ call slaunch_early_tests + +- /* +- * txt_early_tests() returns MBI address, pass it to tpm_extend_mbi() +- * and store for later in EBX. +- */ +- push %eax +- movl %eax,%ebx ++ /* Move outputs of slaunch_early_tests() from stack into registers. */ ++ pop %ebx /* physical MBI address */ ++ pop %edx /* physical SLRT address */ ++ ++ /* Save physical address of SLRT for C code. */ ++ mov %edx, sym_esi(slaunch_slrt) ++ ++ /* Push arguments to stack and call tpm_extend_mbi(). */ ++ push %edx /* SLRT address because early code has no slaunch_slrt */ ++ push %ebx /* MBI address */ + call tpm_extend_mbi + + /* Move magic number expected by Multiboot 2 to EAX and fall through. */ +@@ -878,8 +890,8 @@ reloc: + .incbin "reloc.bin" + + ALIGN +-txt_early_tests: +- .incbin "txt_early.bin" ++slaunch_early_tests: ++ .incbin "slaunch_early.bin" + + ALIGN + tpm_extend_mbi: +diff --git a/xen/arch/x86/boot/txt_early.c b/xen/arch/x86/boot/slaunch_early.c +similarity index 91% +rename from xen/arch/x86/boot/txt_early.c +rename to xen/arch/x86/boot/slaunch_early.c +index 23ee734c47..3edf40778a 100644 +--- a/xen/arch/x86/boot/txt_early.c ++++ b/xen/arch/x86/boot/slaunch_early.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2022-2023 3mdeb Sp. z o.o. All rights reserved. ++ * Copyright (c) 2022-2024 3mdeb Sp. z o.o. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -23,11 +23,18 @@ asm ( + " .text \n" + " .globl _start \n" + "_start: \n" +- " jmp txt_early_tests \n" ++ " jmp slaunch_early_tests \n" + ); + + #include "defs.h" + #include "../include/asm/intel_txt.h" ++#include "../include/asm/slaunch.h" ++ ++struct early_tests_results ++{ ++ uint32_t mbi_pa; ++ uint32_t slrt_pa; ++} __packed; + + static void verify_pmr_ranges(struct txt_os_mle_data *os_mle, + struct txt_os_sinit_data *os_sinit, +@@ -105,9 +112,10 @@ static void verify_pmr_ranges(struct txt_os_mle_data *os_mle, + */ + } + +-uint32_t __stdcall txt_early_tests(uint32_t load_base_addr, ++void __stdcall slaunch_early_tests(uint32_t load_base_addr, + uint32_t tgt_base_addr, +- uint32_t tgt_end_addr) ++ uint32_t tgt_end_addr, ++ struct early_tests_results *result) + { + void *txt_heap; + struct txt_os_mle_data *os_mle; +@@ -128,5 +136,6 @@ uint32_t __stdcall txt_early_tests(uint32_t load_base_addr, + + verify_pmr_ranges(os_mle, os_sinit, load_base_addr, tgt_base_addr, size); + +- return os_mle->boot_params_addr; ++ result->mbi_pa = os_mle->boot_params_addr; ++ result->slrt_pa = os_mle->slrt; + } +diff --git a/xen/arch/x86/include/asm/intel_txt.h b/xen/arch/x86/include/asm/intel_txt.h +index 112f670816..e6c6a06be1 100644 +--- a/xen/arch/x86/include/asm/intel_txt.h ++++ b/xen/arch/x86/include/asm/intel_txt.h +@@ -329,14 +329,6 @@ static inline int is_in_pmr(struct txt_os_sinit_data *os_sinit, uint64_t base, + return 0; + } + +-/* Returns physical address. */ +-static inline uint32_t txt_find_slrt(void) +-{ +- struct txt_os_mle_data *os_mle = +- txt_os_mle_data_start(_txt(read_txt_reg(TXTCR_HEAP_BASE))); +- return os_mle->slrt; +-} +- + extern void map_txt_mem_regions(void); + extern void protect_txt_mem_regions(void); + extern void txt_restore_mtrrs(bool e820_verbose); +diff --git a/xen/arch/x86/include/asm/slaunch.h b/xen/arch/x86/include/asm/slaunch.h +index 03c686061a..70101bb5d6 100644 +--- a/xen/arch/x86/include/asm/slaunch.h ++++ b/xen/arch/x86/include/asm/slaunch.h +@@ -18,6 +18,7 @@ + #define DLE_EVTYPE_SLAUNCH_END (TXT_EVTYPE_BASE + 0x104) + + extern bool slaunch_active; ++extern uint32_t slaunch_slrt; /* physical address */ + + /* evt_log is a physical address and the caller must map it to virtual, if + * needed. */ +diff --git a/xen/arch/x86/intel_txt.c b/xen/arch/x86/intel_txt.c +index 36fda89cf2..c3471dfe1f 100644 +--- a/xen/arch/x86/intel_txt.c ++++ b/xen/arch/x86/intel_txt.c +@@ -29,7 +29,7 @@ void __init map_txt_mem_regions(void) + + map_l2(txt_heap_base, txt_heap_size); + +- find_evt_log(__va(txt_find_slrt()), &evt_log_addr, &evt_log_size); ++ find_evt_log(__va(slaunch_slrt), &evt_log_addr, &evt_log_size); + map_l2((unsigned long)evt_log_addr, evt_log_size); + if ( evt_log_addr != NULL ) + map_l2((unsigned long)evt_log_addr, evt_log_size); +@@ -53,7 +53,7 @@ void __init protect_txt_mem_regions(void) + BUG_ON(rc == 0); + + /* TXT TPM Event Log */ +- find_evt_log(__va(txt_find_slrt()), &evt_log_addr, &evt_log_size); ++ find_evt_log(__va(slaunch_slrt), &evt_log_addr, &evt_log_size); + if ( evt_log_addr != NULL ) { + printk("SLAUNCH: reserving event log (%#lx - %#lx)\n", + (uint64_t)evt_log_addr, +diff --git a/xen/arch/x86/slaunch.c b/xen/arch/x86/slaunch.c +index 07500ee8db..cfb53a0eb5 100644 +--- a/xen/arch/x86/slaunch.c ++++ b/xen/arch/x86/slaunch.c +@@ -1,5 +1,4 @@ + #include +-#include + #include + #include + #include +@@ -9,6 +8,7 @@ + #include + + bool __initdata slaunch_active; ++uint32_t __initdata slaunch_slrt; + + static void __maybe_unused compile_time_checks(void) + { +@@ -37,10 +37,9 @@ int __init map_l2(unsigned long paddr, unsigned long size) + + static struct slr_table *slr_get_table(void) + { +- uint32_t slrt_pa = txt_find_slrt(); +- struct slr_table *slrt = __va(slrt_pa); ++ struct slr_table *slrt = __va(slaunch_slrt); + +- map_l2(slrt_pa, PAGE_SIZE); ++ map_l2(slaunch_slrt, PAGE_SIZE); + + if ( slrt->magic != SLR_TABLE_MAGIC ) + panic("SLRT has invalid magic value: %#08x!\n", slrt->magic); +@@ -55,7 +54,7 @@ static struct slr_table *slr_get_table(void) + slrt->size, slrt->max_size); + + if ( slrt->size > PAGE_SIZE ) +- map_l2(slrt_pa, slrt->size); ++ map_l2(slaunch_slrt, slrt->size); + + return slrt; + } +diff --git a/xen/arch/x86/tpm.c b/xen/arch/x86/tpm.c +index b075dafaf2..f05f1bc37b 100644 +--- a/xen/arch/x86/tpm.c ++++ b/xen/arch/x86/tpm.c +@@ -36,6 +36,8 @@ asm ( + #endif + #define __va(x) _p(x) + ++uint32_t slaunch_slrt; ++ + /* + * The code is being compiled as a standalone binary without linking to any + * other part of Xen. Providing implementation of builtin functions in this +@@ -936,9 +938,7 @@ void tpm_hash_extend(unsigned loc, unsigned pcr, uint8_t *buf, unsigned size, + void *evt_log_addr; + uint32_t evt_log_size; + +- struct slr_table *slrt = __va(txt_find_slrt()); +- +- find_evt_log(slrt, &evt_log_addr, &evt_log_size); ++ find_evt_log(__va(slaunch_slrt), &evt_log_addr, &evt_log_size); + evt_log_addr = __va(evt_log_addr); + + if ( is_tpm12() ) { +@@ -976,8 +976,12 @@ void tpm_hash_extend(unsigned loc, unsigned pcr, uint8_t *buf, unsigned size, + } + + #ifdef __EARLY_TPM__ +-void __stdcall tpm_extend_mbi(uint32_t *mbi) ++void __stdcall tpm_extend_mbi(uint32_t *mbi, uint32_t slrt_pa) + { ++ /* Early TPM code isn't linked with the rest but still needs to have this ++ * variable with correct value. */ ++ slaunch_slrt = slrt_pa; ++ + /* MBI starts with uint32_t total_size. */ + tpm_hash_extend(DRTM_LOC, DRTM_DATA_PCR, (uint8_t *)mbi, *mbi, + DLE_EVTYPE_SLAUNCH, NULL, 0); +-- +2.46.0 + diff --git a/1326-x86-boot-find-MBI-and-SLRT-on-AMD.patch b/1326-x86-boot-find-MBI-and-SLRT-on-AMD.patch new file mode 100644 index 00000000..bc7ae259 --- /dev/null +++ b/1326-x86-boot-find-MBI-and-SLRT-on-AMD.patch @@ -0,0 +1,124 @@ +From cecbab10cb5e25050d6c684f624f4b7052fb24c5 Mon Sep 17 00:00:00 2001 +From: Sergii Dmytruk +Date: Thu, 21 Mar 2024 19:41:06 +0200 +Subject: [PATCH 1326/1328] x86/boot: find MBI and SLRT on AMD + +secure-kernel-loader on AMD with SKINIT passes MBI as a parameter for +Multiboot kernel. + +Another thing of interest is the location of SLRT which is bootloader's +data after SKL. + +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/boot/head.S | 2 ++ + xen/arch/x86/boot/slaunch_early.c | 30 ++++++++++++++++++++++++++++++ + xen/arch/x86/include/asm/slaunch.h | 17 +++++++++++++++++ + 3 files changed, 49 insertions(+) + +diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S +index 4efe47e85a..093f894850 100644 +--- a/xen/arch/x86/boot/head.S ++++ b/xen/arch/x86/boot/head.S +@@ -539,6 +539,8 @@ __start: + + /* Push arguments to stack and call slaunch_early_tests(). */ + push %esp /* pointer to output structure */ ++ push %ebp /* Slaunch parameter on AMD */ ++ push %ebx /* Multiboot parameter */ + push $sym_offs(__2M_rwdata_end) /* end of target image */ + push $sym_offs(_start) /* target base address */ + push %esi /* load base address */ +diff --git a/xen/arch/x86/boot/slaunch_early.c b/xen/arch/x86/boot/slaunch_early.c +index 3edf40778a..a33db3bd98 100644 +--- a/xen/arch/x86/boot/slaunch_early.c ++++ b/xen/arch/x86/boot/slaunch_early.c +@@ -29,6 +29,7 @@ asm ( + #include "defs.h" + #include "../include/asm/intel_txt.h" + #include "../include/asm/slaunch.h" ++#include "../include/asm/x86-vendors.h" + + struct early_tests_results + { +@@ -36,6 +37,16 @@ struct early_tests_results + uint32_t slrt_pa; + } __packed; + ++static bool is_intel_cpu(void) ++{ ++ /* No boot_cpu_data in early code. */ ++ uint32_t eax, ebx, ecx, edx; ++ cpuid(0x00000000, &eax, &ebx, &ecx, &edx); ++ return ebx == X86_VENDOR_INTEL_EBX ++ && ecx == X86_VENDOR_INTEL_ECX ++ && edx == X86_VENDOR_INTEL_EDX; ++} ++ + static void verify_pmr_ranges(struct txt_os_mle_data *os_mle, + struct txt_os_sinit_data *os_sinit, + uint32_t load_base_addr, uint32_t tgt_base_addr, +@@ -115,6 +126,8 @@ static void verify_pmr_ranges(struct txt_os_mle_data *os_mle, + void __stdcall slaunch_early_tests(uint32_t load_base_addr, + uint32_t tgt_base_addr, + uint32_t tgt_end_addr, ++ uint32_t multiboot_param, ++ uint32_t slaunch_param, + struct early_tests_results *result) + { + void *txt_heap; +@@ -122,6 +135,23 @@ void __stdcall slaunch_early_tests(uint32_t load_base_addr, + struct txt_os_sinit_data *os_sinit; + uint32_t size = tgt_end_addr - tgt_base_addr; + ++ if ( !is_intel_cpu() ) ++ { ++ /* ++ * Not an Intel CPU. Currently the only other option is AMD with SKINIT ++ * and secure-kernel-loader. ++ */ ++ ++ const uint16_t *sl_header = (void *)slaunch_param; ++ /* secure-kernel-loader passes MBI as a parameter for Multiboot ++ * kernel. */ ++ result->mbi_pa = multiboot_param; ++ /* The fourth 16-bit integer of SKL's header is an offset to ++ * bootloader's data, which is SLRT. */ ++ result->slrt_pa = slaunch_param + sl_header[3]; ++ return; ++ } ++ + /* Clear the TXT error registers for a clean start of day */ + write_txt_reg(TXTCR_ERRORCODE, 0); + +diff --git a/xen/arch/x86/include/asm/slaunch.h b/xen/arch/x86/include/asm/slaunch.h +index 70101bb5d6..70b017b065 100644 +--- a/xen/arch/x86/include/asm/slaunch.h ++++ b/xen/arch/x86/include/asm/slaunch.h +@@ -17,6 +17,23 @@ + #define DLE_EVTYPE_SLAUNCH_START (TXT_EVTYPE_BASE + 0x103) + #define DLE_EVTYPE_SLAUNCH_END (TXT_EVTYPE_BASE + 0x104) + ++#ifndef cpuid ++/* ++ * Generic CPUID function ++ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx ++ * resulting in stale register contents being returned. ++ * ++ * Copied from processor.h because that header can't be included by early code. ++ */ ++#define cpuid(_op,_eax,_ebx,_ecx,_edx) \ ++ asm volatile ( "cpuid" \ ++ : "=a" (*(int *)(_eax)), \ ++ "=b" (*(int *)(_ebx)), \ ++ "=c" (*(int *)(_ecx)), \ ++ "=d" (*(int *)(_edx)) \ ++ : "0" (_op), "2" (0) ) ++#endif ++ + extern bool slaunch_active; + extern uint32_t slaunch_slrt; /* physical address */ + +-- +2.46.0 + diff --git a/1327-arch-x86-move-generic-memory-mapping-and-protection-.patch b/1327-arch-x86-move-generic-memory-mapping-and-protection-.patch new file mode 100644 index 00000000..ac356f57 --- /dev/null +++ b/1327-arch-x86-move-generic-memory-mapping-and-protection-.patch @@ -0,0 +1,174 @@ +From 16aaa2d86404a09f6ca8bccf885fee963248a501 Mon Sep 17 00:00:00 2001 +From: Sergii Dmytruk +Date: Fri, 22 Mar 2024 00:40:12 +0200 +Subject: [PATCH 1327/1328] arch/x86: move generic memory mapping and + protection to slaunch.c + +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/include/asm/slaunch.h | 4 +++ + xen/arch/x86/intel_txt.c | 24 ------------------ + xen/arch/x86/setup.c | 11 +++++---- + xen/arch/x86/slaunch.c | 39 ++++++++++++++++++++++++++++++ + 4 files changed, 49 insertions(+), 29 deletions(-) + +diff --git a/xen/arch/x86/include/asm/slaunch.h b/xen/arch/x86/include/asm/slaunch.h +index 70b017b065..ea20a97e94 100644 +--- a/xen/arch/x86/include/asm/slaunch.h ++++ b/xen/arch/x86/include/asm/slaunch.h +@@ -57,6 +57,10 @@ static inline void find_evt_log(struct slr_table *slrt, void **evt_log, + } + } + ++void map_slaunch_mem_regions(void); ++ ++void protect_slaunch_mem_regions(void); ++ + /* + * This helper function is used to map memory using L2 page tables by aligning + * mapped regions to 2MB. This way page allocator (which at this point isn't +diff --git a/xen/arch/x86/intel_txt.c b/xen/arch/x86/intel_txt.c +index c3471dfe1f..cc9a6d01b0 100644 +--- a/xen/arch/x86/intel_txt.c ++++ b/xen/arch/x86/intel_txt.c +@@ -15,11 +15,7 @@ static uint64_t __initdata txt_heap_base, txt_heap_size; + + void __init map_txt_mem_regions(void) + { +- void *evt_log_addr; +- uint32_t evt_log_size; +- + map_l2(TXT_PRIV_CONFIG_REGS_BASE, NR_TXT_CONFIG_SIZE); +- map_l2(TPM_TIS_BASE, TPM_TIS_SIZE); + + txt_heap_base = read_txt_reg(TXTCR_HEAP_BASE); + BUG_ON(txt_heap_base == 0); +@@ -28,20 +24,11 @@ void __init map_txt_mem_regions(void) + BUG_ON(txt_heap_size == 0); + + map_l2(txt_heap_base, txt_heap_size); +- +- find_evt_log(__va(slaunch_slrt), &evt_log_addr, &evt_log_size); +- map_l2((unsigned long)evt_log_addr, evt_log_size); +- if ( evt_log_addr != NULL ) +- map_l2((unsigned long)evt_log_addr, evt_log_size); + } + + void __init protect_txt_mem_regions(void) + { + int rc; +- +- void *evt_log_addr; +- uint32_t evt_log_size; +- + uint64_t sinit_base, sinit_size; + + /* TXT Heap */ +@@ -52,17 +39,6 @@ void __init protect_txt_mem_regions(void) + txt_heap_base + txt_heap_size); + BUG_ON(rc == 0); + +- /* TXT TPM Event Log */ +- find_evt_log(__va(slaunch_slrt), &evt_log_addr, &evt_log_size); +- if ( evt_log_addr != NULL ) { +- printk("SLAUNCH: reserving event log (%#lx - %#lx)\n", +- (uint64_t)evt_log_addr, +- (uint64_t)evt_log_addr + evt_log_size); +- rc = reserve_e820_ram(&e820_raw, (uint64_t)evt_log_addr, +- (uint64_t)evt_log_addr + evt_log_size); +- BUG_ON(rc == 0); +- } +- + sinit_base = read_txt_reg(TXTCR_SINIT_BASE); + BUG_ON(sinit_base == 0); + +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 75e5dbb725..e53b973603 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -55,7 +55,6 @@ + #include + #include + #include +-#include + #include + #include + +@@ -1172,13 +1171,15 @@ void __init noreturn __start_xen(unsigned long mbi_p) + + if ( slaunch_active ) + { +- /* Prepare for TXT-related code. */ +- map_txt_mem_regions(); ++ /* Prepare for accesses to essential data structures setup by boot ++ * environment. */ ++ map_slaunch_mem_regions(); ++ + /* Measure SLRT here because it gets used by init_e820(), the rest is + * measured below by tpm_process_drtm_policy(). */ + tpm_measure_slrt(); +- /* Reserve TXT heap and SINIT. */ +- protect_txt_mem_regions(); ++ ++ protect_slaunch_mem_regions(); + } + + /* Sanitise the raw E820 map to produce a final clean version. */ +diff --git a/xen/arch/x86/slaunch.c b/xen/arch/x86/slaunch.c +index cfb53a0eb5..0f05a7cb2c 100644 +--- a/xen/arch/x86/slaunch.c ++++ b/xen/arch/x86/slaunch.c +@@ -1,4 +1,6 @@ + #include ++#include ++#include + #include + #include + #include +@@ -35,6 +37,43 @@ int __init map_l2(unsigned long paddr, unsigned long size) + pages, PAGE_HYPERVISOR); + } + ++void __init map_slaunch_mem_regions(void) ++{ ++ void *evt_log_addr; ++ uint32_t evt_log_size; ++ ++ map_l2(TPM_TIS_BASE, TPM_TIS_SIZE); ++ ++ find_evt_log(__va(slaunch_slrt), &evt_log_addr, &evt_log_size); ++ if ( evt_log_addr != NULL ) ++ map_l2((unsigned long)evt_log_addr, evt_log_size); ++ ++ /* Vendor-specific part. */ ++ map_txt_mem_regions(); ++} ++ ++void __init protect_slaunch_mem_regions(void) ++{ ++ int rc; ++ ++ void *evt_log_addr; ++ uint32_t evt_log_size; ++ ++ find_evt_log(__va(slaunch_slrt), &evt_log_addr, &evt_log_size); ++ if ( evt_log_addr != NULL ) ++ { ++ printk("SLAUNCH: reserving event log (%#lx - %#lx)\n", ++ (uint64_t)evt_log_addr, ++ (uint64_t)evt_log_addr + evt_log_size); ++ rc = reserve_e820_ram(&e820_raw, (uint64_t)evt_log_addr, ++ (uint64_t)evt_log_addr + evt_log_size); ++ BUG_ON(rc == 0); ++ } ++ ++ /* Vendor-specific part. */ ++ protect_txt_mem_regions(); ++} ++ + static struct slr_table *slr_get_table(void) + { + struct slr_table *slrt = __va(slaunch_slrt); +-- +2.46.0 + diff --git a/1328-arch-x86-support-slaunch-with-AMD-SKINIT.patch b/1328-arch-x86-support-slaunch-with-AMD-SKINIT.patch new file mode 100644 index 00000000..f46c025c --- /dev/null +++ b/1328-arch-x86-support-slaunch-with-AMD-SKINIT.patch @@ -0,0 +1,289 @@ +From 6c55f68a5187ff603563b1fb361c5bf8209dbf7f Mon Sep 17 00:00:00 2001 +From: Sergii Dmytruk +Date: Sun, 17 Mar 2024 00:58:26 +0200 +Subject: [PATCH 1328/1328] arch/x86: support slaunch with AMD SKINIT + +This mostly involves not running Intel-specific code when on AMD. + +There are only a few new AMD-specific implementation details: + - finding SLB start and size and then mapping and protecting it + - managing offset for adding the next TPM log entry (TXT-compatible + data prepared by SKL is stored inside of vendor data field of TCG + header) + +Signed-off-by: Sergii Dmytruk +--- + xen/arch/x86/e820.c | 2 +- + xen/arch/x86/include/asm/slaunch.h | 2 + + xen/arch/x86/slaunch.c | 68 ++++++++++++++++++++++++------ + xen/arch/x86/tpm.c | 62 ++++++++++++++++++++++++++- + 4 files changed, 118 insertions(+), 16 deletions(-) + +diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c +index 5b2d960a88..896921b1c5 100644 +--- a/xen/arch/x86/e820.c ++++ b/xen/arch/x86/e820.c +@@ -457,7 +457,7 @@ static uint64_t __init mtrr_top_of_ram(void) + rdmsrl(MSR_MTRRcap, mtrr_cap); + rdmsrl(MSR_MTRRdefType, mtrr_def); + +- if ( slaunch_active ) ++ if ( slaunch_active && boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) + txt_restore_mtrrs(e820_verbose); + + if ( e820_verbose ) +diff --git a/xen/arch/x86/include/asm/slaunch.h b/xen/arch/x86/include/asm/slaunch.h +index ea20a97e94..2af67a17f7 100644 +--- a/xen/arch/x86/include/asm/slaunch.h ++++ b/xen/arch/x86/include/asm/slaunch.h +@@ -11,6 +11,8 @@ + /* + * Secure Launch event log entry types. The TXT specification defines the + * base event value as 0x400 for DRTM values. ++ * ++ * Using the same values for AMD SKINIT. + */ + #define TXT_EVTYPE_BASE 0x400 + #define DLE_EVTYPE_SLAUNCH (TXT_EVTYPE_BASE + 0x102) +diff --git a/xen/arch/x86/slaunch.c b/xen/arch/x86/slaunch.c +index 0f05a7cb2c..b18b882f74 100644 +--- a/xen/arch/x86/slaunch.c ++++ b/xen/arch/x86/slaunch.c +@@ -9,6 +9,10 @@ + #include + #include + ++/* SLB is 64k, 64k-aligned */ ++#define SKINIT_SLB_SIZE 0x10000 ++#define SKINIT_SLB_ALIGN 0x10000 ++ + bool __initdata slaunch_active; + uint32_t __initdata slaunch_slrt; + +@@ -37,6 +41,19 @@ int __init map_l2(unsigned long paddr, unsigned long size) + pages, PAGE_HYPERVISOR); + } + ++static uint32_t get_slb_start(void) ++{ ++ /* The runtime computation relies on size being a power of 2 and equal to ++ * alignment. Make sure these assumptions hold. */ ++ BUILD_BUG_ON(SKINIT_SLB_SIZE != SKINIT_SLB_ALIGN); ++ BUILD_BUG_ON(SKINIT_SLB_SIZE == 0); ++ BUILD_BUG_ON((SKINIT_SLB_SIZE & (SKINIT_SLB_SIZE - 1)) != 0); ++ ++ /* Rounding any address within SLB down to alignment gives SLB base and ++ * SLRT is inside SLB on AMD. */ ++ return slaunch_slrt & ~(SKINIT_SLB_SIZE - 1); ++} ++ + void __init map_slaunch_mem_regions(void) + { + void *evt_log_addr; +@@ -49,7 +66,14 @@ void __init map_slaunch_mem_regions(void) + map_l2((unsigned long)evt_log_addr, evt_log_size); + + /* Vendor-specific part. */ +- map_txt_mem_regions(); ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) ++ { ++ map_txt_mem_regions(); ++ } ++ else if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) ++ { ++ map_l2(get_slb_start(), SKINIT_SLB_SIZE); ++ } + } + + void __init protect_slaunch_mem_regions(void) +@@ -71,11 +95,25 @@ void __init protect_slaunch_mem_regions(void) + } + + /* Vendor-specific part. */ +- protect_txt_mem_regions(); ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) ++ { ++ protect_txt_mem_regions(); ++ } ++ else if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) ++ { ++ uint64_t slb_start = get_slb_start(); ++ uint64_t slb_end = slb_start + SKINIT_SLB_SIZE; ++ printk("SLAUNCH: reserving SLB (%#lx - %#lx)\n", slb_start, slb_end); ++ e820_change_range_type(&e820_raw, slb_start, slb_end, ++ E820_RAM, E820_RESERVED); ++ } + } + + static struct slr_table *slr_get_table(void) + { ++ bool intel_cpu = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL); ++ uint16_t slrt_architecture = intel_cpu ? SLR_INTEL_TXT : SLR_AMD_SKINIT; ++ + struct slr_table *slrt = __va(slaunch_slrt); + + map_l2(slaunch_slrt, PAGE_SIZE); +@@ -85,9 +123,9 @@ static struct slr_table *slr_get_table(void) + /* XXX: are newer revisions allowed? */ + if ( slrt->revision != SLR_TABLE_REVISION ) + panic("SLRT is of unsupported revision: %#04x!\n", slrt->revision); +- if ( slrt->architecture != SLR_INTEL_TXT ) +- panic("SLRT is for unexpected architecture: %#04x!\n", +- slrt->architecture); ++ if ( slrt->architecture != slrt_architecture ) ++ panic("SLRT is for unexpected architecture: %#04x != %#04x!\n", ++ slrt->architecture, slrt_architecture); + if ( slrt->size > slrt->max_size ) + panic("SLRT is larger than its max size: %#08x > %#08x!\n", + slrt->size, slrt->max_size); +@@ -104,14 +142,18 @@ void tpm_measure_slrt(void) + + if ( slrt->revision == 1 ) + { +- /* In revision one of the SLRT, only Intel info table is measured. */ +- struct slr_entry_intel_info *intel_info = +- (void *)slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_INTEL_INFO); +- if ( intel_info == NULL ) +- panic("SLRT is missing Intel-specific information!\n"); +- +- tpm_hash_extend(DRTM_LOC, DRTM_DATA_PCR, (uint8_t *)intel_info, +- sizeof(*intel_info), DLE_EVTYPE_SLAUNCH, NULL, 0); ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) ++ { ++ /* In revision one of the SLRT, only Intel info table is ++ * measured. */ ++ struct slr_entry_intel_info *intel_info = ++ (void *)slr_next_entry_by_tag(slrt, NULL, SLR_ENTRY_INTEL_INFO); ++ if ( intel_info == NULL ) ++ panic("SLRT is missing Intel-specific information!\n"); ++ ++ tpm_hash_extend(DRTM_LOC, DRTM_DATA_PCR, (uint8_t *)intel_info, ++ sizeof(*intel_info), DLE_EVTYPE_SLAUNCH, NULL, 0); ++ } + } + else + { +diff --git a/xen/arch/x86/tpm.c b/xen/arch/x86/tpm.c +index f05f1bc37b..6442c7110f 100644 +--- a/xen/arch/x86/tpm.c ++++ b/xen/arch/x86/tpm.c +@@ -31,6 +31,7 @@ asm ( + #include "include/asm/intel_txt.h" + #include "include/asm/slaunch.h" + #include "include/asm/tpm.h" ++#include "include/asm/x86-vendors.h" + #ifdef __va + #error "__va defined in non-paged mode!" + #endif +@@ -63,15 +64,31 @@ void *memcpy(void *dest, const void *src, size_t n) + return dest; + } + ++static bool is_amd_cpu(void) ++{ ++ /* No boot_cpu_data in early code. */ ++ uint32_t eax, ebx, ecx, edx; ++ cpuid(0x00000000, &eax, &ebx, &ecx, &edx); ++ return ebx == X86_VENDOR_AMD_EBX ++ && ecx == X86_VENDOR_AMD_ECX ++ && edx == X86_VENDOR_AMD_EDX; ++} ++ + #else /* __EARLY_TPM__ */ + + #include + #include + #include + #include ++#include + #include + #include + ++static bool is_amd_cpu(void) ++{ ++ return boot_cpu_data.x86_vendor == X86_VENDOR_AMD; ++} ++ + #endif /* __EARLY_TPM__ */ + + #include +@@ -260,6 +277,21 @@ struct TPM12_PCREvent { + uint8_t Data[]; + }; + ++struct tpm1_spec_id_event { ++ uint32_t pcrIndex; ++ uint32_t eventType; ++ uint8_t digest[20]; ++ uint32_t eventSize; ++ uint8_t signature[16]; ++ uint32_t platformClass; ++ uint8_t specVersionMinor; ++ uint8_t specVersionMajor; ++ uint8_t specErrata; ++ uint8_t uintnSize; ++ uint8_t vendorInfoSize; ++ uint8_t vendorInfo[0]; /* variable number of members */ ++} __packed; ++ + struct txt_ev_log_container_12 { + char Signature[20]; /* "TXT Event Container", null-terminated */ + uint8_t Reserved[12]; +@@ -402,6 +434,15 @@ static void *create_log_event12(struct txt_ev_log_container_12 *evt_log, + { + struct TPM12_PCREvent *new_entry; + ++ if ( is_amd_cpu() ) { ++ /* ++ * On AMD, TXT-compatible structure is stored as vendor data of ++ * TCG-defined event log header. ++ */ ++ struct tpm1_spec_id_event *spec_id = (void *)evt_log; ++ evt_log = (struct txt_ev_log_container_12 *)&spec_id->vendorInfo[0]; ++ } ++ + new_entry = (void *)(((uint8_t *)evt_log) + evt_log->NextEventOffset); + + /* +@@ -837,11 +878,28 @@ static uint32_t tpm2_hash_extend(unsigned loc, uint8_t *buf, unsigned size, + + #endif /* __EARLY_TPM__ */ + +-static struct heap_event_log_pointer_element2_1 *find_evt_log_ext_data(void) ++static struct heap_event_log_pointer_element2_1 * ++find_evt_log_ext_data(struct tpm2_spec_id_event *evt_log) + { + struct txt_os_sinit_data *os_sinit; + struct txt_ext_data_element *ext_data; + ++ if ( is_amd_cpu() ) { ++ /* ++ * Event log pointer is defined by TXT specification, but ++ * secure-kernel-loader provides a compatible structure in vendor data ++ * of the log. ++ */ ++ const uint8_t *data_size = ++ (void *)&evt_log->digestSizes[evt_log->digestCount]; ++ ++ if ( *data_size != sizeof(struct heap_event_log_pointer_element2_1) ) ++ return NULL; ++ ++ /* Vendor data directly follows one-byte size. */ ++ return (void *)(data_size + 1); ++ } ++ + os_sinit = txt_os_sinit_data_start(__va(read_txt_reg(TXTCR_HEAP_BASE))); + ext_data = (void *)((uint8_t *)os_sinit + sizeof(*os_sinit)); + +@@ -874,7 +932,7 @@ create_log_event20(struct tpm2_spec_id_event *evt_log, uint32_t evt_log_size, + unsigned i; + uint8_t *p; + +- log_ext_data = find_evt_log_ext_data(); ++ log_ext_data = find_evt_log_ext_data(evt_log); + if ( log_ext_data == NULL ) + return log_hashes; + +-- +2.46.0 + diff --git a/xen.spec.in b/xen.spec.in index 6ea368bb..c258ec9f 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -201,6 +201,37 @@ Patch1200: 1200-hypercall-XENMEM_get_mfn_from_pfn.patch Patch1201: 1201-patch-gvt-hvmloader.patch.patch Patch1202: 1202-libxl-Add-partially-Intel-GVT-g-support-xengt-device.patch +# Secure Launch support patches +Patch1300: 1300-x86-include-asm-intel_txt.h-constants-and-accessors-.patch +Patch1301: 1301-x86-boot-add-MLE-header-and-new-entry-point.patch +Patch1302: 1302-x86-boot-txt_early-add-early-TXT-tests-and-restore-M.patch +Patch1303: 1303-include-xen-slr_table.h-Secure-Launch-Resource-Table.patch +Patch1304: 1304-xen-arch-x86-reserve-TXT-memory.patch +Patch1305: 1305-x86-intel_txt.c-restore-boot-MTRRs.patch +Patch1306: 1306-lib-sha1.c-add-file.patch +Patch1307: 1307-x86-tpm.c-code-for-early-hashing-and-extending-PCRs-.patch +Patch1308: 1308-x86-boot-choose-AP-stack-based-on-APIC-ID.patch +Patch1309: 1309-x86-smpboot.c-TXT-AP-bringup.patch +Patch1310: 1310-arch-x86-don-t-access-x86_cpu_to_apicid-directly-use.patch +Patch1311: 1311-arch-x86-smp-drop-x86_cpu_to_apicid-use-cpu_data-cpu.patch +Patch1312: 1312-arch-x86-smp-move-stack_base-to-cpu_data.patch +Patch1313: 1313-arch-x86-smp-call-x2apic_ap_setup-earlier.patch +Patch1314: 1314-arch-x86-shutdown-protect-against-recurrent-machine_.patch +Patch1315: 1315-arch-x86-smp-drop-booting_cpu-variable.patch +Patch1316: 1316-arch-x86-smp-make-cpu_state-per-CPU.patch +Patch1317: 1317-arch-x86-smp-remove-MONITOR-MWAIT-loop-for-TXT-AP-br.patch +Patch1318: 1318-arch-x86-smp-don-t-send-INIT-SIPI-SIPI-if-AP-is-alre.patch +Patch1319: 1319-arch-x86-smp-start-APs-in-parallel-during-boot.patch +Patch1320: 1320-lib-sha256.c-add-file.patch +Patch1321: 1321-x86-tpm.c-support-extending-PCRs-of-TPM2.0.patch +Patch1322: 1322-x86-tpm.c-implement-event-log-for-TPM2.0.patch +Patch1323: 1323-arch-x86-process-DRTM-policy.patch +Patch1324: 1324-arch-x86-extract-slaunch-unit.patch +Patch1325: 1325-x86-boot-introduce-slaunch_slrt-global-variable.patch +Patch1326: 1326-x86-boot-find-MBI-and-SLRT-on-AMD.patch +Patch1327: 1327-arch-x86-move-generic-memory-mapping-and-protection-.patch +Patch1328: 1328-arch-x86-support-slaunch-with-AMD-SKINIT.patch + %if %build_qemutrad BuildRequires: libidn-devel zlib-devel SDL-devel curl-devel BuildRequires: libX11-devel gtk2-devel libaio-devel