Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARM64-SVE: Add SVE registers to pal context #103801

Merged
merged 29 commits into from
Jun 29, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
72a681f
ARM64-SVE: Add SVE registers to pal context
a74nh Jun 19, 2024
5e19a3e
fix debug sizes
a74nh Jun 21, 2024
b38dacd
Add SVE defines if missing from Linux host
a74nh Jun 21, 2024
5fc68cc
More missing defines
a74nh Jun 21, 2024
41580bc
More missing defines
a74nh Jun 21, 2024
6c8a283
Add cast
a74nh Jun 21, 2024
7d08124
Move SVE registers after debug registers
a74nh Jun 24, 2024
5f918a9
Fix dbgtargetcontext
a74nh Jun 24, 2024
7cff0c1
Remove SVE from debug context
a74nh Jun 24, 2024
3f287c5
Move ffr
a74nh Jun 24, 2024
e9e6a4e
Add SVE registers to asmconstants
a74nh Jun 24, 2024
a2c17dd
Remove Z registers from context
a74nh Jun 24, 2024
7c3256b
backup/restore SVE in Context2.S
a74nh Jun 25, 2024
ba17c2b
Remove unused SVE128 struct
a74nh Jun 25, 2024
73404ff
Add XStateFeaturesMask
a74nh Jun 25, 2024
ea6979a
restore instrsarm64sve.h changes
a74nh Jun 25, 2024
dd12f03
Restore SIZEOF__CONTEXT for windows
a74nh Jun 25, 2024
29acc33
Fix AsmOffsets.cs for windows
a74nh Jun 25, 2024
a21aee0
Fix AsmOffsets.cs for windows
a74nh Jun 25, 2024
2e549cd
Restore missing ldr
a74nh Jun 26, 2024
d43f5d5
Check size of SVE data returned from the kernel
a74nh Jun 27, 2024
ed15cc3
16 P registers
a74nh Jun 27, 2024
063f41b
Copy context based on XSTATE_MASK_SVE
a74nh Jun 28, 2024
8be931b
Move context handling inside XSTATE checks
a74nh Jun 28, 2024
6fb9141
Set CONTEXT_XSTATE
a74nh Jun 28, 2024
d2c2e10
Remove __pad and fix sizes
a74nh Jun 28, 2024
f0a1dba
Fix context sizes
a74nh Jun 28, 2024
178e266
Fix context sizes
a74nh Jun 28, 2024
29933a8
Only read/write OS context SVE registers on 128bit
a74nh Jun 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,15 @@ class AsmOffsets
public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0;
#endif // TARGET_UNIX
#elif TARGET_ARM64
#if TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0x9e0;
public const int OFFSETOF__REGDISPLAY__SP = 0x938;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x940;
#else // TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0x940;
public const int OFFSETOF__REGDISPLAY__SP = 0x898;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x8a0;
#endif // TARGET_UNIX
#elif TARGET_ARM
public const int SIZEOF__REGDISPLAY = 0x410;
public const int OFFSETOF__REGDISPLAY__SP = 0x3ec;
Expand Down Expand Up @@ -71,9 +77,15 @@ class AsmOffsets
public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbd8;
#endif // TARGET_UNIX
#elif TARGET_ARM64
#if TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0x9d0;
public const int OFFSETOF__REGDISPLAY__SP = 0x930;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x938;
#else // TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0x930;
public const int OFFSETOF__REGDISPLAY__SP = 0x890;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x898;
#endif // TARGET_UNIX
#elif TARGET_ARM
public const int SIZEOF__REGDISPLAY = 0x408;
public const int OFFSETOF__REGDISPLAY__SP = 0x3e8;
Expand Down Expand Up @@ -111,9 +123,13 @@ class AsmOffsets
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xc20;
#else // TARGET_UNIX
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0;
#endif // TARGET_UNIx
#endif // TARGET_UNIX
#elif TARGET_ARM64
#if TARGET_UNIX
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x3e0;
#else // TARGET_UNIX
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x390;
#endif // TARGET_UNIX
#elif TARGET_ARM
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x1a0;
#elif TARGET_X86
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/debug/inc/dbgtargetcontext.h
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,12 @@ typedef DECLSPEC_ALIGN(16) struct {

} DT_CONTEXT;


#if !defined(CROSS_COMPILE) && !defined(TARGET_WINDOWS)
static_assert(sizeof(DT_CONTEXT) == offsetof(T_CONTEXT, XStateFeaturesMask), "DT_CONTEXT must not include the SVE registers on AMD64");
#else
static_assert(sizeof(DT_CONTEXT) == sizeof(T_CONTEXT), "DT_CONTEXT size must equal the T_CONTEXT size on ARM64");
#endif

#elif defined(DTCONTEXT_IS_LOONGARCH64)

Expand Down
19 changes: 18 additions & 1 deletion src/coreclr/pal/inc/pal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1844,6 +1844,12 @@ typedef struct _IMAGE_ARM_RUNTIME_FUNCTION_ENTRY {
#define CONTEXT_EXCEPTION_REQUEST 0x40000000L
#define CONTEXT_EXCEPTION_REPORTING 0x80000000L

#define CONTEXT_XSTATE (CONTEXT_ARM64 | 0x40L)

#define XSTATE_SVE (0)

#define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE))

//
// This flag is set by the unwinder if it has unwound to a call
// site, and cleared whenever it unwinds through a trap frame.
Expand Down Expand Up @@ -1944,7 +1950,18 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
/* +0x338 */ DWORD64 Bvr[ARM64_MAX_BREAKPOINTS];
/* +0x378 */ DWORD Wcr[ARM64_MAX_WATCHPOINTS];
/* +0x380 */ DWORD64 Wvr[ARM64_MAX_WATCHPOINTS];
/* +0x390 */

/* +0x390 */ DWORD64 XStateFeaturesMask;

//
// Sve Registers
//
// TODO-SVE: Support Vector register sizes >128bit
// For 128bit, Z and V registers fully overlap, so there is no need to load/store both.
/* +0x398 */ DWORD Vl;
/* +0x39c */ DWORD Ffr;
/* +0x3a0 */ DWORD P[16];
/* +0x3e0 */

} CONTEXT, *PCONTEXT, *LPCONTEXT;

Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/pal/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ if(CLR_CMAKE_HOST_ARCH_AMD64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET
add_definitions(-DXSTATE_SUPPORTED)
endif(CLR_CMAKE_HOST_ARCH_AMD64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL)

if(CLR_CMAKE_HOST_ARCH_ARM64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL)
# Currently the _xstate is not available on Alpine Linux
add_definitions(-DXSTATE_SUPPORTED)
endif(CLR_CMAKE_HOST_ARCH_ARM64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL)

if(CLR_CMAKE_TARGET_LINUX_MUSL)
# Setting RLIMIT_NOFILE breaks debugging of coreclr on Alpine Linux for some reason
add_definitions(-DDONT_SET_RLIMIT_NOFILE)
Expand Down
47 changes: 45 additions & 2 deletions src/coreclr/pal/src/arch/arm64/asmconstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@

#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT)

#define CONTEXT_XSTATE_BIT (6)
#define CONTEXT_XSTATE (1 << CONTEXT_XSTATE_BIT)

#define XSTATE_SVE_BIT (0)

#define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE))


#define CONTEXT_ContextFlags 0
#define CONTEXT_Cpsr CONTEXT_ContextFlags+4
Expand Down Expand Up @@ -54,6 +61,7 @@
#define CONTEXT_Lr CONTEXT_Fp+8
#define CONTEXT_Sp CONTEXT_Lr+8
#define CONTEXT_Pc CONTEXT_Sp+8

#define CONTEXT_NEON_OFFSET CONTEXT_Pc+8
#define CONTEXT_V0 0
#define CONTEXT_V1 CONTEXT_V0+16
Expand Down Expand Up @@ -89,7 +97,42 @@
#define CONTEXT_V31 CONTEXT_V30+16
#define CONTEXT_FLOAT_CONTROL_OFFSET CONTEXT_V31+16
#define CONTEXT_Fpcr 0
#define CONTEXT_Fpsr CONTEXT_Fpcr+8
jkotas marked this conversation as resolved.
Show resolved Hide resolved
#define CONTEXT_Size ((CONTEXT_NEON_OFFSET + CONTEXT_Fpsr + 8 + 0xf) & ~0xf)
#define CONTEXT_Fpsr CONTEXT_Fpcr+4
#define CONTEXT_NEON_SIZE CONTEXT_FLOAT_CONTROL_OFFSET+CONTEXT_Fpsr+4

#define CONTEXT_DEBUG_OFFSET CONTEXT_NEON_OFFSET+CONTEXT_NEON_SIZE
#define CONTEXT_DEBUG_SIZE 120 // (8*4)+(8*8)+(2*4)+(2*8)

#define CONTEXT_XSTATEFEATURESMASK_OFFSET CONTEXT_DEBUG_OFFSET+CONTEXT_DEBUG_SIZE

// TODO-SVE: Support Vector register sizes >128bit

#define CONTEXT_SVE_OFFSET CONTEXT_XSTATEFEATURESMASK_OFFSET+8
#define CONTEXT_VL_OFFSET 0

// SVE register offsets are multiples of the vector length
#define CONTEXT_SVE_REGS_OFFSET CONTEXT_VL_OFFSET+4
#define CONTEXT_FFR_VL 0
#define CONTEXT_P0_VL CONTEXT_FFR_VL+1
#define CONTEXT_P1_VL CONTEXT_P0_VL+1
#define CONTEXT_P2_VL CONTEXT_P1_VL+1
#define CONTEXT_P3_VL CONTEXT_P2_VL+1
#define CONTEXT_P4_VL CONTEXT_P3_VL+1
#define CONTEXT_P5_VL CONTEXT_P4_VL+1
#define CONTEXT_P6_VL CONTEXT_P5_VL+1
#define CONTEXT_P7_VL CONTEXT_P6_VL+1
#define CONTEXT_P8_VL CONTEXT_P7_VL+1
#define CONTEXT_P9_VL CONTEXT_P8_VL+1
#define CONTEXT_P10_VL CONTEXT_P9_VL+1
#define CONTEXT_P11_VL CONTEXT_P10_VL+1
#define CONTEXT_P12_VL CONTEXT_P11_VL+1
#define CONTEXT_P13_VL CONTEXT_P12_VL+1
#define CONTEXT_P14_VL CONTEXT_P13_VL+1
#define CONTEXT_P15_VL CONTEXT_P14_VL+1

#define CONTEXT_SVE_REGS_SIZE ((CONTEXT_P15_VL+1) * 4)
#define CONTEXT_SVE_SIZE CONTEXT_SVE_REGS_SIZE + 8

#define CONTEXT_Size CONTEXT_SVE_OFFSET + CONTEXT_SVE_SIZE

#endif
69 changes: 67 additions & 2 deletions src/coreclr/pal/src/arch/arm64/context2.S
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
//
// Implementation of _CONTEXT_CaptureContext for the ARM platform.
// Implementation of _CONTEXT_CaptureContext for the ARM64 platform.
// This function is processor dependent. It is used by exception handling,
// and is always apply to the current thread.
//
Expand All @@ -12,6 +12,7 @@
// Incoming:
// x0: Context*
//
.arch_extension sve
LEAF_ENTRY CONTEXT_CaptureContext, _TEXT
PROLOG_STACK_ALLOC 32
.cfi_adjust_cfa_offset 32
Expand Down Expand Up @@ -69,7 +70,6 @@ LOCAL_LABEL(Done_CONTEXT_CONTROL):
stp x26, x27, [x0, CONTEXT_X26]
str x28, [x0, CONTEXT_X28]


LOCAL_LABEL(Done_CONTEXT_INTEGER):
ldr w1, [x0, CONTEXT_ContextFlags]
// clangs assembler doesn't seem to support the mov Wx, imm32 yet
Expand Down Expand Up @@ -104,6 +104,41 @@ LOCAL_LABEL(Done_CONTEXT_INTEGER):
sub x0, x0, CONTEXT_FLOAT_CONTROL_OFFSET + CONTEXT_NEON_OFFSET

LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT):
ldr w1, [x0, CONTEXT_ContextFlags]
// clangs assembler doesn't seem to support the mov Wx, imm32 yet
movz w2, #0x40, lsl #16
movk w2, #0x40
mov w3, w2
and w2, w1, w2
cmp w2, w3
b.ne LOCAL_LABEL(Done_CONTEXT_SVE)

ldr x1, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should check the CONTEXT_XSTATE in CONTEXT_ContextFlags first and check the features mask only if the CONTEXT_XSTATE is set.

tbz x1, #XSTATE_SVE_BIT, LOCAL_LABEL(Done_CONTEXT_SVE)

add x0, x0, CONTEXT_SVE_OFFSET
str p0, [x0, CONTEXT_P0_VL, MUL VL]
jkotas marked this conversation as resolved.
Show resolved Hide resolved
str p1, [x0, CONTEXT_P1_VL, MUL VL]
str p2, [x0, CONTEXT_P2_VL, MUL VL]
str p3, [x0, CONTEXT_P3_VL, MUL VL]
str p4, [x0, CONTEXT_P4_VL, MUL VL]
str p5, [x0, CONTEXT_P5_VL, MUL VL]
str p6, [x0, CONTEXT_P6_VL, MUL VL]
str p7, [x0, CONTEXT_P7_VL, MUL VL]
str p8, [x0, CONTEXT_P8_VL, MUL VL]
str p9, [x0, CONTEXT_P9_VL, MUL VL]
str p10, [x0, CONTEXT_P10_VL, MUL VL]
str p11, [x0, CONTEXT_P11_VL, MUL VL]
str p12, [x0, CONTEXT_P12_VL, MUL VL]
str p13, [x0, CONTEXT_P13_VL, MUL VL]
str p14, [x0, CONTEXT_P14_VL, MUL VL]
str p15, [x0, CONTEXT_P15_VL, MUL VL]
rdffr p0.b
str p0, [x0, CONTEXT_FFR_VL, MUL VL]
ldr p0, [x0, 0, MUL VL]
sub x0, x0, CONTEXT_SVE_OFFSET

LOCAL_LABEL(Done_CONTEXT_SVE):

EPILOG_STACK_FREE 32
ret
Expand All @@ -124,6 +159,7 @@ LEAF_ENTRY RtlCaptureContext, _TEXT
orr w1, w1, #0x4
orr w1, w1, #0x8
str w1, [x0, CONTEXT_ContextFlags]
str xzr, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET]
ldr x1, [sp]
EPILOG_STACK_FREE 16
b C_FUNC(CONTEXT_CaptureContext)
Expand All @@ -133,6 +169,7 @@ LEAF_END RtlCaptureContext, _TEXT
// x0: Context*
// x1: Exception*
//
.arch_extension sve
LEAF_ENTRY RtlRestoreContext, _TEXT

#ifdef HAS_ADDRESS_SANITIZER
Expand All @@ -154,6 +191,34 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT):
// since we potentially clobber x0 below, we'll bank it in x16
mov x16, x0

ldr w17, [x16, CONTEXT_ContextFlags]
tbz w17, #CONTEXT_XSTATE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE)

ldr w17, [x16, CONTEXT_XSTATEFEATURESMASK_OFFSET]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should check the CONTEXT_XSTATE in CONTEXT_ContextFlags first and check the features mask only if the CONTEXT_XSTATE is set.

tbz w17, #XSTATE_SVE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE)

add x16, x16, CONTEXT_SVE_OFFSET
ldr p0, [x16, CONTEXT_FFR_VL, MUL VL]
wrffr p0.b
ldr p0, [x16, CONTEXT_P0_VL, MUL VL]
ldr p1, [x16, CONTEXT_P1_VL, MUL VL]
ldr p2, [x16, CONTEXT_P2_VL, MUL VL]
ldr p3, [x16, CONTEXT_P3_VL, MUL VL]
ldr p4, [x16, CONTEXT_P4_VL, MUL VL]
ldr p5, [x16, CONTEXT_P5_VL, MUL VL]
ldr p6, [x16, CONTEXT_P6_VL, MUL VL]
ldr p7, [x16, CONTEXT_P7_VL, MUL VL]
ldr p8, [x16, CONTEXT_P8_VL, MUL VL]
ldr p9, [x16, CONTEXT_P9_VL, MUL VL]
ldr p10, [x16, CONTEXT_P10_VL, MUL VL]
ldr p11, [x16, CONTEXT_P11_VL, MUL VL]
ldr p12, [x16, CONTEXT_P12_VL, MUL VL]
ldr p13, [x16, CONTEXT_P13_VL, MUL VL]
ldr p14, [x16, CONTEXT_P14_VL, MUL VL]
ldr p15, [x16, CONTEXT_P15_VL, MUL VL]
sub x16, x16, CONTEXT_SVE_OFFSET

LOCAL_LABEL(No_Restore_CONTEXT_SVE):
ldr w17, [x16, CONTEXT_ContextFlags]
tbz w17, #CONTEXT_FLOATING_POINT_BIT, LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT)

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/pal/src/exception/signal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ static void inject_activation_handler(int code, siginfo_t *siginfo, void *contex

ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT;

#if defined(HOST_AMD64)
#if defined(HOST_AMD64) || defined(HOST_ARM64)
contextFlags |= CONTEXT_XSTATE;
#endif

Expand Down Expand Up @@ -1005,7 +1005,7 @@ static bool common_signal_handler(int code, siginfo_t *siginfo, void *sigcontext

ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT;

#if defined(HOST_AMD64)
#if defined(HOST_AMD64) || defined(HOST_ARM64)
contextFlags |= CONTEXT_XSTATE;
#endif

Expand Down
Loading
Loading