Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Optimize Memmove unrolling for constant src #108576

Merged
merged 9 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 97 additions & 1 deletion src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2590,6 +2590,96 @@ AssertionIndex Compiler::optAssertionIsSubtype(GenTree* tree, GenTree* methodTab
return NO_ASSERTION_INDEX;
}

//------------------------------------------------------------------------------
// optVNBasedFoldExpr_Call_Memmove: Unrolls NI_System_SpanHelpers_Memmove/CORINFO_HELP_MEMCPY
// if possible. This function effectively duplicates LowerCallMemmove.
// However, unlike LowerCallMemmove, it is able to optimize src into constants with help of VN.
//
// Arguments:
// call - NI_System_SpanHelpers_Memmove/CORINFO_HELP_MEMCPY call to unroll
//
// Return Value:
// Returns a new tree or nullptr if nothing is changed.
//
GenTree* Compiler::optVNBasedFoldExpr_Call_Memmove(GenTreeCall* call)
{
JITDUMP("See if we can optimize NI_System_SpanHelpers_Memmove with help of VN...\n")
assert(call->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Memmove) ||
call->IsHelperCall(this, CORINFO_HELP_MEMCPY));

CallArg* dstArg = call->gtArgs.GetUserArgByIndex(0);
CallArg* srcArg = call->gtArgs.GetUserArgByIndex(1);
CallArg* lenArg = call->gtArgs.GetUserArgByIndex(2);
ValueNum lenVN = vnStore->VNConservativeNormalValue(lenArg->GetNode()->gtVNPair);
if (!vnStore->IsVNConstant(lenVN))
{
JITDUMP("...length is not a constant - bail out.\n");
return nullptr;
}

size_t len = vnStore->CoercedConstantValue<size_t>(lenVN);
if (len == 0)
{
// Memmove(dst, src, 0) -> no-op.
// Memmove doesn't dereference src/dst pointers if length is 0.
JITDUMP("...length is 0 -> optimize to no-op.\n");
return gtWrapWithSideEffects(gtNewNothingNode(), call, GTF_ALL_EFFECT, true);
}

if (len > getUnrollThreshold(Memcpy))
{
JITDUMP("...length is too big to unroll - bail out.\n");
return nullptr;
}

// if GetImmutableDataFromAddress returns true, it means that the src is a read-only constant.
// Thus, dst and src do not overlap (if they do - it's an UB).
uint8_t* buffer = getAllocator().allocate<uint8_t>(len);
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
if (!GetImmutableDataFromAddress(srcArg->GetNode(), (int)len, buffer))
{
JITDUMP("...src is not a constant - fallback to LowerCallMemmove.\n");
return nullptr;
}

// if dstArg is not simple, we replace the arg directly with a temp assignment and
// continue using that temp - it allows us reliably extract all side effects.
GenTree* dst = fgMakeMultiUse(&dstArg->LateNodeRef());

// Now we're going to emit a chain of STOREIND via COMMA nodes.
// the very first tree is expected to be side-effects from the original call (including all args)
GenTree* result = nullptr;
gtExtractSideEffList(call, &result, GTF_ALL_EFFECT, true);

unsigned lenRemaining = (unsigned)len;
while (lenRemaining > 0)
{
const ssize_t offset = (ssize_t)len - (ssize_t)lenRemaining;

// Clone dst and add offset if necessary.
GenTree* currDst = gtCloneExpr(dst);
if (offset != 0)
{
currDst = gtNewOperNode(GT_ADD, dst->TypeGet(), currDst, gtNewIconNode(offset, TYP_I_IMPL));
}

// Create an unaligned STOREIND node using the largest possible word size.
var_types type = roundDownMaxType(lenRemaining);
GenTree* srcCns = gtNewGenericCon(type, buffer + offset);
GenTreeStoreInd* storeInd = gtNewStoreIndNode(type, currDst, srcCns, GTF_IND_UNALIGNED);
fgUpdateConstTreeValueNumber(srcCns);

// Merge with the previous result.
result = result == nullptr ? storeInd : gtNewOperNode(GT_COMMA, TYP_VOID, result, storeInd);

lenRemaining -= genTypeSize(type);
}

JITDUMP("...optimized into STOREIND(s)!:\n");
DISPTREE(result);
getAllocator().deallocate(buffer);
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
return result;
}

//------------------------------------------------------------------------------
// optVNBasedFoldExpr_Call: Folds given call using VN to a simpler tree.
//
Expand Down Expand Up @@ -2654,6 +2744,11 @@ GenTree* Compiler::optVNBasedFoldExpr_Call(BasicBlock* block, GenTree* parent, G
break;
}

if (call->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Memmove) || call->IsHelperCall(this, CORINFO_HELP_MEMCPY))
{
return optVNBasedFoldExpr_Call_Memmove(call);
}

return nullptr;
}

Expand Down Expand Up @@ -6372,7 +6467,8 @@ Compiler::fgWalkResult Compiler::optVNBasedFoldCurStmt(BasicBlock* block,
break;

case GT_CALL:
if (!tree->AsCall()->IsPure(this))
// The checks aren't for correctness, but to avoid unnecessary work.
if (!tree->AsCall()->IsPure(this) && !tree->AsCall()->IsSpecialIntrinsic())
{
return WALK_CONTINUE;
}
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5956,6 +5956,7 @@ class Compiler
}
}

bool GetImmutableDataFromAddress(GenTree* address, int size, uint8_t* pValue);
bool GetObjectHandleAndOffset(GenTree* tree, ssize_t* byteOffset, CORINFO_OBJECT_HANDLE* pObj);

// Convert a BYTE which represents the VM's CorInfoGCtype to the JIT's var_types
Expand Down Expand Up @@ -8040,6 +8041,7 @@ class Compiler
GenTree* optVNBasedFoldConstExpr(BasicBlock* block, GenTree* parent, GenTree* tree);
GenTree* optVNBasedFoldExpr(BasicBlock* block, GenTree* parent, GenTree* tree);
GenTree* optVNBasedFoldExpr_Call(BasicBlock* block, GenTree* parent, GenTreeCall* call);
GenTree* optVNBasedFoldExpr_Call_Memmove(GenTreeCall* call);
GenTree* optExtractSideEffListFromConst(GenTree* tree);

AssertionIndex GetAssertionCount()
Expand Down
39 changes: 39 additions & 0 deletions src/coreclr/jit/valuenum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12024,6 +12024,45 @@ bool Compiler::fgGetStaticFieldSeqAndAddress(ValueNumStore* vnStore,
return false;
}

//----------------------------------------------------------------------------------
// GetImmutableDataFromAddress: Given a tree representing an address, try to obtain
// the actual content of the value stored at that address (of the given size).
// The value is expected to be immutable (invariant).
//
// Arguments:
// address - tree node representing the address
// size - size of the value to read
// pValue - [out] resulting value
//
// Return Value:
// true if the value was successfully obtained, false otherwise
//
bool Compiler::GetImmutableDataFromAddress(GenTree* address, int size, uint8_t* pValue)
{
assert(vnStore != nullptr);

ssize_t byteOffset = 0;
FieldSeq* fieldSeq = nullptr;

// See if 'src' is a non-gc object handle.
CORINFO_OBJECT_HANDLE obj = NO_OBJECT_HANDLE;
if (GetObjectHandleAndOffset(address, &byteOffset, &obj) && ((size_t)byteOffset <= INT32_MAX))
{
assert(obj != NO_OBJECT_HANDLE);
return info.compCompHnd->isObjectImmutable(obj) &&
info.compCompHnd->getObjectContent(obj, pValue, size, (int)byteOffset);
}

// See if 'src' is some static read-only field (including RVA)
if (fgGetStaticFieldSeqAndAddress(vnStore, address, &byteOffset, &fieldSeq) && ((size_t)byteOffset <= INT32_MAX))
{
CORINFO_FIELD_HANDLE fld = fieldSeq->GetFieldHandle();
return (fld != nullptr) && info.compCompHnd->getStaticFieldContent(fld, pValue, size, (int)byteOffset);
}

return false;
}

//----------------------------------------------------------------------------------
// GetObjectHandleAndOffset: Try to obtain a constant object handle with an offset from
// the given tree.
Expand Down
Loading