-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Vulkan AS rebuild-on-replay: Copy AS input buffers on build #3426
base: v1.x
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3948,6 +3948,107 @@ void ImgRefs::Split(bool splitAspects, bool splitLevels, bool splitLayers) | |
areLayersSplit = newSplitLayerCount > 1; | ||
} | ||
|
||
void QueryPoolInfo::Add(uint32_t firstQuery, rdcarray<uint64_t> values) | ||
{ | ||
Reset(firstQuery, (uint32_t)values.size()); | ||
|
||
m_Entries.reserve(m_Entries.size() + values.size()); | ||
for(uint64_t value : values) | ||
m_Entries.emplace_back(firstQuery++, value); | ||
|
||
std::sort(m_Entries.begin(), m_Entries.end()); | ||
} | ||
|
||
void QueryPoolInfo::Reset(uint32_t firstQuery, uint32_t queryCount) | ||
{ | ||
m_Entries.removeIf([&](const auto &entry) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please avoid Also please use explicit lambda capture rather than unconditional capture to be clear about what is stored in the lambda. |
||
return (entry.index >= firstQuery) && (entry.index < (firstQuery + queryCount)); | ||
}); | ||
} | ||
|
||
void QueryPoolInfo::Replace(uint32_t firstQuery, uint32_t queryCount, void *pData, | ||
VkDeviceSize stride, VkQueryResultFlags flags) const | ||
{ | ||
const auto writeEntry = [&](Entry queryPoolInfoEntry) { | ||
const size_t num_bytes = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4; | ||
|
||
byte *pStart = (byte *)pData + (queryPoolInfoEntry.index * stride); | ||
memcpy(pStart, &queryPoolInfoEntry.value, num_bytes); | ||
}; | ||
|
||
Replace(firstQuery, queryCount, writeEntry); | ||
} | ||
|
||
void QueryPoolInfo::Replace(uint32_t firstQuery, uint32_t queryCount, | ||
const std::function<void(uint32_t, rdcarray<uint64_t>)> &writeEntry) const | ||
{ | ||
rdcarray<Entry> entries; | ||
entries.reserve(queryCount); | ||
|
||
// Swap out any AS compaction sizes with the replacements | ||
Replace(firstQuery, queryCount, [&](Entry entry) { entries.push_back(entry); }); | ||
|
||
std::sort(entries.begin(), entries.end()); | ||
|
||
// Now batch into contiguous ranges and dispatch | ||
for(size_t i = 0; i < entries.size();) | ||
{ | ||
uint32_t queryIndex = entries[i].index; | ||
rdcarray<uint64_t> batch; | ||
|
||
while(queryIndex == entries[++i].index) | ||
{ | ||
batch.push_back(entries[i].value); | ||
++queryIndex; | ||
} | ||
|
||
writeEntry(queryIndex, std::move(batch)); | ||
} | ||
} | ||
|
||
bool QueryPoolInfo::HasReplacementEntries(uint32_t firstQuery, uint32_t queryCount) const | ||
{ | ||
uint32_t start, end; | ||
rdctie(start, end) = GetIntersection(firstQuery, queryCount); | ||
return start <= end; | ||
} | ||
|
||
rdcpair<uint32_t, uint32_t> QueryPoolInfo::GetIntersection(uint32_t firstQuery, | ||
uint32_t queryCount) const | ||
{ | ||
if(m_Entries.empty()) | ||
return {1, 0}; // Invalid | ||
|
||
const uint32_t start = RDCMAX(firstQuery, m_Entries.front().index); | ||
const uint32_t end = RDCMIN(firstQuery + queryCount - 1, (uint32_t)m_Entries.back().index); | ||
return {start, end}; | ||
} | ||
|
||
void QueryPoolInfo::Replace(uint32_t firstQuery, uint32_t queryCount, | ||
const std::function<void(Entry)> &writeEntry) const | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason for this function to be separated out? It looks like it's only called from the other |
||
{ | ||
if(!m_Entries.empty()) | ||
{ | ||
// Find the intersection of the two query ranges | ||
uint32_t start, end; | ||
rdctie(start, end) = GetIntersection(firstQuery, queryCount); | ||
if(end < start) | ||
return; | ||
|
||
uint32_t j = 0; | ||
for(uint32_t i = start; i < end; ++i) | ||
{ | ||
// The indices are sparse but ordered | ||
while(i != m_Entries[j].index) | ||
{ | ||
++j; | ||
} | ||
|
||
writeEntry(m_Entries[j]); | ||
} | ||
} | ||
} | ||
|
||
VkResourceRecord::~VkResourceRecord() | ||
{ | ||
// bufferviews and imageviews have non-owning pointers to the sparseinfo struct | ||
|
@@ -3996,8 +4097,11 @@ VkResourceRecord::~VkResourceRecord() | |
if(resType == eResCommandPool) | ||
SAFE_DELETE(cmdPoolInfo); | ||
|
||
if(resType == eResAccelerationStructureKHR) | ||
SAFE_DELETE(accelerationStructureInfo); | ||
if(resType == eResQueryPool) | ||
SAFE_DELETE(queryPoolInfo); | ||
|
||
if(resType == eResAccelerationStructureKHR && accelerationStructureInfo) | ||
accelerationStructureInfo->Release(); | ||
} | ||
|
||
void VkResourceRecord::MarkImageFrameReferenced(VkResourceRecord *img, const ImageRange &range, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2236,6 +2236,41 @@ inline FrameRefType MarkMemoryReferenced(std::unordered_map<ResourceId, MemRefs> | |
return MarkMemoryReferenced(memRefs, mem, offset, size, refType, ComposeFrameRefs); | ||
} | ||
|
||
// Used to replace QueryPool results | ||
class QueryPoolInfo | ||
{ | ||
public: | ||
void Add(uint32_t firstQuery, rdcarray<uint64_t> values); | ||
|
||
void Reset(uint32_t firstQuery, uint32_t queryCount); | ||
|
||
void Replace(uint32_t firstQuery, uint32_t queryCount, void *pData, VkDeviceSize stride, | ||
VkQueryResultFlags flags) const; | ||
|
||
// Calls writeEntry with matching contiguous entries, buffered into an array. | ||
void Replace(uint32_t firstQuery, uint32_t queryCount, | ||
const std::function<void(uint32_t, rdcarray<uint64_t>)> &writeEntry) const; | ||
|
||
bool HasReplacementEntries(uint32_t firstQuery, uint32_t queryCount) const; | ||
|
||
private: | ||
struct Entry | ||
{ | ||
Entry(uint32_t i, uint64_t v) : index(i), value(v) {} | ||
bool operator<(Entry other) const { return index < other.index; } | ||
|
||
uint32_t index; | ||
uint64_t value; | ||
}; | ||
|
||
rdcpair<uint32_t, uint32_t> GetIntersection(uint32_t firstQuery, uint32_t queryCount) const; | ||
|
||
void Replace(uint32_t firstQuery, uint32_t queryCount, | ||
const std::function<void(Entry)> &writeEntry) const; | ||
|
||
rdcarray<Entry> m_Entries; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thinking about this more generally, query pools are single-typed so we know at creation time whether we're either going to replace all results or none of them (when the query pool type is compacted size). Do you have any empirical data or intuition on how large those query pools are? I would assume either 1, some small fixed count (UE5 seems to use 64, configurable), or maybe the application's expected max number of BLASs (some thousands?). Unless that seems unreasonable or you're otherwise worried about having a huge number of possible queries with only a few actually being valid, I think this code could be simplified a good deal by a) only allocating this struct when needed for patching and b) pre-allocating all the entries up front. Then you don't have to worry about intersecting, resizing this array on the fly, etc. We just read all our values out of it. |
||
}; | ||
|
||
struct DescUpdateTemplate; | ||
struct ImageLayouts; | ||
struct VkAccelerationStructureInfo; | ||
|
@@ -2332,6 +2367,7 @@ struct VkResourceRecord : public ResourceRecord | |
DescPoolInfo *descPoolInfo; // only for descriptor pools | ||
CmdPoolInfo *cmdPoolInfo; // only for command pools | ||
uint32_t queueFamilyIndex; // only for queues | ||
QueryPoolInfo *queryPoolInfo; // only for query pools | ||
VkAccelerationStructureInfo *accelerationStructureInfo; // only for acceleration structures | ||
}; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This state seems redundant here - we don't do this during replay, and we don't need to differentiate between background and active capturing. Doing a quick look through unless I'm missing something this value is (ultimately) not used.
Same applies below to
CopyAccelerationStructure
.