Skip to content

Commit

Permalink
Replace queried AS compaction sizes with full size
Browse files Browse the repository at this point in the history
The spec does not mandate that the compaction size is the same between capture and replay, so we effectively disable it by always returning the full AS size when the compaction size is queried.
  • Loading branch information
cmannett85-arm committed Sep 24, 2024
1 parent e68d4cf commit f53a115
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 10 deletions.
104 changes: 104 additions & 0 deletions renderdoc/driver/vulkan/vk_resources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3948,6 +3948,107 @@ void ImgRefs::Split(bool splitAspects, bool splitLevels, bool splitLayers)
areLayersSplit = newSplitLayerCount > 1;
}

void QueryPoolInfo::Add(uint32_t firstQuery, rdcarray<uint64_t> values)
{
Reset(firstQuery, (uint32_t)values.size());

m_Entries.reserve(m_Entries.size() + values.size());
for(uint64_t value : values)
m_Entries.emplace_back(firstQuery++, value);

std::sort(m_Entries.begin(), m_Entries.end());
}

void QueryPoolInfo::Reset(uint32_t firstQuery, uint32_t queryCount)
{
m_Entries.removeIf([&](const auto &entry) {
return (entry.index >= firstQuery) && (entry.index < (firstQuery + queryCount));
});
}

void QueryPoolInfo::Replace(uint32_t firstQuery, uint32_t queryCount, void *pData,
VkDeviceSize stride, VkQueryResultFlags flags) const
{
const auto writeEntry = [&](Entry queryPoolInfoEntry) {
const size_t num_bytes = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4;

byte *pStart = (byte *)pData + (queryPoolInfoEntry.index * stride);
memcpy(pStart, &queryPoolInfoEntry.value, num_bytes);
};

Replace(firstQuery, queryCount, writeEntry);
}

void QueryPoolInfo::Replace(uint32_t firstQuery, uint32_t queryCount,
const std::function<void(uint32_t, rdcarray<uint64_t>)> &writeEntry) const
{
rdcarray<Entry> entries;
entries.reserve(queryCount);

// Swap out any AS compaction sizes with the replacements
Replace(firstQuery, queryCount, [&](Entry entry) { entries.push_back(entry); });

std::sort(entries.begin(), entries.end());

// Now batch into contiguous ranges and dispatch
for(size_t i = 0; i < entries.size();)
{
uint32_t queryIndex = entries[i].index;
rdcarray<uint64_t> batch;

while(queryIndex == entries[++i].index)
{
batch.push_back(entries[i].value);
++queryIndex;
}

writeEntry(queryIndex, std::move(batch));
}
}

bool QueryPoolInfo::HasReplacementEntries(uint32_t firstQuery, uint32_t queryCount) const
{
uint32_t start, end;
rdctie(start, end) = GetIntersection(firstQuery, queryCount);
return start <= end;
}

rdcpair<uint32_t, uint32_t> QueryPoolInfo::GetIntersection(uint32_t firstQuery,
uint32_t queryCount) const
{
if(m_Entries.empty())
return {1, 0}; // Invalid

const uint32_t start = RDCMAX(firstQuery, m_Entries.front().index);
const uint32_t end = RDCMIN(firstQuery + queryCount - 1, (uint32_t)m_Entries.back().index);
return {start, end};
}

void QueryPoolInfo::Replace(uint32_t firstQuery, uint32_t queryCount,
const std::function<void(Entry)> &writeEntry) const
{
if(!m_Entries.empty())
{
// Find the intersection of the two query ranges
uint32_t start, end;
rdctie(start, end) = GetIntersection(firstQuery, queryCount);
if(end < start)
return;

uint32_t j = 0;
for(uint32_t i = start; i < end; ++i)
{
// The indices are sparse but ordered
while(i != m_Entries[j].index)
{
++j;
}

writeEntry(m_Entries[j]);
}
}
}

VkResourceRecord::~VkResourceRecord()
{
// bufferviews and imageviews have non-owning pointers to the sparseinfo struct
Expand Down Expand Up @@ -3996,6 +4097,9 @@ VkResourceRecord::~VkResourceRecord()
if(resType == eResCommandPool)
SAFE_DELETE(cmdPoolInfo);

if(resType == eResQueryPool)
SAFE_DELETE(queryPoolInfo);

if(resType == eResAccelerationStructureKHR && accelerationStructureInfo)
accelerationStructureInfo->Release();
}
Expand Down
36 changes: 36 additions & 0 deletions renderdoc/driver/vulkan/vk_resources.h
Original file line number Diff line number Diff line change
Expand Up @@ -2236,6 +2236,41 @@ inline FrameRefType MarkMemoryReferenced(std::unordered_map<ResourceId, MemRefs>
return MarkMemoryReferenced(memRefs, mem, offset, size, refType, ComposeFrameRefs);
}

// Used to replace QueryPool results
class QueryPoolInfo
{
public:
void Add(uint32_t firstQuery, rdcarray<uint64_t> values);

void Reset(uint32_t firstQuery, uint32_t queryCount);

void Replace(uint32_t firstQuery, uint32_t queryCount, void *pData, VkDeviceSize stride,
VkQueryResultFlags flags) const;

// Calls writeEntry with matching contiguous entries, buffered into an array.
void Replace(uint32_t firstQuery, uint32_t queryCount,
const std::function<void(uint32_t, rdcarray<uint64_t>)> &writeEntry) const;

bool HasReplacementEntries(uint32_t firstQuery, uint32_t queryCount) const;

private:
struct Entry
{
Entry(uint32_t i, uint64_t v) : index(i), value(v) {}
bool operator<(Entry other) const { return index < other.index; }

uint32_t index;
uint64_t value;
};

rdcpair<uint32_t, uint32_t> GetIntersection(uint32_t firstQuery, uint32_t queryCount) const;

void Replace(uint32_t firstQuery, uint32_t queryCount,
const std::function<void(Entry)> &writeEntry) const;

rdcarray<Entry> m_Entries;
};

struct DescUpdateTemplate;
struct ImageLayouts;
struct VkAccelerationStructureInfo;
Expand Down Expand Up @@ -2332,6 +2367,7 @@ struct VkResourceRecord : public ResourceRecord
DescPoolInfo *descPoolInfo; // only for descriptor pools
CmdPoolInfo *cmdPoolInfo; // only for command pools
uint32_t queueFamilyIndex; // only for queues
QueryPoolInfo *queryPoolInfo; // only for query pools
VkAccelerationStructureInfo *accelerationStructureInfo; // only for acceleration structures
};

Expand Down
55 changes: 47 additions & 8 deletions renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4546,6 +4546,37 @@ void WrappedVulkan::vkCmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQ
}
record->MarkBufferFrameReferenced(GetRecord(destBuffer), destOffset, size,
eFrameRef_PartialWrite);

const QueryPoolInfo *qpInfo = GetRecord(queryPool)->queryPoolInfo;
if(qpInfo->HasReplacementEntries(firstQuery, queryCount))
{
// We want to record these commands into the capture so they are replayed
VkMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_MEMORY_BARRIER,
NULL,
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_TRANSFER_READ_BIT,
};
vkCmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 1, &barrier, 0, VK_NULL_HANDLE, 0,
VK_NULL_HANDLE);

qpInfo->Replace(
firstQuery, queryCount, [&](uint32_t queryIndexStart, rdcarray<uint64_t> results) {
const size_t size = (size_t)(results.size() * destStride);
RDCASSERT(size < (1 << 16));

const size_t resultSize = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4;
byte *tmp = new byte[size];
size_t i = 0;
for(byte *ptr = tmp; ptr < (ptr + size); ptr += destStride)
memcpy(ptr, &results[i++], resultSize);

vkCmdUpdateBuffer(commandBuffer, destBuffer, destOffset + (queryIndexStart * destStride),
size, (const uint32_t *)tmp);
delete[] tmp;
});
}
}
}

Expand Down Expand Up @@ -8018,27 +8049,35 @@ void WrappedVulkan::vkCmdWriteAccelerationStructuresPropertiesKHR(
for(uint32_t i = 0; i < accelerationStructureCount; ++i)
unwrappedASes[i] = Unwrap(pAccelerationStructures[i]);

// The compacted size can vary between capture and replay, so to ensure we always have enough
// memory we return the full AS size
if(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR)
{
auto &qpInfo = GetRecord(queryPool)->queryPoolInfo;

rdcarray<uint64_t> sizes;
sizes.reserve(accelerationStructureCount);
for(uint32_t i = 0; i < accelerationStructureCount; ++i)
sizes.push_back(GetRecord(pAccelerationStructures[i])->memSize);

qpInfo->Add(firstQuery, std::move(sizes));
}

ObjDisp(commandBuffer)
->CmdWriteAccelerationStructuresPropertiesKHR(Unwrap(commandBuffer),
accelerationStructureCount, unwrappedASes,
queryType, Unwrap(queryPool), firstQuery);
}

// CPU-side VK_KHR_acceleration_structure calls are not supported for now
VkResult WrappedVulkan::vkWriteAccelerationStructuresPropertiesKHR(
VkDevice device, uint32_t accelerationStructureCount,
const VkAccelerationStructureKHR *pAccelerationStructures, VkQueryType queryType,
size_t dataSize, void *pData, size_t stride)
{
byte *memory = GetTempMemory(sizeof(VkAccelerationStructureKHR) * accelerationStructureCount);
VkAccelerationStructureKHR *unwrappedASes = (VkAccelerationStructureKHR *)memory;
for(uint32_t i = 0; i < accelerationStructureCount; ++i)
unwrappedASes[i] = Unwrap(pAccelerationStructures[i]);

return ObjDisp(device)->WriteAccelerationStructuresPropertiesKHR(
Unwrap(device), accelerationStructureCount, unwrappedASes, queryType, dataSize, pData, stride);
return VK_ERROR_UNKNOWN;
}

// CPU-side VK_KHR_acceleration_structure calls are not supported for now
VkResult WrappedVulkan::vkCopyAccelerationStructureKHR(VkDevice device,
VkDeferredOperationKHR deferredOperation,
const VkCopyAccelerationStructureInfoKHR *pInfo)
Expand Down
12 changes: 10 additions & 2 deletions renderdoc/driver/vulkan/wrappers/vk_misc_funcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1649,6 +1649,8 @@ VkResult WrappedVulkan::vkCreateQueryPool(VkDevice device, const VkQueryPoolCrea
}

VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pQueryPool);
record->queryPoolInfo = new QueryPoolInfo();

record->AddChunk(chunk);
}
else
Expand All @@ -1665,8 +1667,12 @@ VkResult WrappedVulkan::vkGetQueryPoolResults(VkDevice device, VkQueryPool query
size_t dataSize, void *pData, VkDeviceSize stride,
VkQueryResultFlags flags)
{
return ObjDisp(device)->GetQueryPoolResults(Unwrap(device), Unwrap(queryPool), firstQuery,
queryCount, dataSize, pData, stride, flags);
VkResult result = ObjDisp(device)->GetQueryPoolResults(
Unwrap(device), Unwrap(queryPool), firstQuery, queryCount, dataSize, pData, stride, flags);

GetRecord(queryPool)->queryPoolInfo->Replace(firstQuery, queryCount, pData, stride, flags);

return result;
}

template <typename SerialiserType>
Expand Down Expand Up @@ -1699,6 +1705,8 @@ void WrappedVulkan::vkResetQueryPool(VkDevice device, VkQueryPool queryPool, uin
SERIALISE_TIME_CALL(
ObjDisp(device)->ResetQueryPool(Unwrap(device), Unwrap(queryPool), firstQuery, queryCount));

GetRecord(queryPool)->queryPoolInfo->Reset(firstQuery, queryCount);

if(IsActiveCapturing(m_State))
{
CACHE_THREAD_SERIALISER();
Expand Down

0 comments on commit f53a115

Please sign in to comment.