From 01e60e05adc07933a3b6d771d2986f0028b4d7bc Mon Sep 17 00:00:00 2001 From: Kishimisu Date: Sun, 2 Jun 2024 03:37:31 +0200 Subject: [PATCH] Update to 1.0.4 --- dist/cjs/radix-sort-cjs.js | 575 +++++++++++++++++++++++---- dist/cjs/radix-sort-cjs.js.map | 2 +- dist/esm/radix-sort-esm.js | 575 +++++++++++++++++++++++---- dist/esm/radix-sort-esm.js.map | 2 +- dist/umd/radix-sort-umd.js | 610 +++++++++++++++++++++-------- dist/umd/radix-sort-umd.js.map | 2 +- dist/umd/radix-sort-umd.min.js | 2 +- dist/umd/radix-sort-umd.min.js.map | 2 +- package-lock.json | 4 +- package.json | 2 +- 10 files changed, 1450 insertions(+), 326 deletions(-) diff --git a/dist/cjs/radix-sort-cjs.js b/dist/cjs/radix-sort-cjs.js index c6bed33..331b570 100644 --- a/dist/cjs/radix-sort-cjs.js +++ b/dist/cjs/radix-sort-cjs.js @@ -207,6 +207,45 @@ fn add_block_sums( items[ELM_ID + 1] += blockSum; }`; +/** + * Find the best dispatch size x and y dimensions to minimize unused workgroups + * + * @param {GPUDevice} device - The GPU device + * @param {int} workgroup_count - Number of workgroups to dispatch + * @returns + */ +function find_optimal_dispatch_size(device, workgroup_count) { + const dispatchSize = { + x: workgroup_count, + y: 1 + }; + + if (workgroup_count > device.limits.maxComputeWorkgroupsPerDimension) { + const x = Math.floor(Math.sqrt(workgroup_count)); + const y = Math.ceil(workgroup_count / x); + + dispatchSize.x = x; + dispatchSize.y = y; + } + + return dispatchSize +} + +function create_buffer_from_data({device, label, data, usage = 0}) { + const dispatchSizes = device.createBuffer({ + label: label, + usage: usage, + size: data.length * 4, + mappedAtCreation: true + }); + + const dispatchData = new Uint32Array(dispatchSizes.getMappedRange()); + dispatchData.set(data); + dispatchSizes.unmap(); + + return dispatchSizes +} + class PrefixSumKernel { /** * Perform a parallel prefix sum on the given data buffer @@ -245,31 +284,14 @@ class PrefixSumKernel { this.create_pass_recursive(data, count); } - find_optimal_dispatch_size(item_count) { - const { maxComputeWorkgroupsPerDimension } = this.device.limits; - - let workgroup_count = Math.ceil(item_count / this.items_per_workgroup); - let x = workgroup_count; - let y = 1; - - if (workgroup_count > maxComputeWorkgroupsPerDimension) { - x = Math.floor(Math.sqrt(workgroup_count)); - y = Math.ceil(workgroup_count / x); - workgroup_count = x * y; - } - - return { - workgroup_count, - dispatchSize: { x, y }, - } - } - create_pass_recursive(data, count) { // Find best dispatch x and y dimensions to minimize unused threads - const { workgroup_count, dispatchSize } = this.find_optimal_dispatch_size(count); + const workgroup_count = Math.ceil(count / this.items_per_workgroup); + const dispatchSize = find_optimal_dispatch_size(this.device, workgroup_count); // Create buffer for block sums const blockSumBuffer = this.device.createBuffer({ + label: 'prefix-sum-block-sum', size: workgroup_count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); @@ -350,11 +372,17 @@ class PrefixSumKernel { } } - dispatch(pass) { - for (const { pipeline, bindGroup, dispatchSize } of this.pipelines) { + get_dispatch_chain() { + return this.pipelines.flatMap(p => [ p.dispatchSize.x, p.dispatchSize.y, 1 ]) + } + + dispatch(pass, dispatchSize, offset = 0) { + for (let i = 0; i < this.pipelines.length; i++) { + const { pipeline, bindGroup } = this.pipelines[i]; + pass.setPipeline(pipeline); pass.setBindGroup(0, bindGroup); - pass.dispatchWorkgroups(dispatchSize.x, dispatchSize.y, 1); + pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4); } } } @@ -607,6 +635,263 @@ fn radix_sort_reorder( outputValues[sorted_position] = v; }`; +const checkSortSource = (isFirstPass = false, isLastPass = false, isFullCheck = false) => /* wgsl */ ` + +@group(0) @binding(0) var input: array; +@group(0) @binding(1) var output: array; +@group(0) @binding(2) var original: array; +@group(0) @binding(3) var is_sorted: u32; + +override WORKGROUP_SIZE_X: u32; +override WORKGROUP_SIZE_Y: u32; +override THREADS_PER_WORKGROUP: u32; +override ELEMENT_COUNT: u32; +override START_ELEMENT: u32; + +var s_data: array; + +@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1) +fn check_sort( + @builtin(workgroup_id) w_id: vec3, + @builtin(num_workgroups) w_dim: vec3, + @builtin(local_invocation_index) TID: u32, // Local thread ID +) { + let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x; + let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP + START_ELEMENT; + let GID = TID + WID; // Global thread ID + + // Load data into shared memory + ${ isFirstPass ? first_pass_load_data : "s_data[TID] = select(0u, input[GID], GID < ELEMENT_COUNT);" } + + // Perform parallel reduction + for (var d = 1u; d < THREADS_PER_WORKGROUP; d *= 2u) { + workgroupBarrier(); + if (TID % (2u * d) == 0u) { + s_data[TID] += s_data[TID + d]; + } + } + workgroupBarrier(); + + // Write reduction result + ${ isLastPass ? last_pass(isFullCheck) : write_reduction_result } +}`; + +const write_reduction_result = /* wgsl */ ` + if (TID == 0) { + output[WORKGROUP_ID] = s_data[0]; + } +`; + +const first_pass_load_data = /* wgsl */ ` + let LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1; + + // Load current element into shared memory + // Also load next element for comparison + let elm = select(0u, input[GID], GID < ELEMENT_COUNT); + let next = select(0u, input[GID + 1], GID < ELEMENT_COUNT-1); + s_data[TID] = elm; + workgroupBarrier(); + + s_data[TID] = select(0u, 1u, GID < ELEMENT_COUNT-1 && elm > next); +`; + +const last_pass = (isFullCheck) => /* wgsl */ ` + let fullDispatchLength = arrayLength(&output); + let dispatchIndex = TID * 3; + + if (dispatchIndex >= fullDispatchLength) { + return; + } + + ${isFullCheck ? last_pass_full : last_pass_fast} +`; + +const last_pass_fast = /* wgsl */ ` + output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] == 0 && is_sorted == 0u); +`; + +const last_pass_full = /* wgsl */ ` + if (TID == 0 && s_data[0] == 0) { + is_sorted = 1u; + } + + output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] != 0); +`; + +class CheckSortKernel { + /** + * CheckSortKernel - Performs a parralel reduction to check if an array is sorted. + * + * @param {GPUDevice} device + * @param {GPUBuffer} data - The buffer containing the data to check + * @param {GPUBuffer} result - The result dispatch size buffer + * @param {GPUBuffer} original - The original dispatch size buffer + * @param {GPUBuffer} is_sorted - 1-element buffer to store whether the array is sorted + * @param {number} count - The number of elements to check + * @param {number} start - The index to start checking from + * @param {boolean} full_check - Whether this kernel is performing a full check or a fast check + * @param {object} workgroup_size - The workgroup size in x and y dimensions + */ + constructor({ + device, + data, + result, + original, + is_sorted, + count, + start = 0, + full_check = true, + workgroup_size = { x: 16, y: 16 }, + }) { + this.device = device; + this.count = count; + this.start = start; + this.full_check = full_check; + this.workgroup_size = workgroup_size; + this.threads_per_workgroup = workgroup_size.x * workgroup_size.y; + + this.pipelines = []; + + this.buffers = { + data, + result, + original, + is_sorted, + outputs: [] + }; + + this.create_passes_recursive(data, count); + } + + // Find the best dispatch size for each pass to minimize unused workgroups + static find_optimal_dispatch_chain(device, item_count, workgroup_size) { + const threads_per_workgroup = workgroup_size.x * workgroup_size.y; + const sizes = []; + + do { + // Number of workgroups required to process all items + const target_workgroup_count = Math.ceil(item_count / threads_per_workgroup); + + // Optimal dispatch size and updated workgroup count + const dispatchSize = find_optimal_dispatch_size(device, target_workgroup_count); + + sizes.push(dispatchSize.x, dispatchSize.y, 1); + item_count = target_workgroup_count; + } while (item_count > 1) + + return sizes + } + + create_passes_recursive(buffer, count, passIndex = 0) { + const workgroup_count = Math.ceil(count / this.threads_per_workgroup); + + const isFirstPass = passIndex === 0; + const isLastPass = workgroup_count <= 1; + + const outputBuffer = isLastPass ? this.buffers.result : this.device.createBuffer({ + label: `check-sort-${this.full_check ? 'full' : 'fast'}-${passIndex}`, + size: workgroup_count * 4, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST + }); + + const bindGroupLayout = this.device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'read-only-storage' } + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'storage' } + }, + // Last pass bindings + ...(isLastPass ? [{ + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'read-only-storage' } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'storage' } + }] : []), + ] + }); + + const bindGroup = this.device.createBindGroup({ + layout: bindGroupLayout, + entries: [ + { + binding: 0, + resource: { buffer: buffer } + }, + { + binding: 1, + resource: { buffer: outputBuffer } + }, + // Last pass buffers + ...(isLastPass ? [{ + binding: 2, + resource: { buffer: this.buffers.original } + }, { + binding: 3, + resource: { buffer: this.buffers.is_sorted } + }] : []), + ] + }); + + const pipelineLayout = this.device.createPipelineLayout({ + bindGroupLayouts: [bindGroupLayout] + }); + + const element_count = isFirstPass ? this.start + count : count; + const start_element = isFirstPass ? this.start : 0; + + const checkSortPipeline = this.device.createComputePipeline({ + layout: pipelineLayout, + compute: { + module: this.device.createShaderModule({ + code: checkSortSource(isFirstPass, isLastPass, this.full_check), + label: 'check-sort', + }), + entryPoint: 'check_sort', + constants: { + 'WORKGROUP_SIZE_X': this.workgroup_size.x, + 'WORKGROUP_SIZE_Y': this.workgroup_size.y, + 'THREADS_PER_WORKGROUP': this.threads_per_workgroup, + 'ELEMENT_COUNT': element_count, + 'START_ELEMENT': start_element, + }, + } + }); + + this.buffers.outputs.push(outputBuffer); + this.pipelines.push({ pipeline: checkSortPipeline, bindGroup }); + + if (!isLastPass) { + this.create_passes_recursive(outputBuffer, workgroup_count, passIndex + 1); + } + } + + dispatch(pass, dispatchSize, offset = 0) { + for (let i = 0; i < this.pipelines.length; i++) { + const { pipeline, bindGroup } = this.pipelines[i]; + + const dispatchIndirect = (this.full_check || i < this.pipelines.length - 1); + + pass.setPipeline(pipeline); + pass.setBindGroup(0, bindGroup); + + if (dispatchIndirect) + pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4); + else + // Only the last dispatch of the fast check kernel is constant to (1, 1, 1) + pass.dispatchWorkgroups(1, 1, 1); + } + } +} + class RadixSortKernel { /** * Perform a parallel radix sort on the GPU given a buffer of keys and (optionnaly) values @@ -621,6 +906,7 @@ class RadixSortKernel { * @param {number} count - Number of elements to sort * @param {number} bit_count - Number of bits per element (default: 32) * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two + * @param {boolean} check_order - Enable "order checking" optimization. Useful if the data needs to be sorted in real-time and doesn't change much. (default: false) * @param {boolean} local_shuffle - Enable "local shuffling" optimization for the radix sort kernel (default: false) * @param {boolean} avoid_bank_conflicts - Enable "avoiding bank conflicts" optimization for the prefix sum kernel (default: false) */ @@ -631,19 +917,22 @@ class RadixSortKernel { count, bit_count = 32, workgroup_size = { x: 16, y: 16 }, + check_order = false, local_shuffle = false, avoid_bank_conflicts = false, } = {}) { if (device == null) throw new Error('No device provided') if (keys == null) throw new Error('No keys buffer provided') if (!Number.isInteger(count) || count <= 0) throw new Error('Invalid count parameter') - if (!Number.isInteger(bit_count) || bit_count <= 0) throw new Error('Invalid bit_count parameter') + if (!Number.isInteger(bit_count) || bit_count <= 0 || bit_count > 32) throw new Error('Invalid bit_count parameter') if (!Number.isInteger(workgroup_size.x) || !Number.isInteger(workgroup_size.y)) throw new Error('Invalid workgroup_size parameter') + if (bit_count % 4 != 0) throw new Error('bit_count must be a multiple of 4') this.device = device; this.count = count; this.bit_count = bit_count; this.workgroup_size = workgroup_size; + this.check_order = check_order; this.local_shuffle = local_shuffle; this.avoid_bank_conflicts = avoid_bank_conflicts; @@ -657,34 +946,16 @@ class RadixSortKernel { this.shaderModules = {}; // GPUShaderModules this.buffers = {}; // GPUBuffers this.pipelines = []; // List of passes + this.kernels = {}; // Find best dispatch x and y dimensions to minimize unused threads - this.find_optimal_dispatch_size(); + this.dispatchSize = find_optimal_dispatch_size(this.device, this.workgroup_count); // Create shader modules from wgsl code this.create_shader_modules(); - - // Create GPU buffers - this.create_buffers(keys, values); // Create multi-pass pipelines - this.create_pipelines(); - } - - find_optimal_dispatch_size() { - const { maxComputeWorkgroupsPerDimension } = this.device.limits; - - this.dispatchSize = { - x: this.workgroup_count, - y: 1 - }; - - if (this.workgroup_count > maxComputeWorkgroupsPerDimension) { - const x = Math.floor(Math.sqrt(this.workgroup_count)); - const y = Math.ceil(this.workgroup_count / x); - - this.dispatchSize = { x, y }; - } + this.create_pipelines(keys, values); } create_shader_modules() { @@ -709,26 +980,149 @@ class RadixSortKernel { }; } - create_buffers(keys, values) { + create_pipelines(keys, values) { + // Block prefix sum kernel + const { prefixSumKernel, prefixBlockSumBuffer } = this.create_prefix_sum_kernel(); + + // Indirect dispatch buffers + const dispatchData = this.calculate_dispatch_sizes(prefixSumKernel); + + // GPU buffers + this.create_buffers(keys, values, prefixBlockSumBuffer, dispatchData); + + // Check sort kernels + this.create_check_sort_kernels(this.buffers.keys, dispatchData); + + // Radix sort passes for every 2 bits + for (let bit = 0; bit < this.bit_count; bit += 2) { + // Swap buffers every pass + const even = (bit % 4 == 0); + const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys; + const inValues = even ? this.buffers.values : this.buffers.tmpValues; + const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys; + const outValues = even ? this.buffers.tmpValues : this.buffers.values; + + // Compute local prefix sums and block sums + const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit); + + // Reorder keys and values + const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit); + + this.pipelines.push({ blockSumPipeline, reorderPipeline }); + } + } + + create_prefix_sum_kernel() { + // Prefix Block Sum buffer (4 element per workgroup) + const prefixBlockSumBuffer = this.device.createBuffer({ + label: 'radix-sort-prefix-block-sum', + size: this.prefix_block_workgroup_count * 4, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST + }); + + // Create block prefix sum kernel + const prefixSumKernel = new PrefixSumKernel({ + device: this.device, + data: prefixBlockSumBuffer, + count: this.prefix_block_workgroup_count, + workgroup_size: this.workgroup_size, + avoid_bank_conflicts: this.avoid_bank_conflicts, + }); + + this.kernels.prefixSum = prefixSumKernel; + + return { prefixSumKernel, prefixBlockSumBuffer } + } + + calculate_dispatch_sizes(prefixSumKernel) { + // Prefix sum dispatch sizes + const prefixSumDispatchSize = prefixSumKernel.get_dispatch_chain(); + + // Check sort element count (fast/full) + const check_sort_fast_count = Math.min(this.count, this.threads_per_workgroup * 4); + const check_sort_full_count = this.count - check_sort_fast_count; + const start_full = check_sort_fast_count - 1; + + // Check sort dispatch sizes + const dispatchSizesFast = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_fast_count, this.workgroup_size); + const dispatchSizesFull = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_full_count, this.workgroup_size); + + // Initial dispatch sizes + const initialDispatch = [ + this.dispatchSize.x, this.dispatchSize.y, 1, // Radix Sort + Reorder + ...dispatchSizesFast.slice(0, 3), // Check sort fast + ...prefixSumDispatchSize // Prefix Sum + ]; + + // Dispatch offsets in main buffer + this.dispatchOffsets = { + radix_sort: 0, + check_sort_fast: 3 * 4, + prefix_sum: 6 * 4 + }; + + return { + initialDispatch, + dispatchSizesFull, + check_sort_fast_count, + check_sort_full_count, + start_full + } + } + + create_buffers(keys, values, prefixBlockSumBuffer, dispatchData) { // Keys and values double buffering const tmpKeysBuffer = this.device.createBuffer({ + label: 'radix-sort-tmp-keys', size: this.count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); const tmpValuesBuffer = !this.has_values ? null : this.device.createBuffer({ + label: 'radix-sort-tmp-values', size: this.count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); // Local Prefix Sum buffer (1 element per item) const localPrefixSumBuffer = this.device.createBuffer({ + label: 'radix-sort-local-prefix-sum', size: this.count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); - // Prefix Block Sum buffer (4 element per workgroup) - const prefixBlockSumBuffer = this.device.createBuffer({ - size: this.prefix_block_workgroup_count * 4, + // Dispatch sizes (radix sort, check sort, prefix sum) + const dispatchBuffer = create_buffer_from_data({ + device: this.device, + label: 'radix-sort-dispatch-size', + data: dispatchData.initialDispatch, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT + }); + const originalDispatchBuffer = create_buffer_from_data({ + device: this.device, + label: 'radix-sort-dispatch-size-original', + data: dispatchData.initialDispatch, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC + }); + + // Dispatch sizes (full sort) + const checkSortFullDispatchBuffer = create_buffer_from_data({ + label: 'check-sort-full-dispatch-size', + device: this.device, + data: dispatchData.dispatchSizesFull, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT + }); + const checkSortFullOriginalDispatchBuffer = create_buffer_from_data({ + label: 'check-sort-full-dispatch-size-original', + device: this.device, + data: dispatchData.dispatchSizesFull, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC + }); + + // Flag to tell if the data is sorted + const isSortedBuffer = create_buffer_from_data({ + label: 'is-sorted', + device: this.device, + data: new Uint32Array([0]), usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); @@ -739,36 +1133,56 @@ class RadixSortKernel { tmpValues: tmpValuesBuffer, localPrefixSum: localPrefixSumBuffer, prefixBlockSum: prefixBlockSumBuffer, + + dispatchSize: dispatchBuffer, + originalDispatchSize: originalDispatchBuffer, + checkSortFullDispatchSize: checkSortFullDispatchBuffer, + originalCheckSortFullDispatchSize: checkSortFullOriginalDispatchBuffer, + isSorted: isSortedBuffer, }; } - // Create radix sort passes for every 2 bits - create_pipelines() { - for (let bit = 0; bit < this.bit_count; bit += 2) { - // Swap buffers every pass - const even = (bit % 4 == 0); - const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys; - const inValues = even ? this.buffers.values : this.buffers.tmpValues; - const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys; - const outValues = even ? this.buffers.tmpValues : this.buffers.values; + create_check_sort_kernels(inKeys, checkSortPartitionData) { + // Skip check sort if disabled + if (!this.check_order) { + return [ null, null ] + } - // Compute local prefix sums and block sums - const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit); + const { check_sort_fast_count, check_sort_full_count, start_full } = checkSortPartitionData; + + // Create the full pass + const checkSortFull = new CheckSortKernel({ + device: this.device, + data: inKeys, + result: this.buffers.dispatchSize, + original: this.buffers.originalDispatchSize, + is_sorted: this.buffers.isSorted, + count: check_sort_full_count, + start: start_full, + full_check: true, + workgroup_size: this.workgroup_size + }); - // Compute block sums prefix sums - const prefixSumKernel = new PrefixSumKernel({ - device: this.device, - data: this.buffers.prefixBlockSum, - count: this.prefix_block_workgroup_count, - workgroup_size: this.workgroup_size, - avoid_bank_conflicts: this.avoid_bank_conflicts, - }); - - // Reorder keys and values - const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit); + // Create the fast pass + const checkSortFast = new CheckSortKernel({ + device: this.device, + data: inKeys, + result: this.buffers.checkSortFullDispatchSize, + original: this.buffers.originalCheckSortFullDispatchSize, + is_sorted: this.buffers.isSorted, + count: check_sort_fast_count, + full_check: false, + workgroup_size: this.workgroup_size + }); - this.pipelines.push({ blockSumPipeline, prefixSumKernel, reorderPipeline }); + if (checkSortFast.threads_per_workgroup < checkSortFull.pipelines.length) { + console.warn(`Warning: workgroup size is too small to enable check sort optimization, disabling...`); + this.check_order = false; + return [ null, null ] } + + this.kernels.checkSortFast = checkSortFast; + this.kernels.checkSortFull = checkSortFull; } create_block_sum_pipeline(inKeys, inValues, bit) { @@ -951,17 +1365,24 @@ class RadixSortKernel { * * @param {GPUComputePassEncoder} pass */ - dispatch(pass) { - for (const { blockSumPipeline, prefixSumKernel, reorderPipeline } of this.pipelines) { + dispatch(pass) { + for (let i = 0; i < this.bit_count / 2; i++) { + const { blockSumPipeline, reorderPipeline } = this.pipelines[i]; + + if (this.check_order && i % 2 == 0) { + this.kernels.checkSortFast.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.check_sort_fast); + this.kernels.checkSortFull.dispatch(pass, this.buffers.checkSortFullDispatchSize); + } + pass.setPipeline(blockSumPipeline.pipeline); pass.setBindGroup(0, blockSumPipeline.bindGroup); - pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1); + pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort); - prefixSumKernel.dispatch(pass); + this.kernels.prefixSum.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.prefix_sum); pass.setPipeline(reorderPipeline.pipeline); pass.setBindGroup(0, reorderPipeline.bindGroup); - pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1); + pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort); } } } diff --git a/dist/cjs/radix-sort-cjs.js.map b/dist/cjs/radix-sort-cjs.js.map index d0b5aa0..76b8ea0 100644 --- a/dist/cjs/radix-sort-cjs.js.map +++ b/dist/cjs/radix-sort-cjs.js.map @@ -1 +1 @@ -{"version":3,"file":"radix-sort-cjs.js","sources":["../../src/shaders/prefix_sum.js","../../src/shaders/optimizations/prefix_sum_no_bank_conflict.js","../../src/PrefixSumKernel.js","../../src/shaders/radix_sort.js","../../src/shaders/optimizations/radix_sort_local_shuffle.js","../../src/shaders/radix_sort_reorder.js","../../src/RadixSortKernel.js"],"sourcesContent":["const prefixSumSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n temp[ELM_TID] = items[ELM_GID];\r\n temp[ELM_TID + 1] = items[ELM_GID + 1];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n let last_offset = ITEMS_PER_WORKGROUP - 1;\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[ELM_GID] = temp[ELM_TID];\r\n items[ELM_GID + 1] = temp[ELM_TID + 1];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumSource","/**\r\n * Prefix sum with optimization to avoid bank conflicts\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst prefixSumNoBankConflictSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nconst NUM_BANKS: u32 = 32;\r\nconst LOG_NUM_BANKS: u32 = 5;\r\n\r\nfn get_offset(offset: u32) -> u32 {\r\n // return offset >> LOG_NUM_BANKS; // Conflict-free\r\n return (offset >> NUM_BANKS) + (offset >> (2 * LOG_NUM_BANKS)); // Zero bank conflict\r\n}\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n let ai: u32 = TID;\r\n let bi: u32 = TID + (ITEMS_PER_WORKGROUP >> 1);\r\n let s_ai = ai + get_offset(ai);\r\n let s_bi = bi + get_offset(bi);\r\n let g_ai = ai + WID * 2;\r\n let g_bi = bi + WID * 2;\r\n temp[s_ai] = items[g_ai];\r\n temp[s_bi] = items[g_bi];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n var last_offset = ITEMS_PER_WORKGROUP - 1;\r\n last_offset += get_offset(last_offset);\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[g_ai] = temp[s_ai];\r\n items[g_bi] = temp[s_bi];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumNoBankConflictSource","import prefixSumSource from \"./shaders/prefix_sum\"\r\nimport prefixSumSource_NoBankConflict from \"./shaders/optimizations/prefix_sum_no_bank_conflict\"\r\n\r\nclass PrefixSumKernel {\r\n /**\r\n * Perform a parallel prefix sum on the given data buffer\r\n * \r\n * Based on \"Parallel Prefix Sum (Scan) with CUDA\"\r\n * https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - Buffer containing the data to process\r\n * @param {number} count - Max number of elements to process\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} avoid_bank_conflicts - Use the \"Avoid bank conflicts\" optimization from the original publication\r\n */\r\n constructor({\r\n device,\r\n data,\r\n count,\r\n workgroup_size = { x: 16, y: 16 },\r\n avoid_bank_conflicts = false\r\n }) {\r\n this.device = device\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.items_per_workgroup = 2 * this.threads_per_workgroup // 2 items are processed per thread\r\n\r\n if (Math.log2(this.threads_per_workgroup) % 1 !== 0) \r\n throw new Error(`workgroup_size.x * workgroup_size.y must be a power of two. (current: ${this.threads_per_workgroup})`)\r\n\r\n this.pipelines = []\r\n\r\n this.shaderModule = this.device.createShaderModule({\r\n label: 'prefix-sum',\r\n code: avoid_bank_conflicts ? prefixSumSource_NoBankConflict : prefixSumSource,\r\n })\r\n\r\n this.create_pass_recursive(data, count)\r\n }\r\n\r\n find_optimal_dispatch_size(item_count) {\r\n const { maxComputeWorkgroupsPerDimension } = this.device.limits\r\n\r\n let workgroup_count = Math.ceil(item_count / this.items_per_workgroup)\r\n let x = workgroup_count\r\n let y = 1\r\n\r\n if (workgroup_count > maxComputeWorkgroupsPerDimension) {\r\n x = Math.floor(Math.sqrt(workgroup_count))\r\n y = Math.ceil(workgroup_count / x)\r\n workgroup_count = x * y\r\n }\r\n\r\n return { \r\n workgroup_count,\r\n dispatchSize: { x, y },\r\n }\r\n }\r\n\r\n create_pass_recursive(data, count) {\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n const { workgroup_count, dispatchSize } = this.find_optimal_dispatch_size(count)\r\n \r\n // Create buffer for block sums \r\n const blockSumBuffer = this.device.createBuffer({\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create bind group and pipeline layout\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n label: 'prefix-sum-bind-group',\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: data }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: blockSumBuffer }\r\n }\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n // Per-workgroup (block) prefix sum\r\n const scanPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-scan-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'reduce_downsweep',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ITEMS_PER_WORKGROUP': this.items_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: scanPipeline, bindGroup, dispatchSize })\r\n\r\n if (workgroup_count > 1) {\r\n // Prefix sum on block sums\r\n this.create_pass_recursive(blockSumBuffer, workgroup_count)\r\n\r\n // Add block sums to local prefix sums\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-add-block-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'add_block_sums',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: blockSumPipeline, bindGroup, dispatchSize })\r\n }\r\n }\r\n\r\n dispatch(pass) {\r\n for (const { pipeline, bindGroup, dispatchSize } of this.pipelines) {\r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n pass.dispatchWorkgroups(dispatchSize.x, dispatchSize.y, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default PrefixSumKernel","const radixSortSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n // Store local prefix sum to global memory\r\n local_prefix_sums[GID] = bit_prefix_sums[extract_bits];\r\n}`\r\n\r\nexport default radixSortSource;","/**\r\n * Radix sort with \"local shuffle and coalesced mapping\" optimization\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst radixSortCoalescedSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n@group(0) @binding(3) var values: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\nvar s_prefix_sum_scan: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let val = values[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n let prefix_sum = bit_prefix_sums[extract_bits]; \r\n\r\n // Scan bit prefix sums\r\n if (TID == LAST_THREAD) {\r\n var sum: u32 = 0;\r\n bit_prefix_sums[extract_bits] += 1;\r\n for (var i: u32 = 0; i < 4; i++) {\r\n s_prefix_sum_scan[i] = sum;\r\n sum += bit_prefix_sums[i];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n if (GID < ELEMENT_COUNT) {\r\n // Compute new position\r\n let new_pos: u32 = prefix_sum + s_prefix_sum_scan[extract_bits];\r\n\r\n // Shuffle elements locally\r\n input[WID + new_pos] = elm;\r\n values[WID + new_pos] = val;\r\n local_prefix_sums[WID + new_pos] = prefix_sum;\r\n }\r\n}`\r\n\r\nexport default radixSortCoalescedSource;","const radixSortReorderSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var inputKeys: array;\r\n@group(0) @binding(1) var outputKeys: array;\r\n@group(0) @binding(2) var local_prefix_sum: array;\r\n@group(0) @binding(3) var prefix_block_sum: array;\r\n@group(0) @binding(4) var inputValues: array;\r\n@group(0) @binding(5) var outputValues: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort_reorder(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) { \r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n if (GID >= ELEMENT_COUNT) {\r\n return;\r\n }\r\n\r\n let k = inputKeys[GID];\r\n let v = inputValues[GID];\r\n\r\n let local_prefix = local_prefix_sum[GID];\r\n\r\n // Calculate new position\r\n let extract_bits = (k >> CURRENT_BIT) & 0x3;\r\n let pid = extract_bits * WORKGROUP_COUNT + WORKGROUP_ID;\r\n let sorted_position = prefix_block_sum[pid] + local_prefix;\r\n \r\n outputKeys[sorted_position] = k;\r\n outputValues[sorted_position] = v;\r\n}`\r\n\r\nexport default radixSortReorderSource;","import PrefixSumKernel from \"./PrefixSumKernel\"\r\nimport radixSortSource from \"./shaders/radix_sort\"\r\nimport radixSortSource_LocalShuffle from \"./shaders/optimizations/radix_sort_local_shuffle\"\r\nimport reorderSource from \"./shaders/radix_sort_reorder\"\r\n\r\nclass RadixSortKernel {\r\n /**\r\n * Perform a parallel radix sort on the GPU given a buffer of keys and (optionnaly) values\r\n * Note: The buffers are sorted in-place.\r\n * \r\n * Based on \"Fast 4-way parallel radix sorting on GPUs\"\r\n * https://www.sci.utah.edu/~csilva/papers/cgf.pdf]\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} keys - Buffer containing the keys to sort\r\n * @param {GPUBuffer} values - (optional) Buffer containing the associated values\r\n * @param {number} count - Number of elements to sort\r\n * @param {number} bit_count - Number of bits per element (default: 32)\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} local_shuffle - Enable \"local shuffling\" optimization for the radix sort kernel (default: false)\r\n * @param {boolean} avoid_bank_conflicts - Enable \"avoiding bank conflicts\" optimization for the prefix sum kernel (default: false)\r\n */\r\n constructor({\r\n device,\r\n keys,\r\n values,\r\n count,\r\n bit_count = 32,\r\n workgroup_size = { x: 16, y: 16 },\r\n local_shuffle = false,\r\n avoid_bank_conflicts = false,\r\n } = {}) {\r\n if (device == null) throw new Error('No device provided')\r\n if (keys == null) throw new Error('No keys buffer provided')\r\n if (!Number.isInteger(count) || count <= 0) throw new Error('Invalid count parameter')\r\n if (!Number.isInteger(bit_count) || bit_count <= 0) throw new Error('Invalid bit_count parameter')\r\n if (!Number.isInteger(workgroup_size.x) || !Number.isInteger(workgroup_size.y)) throw new Error('Invalid workgroup_size parameter')\r\n\r\n this.device = device\r\n this.count = count\r\n this.bit_count = bit_count\r\n this.workgroup_size = workgroup_size\r\n this.local_shuffle = local_shuffle\r\n this.avoid_bank_conflicts = avoid_bank_conflicts\r\n\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n this.prefix_block_workgroup_count = 4 * this.workgroup_count\r\n\r\n this.has_values = (values != null) // Is the values buffer provided ?\r\n\r\n this.dispatchSize = {} // Dispatch dimension x and y\r\n this.shaderModules = {} // GPUShaderModules\r\n this.buffers = {} // GPUBuffers\r\n this.pipelines = [] // List of passes\r\n\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n this.find_optimal_dispatch_size()\r\n\r\n // Create shader modules from wgsl code\r\n this.create_shader_modules()\r\n\r\n // Create GPU buffers\r\n this.create_buffers(keys, values)\r\n \r\n // Create multi-pass pipelines\r\n this.create_pipelines()\r\n }\r\n\r\n find_optimal_dispatch_size() {\r\n const { maxComputeWorkgroupsPerDimension } = this.device.limits\r\n\r\n this.dispatchSize = { \r\n x: this.workgroup_count, \r\n y: 1\r\n }\r\n\r\n if (this.workgroup_count > maxComputeWorkgroupsPerDimension) {\r\n const x = Math.floor(Math.sqrt(this.workgroup_count))\r\n const y = Math.ceil(this.workgroup_count / x)\r\n \r\n this.dispatchSize = { x, y } \r\n }\r\n }\r\n\r\n create_shader_modules() {\r\n // Remove every occurence of \"values\" in the shader code if values buffer is not provided\r\n const remove_values = (source) => {\r\n return source.split('\\n')\r\n .filter(line => !line.toLowerCase().includes('values'))\r\n .join('\\n')\r\n }\r\n\r\n const blockSumSource = this.local_shuffle ? radixSortSource_LocalShuffle : radixSortSource\r\n \r\n this.shaderModules = {\r\n blockSum: this.device.createShaderModule({\r\n label: 'radix-sort-block-sum',\r\n code: this.has_values ? blockSumSource : remove_values(blockSumSource),\r\n }),\r\n reorder: this.device.createShaderModule({\r\n label: 'radix-sort-reorder',\r\n code: this.has_values ? reorderSource : remove_values(reorderSource),\r\n })\r\n }\r\n }\r\n\r\n create_buffers(keys, values) {\r\n // Keys and values double buffering\r\n const tmpKeysBuffer = this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n const tmpValuesBuffer = !this.has_values ? null : this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Local Prefix Sum buffer (1 element per item)\r\n const localPrefixSumBuffer = this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Prefix Block Sum buffer (4 element per workgroup)\r\n const prefixBlockSumBuffer = this.device.createBuffer({\r\n size: this.prefix_block_workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n \r\n this.buffers = {\r\n keys: keys,\r\n values: values,\r\n tmpKeys: tmpKeysBuffer,\r\n tmpValues: tmpValuesBuffer,\r\n localPrefixSum: localPrefixSumBuffer,\r\n prefixBlockSum: prefixBlockSumBuffer,\r\n }\r\n }\r\n\r\n // Create radix sort passes for every 2 bits\r\n create_pipelines() {\r\n for (let bit = 0; bit < this.bit_count; bit += 2) {\r\n // Swap buffers every pass\r\n const even = (bit % 4 == 0)\r\n const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys\r\n const inValues = even ? this.buffers.values : this.buffers.tmpValues\r\n const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys\r\n const outValues = even ? this.buffers.tmpValues : this.buffers.values\r\n\r\n // Compute local prefix sums and block sums\r\n const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit)\r\n\r\n // Compute block sums prefix sums\r\n const prefixSumKernel = new PrefixSumKernel({ \r\n device: this.device,\r\n data: this.buffers.prefixBlockSum, \r\n count: this.prefix_block_workgroup_count,\r\n workgroup_size: this.workgroup_size,\r\n avoid_bank_conflicts: this.avoid_bank_conflicts,\r\n })\r\n \r\n // Reorder keys and values\r\n const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit)\r\n\r\n this.pipelines.push({ blockSumPipeline, prefixSumKernel, reorderPipeline })\r\n }\r\n }\r\n\r\n create_block_sum_pipeline(inKeys, inValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-block-sum',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: this.local_shuffle ? 'storage' : 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n // \"Local shuffle\" optimization needs access to the values buffer\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n resource: { buffer: inValues }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-block-sum',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.blockSum,\r\n entryPoint: 'radix_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: blockSumPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-reorder',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 5,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outKeys }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 3,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n resource: { buffer: inValues }\r\n },\r\n {\r\n binding: 5,\r\n resource: { buffer: outValues }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const reorderPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-reorder',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.reorder,\r\n entryPoint: 'radix_sort_reorder',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: reorderPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n /**\r\n * Encode all pipelines into the current pass\r\n * \r\n * @param {GPUComputePassEncoder} pass \r\n */\r\n dispatch(pass) {\r\n for (const { blockSumPipeline, prefixSumKernel, reorderPipeline } of this.pipelines) { \r\n pass.setPipeline(blockSumPipeline.pipeline)\r\n pass.setBindGroup(0, blockSumPipeline.bindGroup)\r\n pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1)\r\n\r\n prefixSumKernel.dispatch(pass)\r\n\r\n pass.setPipeline(reorderPipeline.pipeline)\r\n pass.setBindGroup(0, reorderPipeline.bindGroup)\r\n pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default RadixSortKernel"],"names":["prefixSumSource_NoBankConflict","radixSortSource_LocalShuffle","reorderSource"],"mappings":";;;;AAAA,MAAM,eAAe,cAAc,CAAC;AACpC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;ACzFD;AACA;AACA;AACA;AACA;AACA,MAAM,6BAA6B,cAAc,CAAC;AAClD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;AC7GD,MAAM,eAAe,CAAC;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,WAAW,CAAC;AAChB,QAAQ,MAAM;AACd,QAAQ,IAAI;AACZ,QAAQ,KAAK;AACb,QAAQ,cAAc,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;AACzC,QAAQ,oBAAoB,GAAG,KAAK;AACpC,KAAK,EAAE;AACP,QAAQ,IAAI,CAAC,MAAM,GAAG,OAAM;AAC5B,QAAQ,IAAI,CAAC,cAAc,GAAG,eAAc;AAC5C,QAAQ,IAAI,CAAC,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACxE,QAAQ,IAAI,CAAC,mBAAmB,GAAG,CAAC,GAAG,IAAI,CAAC,sBAAqB;AACjE;AACA,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,qBAAqB,CAAC,GAAG,CAAC,KAAK,CAAC;AAC3D,YAAY,MAAM,IAAI,KAAK,CAAC,CAAC,sEAAsE,EAAE,IAAI,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;AACnI;AACA,QAAQ,IAAI,CAAC,SAAS,GAAG,GAAE;AAC3B;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AAC3D,YAAY,KAAK,EAAE,YAAY;AAC/B,YAAY,IAAI,EAAE,oBAAoB,GAAGA,6BAA8B,GAAG,eAAe;AACzF,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,qBAAqB,CAAC,IAAI,EAAE,KAAK,EAAC;AAC/C,KAAK;AACL;AACA,IAAI,0BAA0B,CAAC,UAAU,EAAE;AAC3C,QAAQ,MAAM,EAAE,gCAAgC,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,OAAM;AACvE;AACA,QAAQ,IAAI,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,mBAAmB,EAAC;AAC9E,QAAQ,IAAI,CAAC,GAAG,gBAAe;AAC/B,QAAQ,IAAI,CAAC,GAAG,EAAC;AACjB;AACA,QAAQ,IAAI,eAAe,GAAG,gCAAgC,EAAE;AAChE,YAAY,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,EAAC;AACtD,YAAY,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,eAAe,GAAG,CAAC,EAAC;AAC9C,YAAY,eAAe,GAAG,CAAC,GAAG,EAAC;AACnC,SAAS;AACT;AACA,QAAQ,OAAO;AACf,YAAY,eAAe;AAC3B,YAAY,YAAY,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE;AAClC,SAAS;AACT,KAAK;AACL;AACA,IAAI,qBAAqB,CAAC,IAAI,EAAE,KAAK,EAAE;AACvC;AACA,QAAQ,MAAM,EAAE,eAAe,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,0BAA0B,CAAC,KAAK,EAAC;AACxF;AACA;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACxD,YAAY,IAAI,EAAE,eAAe,GAAG,CAAC;AACrC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,KAAK,EAAE,uBAAuB;AAC1C,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE;AAC9C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE;AACxD,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAC/D,YAAY,KAAK,EAAE,0BAA0B;AAC7C,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,YAAY;AACzC,gBAAgB,UAAU,EAAE,kBAAkB;AAC9C,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,qBAAqB,EAAE,IAAI,CAAC,mBAAmB;AACnE,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,YAAY,EAAE,EAAC;AAChF;AACA,QAAQ,IAAI,eAAe,GAAG,CAAC,EAAE;AACjC;AACA,YAAY,IAAI,CAAC,qBAAqB,CAAC,cAAc,EAAE,eAAe,EAAC;AACvE;AACA;AACA,YAAY,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AACvE,gBAAgB,KAAK,EAAE,+BAA+B;AACtD,gBAAgB,MAAM,EAAE,cAAc;AACtC,gBAAgB,OAAO,EAAE;AACzB,oBAAoB,MAAM,EAAE,IAAI,CAAC,YAAY;AAC7C,oBAAoB,UAAU,EAAE,gBAAgB;AAChD,oBAAoB,SAAS,EAAE;AAC/B,wBAAwB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AACjE,wBAAwB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AACjE,wBAAwB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AAC3E,qBAAqB;AACrB,iBAAiB;AACjB,aAAa,EAAC;AACd;AACA,YAAY,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,gBAAgB,EAAE,SAAS,EAAE,YAAY,EAAE,EAAC;AACxF,SAAS;AACT,KAAK;AACL;AACA,IAAI,QAAQ,CAAC,IAAI,EAAE;AACnB,QAAQ,KAAK,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,YAAY,EAAE,IAAI,IAAI,CAAC,SAAS,EAAE;AAC5E,YAAY,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAC;AACtC,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,SAAS,EAAC;AAC3C,YAAY,IAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,EAAE,CAAC,EAAC;AACtE,SAAS;AACT,KAAK;AACL;;ACzJA,MAAM,eAAe,cAAc,CAAC;AACpC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;ACtFD;AACA;AACA;AACA;AACA;AACA,MAAM,wBAAwB,cAAc,CAAC;AAC7C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;AClHD,MAAM,sBAAsB,cAAc,CAAC;AAC3C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;ACrCD,MAAM,eAAe,CAAC;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,WAAW,CAAC;AAChB,QAAQ,MAAM;AACd,QAAQ,IAAI;AACZ,QAAQ,MAAM;AACd,QAAQ,KAAK;AACb,QAAQ,SAAS,GAAG,EAAE;AACtB,QAAQ,cAAc,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;AACzC,QAAQ,aAAa,GAAG,KAAK;AAC7B,QAAQ,oBAAoB,GAAG,KAAK;AACpC,KAAK,GAAG,EAAE,EAAE;AACZ,QAAQ,IAAI,MAAM,IAAI,IAAI,EAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC;AACjE,QAAQ,IAAI,IAAI,IAAI,IAAI,EAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC;AACpE,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC;AAC9F,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,SAAS,IAAI,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC;AAC1G,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC;AAC3I;AACA,QAAQ,IAAI,CAAC,MAAM,GAAG,OAAM;AAC5B,QAAQ,IAAI,CAAC,KAAK,GAAG,MAAK;AAC1B,QAAQ,IAAI,CAAC,SAAS,GAAG,UAAS;AAClC,QAAQ,IAAI,CAAC,cAAc,GAAG,eAAc;AAC5C,QAAQ,IAAI,CAAC,aAAa,GAAG,cAAa;AAC1C,QAAQ,IAAI,CAAC,oBAAoB,GAAG,qBAAoB;AACxD;AACA,QAAQ,IAAI,CAAC,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACxE,QAAQ,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,qBAAqB,EAAC;AAC5E,QAAQ,IAAI,CAAC,4BAA4B,GAAG,CAAC,GAAG,IAAI,CAAC,gBAAe;AACpE;AACA,QAAQ,IAAI,CAAC,UAAU,IAAI,MAAM,IAAI,IAAI,EAAC;AAC1C;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG,GAAE;AAC9B,QAAQ,IAAI,CAAC,aAAa,GAAG,GAAE;AAC/B,QAAQ,IAAI,CAAC,OAAO,GAAG,GAAE;AACzB,QAAQ,IAAI,CAAC,SAAS,GAAG,GAAE;AAC3B;AACA;AACA,QAAQ,IAAI,CAAC,0BAA0B,GAAE;AACzC;AACA;AACA,QAAQ,IAAI,CAAC,qBAAqB,GAAE;AACpC;AACA;AACA,QAAQ,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,MAAM,EAAC;AACzC;AACA;AACA,QAAQ,IAAI,CAAC,gBAAgB,GAAE;AAC/B,KAAK;AACL;AACA,IAAI,0BAA0B,GAAG;AACjC,QAAQ,MAAM,EAAE,gCAAgC,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,OAAM;AACvE;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG;AAC5B,YAAY,CAAC,EAAE,IAAI,CAAC,eAAe;AACnC,YAAY,CAAC,EAAE,CAAC;AAChB,UAAS;AACT;AACA,QAAQ,IAAI,IAAI,CAAC,eAAe,GAAG,gCAAgC,EAAE;AACrE,YAAY,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,EAAC;AACjE,YAAY,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,GAAG,CAAC,EAAC;AACzD;AACA,YAAY,IAAI,CAAC,YAAY,GAAG,EAAE,CAAC,EAAE,CAAC,GAAE;AACxC,SAAS;AACT,KAAK;AACL;AACA,IAAI,qBAAqB,GAAG;AAC5B;AACA,QAAQ,MAAM,aAAa,GAAG,CAAC,MAAM,KAAK;AAC1C,YAAY,OAAO,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;AACrC,0BAA0B,MAAM,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;AAChF,0BAA0B,IAAI,CAAC,IAAI,CAAC;AACpC,UAAS;AACT;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,aAAa,GAAGC,wBAA4B,GAAG,gBAAe;AAClG;AACA,QAAQ,IAAI,CAAC,aAAa,GAAG;AAC7B,YAAY,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AACrD,gBAAgB,KAAK,EAAE,sBAAsB;AAC7C,gBAAgB,IAAI,EAAE,IAAI,CAAC,UAAU,GAAG,cAAc,GAAG,aAAa,CAAC,cAAc,CAAC;AACtF,aAAa,CAAC;AACd,YAAY,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AACpD,gBAAgB,KAAK,EAAE,oBAAoB;AAC3C,gBAAgB,IAAI,EAAE,IAAI,CAAC,UAAU,GAAGC,sBAAa,GAAG,aAAa,CAACA,sBAAa,CAAC;AACpF,aAAa,CAAC;AACd,UAAS;AACT,KAAK;AACL;AACA,IAAI,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE;AACjC;AACA,QAAQ,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACvD,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV,QAAQ,MAAM,eAAe,GAAG,CAAC,IAAI,CAAC,UAAU,GAAG,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACnF,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,oBAAoB,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AAC9D,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,oBAAoB,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AAC9D,YAAY,IAAI,EAAE,IAAI,CAAC,4BAA4B,GAAG,CAAC;AACvD,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,OAAO,GAAG;AACvB,YAAY,IAAI,EAAE,IAAI;AACtB,YAAY,MAAM,EAAE,MAAM;AAC1B,YAAY,OAAO,EAAE,aAAa;AAClC,YAAY,SAAS,EAAE,eAAe;AACtC,YAAY,cAAc,EAAE,oBAAoB;AAChD,YAAY,cAAc,EAAE,oBAAoB;AAChD,UAAS;AACT,KAAK;AACL;AACA;AACA,IAAI,gBAAgB,GAAG;AACvB,QAAQ,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,CAAC,EAAE;AAC1D;AACA,YAAY,MAAM,IAAI,SAAS,GAAG,GAAG,CAAC,IAAI,CAAC,EAAC;AAC5C,YAAY,MAAM,MAAM,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,QAAO;AAC7E,YAAY,MAAM,QAAQ,IAAI,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,UAAS;AACjF,YAAY,MAAM,OAAO,KAAK,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,KAAI;AAC7E,YAAY,MAAM,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,OAAM;AACjF;AACA;AACA,YAAY,MAAM,gBAAgB,GAAG,IAAI,CAAC,yBAAyB,CAAC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAC;AAC1F;AACA;AACA,YAAY,MAAM,eAAe,GAAG,IAAI,eAAe,CAAC;AACxD,gBAAgB,MAAM,EAAE,IAAI,CAAC,MAAM;AACnC,gBAAgB,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc;AACjD,gBAAgB,KAAK,EAAE,IAAI,CAAC,4BAA4B;AACxD,gBAAgB,cAAc,EAAE,IAAI,CAAC,cAAc;AACnD,gBAAgB,oBAAoB,EAAE,IAAI,CAAC,oBAAoB;AAC/D,aAAa,EAAC;AACd;AACA;AACA,YAAY,MAAM,eAAe,GAAG,IAAI,CAAC,uBAAuB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAC;AAC3G;AACA,YAAY,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,gBAAgB,EAAE,eAAe,EAAE,eAAe,EAAE,EAAC;AACvF,SAAS;AACT,KAAK;AACL;AACA,IAAI,yBAAyB,CAAC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE;AACrD,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,sBAAsB;AACzC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,GAAG,SAAS,GAAG,mBAAmB,EAAE;AAC1F,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC;AAC7D,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE;AAChD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB;AACA,gBAAgB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC;AAC7D,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE;AAClD,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AACnE,YAAY,KAAK,EAAE,sBAAsB;AACzC,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,QAAQ;AACnD,gBAAgB,UAAU,EAAE,YAAY;AACxC,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,iBAAiB,EAAE,IAAI,CAAC,eAAe;AAC3D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,eAAe,EAAE,IAAI,CAAC,KAAK;AAC/C,oBAAoB,aAAa,EAAE,GAAG;AACtC,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,OAAO;AACf,YAAY,QAAQ,EAAE,gBAAgB;AACtC,YAAY,SAAS;AACrB,SAAS;AACT,KAAK;AACL;AACA,IAAI,uBAAuB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE;AACvE,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,oBAAoB;AACvC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,UAAU,GAAG;AACtC,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,UAAU,EAAE,cAAc,CAAC,OAAO;AAC1D,wBAAwB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AAC7D,qBAAqB;AACrB,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,UAAU,EAAE,cAAc,CAAC,OAAO;AAC1D,wBAAwB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AACnD,qBAAqB;AACrB,iBAAiB,GAAG,EAAE,CAAC;AACvB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE;AAChD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE;AACjD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,UAAU,GAAG;AACtC,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,QAAQ,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE;AACtD,qBAAqB;AACrB,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,QAAQ,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE;AACvD,qBAAqB;AACrB,iBAAiB,GAAG,EAAE,CAAC;AACvB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,oBAAoB;AACvC,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,OAAO;AAClD,gBAAgB,UAAU,EAAE,oBAAoB;AAChD,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,iBAAiB,EAAE,IAAI,CAAC,eAAe;AAC3D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,eAAe,EAAE,IAAI,CAAC,KAAK;AAC/C,oBAAoB,aAAa,EAAE,GAAG;AACtC,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,OAAO;AACf,YAAY,QAAQ,EAAE,eAAe;AACrC,YAAY,SAAS;AACrB,SAAS;AACT,KAAK;AACL;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,QAAQ,CAAC,IAAI,EAAE;AACnB,QAAQ,KAAK,MAAM,EAAE,gBAAgB,EAAE,eAAe,EAAE,eAAe,EAAE,IAAI,IAAI,CAAC,SAAS,EAAE;AAC7F,YAAY,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,QAAQ,EAAC;AACvD,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,gBAAgB,CAAC,SAAS,EAAC;AAC5D,YAAY,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAC;AAChF;AACA,YAAY,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAC;AAC1C;AACA,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC,QAAQ,EAAC;AACtD,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,eAAe,CAAC,SAAS,EAAC;AAC3D,YAAY,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAC;AAChF,SAAS;AACT,KAAK;AACL;;;;;"} \ No newline at end of file +{"version":3,"file":"radix-sort-cjs.js","sources":["../../src/shaders/prefix_sum.js","../../src/shaders/optimizations/prefix_sum_no_bank_conflict.js","../../src/utils.js","../../src/PrefixSumKernel.js","../../src/shaders/radix_sort.js","../../src/shaders/optimizations/radix_sort_local_shuffle.js","../../src/shaders/radix_sort_reorder.js","../../src/shaders/check_sort.js","../../src/CheckSortKernel.js","../../src/RadixSortKernel.js"],"sourcesContent":["const prefixSumSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n temp[ELM_TID] = items[ELM_GID];\r\n temp[ELM_TID + 1] = items[ELM_GID + 1];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n let last_offset = ITEMS_PER_WORKGROUP - 1;\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[ELM_GID] = temp[ELM_TID];\r\n items[ELM_GID + 1] = temp[ELM_TID + 1];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumSource","/**\r\n * Prefix sum with optimization to avoid bank conflicts\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst prefixSumNoBankConflictSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nconst NUM_BANKS: u32 = 32;\r\nconst LOG_NUM_BANKS: u32 = 5;\r\n\r\nfn get_offset(offset: u32) -> u32 {\r\n // return offset >> LOG_NUM_BANKS; // Conflict-free\r\n return (offset >> NUM_BANKS) + (offset >> (2 * LOG_NUM_BANKS)); // Zero bank conflict\r\n}\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n let ai: u32 = TID;\r\n let bi: u32 = TID + (ITEMS_PER_WORKGROUP >> 1);\r\n let s_ai = ai + get_offset(ai);\r\n let s_bi = bi + get_offset(bi);\r\n let g_ai = ai + WID * 2;\r\n let g_bi = bi + WID * 2;\r\n temp[s_ai] = items[g_ai];\r\n temp[s_bi] = items[g_bi];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n var last_offset = ITEMS_PER_WORKGROUP - 1;\r\n last_offset += get_offset(last_offset);\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[g_ai] = temp[s_ai];\r\n items[g_bi] = temp[s_bi];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumNoBankConflictSource","/**\r\n * Find the best dispatch size x and y dimensions to minimize unused workgroups\r\n * \r\n * @param {GPUDevice} device - The GPU device\r\n * @param {int} workgroup_count - Number of workgroups to dispatch\r\n * @returns \r\n */\r\nfunction find_optimal_dispatch_size(device, workgroup_count) {\r\n const dispatchSize = { \r\n x: workgroup_count, \r\n y: 1\r\n }\r\n\r\n if (workgroup_count > device.limits.maxComputeWorkgroupsPerDimension) {\r\n const x = Math.floor(Math.sqrt(workgroup_count))\r\n const y = Math.ceil(workgroup_count / x)\r\n \r\n dispatchSize.x = x\r\n dispatchSize.y = y\r\n }\r\n\r\n return dispatchSize\r\n}\r\n\r\nfunction create_buffer_from_data({device, label, data, usage = 0}) {\r\n const dispatchSizes = device.createBuffer({\r\n label: label,\r\n usage: usage,\r\n size: data.length * 4,\r\n mappedAtCreation: true\r\n })\r\n\r\n const dispatchData = new Uint32Array(dispatchSizes.getMappedRange())\r\n dispatchData.set(data)\r\n dispatchSizes.unmap()\r\n\r\n return dispatchSizes\r\n}\r\n\r\nexport {\r\n find_optimal_dispatch_size,\r\n create_buffer_from_data,\r\n}","import prefixSumSource from \"./shaders/prefix_sum\"\r\nimport prefixSumSource_NoBankConflict from \"./shaders/optimizations/prefix_sum_no_bank_conflict\"\r\nimport { find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass PrefixSumKernel {\r\n /**\r\n * Perform a parallel prefix sum on the given data buffer\r\n * \r\n * Based on \"Parallel Prefix Sum (Scan) with CUDA\"\r\n * https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - Buffer containing the data to process\r\n * @param {number} count - Max number of elements to process\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} avoid_bank_conflicts - Use the \"Avoid bank conflicts\" optimization from the original publication\r\n */\r\n constructor({\r\n device,\r\n data,\r\n count,\r\n workgroup_size = { x: 16, y: 16 },\r\n avoid_bank_conflicts = false\r\n }) {\r\n this.device = device\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.items_per_workgroup = 2 * this.threads_per_workgroup // 2 items are processed per thread\r\n\r\n if (Math.log2(this.threads_per_workgroup) % 1 !== 0) \r\n throw new Error(`workgroup_size.x * workgroup_size.y must be a power of two. (current: ${this.threads_per_workgroup})`)\r\n\r\n this.pipelines = []\r\n\r\n this.shaderModule = this.device.createShaderModule({\r\n label: 'prefix-sum',\r\n code: avoid_bank_conflicts ? prefixSumSource_NoBankConflict : prefixSumSource,\r\n })\r\n\r\n this.create_pass_recursive(data, count)\r\n }\r\n\r\n create_pass_recursive(data, count) {\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n const workgroup_count = Math.ceil(count / this.items_per_workgroup)\r\n const dispatchSize = find_optimal_dispatch_size(this.device, workgroup_count)\r\n \r\n // Create buffer for block sums \r\n const blockSumBuffer = this.device.createBuffer({\r\n label: 'prefix-sum-block-sum',\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create bind group and pipeline layout\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n label: 'prefix-sum-bind-group',\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: data }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: blockSumBuffer }\r\n }\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n // Per-workgroup (block) prefix sum\r\n const scanPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-scan-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'reduce_downsweep',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ITEMS_PER_WORKGROUP': this.items_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: scanPipeline, bindGroup, dispatchSize })\r\n\r\n if (workgroup_count > 1) {\r\n // Prefix sum on block sums\r\n this.create_pass_recursive(blockSumBuffer, workgroup_count)\r\n\r\n // Add block sums to local prefix sums\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-add-block-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'add_block_sums',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: blockSumPipeline, bindGroup, dispatchSize })\r\n }\r\n }\r\n\r\n get_dispatch_chain() {\r\n return this.pipelines.flatMap(p => [ p.dispatchSize.x, p.dispatchSize.y, 1 ])\r\n }\r\n\r\n dispatch(pass, dispatchSize, offset = 0) {\r\n for (let i = 0; i < this.pipelines.length; i++) {\r\n const { pipeline, bindGroup } = this.pipelines[i]\r\n \r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4)\r\n }\r\n }\r\n}\r\n\r\nexport default PrefixSumKernel","const radixSortSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n // Store local prefix sum to global memory\r\n local_prefix_sums[GID] = bit_prefix_sums[extract_bits];\r\n}`\r\n\r\nexport default radixSortSource;","/**\r\n * Radix sort with \"local shuffle and coalesced mapping\" optimization\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst radixSortCoalescedSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n@group(0) @binding(3) var values: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\nvar s_prefix_sum_scan: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let val = values[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n let prefix_sum = bit_prefix_sums[extract_bits]; \r\n\r\n // Scan bit prefix sums\r\n if (TID == LAST_THREAD) {\r\n var sum: u32 = 0;\r\n bit_prefix_sums[extract_bits] += 1;\r\n for (var i: u32 = 0; i < 4; i++) {\r\n s_prefix_sum_scan[i] = sum;\r\n sum += bit_prefix_sums[i];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n if (GID < ELEMENT_COUNT) {\r\n // Compute new position\r\n let new_pos: u32 = prefix_sum + s_prefix_sum_scan[extract_bits];\r\n\r\n // Shuffle elements locally\r\n input[WID + new_pos] = elm;\r\n values[WID + new_pos] = val;\r\n local_prefix_sums[WID + new_pos] = prefix_sum;\r\n }\r\n}`\r\n\r\nexport default radixSortCoalescedSource;","const radixSortReorderSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var inputKeys: array;\r\n@group(0) @binding(1) var outputKeys: array;\r\n@group(0) @binding(2) var local_prefix_sum: array;\r\n@group(0) @binding(3) var prefix_block_sum: array;\r\n@group(0) @binding(4) var inputValues: array;\r\n@group(0) @binding(5) var outputValues: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort_reorder(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) { \r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n if (GID >= ELEMENT_COUNT) {\r\n return;\r\n }\r\n\r\n let k = inputKeys[GID];\r\n let v = inputValues[GID];\r\n\r\n let local_prefix = local_prefix_sum[GID];\r\n\r\n // Calculate new position\r\n let extract_bits = (k >> CURRENT_BIT) & 0x3;\r\n let pid = extract_bits * WORKGROUP_COUNT + WORKGROUP_ID;\r\n let sorted_position = prefix_block_sum[pid] + local_prefix;\r\n \r\n outputKeys[sorted_position] = k;\r\n outputValues[sorted_position] = v;\r\n}`\r\n\r\nexport default radixSortReorderSource;","const checkSortSource = (isFirstPass = false, isLastPass = false, isFullCheck = false) => /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var output: array;\r\n@group(0) @binding(2) var original: array;\r\n@group(0) @binding(3) var is_sorted: u32;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ELEMENT_COUNT: u32;\r\noverride START_ELEMENT: u32;\r\n\r\nvar s_data: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn check_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP + START_ELEMENT;\r\n let GID = TID + WID; // Global thread ID\r\n\r\n // Load data into shared memory\r\n ${ isFirstPass ? first_pass_load_data : \"s_data[TID] = select(0u, input[GID], GID < ELEMENT_COUNT);\" }\r\n\r\n // Perform parallel reduction\r\n for (var d = 1u; d < THREADS_PER_WORKGROUP; d *= 2u) { \r\n workgroupBarrier(); \r\n if (TID % (2u * d) == 0u) {\r\n s_data[TID] += s_data[TID + d];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Write reduction result\r\n ${ isLastPass ? last_pass(isFullCheck) : write_reduction_result }\r\n}`\r\n\r\nconst write_reduction_result = /* wgsl */ `\r\n if (TID == 0) {\r\n output[WORKGROUP_ID] = s_data[0];\r\n }\r\n`\r\n\r\nconst first_pass_load_data = /* wgsl */ `\r\n let LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n\r\n // Load current element into shared memory\r\n // Also load next element for comparison\r\n let elm = select(0u, input[GID], GID < ELEMENT_COUNT);\r\n let next = select(0u, input[GID + 1], GID < ELEMENT_COUNT-1);\r\n s_data[TID] = elm;\r\n workgroupBarrier();\r\n\r\n s_data[TID] = select(0u, 1u, GID < ELEMENT_COUNT-1 && elm > next);\r\n`\r\n\r\nconst last_pass = (isFullCheck) => /* wgsl */ `\r\n let fullDispatchLength = arrayLength(&output);\r\n let dispatchIndex = TID * 3;\r\n\r\n if (dispatchIndex >= fullDispatchLength) {\r\n return;\r\n }\r\n\r\n ${isFullCheck ? last_pass_full : last_pass_fast}\r\n`\r\n\r\nconst last_pass_fast = /* wgsl */ `\r\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] == 0 && is_sorted == 0u);\r\n`\r\n\r\nconst last_pass_full = /* wgsl */ `\r\n if (TID == 0 && s_data[0] == 0) {\r\n is_sorted = 1u;\r\n }\r\n\r\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] != 0);\r\n`\r\nexport default checkSortSource","import checkSortSource from \"./shaders/check_sort\"\r\nimport { find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass CheckSortKernel {\r\n /**\r\n * CheckSortKernel - Performs a parralel reduction to check if an array is sorted.\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - The buffer containing the data to check\r\n * @param {GPUBuffer} result - The result dispatch size buffer\r\n * @param {GPUBuffer} original - The original dispatch size buffer\r\n * @param {GPUBuffer} is_sorted - 1-element buffer to store whether the array is sorted\r\n * @param {number} count - The number of elements to check\r\n * @param {number} start - The index to start checking from\r\n * @param {boolean} full_check - Whether this kernel is performing a full check or a fast check\r\n * @param {object} workgroup_size - The workgroup size in x and y dimensions\r\n */\r\n constructor({\r\n device,\r\n data,\r\n result,\r\n original,\r\n is_sorted,\r\n count,\r\n start = 0,\r\n full_check = true,\r\n workgroup_size = { x: 16, y: 16 },\r\n }) {\r\n this.device = device\r\n this.count = count\r\n this.start = start\r\n this.full_check = full_check\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n\r\n this.pipelines = []\r\n\r\n this.buffers = {\r\n data, \r\n result, \r\n original, \r\n is_sorted,\r\n outputs: []\r\n }\r\n\r\n this.create_passes_recursive(data, count)\r\n }\r\n\r\n // Find the best dispatch size for each pass to minimize unused workgroups\r\n static find_optimal_dispatch_chain(device, item_count, workgroup_size) {\r\n const threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n const sizes = []\r\n\r\n do {\r\n // Number of workgroups required to process all items\r\n const target_workgroup_count = Math.ceil(item_count / threads_per_workgroup)\r\n \r\n // Optimal dispatch size and updated workgroup count\r\n const dispatchSize = find_optimal_dispatch_size(device, target_workgroup_count)\r\n \r\n sizes.push(dispatchSize.x, dispatchSize.y, 1)\r\n item_count = target_workgroup_count\r\n } while (item_count > 1)\r\n \r\n return sizes\r\n }\r\n\r\n create_passes_recursive(buffer, count, passIndex = 0) {\r\n const workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n\r\n const isFirstPass = passIndex === 0\r\n const isLastPass = workgroup_count <= 1\r\n\r\n const outputBuffer = isLastPass ? this.buffers.result : this.device.createBuffer({\r\n label: `check-sort-${this.full_check ? 'full' : 'fast'}-${passIndex}`,\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n // Last pass bindings\r\n ...(isLastPass ? [{\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n }, {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : []),\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: buffer }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outputBuffer }\r\n },\r\n // Last pass buffers\r\n ...(isLastPass ? [{\r\n binding: 2,\r\n resource: { buffer: this.buffers.original }\r\n }, {\r\n binding: 3,\r\n resource: { buffer: this.buffers.is_sorted }\r\n }] : []),\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [bindGroupLayout]\r\n })\r\n\r\n const element_count = isFirstPass ? this.start + count : count\r\n const start_element = isFirstPass ? this.start : 0\r\n\r\n const checkSortPipeline = this.device.createComputePipeline({\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.device.createShaderModule({\r\n code: checkSortSource(isFirstPass, isLastPass, this.full_check),\r\n label: 'check-sort',\r\n }),\r\n entryPoint: 'check_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': element_count,\r\n 'START_ELEMENT': start_element,\r\n },\r\n }\r\n })\r\n\r\n this.buffers.outputs.push(outputBuffer)\r\n this.pipelines.push({ pipeline: checkSortPipeline, bindGroup })\r\n \r\n if (!isLastPass) {\r\n this.create_passes_recursive(outputBuffer, workgroup_count, passIndex + 1)\r\n }\r\n }\r\n\r\n dispatch(pass, dispatchSize, offset = 0) {\r\n for (let i = 0; i < this.pipelines.length; i++) {\r\n const { pipeline, bindGroup } = this.pipelines[i]\r\n\r\n const dispatchIndirect = (this.full_check || i < this.pipelines.length - 1)\r\n\r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n\r\n if (dispatchIndirect)\r\n pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4)\r\n else\r\n // Only the last dispatch of the fast check kernel is constant to (1, 1, 1)\r\n pass.dispatchWorkgroups(1, 1, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default CheckSortKernel","import PrefixSumKernel from \"./PrefixSumKernel\"\r\nimport radixSortSource from \"./shaders/radix_sort\"\r\nimport radixSortSource_LocalShuffle from \"./shaders/optimizations/radix_sort_local_shuffle\"\r\nimport reorderSource from \"./shaders/radix_sort_reorder\"\r\nimport CheckSortKernel from \"./CheckSortKernel\"\r\nimport { create_buffer_from_data, find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass RadixSortKernel {\r\n /**\r\n * Perform a parallel radix sort on the GPU given a buffer of keys and (optionnaly) values\r\n * Note: The buffers are sorted in-place.\r\n * \r\n * Based on \"Fast 4-way parallel radix sorting on GPUs\"\r\n * https://www.sci.utah.edu/~csilva/papers/cgf.pdf]\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} keys - Buffer containing the keys to sort\r\n * @param {GPUBuffer} values - (optional) Buffer containing the associated values\r\n * @param {number} count - Number of elements to sort\r\n * @param {number} bit_count - Number of bits per element (default: 32)\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} check_order - Enable \"order checking\" optimization. Useful if the data needs to be sorted in real-time and doesn't change much. (default: false)\r\n * @param {boolean} local_shuffle - Enable \"local shuffling\" optimization for the radix sort kernel (default: false)\r\n * @param {boolean} avoid_bank_conflicts - Enable \"avoiding bank conflicts\" optimization for the prefix sum kernel (default: false)\r\n */\r\n constructor({\r\n device,\r\n keys,\r\n values,\r\n count,\r\n bit_count = 32,\r\n workgroup_size = { x: 16, y: 16 },\r\n check_order = false,\r\n local_shuffle = false,\r\n avoid_bank_conflicts = false,\r\n } = {}) {\r\n if (device == null) throw new Error('No device provided')\r\n if (keys == null) throw new Error('No keys buffer provided')\r\n if (!Number.isInteger(count) || count <= 0) throw new Error('Invalid count parameter')\r\n if (!Number.isInteger(bit_count) || bit_count <= 0 || bit_count > 32) throw new Error('Invalid bit_count parameter')\r\n if (!Number.isInteger(workgroup_size.x) || !Number.isInteger(workgroup_size.y)) throw new Error('Invalid workgroup_size parameter')\r\n if (bit_count % 4 != 0) throw new Error('bit_count must be a multiple of 4')\r\n\r\n this.device = device\r\n this.count = count\r\n this.bit_count = bit_count\r\n this.workgroup_size = workgroup_size\r\n this.check_order = check_order\r\n this.local_shuffle = local_shuffle\r\n this.avoid_bank_conflicts = avoid_bank_conflicts\r\n\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n this.prefix_block_workgroup_count = 4 * this.workgroup_count\r\n\r\n this.has_values = (values != null) // Is the values buffer provided ?\r\n\r\n this.dispatchSize = {} // Dispatch dimension x and y\r\n this.shaderModules = {} // GPUShaderModules\r\n this.buffers = {} // GPUBuffers\r\n this.pipelines = [] // List of passes\r\n this.kernels = {}\r\n\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n this.dispatchSize = find_optimal_dispatch_size(this.device, this.workgroup_count)\r\n\r\n // Create shader modules from wgsl code\r\n this.create_shader_modules()\r\n \r\n // Create multi-pass pipelines\r\n this.create_pipelines(keys, values)\r\n }\r\n\r\n create_shader_modules() {\r\n // Remove every occurence of \"values\" in the shader code if values buffer is not provided\r\n const remove_values = (source) => {\r\n return source.split('\\n')\r\n .filter(line => !line.toLowerCase().includes('values'))\r\n .join('\\n')\r\n }\r\n\r\n const blockSumSource = this.local_shuffle ? radixSortSource_LocalShuffle : radixSortSource\r\n \r\n this.shaderModules = {\r\n blockSum: this.device.createShaderModule({\r\n label: 'radix-sort-block-sum',\r\n code: this.has_values ? blockSumSource : remove_values(blockSumSource),\r\n }),\r\n reorder: this.device.createShaderModule({\r\n label: 'radix-sort-reorder',\r\n code: this.has_values ? reorderSource : remove_values(reorderSource),\r\n })\r\n }\r\n }\r\n\r\n create_pipelines(keys, values) { \r\n // Block prefix sum kernel \r\n const { prefixSumKernel, prefixBlockSumBuffer } = this.create_prefix_sum_kernel()\r\n\r\n // Indirect dispatch buffers\r\n const dispatchData = this.calculate_dispatch_sizes(prefixSumKernel)\r\n\r\n // GPU buffers\r\n this.create_buffers(keys, values, prefixBlockSumBuffer, dispatchData)\r\n\r\n // Check sort kernels\r\n this.create_check_sort_kernels(this.buffers.keys, dispatchData)\r\n\r\n // Radix sort passes for every 2 bits\r\n for (let bit = 0; bit < this.bit_count; bit += 2) {\r\n // Swap buffers every pass\r\n const even = (bit % 4 == 0)\r\n const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys\r\n const inValues = even ? this.buffers.values : this.buffers.tmpValues\r\n const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys\r\n const outValues = even ? this.buffers.tmpValues : this.buffers.values\r\n\r\n // Compute local prefix sums and block sums\r\n const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit)\r\n \r\n // Reorder keys and values\r\n const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit)\r\n\r\n this.pipelines.push({ blockSumPipeline, reorderPipeline })\r\n }\r\n }\r\n\r\n create_prefix_sum_kernel() {\r\n // Prefix Block Sum buffer (4 element per workgroup)\r\n const prefixBlockSumBuffer = this.device.createBuffer({\r\n label: 'radix-sort-prefix-block-sum',\r\n size: this.prefix_block_workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create block prefix sum kernel\r\n const prefixSumKernel = new PrefixSumKernel({ \r\n device: this.device,\r\n data: prefixBlockSumBuffer, \r\n count: this.prefix_block_workgroup_count,\r\n workgroup_size: this.workgroup_size,\r\n avoid_bank_conflicts: this.avoid_bank_conflicts,\r\n })\r\n\r\n this.kernels.prefixSum = prefixSumKernel\r\n\r\n return { prefixSumKernel, prefixBlockSumBuffer }\r\n }\r\n\r\n calculate_dispatch_sizes(prefixSumKernel) {\r\n // Prefix sum dispatch sizes\r\n const prefixSumDispatchSize = prefixSumKernel.get_dispatch_chain()\r\n\r\n // Check sort element count (fast/full)\r\n const check_sort_fast_count = Math.min(this.count, this.threads_per_workgroup * 4)\r\n const check_sort_full_count = this.count - check_sort_fast_count\r\n const start_full = check_sort_fast_count - 1\r\n\r\n // Check sort dispatch sizes\r\n const dispatchSizesFast = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_fast_count, this.workgroup_size)\r\n const dispatchSizesFull = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_full_count, this.workgroup_size)\r\n\r\n // Initial dispatch sizes\r\n const initialDispatch = [\r\n this.dispatchSize.x, this.dispatchSize.y, 1, // Radix Sort + Reorder\r\n ...dispatchSizesFast.slice(0, 3), // Check sort fast\r\n ...prefixSumDispatchSize // Prefix Sum\r\n ]\r\n\r\n // Dispatch offsets in main buffer\r\n this.dispatchOffsets = {\r\n radix_sort: 0,\r\n check_sort_fast: 3 * 4,\r\n prefix_sum: 6 * 4\r\n }\r\n\r\n return {\r\n initialDispatch,\r\n dispatchSizesFull,\r\n check_sort_fast_count, \r\n check_sort_full_count, \r\n start_full \r\n }\r\n }\r\n\r\n create_buffers(keys, values, prefixBlockSumBuffer, dispatchData) {\r\n // Keys and values double buffering\r\n const tmpKeysBuffer = this.device.createBuffer({\r\n label: 'radix-sort-tmp-keys',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n const tmpValuesBuffer = !this.has_values ? null : this.device.createBuffer({\r\n label: 'radix-sort-tmp-values',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Local Prefix Sum buffer (1 element per item)\r\n const localPrefixSumBuffer = this.device.createBuffer({\r\n label: 'radix-sort-local-prefix-sum',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Dispatch sizes (radix sort, check sort, prefix sum)\r\n const dispatchBuffer = create_buffer_from_data({\r\n device: this.device, \r\n label: 'radix-sort-dispatch-size',\r\n data: dispatchData.initialDispatch, \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT\r\n })\r\n const originalDispatchBuffer = create_buffer_from_data({\r\n device: this.device, \r\n label: 'radix-sort-dispatch-size-original',\r\n data: dispatchData.initialDispatch, \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC\r\n })\r\n\r\n // Dispatch sizes (full sort)\r\n const checkSortFullDispatchBuffer = create_buffer_from_data({\r\n label: 'check-sort-full-dispatch-size',\r\n device: this.device, \r\n data: dispatchData.dispatchSizesFull,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT\r\n })\r\n const checkSortFullOriginalDispatchBuffer = create_buffer_from_data({\r\n label: 'check-sort-full-dispatch-size-original',\r\n device: this.device, \r\n data: dispatchData.dispatchSizesFull,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC\r\n })\r\n\r\n // Flag to tell if the data is sorted\r\n const isSortedBuffer = create_buffer_from_data({\r\n label: 'is-sorted',\r\n device: this.device, \r\n data: new Uint32Array([0]), \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n \r\n this.buffers = {\r\n keys: keys,\r\n values: values,\r\n tmpKeys: tmpKeysBuffer,\r\n tmpValues: tmpValuesBuffer,\r\n localPrefixSum: localPrefixSumBuffer,\r\n prefixBlockSum: prefixBlockSumBuffer,\r\n \r\n dispatchSize: dispatchBuffer,\r\n originalDispatchSize: originalDispatchBuffer,\r\n checkSortFullDispatchSize: checkSortFullDispatchBuffer,\r\n originalCheckSortFullDispatchSize: checkSortFullOriginalDispatchBuffer,\r\n isSorted: isSortedBuffer,\r\n }\r\n }\r\n\r\n create_check_sort_kernels(inKeys, checkSortPartitionData) {\r\n // Skip check sort if disabled\r\n if (!this.check_order) {\r\n return [ null, null ]\r\n }\r\n\r\n const { check_sort_fast_count, check_sort_full_count, start_full } = checkSortPartitionData\r\n\r\n // Create the full pass\r\n const checkSortFull = new CheckSortKernel({\r\n device: this.device,\r\n data: inKeys,\r\n result: this.buffers.dispatchSize,\r\n original: this.buffers.originalDispatchSize,\r\n is_sorted: this.buffers.isSorted,\r\n count: check_sort_full_count,\r\n start: start_full,\r\n full_check: true,\r\n workgroup_size: this.workgroup_size\r\n })\r\n\r\n // Create the fast pass\r\n const checkSortFast = new CheckSortKernel({\r\n device: this.device,\r\n data: inKeys,\r\n result: this.buffers.checkSortFullDispatchSize,\r\n original: this.buffers.originalCheckSortFullDispatchSize,\r\n is_sorted: this.buffers.isSorted,\r\n count: check_sort_fast_count,\r\n full_check: false,\r\n workgroup_size: this.workgroup_size\r\n })\r\n\r\n if (checkSortFast.threads_per_workgroup < checkSortFull.pipelines.length) {\r\n console.warn(`Warning: workgroup size is too small to enable check sort optimization, disabling...`)\r\n this.check_order = false\r\n return [ null, null ]\r\n }\r\n\r\n this.kernels.checkSortFast = checkSortFast\r\n this.kernels.checkSortFull = checkSortFull\r\n }\r\n\r\n create_block_sum_pipeline(inKeys, inValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-block-sum',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: this.local_shuffle ? 'storage' : 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n // \"Local shuffle\" optimization needs access to the values buffer\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n resource: { buffer: inValues }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-block-sum',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.blockSum,\r\n entryPoint: 'radix_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: blockSumPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-reorder',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 5,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outKeys }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 3,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n resource: { buffer: inValues }\r\n },\r\n {\r\n binding: 5,\r\n resource: { buffer: outValues }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const reorderPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-reorder',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.reorder,\r\n entryPoint: 'radix_sort_reorder',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: reorderPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n /**\r\n * Encode all pipelines into the current pass\r\n * \r\n * @param {GPUComputePassEncoder} pass \r\n */\r\n dispatch(pass) { \r\n for (let i = 0; i < this.bit_count / 2; i++) {\r\n const { blockSumPipeline, reorderPipeline } = this.pipelines[i]\r\n\r\n if (this.check_order && i % 2 == 0) {\r\n this.kernels.checkSortFast.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.check_sort_fast)\r\n this.kernels.checkSortFull.dispatch(pass, this.buffers.checkSortFullDispatchSize)\r\n }\r\n \r\n pass.setPipeline(blockSumPipeline.pipeline)\r\n pass.setBindGroup(0, blockSumPipeline.bindGroup)\r\n pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort)\r\n\r\n this.kernels.prefixSum.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.prefix_sum)\r\n\r\n pass.setPipeline(reorderPipeline.pipeline)\r\n pass.setBindGroup(0, reorderPipeline.bindGroup)\r\n pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort)\r\n }\r\n }\r\n}\r\n\r\nexport default RadixSortKernel"],"names":["prefixSumSource_NoBankConflict","radixSortSource_LocalShuffle","reorderSource"],"mappings":";;;;AAAA,MAAM,eAAe,cAAc,CAAC;AACpC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;ACzFD;AACA;AACA;AACA;AACA;AACA,MAAM,6BAA6B,cAAc,CAAC;AAClD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;AChHD;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS,0BAA0B,CAAC,MAAM,EAAE,eAAe,EAAE;AAC7D,IAAI,MAAM,YAAY,GAAG;AACzB,QAAQ,CAAC,EAAE,eAAe;AAC1B,QAAQ,CAAC,EAAE,CAAC;AACZ,MAAK;AACL;AACA,IAAI,IAAI,eAAe,GAAG,MAAM,CAAC,MAAM,CAAC,gCAAgC,EAAE;AAC1E,QAAQ,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,EAAC;AACxD,QAAQ,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,eAAe,GAAG,CAAC,EAAC;AAChD;AACA,QAAQ,YAAY,CAAC,CAAC,GAAG,EAAC;AAC1B,QAAQ,YAAY,CAAC,CAAC,GAAG,EAAC;AAC1B,KAAK;AACL;AACA,IAAI,OAAO,YAAY;AACvB,CAAC;AACD;AACA,SAAS,uBAAuB,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,GAAG,CAAC,CAAC,EAAE;AACnE,IAAI,MAAM,aAAa,GAAG,MAAM,CAAC,YAAY,CAAC;AAC9C,QAAQ,KAAK,EAAE,KAAK;AACpB,QAAQ,KAAK,EAAE,KAAK;AACpB,QAAQ,IAAI,EAAE,IAAI,CAAC,MAAM,GAAG,CAAC;AAC7B,QAAQ,gBAAgB,EAAE,IAAI;AAC9B,KAAK,EAAC;AACN;AACA,IAAI,MAAM,YAAY,GAAG,IAAI,WAAW,CAAC,aAAa,CAAC,cAAc,EAAE,EAAC;AACxE,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,EAAC;AAC1B,IAAI,aAAa,CAAC,KAAK,GAAE;AACzB;AACA,IAAI,OAAO,aAAa;AACxB;;ACjCA,MAAM,eAAe,CAAC;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,WAAW,CAAC;AAChB,QAAQ,MAAM;AACd,QAAQ,IAAI;AACZ,QAAQ,KAAK;AACb,QAAQ,cAAc,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;AACzC,QAAQ,oBAAoB,GAAG,KAAK;AACpC,KAAK,EAAE;AACP,QAAQ,IAAI,CAAC,MAAM,GAAG,OAAM;AAC5B,QAAQ,IAAI,CAAC,cAAc,GAAG,eAAc;AAC5C,QAAQ,IAAI,CAAC,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACxE,QAAQ,IAAI,CAAC,mBAAmB,GAAG,CAAC,GAAG,IAAI,CAAC,sBAAqB;AACjE;AACA,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,qBAAqB,CAAC,GAAG,CAAC,KAAK,CAAC;AAC3D,YAAY,MAAM,IAAI,KAAK,CAAC,CAAC,sEAAsE,EAAE,IAAI,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;AACnI;AACA,QAAQ,IAAI,CAAC,SAAS,GAAG,GAAE;AAC3B;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AAC3D,YAAY,KAAK,EAAE,YAAY;AAC/B,YAAY,IAAI,EAAE,oBAAoB,GAAGA,6BAA8B,GAAG,eAAe;AACzF,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,qBAAqB,CAAC,IAAI,EAAE,KAAK,EAAC;AAC/C,KAAK;AACL;AACA,IAAI,qBAAqB,CAAC,IAAI,EAAE,KAAK,EAAE;AACvC;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,mBAAmB,EAAC;AAC3E,QAAQ,MAAM,YAAY,GAAG,0BAA0B,CAAC,IAAI,CAAC,MAAM,EAAE,eAAe,EAAC;AACrF;AACA;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACxD,YAAY,KAAK,EAAE,sBAAsB;AACzC,YAAY,IAAI,EAAE,eAAe,GAAG,CAAC;AACrC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,KAAK,EAAE,uBAAuB;AAC1C,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE;AAC9C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE;AACxD,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAC/D,YAAY,KAAK,EAAE,0BAA0B;AAC7C,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,YAAY;AACzC,gBAAgB,UAAU,EAAE,kBAAkB;AAC9C,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,qBAAqB,EAAE,IAAI,CAAC,mBAAmB;AACnE,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,YAAY,EAAE,EAAC;AAChF;AACA,QAAQ,IAAI,eAAe,GAAG,CAAC,EAAE;AACjC;AACA,YAAY,IAAI,CAAC,qBAAqB,CAAC,cAAc,EAAE,eAAe,EAAC;AACvE;AACA;AACA,YAAY,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AACvE,gBAAgB,KAAK,EAAE,+BAA+B;AACtD,gBAAgB,MAAM,EAAE,cAAc;AACtC,gBAAgB,OAAO,EAAE;AACzB,oBAAoB,MAAM,EAAE,IAAI,CAAC,YAAY;AAC7C,oBAAoB,UAAU,EAAE,gBAAgB;AAChD,oBAAoB,SAAS,EAAE;AAC/B,wBAAwB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AACjE,wBAAwB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AACjE,wBAAwB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AAC3E,qBAAqB;AACrB,iBAAiB;AACjB,aAAa,EAAC;AACd;AACA,YAAY,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,gBAAgB,EAAE,SAAS,EAAE,YAAY,EAAE,EAAC;AACxF,SAAS;AACT,KAAK;AACL;AACA,IAAI,kBAAkB,GAAG;AACzB,QAAQ,OAAO,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC;AACrF,KAAK;AACL;AACA,IAAI,QAAQ,CAAC,IAAI,EAAE,YAAY,EAAE,MAAM,GAAG,CAAC,EAAE;AAC7C,QAAQ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;AACxD,YAAY,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAC;AAC7D;AACA,YAAY,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAC;AACtC,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,SAAS,EAAC;AAC3C,YAAY,IAAI,CAAC,0BAA0B,CAAC,YAAY,EAAE,MAAM,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,EAAC;AAC7E,SAAS;AACT,KAAK;AACL;;AC/IA,MAAM,eAAe,cAAc,CAAC;AACpC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;ACtFD;AACA;AACA;AACA;AACA;AACA,MAAM,wBAAwB,cAAc,CAAC;AAC7C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;AClHD,MAAM,sBAAsB,cAAc,CAAC;AAC3C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;AC1CD,MAAM,eAAe,GAAG,CAAC,WAAW,GAAG,KAAK,EAAE,UAAU,GAAG,KAAK,EAAE,WAAW,GAAG,KAAK,gBAAgB,CAAC;AACtG;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,GAAG,WAAW,GAAG,oBAAoB,GAAG,4DAA4D,EAAE;AAC1G;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,GAAG,UAAU,GAAG,SAAS,CAAC,WAAW,CAAC,GAAG,sBAAsB,EAAE;AACrE,CAAC,EAAC;AACF;AACA,MAAM,sBAAsB,cAAc,CAAC;AAC3C;AACA;AACA;AACA,EAAC;AACD;AACA,MAAM,oBAAoB,cAAc,CAAC;AACzC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,EAAC;AACD;AACA,MAAM,SAAS,GAAG,CAAC,WAAW,gBAAgB,CAAC;AAC/C;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,EAAE,WAAW,GAAG,cAAc,GAAG,cAAc,CAAC;AACpD,EAAC;AACD;AACA,MAAM,cAAc,cAAc,CAAC;AACnC;AACA,EAAC;AACD;AACA,MAAM,cAAc,cAAc,CAAC;AACnC;AACA;AACA;AACA;AACA;AACA;;AC9EA,MAAM,eAAe,CAAC;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,WAAW,CAAC;AAChB,QAAQ,MAAM;AACd,QAAQ,IAAI;AACZ,QAAQ,MAAM;AACd,QAAQ,QAAQ;AAChB,QAAQ,SAAS;AACjB,QAAQ,KAAK;AACb,QAAQ,KAAK,GAAG,CAAC;AACjB,QAAQ,UAAU,GAAG,IAAI;AACzB,QAAQ,cAAc,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;AACzC,KAAK,EAAE;AACP,QAAQ,IAAI,CAAC,MAAM,GAAG,OAAM;AAC5B,QAAQ,IAAI,CAAC,KAAK,GAAG,MAAK;AAC1B,QAAQ,IAAI,CAAC,KAAK,GAAG,MAAK;AAC1B,QAAQ,IAAI,CAAC,UAAU,GAAG,WAAU;AACpC,QAAQ,IAAI,CAAC,cAAc,GAAG,eAAc;AAC5C,QAAQ,IAAI,CAAC,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACxE;AACA,QAAQ,IAAI,CAAC,SAAS,GAAG,GAAE;AAC3B;AACA,QAAQ,IAAI,CAAC,OAAO,GAAG;AACvB,YAAY,IAAI;AAChB,YAAY,MAAM;AAClB,YAAY,QAAQ;AACpB,YAAY,SAAS;AACrB,YAAY,OAAO,EAAE,EAAE;AACvB,UAAS;AACT;AACA,QAAQ,IAAI,CAAC,uBAAuB,CAAC,IAAI,EAAE,KAAK,EAAC;AACjD,KAAK;AACL;AACA;AACA,IAAI,OAAO,2BAA2B,CAAC,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE;AAC3E,QAAQ,MAAM,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACzE,QAAQ,MAAM,KAAK,GAAG,GAAE;AACxB;AACA,QAAQ,GAAG;AACX;AACA,YAAY,MAAM,sBAAsB,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,qBAAqB,EAAC;AACxF;AACA;AACA,YAAY,MAAM,YAAY,GAAG,0BAA0B,CAAC,MAAM,EAAE,sBAAsB,EAAC;AAC3F;AACA,YAAY,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,EAAE,CAAC,EAAC;AACzD,YAAY,UAAU,GAAG,uBAAsB;AAC/C,SAAS,QAAQ,UAAU,GAAG,CAAC,CAAC;AAChC;AACA,QAAQ,OAAO,KAAK;AACpB,KAAK;AACL;AACA,IAAI,uBAAuB,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,GAAG,CAAC,EAAE;AAC1D,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,qBAAqB,EAAC;AAC7E;AACA,QAAQ,MAAM,WAAW,GAAG,SAAS,KAAK,EAAC;AAC3C,QAAQ,MAAM,UAAU,GAAG,eAAe,IAAI,EAAC;AAC/C;AACA,QAAQ,MAAM,YAAY,GAAG,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACzF,YAAY,KAAK,EAAE,CAAC,WAAW,EAAE,IAAI,CAAC,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;AACjF,YAAY,IAAI,EAAE,eAAe,GAAG,CAAC;AACrC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB;AACA,gBAAgB,IAAI,UAAU,GAAG,CAAC;AAClC,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB,EAAE;AACnB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE;AAChD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,YAAY,EAAE;AACtD,iBAAiB;AACjB;AACA,gBAAgB,IAAI,UAAU,GAAG,CAAC;AAClC,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE;AAC/D,iBAAiB,EAAE;AACnB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;AAChE,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,CAAC,eAAe,CAAC;AAC/C,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,aAAa,GAAG,WAAW,GAAG,IAAI,CAAC,KAAK,GAAG,KAAK,GAAG,MAAK;AACtE,QAAQ,MAAM,aAAa,GAAG,WAAW,GAAG,IAAI,CAAC,KAAK,GAAG,EAAC;AAC1D;AACA,QAAQ,MAAM,iBAAiB,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AACpE,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AACvD,oBAAoB,IAAI,EAAE,eAAe,CAAC,WAAW,EAAE,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC;AACnF,oBAAoB,KAAK,EAAE,YAAY;AACvC,iBAAiB,CAAC;AAClB,gBAAgB,UAAU,EAAE,YAAY;AACxC,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,eAAe,EAAE,aAAa;AAClD,oBAAoB,eAAe,EAAE,aAAa;AAClD,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,YAAY,EAAC;AAC/C,QAAQ,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,iBAAiB,EAAE,SAAS,EAAE,EAAC;AACvE;AACA,QAAQ,IAAI,CAAC,UAAU,EAAE;AACzB,YAAY,IAAI,CAAC,uBAAuB,CAAC,YAAY,EAAE,eAAe,EAAE,SAAS,GAAG,CAAC,EAAC;AACtF,SAAS;AACT,KAAK;AACL;AACA,IAAI,QAAQ,CAAC,IAAI,EAAE,YAAY,EAAE,MAAM,GAAG,CAAC,EAAE;AAC7C,QAAQ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;AACxD,YAAY,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAC;AAC7D;AACA,YAAY,MAAM,gBAAgB,IAAI,IAAI,CAAC,UAAU,IAAI,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAC;AACvF;AACA,YAAY,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAC;AACtC,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,SAAS,EAAC;AAC3C;AACA,YAAY,IAAI,gBAAgB;AAChC,gBAAgB,IAAI,CAAC,0BAA0B,CAAC,YAAY,EAAE,MAAM,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,EAAC;AACjF;AACA;AACA,gBAAgB,IAAI,CAAC,kBAAkB,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAC;AAChD,SAAS;AACT,KAAK;AACL;;ACxKA,MAAM,eAAe,CAAC;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,WAAW,CAAC;AAChB,QAAQ,MAAM;AACd,QAAQ,IAAI;AACZ,QAAQ,MAAM;AACd,QAAQ,KAAK;AACb,QAAQ,SAAS,GAAG,EAAE;AACtB,QAAQ,cAAc,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;AACzC,QAAQ,WAAW,GAAG,KAAK;AAC3B,QAAQ,aAAa,GAAG,KAAK;AAC7B,QAAQ,oBAAoB,GAAG,KAAK;AACpC,KAAK,GAAG,EAAE,EAAE;AACZ,QAAQ,IAAI,MAAM,IAAI,IAAI,EAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC;AACjE,QAAQ,IAAI,IAAI,IAAI,IAAI,EAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC;AACpE,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC;AAC9F,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,SAAS,IAAI,CAAC,IAAI,SAAS,GAAG,EAAE,EAAE,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC;AAC5H,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC;AAC3I,QAAQ,IAAI,SAAS,GAAG,CAAC,IAAI,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC;AACpF;AACA,QAAQ,IAAI,CAAC,MAAM,GAAG,OAAM;AAC5B,QAAQ,IAAI,CAAC,KAAK,GAAG,MAAK;AAC1B,QAAQ,IAAI,CAAC,SAAS,GAAG,UAAS;AAClC,QAAQ,IAAI,CAAC,cAAc,GAAG,eAAc;AAC5C,QAAQ,IAAI,CAAC,WAAW,GAAG,YAAW;AACtC,QAAQ,IAAI,CAAC,aAAa,GAAG,cAAa;AAC1C,QAAQ,IAAI,CAAC,oBAAoB,GAAG,qBAAoB;AACxD;AACA,QAAQ,IAAI,CAAC,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACxE,QAAQ,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,qBAAqB,EAAC;AAC5E,QAAQ,IAAI,CAAC,4BAA4B,GAAG,CAAC,GAAG,IAAI,CAAC,gBAAe;AACpE;AACA,QAAQ,IAAI,CAAC,UAAU,IAAI,MAAM,IAAI,IAAI,EAAC;AAC1C;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG,GAAE;AAC9B,QAAQ,IAAI,CAAC,aAAa,GAAG,GAAE;AAC/B,QAAQ,IAAI,CAAC,OAAO,GAAG,GAAE;AACzB,QAAQ,IAAI,CAAC,SAAS,GAAG,GAAE;AAC3B,QAAQ,IAAI,CAAC,OAAO,GAAG,GAAE;AACzB;AACA;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG,0BAA0B,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,eAAe,EAAC;AACzF;AACA;AACA,QAAQ,IAAI,CAAC,qBAAqB,GAAE;AACpC;AACA;AACA,QAAQ,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAC;AAC3C,KAAK;AACL;AACA,IAAI,qBAAqB,GAAG;AAC5B;AACA,QAAQ,MAAM,aAAa,GAAG,CAAC,MAAM,KAAK;AAC1C,YAAY,OAAO,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;AACrC,0BAA0B,MAAM,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;AAChF,0BAA0B,IAAI,CAAC,IAAI,CAAC;AACpC,UAAS;AACT;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,aAAa,GAAGC,wBAA4B,GAAG,gBAAe;AAClG;AACA,QAAQ,IAAI,CAAC,aAAa,GAAG;AAC7B,YAAY,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AACrD,gBAAgB,KAAK,EAAE,sBAAsB;AAC7C,gBAAgB,IAAI,EAAE,IAAI,CAAC,UAAU,GAAG,cAAc,GAAG,aAAa,CAAC,cAAc,CAAC;AACtF,aAAa,CAAC;AACd,YAAY,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AACpD,gBAAgB,KAAK,EAAE,oBAAoB;AAC3C,gBAAgB,IAAI,EAAE,IAAI,CAAC,UAAU,GAAGC,sBAAa,GAAG,aAAa,CAACA,sBAAa,CAAC;AACpF,aAAa,CAAC;AACd,UAAS;AACT,KAAK;AACL;AACA,IAAI,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE;AACnC;AACA,QAAQ,MAAM,EAAE,eAAe,EAAE,oBAAoB,EAAE,GAAG,IAAI,CAAC,wBAAwB,GAAE;AACzF;AACA;AACA,QAAQ,MAAM,YAAY,GAAG,IAAI,CAAC,wBAAwB,CAAC,eAAe,EAAC;AAC3E;AACA;AACA,QAAQ,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,oBAAoB,EAAE,YAAY,EAAC;AAC7E;AACA;AACA,QAAQ,IAAI,CAAC,yBAAyB,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,YAAY,EAAC;AACvE;AACA;AACA,QAAQ,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,CAAC,EAAE;AAC1D;AACA,YAAY,MAAM,IAAI,SAAS,GAAG,GAAG,CAAC,IAAI,CAAC,EAAC;AAC5C,YAAY,MAAM,MAAM,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,QAAO;AAC7E,YAAY,MAAM,QAAQ,IAAI,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,UAAS;AACjF,YAAY,MAAM,OAAO,KAAK,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,KAAI;AAC7E,YAAY,MAAM,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,OAAM;AACjF;AACA;AACA,YAAY,MAAM,gBAAgB,GAAG,IAAI,CAAC,yBAAyB,CAAC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAC;AAC1F;AACA;AACA,YAAY,MAAM,eAAe,GAAG,IAAI,CAAC,uBAAuB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAC;AAC3G;AACA,YAAY,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,gBAAgB,EAAE,eAAe,EAAE,EAAC;AACtE,SAAS;AACT,KAAK;AACL;AACA,IAAI,wBAAwB,GAAG;AAC/B;AACA,QAAQ,MAAM,oBAAoB,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AAC9D,YAAY,KAAK,EAAE,6BAA6B;AAChD,YAAY,IAAI,EAAE,IAAI,CAAC,4BAA4B,GAAG,CAAC;AACvD,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,eAAe,CAAC;AACpD,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,oBAAoB;AACtC,YAAY,KAAK,EAAE,IAAI,CAAC,4BAA4B;AACpD,YAAY,cAAc,EAAE,IAAI,CAAC,cAAc;AAC/C,YAAY,oBAAoB,EAAE,IAAI,CAAC,oBAAoB;AAC3D,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,gBAAe;AAChD;AACA,QAAQ,OAAO,EAAE,eAAe,EAAE,oBAAoB,EAAE;AACxD,KAAK;AACL;AACA,IAAI,wBAAwB,CAAC,eAAe,EAAE;AAC9C;AACA,QAAQ,MAAM,qBAAqB,GAAG,eAAe,CAAC,kBAAkB,GAAE;AAC1E;AACA;AACA,QAAQ,MAAM,qBAAqB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,qBAAqB,GAAG,CAAC,EAAC;AAC1F,QAAQ,MAAM,qBAAqB,GAAG,IAAI,CAAC,KAAK,GAAG,sBAAqB;AACxE,QAAQ,MAAM,UAAU,GAAG,qBAAqB,GAAG,EAAC;AACpD;AACA;AACA,QAAQ,MAAM,iBAAiB,GAAG,eAAe,CAAC,2BAA2B,CAAC,IAAI,CAAC,MAAM,EAAE,qBAAqB,EAAE,IAAI,CAAC,cAAc,EAAC;AACtI,QAAQ,MAAM,iBAAiB,GAAG,eAAe,CAAC,2BAA2B,CAAC,IAAI,CAAC,MAAM,EAAE,qBAAqB,EAAE,IAAI,CAAC,cAAc,EAAC;AACtI;AACA;AACA,QAAQ,MAAM,eAAe,GAAG;AAChC,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC;AACvD,YAAY,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;AAC5C,YAAY,GAAG,qBAAqB;AACpC,UAAS;AACT;AACA;AACA,QAAQ,IAAI,CAAC,eAAe,GAAG;AAC/B,YAAY,UAAU,EAAE,CAAC;AACzB,YAAY,eAAe,EAAE,CAAC,GAAG,CAAC;AAClC,YAAY,UAAU,EAAE,CAAC,GAAG,CAAC;AAC7B,UAAS;AACT;AACA,QAAQ,OAAO;AACf,YAAY,eAAe;AAC3B,YAAY,iBAAiB;AAC7B,YAAY,qBAAqB;AACjC,YAAY,qBAAqB;AACjC,YAAY,UAAU;AACtB,SAAS;AACT,KAAK;AACL;AACA,IAAI,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,oBAAoB,EAAE,YAAY,EAAE;AACrE;AACA,QAAQ,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACvD,YAAY,KAAK,EAAE,qBAAqB;AACxC,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV,QAAQ,MAAM,eAAe,GAAG,CAAC,IAAI,CAAC,UAAU,GAAG,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACnF,YAAY,KAAK,EAAE,uBAAuB;AAC1C,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,oBAAoB,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AAC9D,YAAY,KAAK,EAAE,6BAA6B;AAChD,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,cAAc,GAAG,uBAAuB,CAAC;AACvD,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,KAAK,EAAE,0BAA0B;AAC7C,YAAY,IAAI,EAAE,YAAY,CAAC,eAAe;AAC9C,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV,QAAQ,MAAM,sBAAsB,GAAG,uBAAuB,CAAC;AAC/D,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,KAAK,EAAE,mCAAmC;AACtD,YAAY,IAAI,EAAE,YAAY,CAAC,eAAe;AAC9C,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ;AACnE,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,2BAA2B,GAAG,uBAAuB,CAAC;AACpE,YAAY,KAAK,EAAE,+BAA+B;AAClD,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,YAAY,CAAC,iBAAiB;AAChD,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV,QAAQ,MAAM,mCAAmC,GAAG,uBAAuB,CAAC;AAC5E,YAAY,KAAK,EAAE,wCAAwC;AAC3D,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,YAAY,CAAC,iBAAiB;AAChD,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ;AACnE,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,cAAc,GAAG,uBAAuB,CAAC;AACvD,YAAY,KAAK,EAAE,WAAW;AAC9B,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,IAAI,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;AACtC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,OAAO,GAAG;AACvB,YAAY,IAAI,EAAE,IAAI;AACtB,YAAY,MAAM,EAAE,MAAM;AAC1B,YAAY,OAAO,EAAE,aAAa;AAClC,YAAY,SAAS,EAAE,eAAe;AACtC,YAAY,cAAc,EAAE,oBAAoB;AAChD,YAAY,cAAc,EAAE,oBAAoB;AAChD;AACA,YAAY,YAAY,EAAE,cAAc;AACxC,YAAY,oBAAoB,EAAE,sBAAsB;AACxD,YAAY,yBAAyB,EAAE,2BAA2B;AAClE,YAAY,iCAAiC,EAAE,mCAAmC;AAClF,YAAY,QAAQ,EAAE,cAAc;AACpC,UAAS;AACT,KAAK;AACL;AACA,IAAI,yBAAyB,CAAC,MAAM,EAAE,sBAAsB,EAAE;AAC9D;AACA,QAAQ,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE;AAC/B,YAAY,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE;AACjC,SAAS;AACT;AACA,QAAQ,MAAM,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,UAAU,EAAE,GAAG,uBAAsB;AACnG;AACA;AACA,QAAQ,MAAM,aAAa,GAAG,IAAI,eAAe,CAAC;AAClD,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,MAAM;AACxB,YAAY,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY;AAC7C,YAAY,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,oBAAoB;AACvD,YAAY,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ;AAC5C,YAAY,KAAK,EAAE,qBAAqB;AACxC,YAAY,KAAK,EAAE,UAAU;AAC7B,YAAY,UAAU,EAAE,IAAI;AAC5B,YAAY,cAAc,EAAE,IAAI,CAAC,cAAc;AAC/C,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,aAAa,GAAG,IAAI,eAAe,CAAC;AAClD,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,MAAM;AACxB,YAAY,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,yBAAyB;AAC1D,YAAY,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,iCAAiC;AACpE,YAAY,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ;AAC5C,YAAY,KAAK,EAAE,qBAAqB;AACxC,YAAY,UAAU,EAAE,KAAK;AAC7B,YAAY,cAAc,EAAE,IAAI,CAAC,cAAc;AAC/C,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,aAAa,CAAC,qBAAqB,GAAG,aAAa,CAAC,SAAS,CAAC,MAAM,EAAE;AAClF,YAAY,OAAO,CAAC,IAAI,CAAC,CAAC,oFAAoF,CAAC,EAAC;AAChH,YAAY,IAAI,CAAC,WAAW,GAAG,MAAK;AACpC,YAAY,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE;AACjC,SAAS;AACT;AACA,QAAQ,IAAI,CAAC,OAAO,CAAC,aAAa,GAAG,cAAa;AAClD,QAAQ,IAAI,CAAC,OAAO,CAAC,aAAa,GAAG,cAAa;AAClD,KAAK;AACL;AACA,IAAI,yBAAyB,CAAC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE;AACrD,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,sBAAsB;AACzC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,GAAG,SAAS,GAAG,mBAAmB,EAAE;AAC1F,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC;AAC7D,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE;AAChD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB;AACA,gBAAgB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC;AAC7D,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE;AAClD,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AACnE,YAAY,KAAK,EAAE,sBAAsB;AACzC,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,QAAQ;AACnD,gBAAgB,UAAU,EAAE,YAAY;AACxC,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,iBAAiB,EAAE,IAAI,CAAC,eAAe;AAC3D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,eAAe,EAAE,IAAI,CAAC,KAAK;AAC/C,oBAAoB,aAAa,EAAE,GAAG;AACtC,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,OAAO;AACf,YAAY,QAAQ,EAAE,gBAAgB;AACtC,YAAY,SAAS;AACrB,SAAS;AACT,KAAK;AACL;AACA,IAAI,uBAAuB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE;AACvE,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,oBAAoB;AACvC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,UAAU,GAAG;AACtC,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,UAAU,EAAE,cAAc,CAAC,OAAO;AAC1D,wBAAwB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AAC7D,qBAAqB;AACrB,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,UAAU,EAAE,cAAc,CAAC,OAAO;AAC1D,wBAAwB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AACnD,qBAAqB;AACrB,iBAAiB,GAAG,EAAE,CAAC;AACvB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE;AAChD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE;AACjD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,UAAU,GAAG;AACtC,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,QAAQ,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE;AACtD,qBAAqB;AACrB,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,QAAQ,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE;AACvD,qBAAqB;AACrB,iBAAiB,GAAG,EAAE,CAAC;AACvB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,oBAAoB;AACvC,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,OAAO;AAClD,gBAAgB,UAAU,EAAE,oBAAoB;AAChD,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,iBAAiB,EAAE,IAAI,CAAC,eAAe;AAC3D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,eAAe,EAAE,IAAI,CAAC,KAAK;AAC/C,oBAAoB,aAAa,EAAE,GAAG;AACtC,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,OAAO;AACf,YAAY,QAAQ,EAAE,eAAe;AACrC,YAAY,SAAS;AACrB,SAAS;AACT,KAAK;AACL;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,QAAQ,CAAC,IAAI,EAAE;AACnB,QAAQ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;AACrD,YAAY,MAAM,EAAE,gBAAgB,EAAE,eAAe,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAC;AAC3E;AACA,YAAY,IAAI,IAAI,CAAC,WAAW,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;AAChD,gBAAgB,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,eAAe,CAAC,eAAe,EAAC;AAC1H,gBAAgB,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,yBAAyB,EAAC;AACjG,aAAa;AACb;AACA,YAAY,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,QAAQ,EAAC;AACvD,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,gBAAgB,CAAC,SAAS,EAAC;AAC5D,YAAY,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,eAAe,CAAC,UAAU,EAAC;AACvG;AACA,YAAY,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,eAAe,CAAC,UAAU,EAAC;AAC7G;AACA,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC,QAAQ,EAAC;AACtD,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,eAAe,CAAC,SAAS,EAAC;AAC3D,YAAY,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,eAAe,CAAC,UAAU,EAAC;AACvG,SAAS;AACT,KAAK;AACL;;;;;"} \ No newline at end of file diff --git a/dist/esm/radix-sort-esm.js b/dist/esm/radix-sort-esm.js index 1418a80..a37e183 100644 --- a/dist/esm/radix-sort-esm.js +++ b/dist/esm/radix-sort-esm.js @@ -203,6 +203,45 @@ fn add_block_sums( items[ELM_ID + 1] += blockSum; }`; +/** + * Find the best dispatch size x and y dimensions to minimize unused workgroups + * + * @param {GPUDevice} device - The GPU device + * @param {int} workgroup_count - Number of workgroups to dispatch + * @returns + */ +function find_optimal_dispatch_size(device, workgroup_count) { + const dispatchSize = { + x: workgroup_count, + y: 1 + }; + + if (workgroup_count > device.limits.maxComputeWorkgroupsPerDimension) { + const x = Math.floor(Math.sqrt(workgroup_count)); + const y = Math.ceil(workgroup_count / x); + + dispatchSize.x = x; + dispatchSize.y = y; + } + + return dispatchSize +} + +function create_buffer_from_data({device, label, data, usage = 0}) { + const dispatchSizes = device.createBuffer({ + label: label, + usage: usage, + size: data.length * 4, + mappedAtCreation: true + }); + + const dispatchData = new Uint32Array(dispatchSizes.getMappedRange()); + dispatchData.set(data); + dispatchSizes.unmap(); + + return dispatchSizes +} + class PrefixSumKernel { /** * Perform a parallel prefix sum on the given data buffer @@ -241,31 +280,14 @@ class PrefixSumKernel { this.create_pass_recursive(data, count); } - find_optimal_dispatch_size(item_count) { - const { maxComputeWorkgroupsPerDimension } = this.device.limits; - - let workgroup_count = Math.ceil(item_count / this.items_per_workgroup); - let x = workgroup_count; - let y = 1; - - if (workgroup_count > maxComputeWorkgroupsPerDimension) { - x = Math.floor(Math.sqrt(workgroup_count)); - y = Math.ceil(workgroup_count / x); - workgroup_count = x * y; - } - - return { - workgroup_count, - dispatchSize: { x, y }, - } - } - create_pass_recursive(data, count) { // Find best dispatch x and y dimensions to minimize unused threads - const { workgroup_count, dispatchSize } = this.find_optimal_dispatch_size(count); + const workgroup_count = Math.ceil(count / this.items_per_workgroup); + const dispatchSize = find_optimal_dispatch_size(this.device, workgroup_count); // Create buffer for block sums const blockSumBuffer = this.device.createBuffer({ + label: 'prefix-sum-block-sum', size: workgroup_count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); @@ -346,11 +368,17 @@ class PrefixSumKernel { } } - dispatch(pass) { - for (const { pipeline, bindGroup, dispatchSize } of this.pipelines) { + get_dispatch_chain() { + return this.pipelines.flatMap(p => [ p.dispatchSize.x, p.dispatchSize.y, 1 ]) + } + + dispatch(pass, dispatchSize, offset = 0) { + for (let i = 0; i < this.pipelines.length; i++) { + const { pipeline, bindGroup } = this.pipelines[i]; + pass.setPipeline(pipeline); pass.setBindGroup(0, bindGroup); - pass.dispatchWorkgroups(dispatchSize.x, dispatchSize.y, 1); + pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4); } } } @@ -603,6 +631,263 @@ fn radix_sort_reorder( outputValues[sorted_position] = v; }`; +const checkSortSource = (isFirstPass = false, isLastPass = false, isFullCheck = false) => /* wgsl */ ` + +@group(0) @binding(0) var input: array; +@group(0) @binding(1) var output: array; +@group(0) @binding(2) var original: array; +@group(0) @binding(3) var is_sorted: u32; + +override WORKGROUP_SIZE_X: u32; +override WORKGROUP_SIZE_Y: u32; +override THREADS_PER_WORKGROUP: u32; +override ELEMENT_COUNT: u32; +override START_ELEMENT: u32; + +var s_data: array; + +@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1) +fn check_sort( + @builtin(workgroup_id) w_id: vec3, + @builtin(num_workgroups) w_dim: vec3, + @builtin(local_invocation_index) TID: u32, // Local thread ID +) { + let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x; + let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP + START_ELEMENT; + let GID = TID + WID; // Global thread ID + + // Load data into shared memory + ${ isFirstPass ? first_pass_load_data : "s_data[TID] = select(0u, input[GID], GID < ELEMENT_COUNT);" } + + // Perform parallel reduction + for (var d = 1u; d < THREADS_PER_WORKGROUP; d *= 2u) { + workgroupBarrier(); + if (TID % (2u * d) == 0u) { + s_data[TID] += s_data[TID + d]; + } + } + workgroupBarrier(); + + // Write reduction result + ${ isLastPass ? last_pass(isFullCheck) : write_reduction_result } +}`; + +const write_reduction_result = /* wgsl */ ` + if (TID == 0) { + output[WORKGROUP_ID] = s_data[0]; + } +`; + +const first_pass_load_data = /* wgsl */ ` + let LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1; + + // Load current element into shared memory + // Also load next element for comparison + let elm = select(0u, input[GID], GID < ELEMENT_COUNT); + let next = select(0u, input[GID + 1], GID < ELEMENT_COUNT-1); + s_data[TID] = elm; + workgroupBarrier(); + + s_data[TID] = select(0u, 1u, GID < ELEMENT_COUNT-1 && elm > next); +`; + +const last_pass = (isFullCheck) => /* wgsl */ ` + let fullDispatchLength = arrayLength(&output); + let dispatchIndex = TID * 3; + + if (dispatchIndex >= fullDispatchLength) { + return; + } + + ${isFullCheck ? last_pass_full : last_pass_fast} +`; + +const last_pass_fast = /* wgsl */ ` + output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] == 0 && is_sorted == 0u); +`; + +const last_pass_full = /* wgsl */ ` + if (TID == 0 && s_data[0] == 0) { + is_sorted = 1u; + } + + output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] != 0); +`; + +class CheckSortKernel { + /** + * CheckSortKernel - Performs a parralel reduction to check if an array is sorted. + * + * @param {GPUDevice} device + * @param {GPUBuffer} data - The buffer containing the data to check + * @param {GPUBuffer} result - The result dispatch size buffer + * @param {GPUBuffer} original - The original dispatch size buffer + * @param {GPUBuffer} is_sorted - 1-element buffer to store whether the array is sorted + * @param {number} count - The number of elements to check + * @param {number} start - The index to start checking from + * @param {boolean} full_check - Whether this kernel is performing a full check or a fast check + * @param {object} workgroup_size - The workgroup size in x and y dimensions + */ + constructor({ + device, + data, + result, + original, + is_sorted, + count, + start = 0, + full_check = true, + workgroup_size = { x: 16, y: 16 }, + }) { + this.device = device; + this.count = count; + this.start = start; + this.full_check = full_check; + this.workgroup_size = workgroup_size; + this.threads_per_workgroup = workgroup_size.x * workgroup_size.y; + + this.pipelines = []; + + this.buffers = { + data, + result, + original, + is_sorted, + outputs: [] + }; + + this.create_passes_recursive(data, count); + } + + // Find the best dispatch size for each pass to minimize unused workgroups + static find_optimal_dispatch_chain(device, item_count, workgroup_size) { + const threads_per_workgroup = workgroup_size.x * workgroup_size.y; + const sizes = []; + + do { + // Number of workgroups required to process all items + const target_workgroup_count = Math.ceil(item_count / threads_per_workgroup); + + // Optimal dispatch size and updated workgroup count + const dispatchSize = find_optimal_dispatch_size(device, target_workgroup_count); + + sizes.push(dispatchSize.x, dispatchSize.y, 1); + item_count = target_workgroup_count; + } while (item_count > 1) + + return sizes + } + + create_passes_recursive(buffer, count, passIndex = 0) { + const workgroup_count = Math.ceil(count / this.threads_per_workgroup); + + const isFirstPass = passIndex === 0; + const isLastPass = workgroup_count <= 1; + + const outputBuffer = isLastPass ? this.buffers.result : this.device.createBuffer({ + label: `check-sort-${this.full_check ? 'full' : 'fast'}-${passIndex}`, + size: workgroup_count * 4, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST + }); + + const bindGroupLayout = this.device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'read-only-storage' } + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'storage' } + }, + // Last pass bindings + ...(isLastPass ? [{ + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'read-only-storage' } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'storage' } + }] : []), + ] + }); + + const bindGroup = this.device.createBindGroup({ + layout: bindGroupLayout, + entries: [ + { + binding: 0, + resource: { buffer: buffer } + }, + { + binding: 1, + resource: { buffer: outputBuffer } + }, + // Last pass buffers + ...(isLastPass ? [{ + binding: 2, + resource: { buffer: this.buffers.original } + }, { + binding: 3, + resource: { buffer: this.buffers.is_sorted } + }] : []), + ] + }); + + const pipelineLayout = this.device.createPipelineLayout({ + bindGroupLayouts: [bindGroupLayout] + }); + + const element_count = isFirstPass ? this.start + count : count; + const start_element = isFirstPass ? this.start : 0; + + const checkSortPipeline = this.device.createComputePipeline({ + layout: pipelineLayout, + compute: { + module: this.device.createShaderModule({ + code: checkSortSource(isFirstPass, isLastPass, this.full_check), + label: 'check-sort', + }), + entryPoint: 'check_sort', + constants: { + 'WORKGROUP_SIZE_X': this.workgroup_size.x, + 'WORKGROUP_SIZE_Y': this.workgroup_size.y, + 'THREADS_PER_WORKGROUP': this.threads_per_workgroup, + 'ELEMENT_COUNT': element_count, + 'START_ELEMENT': start_element, + }, + } + }); + + this.buffers.outputs.push(outputBuffer); + this.pipelines.push({ pipeline: checkSortPipeline, bindGroup }); + + if (!isLastPass) { + this.create_passes_recursive(outputBuffer, workgroup_count, passIndex + 1); + } + } + + dispatch(pass, dispatchSize, offset = 0) { + for (let i = 0; i < this.pipelines.length; i++) { + const { pipeline, bindGroup } = this.pipelines[i]; + + const dispatchIndirect = (this.full_check || i < this.pipelines.length - 1); + + pass.setPipeline(pipeline); + pass.setBindGroup(0, bindGroup); + + if (dispatchIndirect) + pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4); + else + // Only the last dispatch of the fast check kernel is constant to (1, 1, 1) + pass.dispatchWorkgroups(1, 1, 1); + } + } +} + class RadixSortKernel { /** * Perform a parallel radix sort on the GPU given a buffer of keys and (optionnaly) values @@ -617,6 +902,7 @@ class RadixSortKernel { * @param {number} count - Number of elements to sort * @param {number} bit_count - Number of bits per element (default: 32) * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two + * @param {boolean} check_order - Enable "order checking" optimization. Useful if the data needs to be sorted in real-time and doesn't change much. (default: false) * @param {boolean} local_shuffle - Enable "local shuffling" optimization for the radix sort kernel (default: false) * @param {boolean} avoid_bank_conflicts - Enable "avoiding bank conflicts" optimization for the prefix sum kernel (default: false) */ @@ -627,19 +913,22 @@ class RadixSortKernel { count, bit_count = 32, workgroup_size = { x: 16, y: 16 }, + check_order = false, local_shuffle = false, avoid_bank_conflicts = false, } = {}) { if (device == null) throw new Error('No device provided') if (keys == null) throw new Error('No keys buffer provided') if (!Number.isInteger(count) || count <= 0) throw new Error('Invalid count parameter') - if (!Number.isInteger(bit_count) || bit_count <= 0) throw new Error('Invalid bit_count parameter') + if (!Number.isInteger(bit_count) || bit_count <= 0 || bit_count > 32) throw new Error('Invalid bit_count parameter') if (!Number.isInteger(workgroup_size.x) || !Number.isInteger(workgroup_size.y)) throw new Error('Invalid workgroup_size parameter') + if (bit_count % 4 != 0) throw new Error('bit_count must be a multiple of 4') this.device = device; this.count = count; this.bit_count = bit_count; this.workgroup_size = workgroup_size; + this.check_order = check_order; this.local_shuffle = local_shuffle; this.avoid_bank_conflicts = avoid_bank_conflicts; @@ -653,34 +942,16 @@ class RadixSortKernel { this.shaderModules = {}; // GPUShaderModules this.buffers = {}; // GPUBuffers this.pipelines = []; // List of passes + this.kernels = {}; // Find best dispatch x and y dimensions to minimize unused threads - this.find_optimal_dispatch_size(); + this.dispatchSize = find_optimal_dispatch_size(this.device, this.workgroup_count); // Create shader modules from wgsl code this.create_shader_modules(); - - // Create GPU buffers - this.create_buffers(keys, values); // Create multi-pass pipelines - this.create_pipelines(); - } - - find_optimal_dispatch_size() { - const { maxComputeWorkgroupsPerDimension } = this.device.limits; - - this.dispatchSize = { - x: this.workgroup_count, - y: 1 - }; - - if (this.workgroup_count > maxComputeWorkgroupsPerDimension) { - const x = Math.floor(Math.sqrt(this.workgroup_count)); - const y = Math.ceil(this.workgroup_count / x); - - this.dispatchSize = { x, y }; - } + this.create_pipelines(keys, values); } create_shader_modules() { @@ -705,26 +976,149 @@ class RadixSortKernel { }; } - create_buffers(keys, values) { + create_pipelines(keys, values) { + // Block prefix sum kernel + const { prefixSumKernel, prefixBlockSumBuffer } = this.create_prefix_sum_kernel(); + + // Indirect dispatch buffers + const dispatchData = this.calculate_dispatch_sizes(prefixSumKernel); + + // GPU buffers + this.create_buffers(keys, values, prefixBlockSumBuffer, dispatchData); + + // Check sort kernels + this.create_check_sort_kernels(this.buffers.keys, dispatchData); + + // Radix sort passes for every 2 bits + for (let bit = 0; bit < this.bit_count; bit += 2) { + // Swap buffers every pass + const even = (bit % 4 == 0); + const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys; + const inValues = even ? this.buffers.values : this.buffers.tmpValues; + const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys; + const outValues = even ? this.buffers.tmpValues : this.buffers.values; + + // Compute local prefix sums and block sums + const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit); + + // Reorder keys and values + const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit); + + this.pipelines.push({ blockSumPipeline, reorderPipeline }); + } + } + + create_prefix_sum_kernel() { + // Prefix Block Sum buffer (4 element per workgroup) + const prefixBlockSumBuffer = this.device.createBuffer({ + label: 'radix-sort-prefix-block-sum', + size: this.prefix_block_workgroup_count * 4, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST + }); + + // Create block prefix sum kernel + const prefixSumKernel = new PrefixSumKernel({ + device: this.device, + data: prefixBlockSumBuffer, + count: this.prefix_block_workgroup_count, + workgroup_size: this.workgroup_size, + avoid_bank_conflicts: this.avoid_bank_conflicts, + }); + + this.kernels.prefixSum = prefixSumKernel; + + return { prefixSumKernel, prefixBlockSumBuffer } + } + + calculate_dispatch_sizes(prefixSumKernel) { + // Prefix sum dispatch sizes + const prefixSumDispatchSize = prefixSumKernel.get_dispatch_chain(); + + // Check sort element count (fast/full) + const check_sort_fast_count = Math.min(this.count, this.threads_per_workgroup * 4); + const check_sort_full_count = this.count - check_sort_fast_count; + const start_full = check_sort_fast_count - 1; + + // Check sort dispatch sizes + const dispatchSizesFast = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_fast_count, this.workgroup_size); + const dispatchSizesFull = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_full_count, this.workgroup_size); + + // Initial dispatch sizes + const initialDispatch = [ + this.dispatchSize.x, this.dispatchSize.y, 1, // Radix Sort + Reorder + ...dispatchSizesFast.slice(0, 3), // Check sort fast + ...prefixSumDispatchSize // Prefix Sum + ]; + + // Dispatch offsets in main buffer + this.dispatchOffsets = { + radix_sort: 0, + check_sort_fast: 3 * 4, + prefix_sum: 6 * 4 + }; + + return { + initialDispatch, + dispatchSizesFull, + check_sort_fast_count, + check_sort_full_count, + start_full + } + } + + create_buffers(keys, values, prefixBlockSumBuffer, dispatchData) { // Keys and values double buffering const tmpKeysBuffer = this.device.createBuffer({ + label: 'radix-sort-tmp-keys', size: this.count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); const tmpValuesBuffer = !this.has_values ? null : this.device.createBuffer({ + label: 'radix-sort-tmp-values', size: this.count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); // Local Prefix Sum buffer (1 element per item) const localPrefixSumBuffer = this.device.createBuffer({ + label: 'radix-sort-local-prefix-sum', size: this.count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); - // Prefix Block Sum buffer (4 element per workgroup) - const prefixBlockSumBuffer = this.device.createBuffer({ - size: this.prefix_block_workgroup_count * 4, + // Dispatch sizes (radix sort, check sort, prefix sum) + const dispatchBuffer = create_buffer_from_data({ + device: this.device, + label: 'radix-sort-dispatch-size', + data: dispatchData.initialDispatch, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT + }); + const originalDispatchBuffer = create_buffer_from_data({ + device: this.device, + label: 'radix-sort-dispatch-size-original', + data: dispatchData.initialDispatch, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC + }); + + // Dispatch sizes (full sort) + const checkSortFullDispatchBuffer = create_buffer_from_data({ + label: 'check-sort-full-dispatch-size', + device: this.device, + data: dispatchData.dispatchSizesFull, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT + }); + const checkSortFullOriginalDispatchBuffer = create_buffer_from_data({ + label: 'check-sort-full-dispatch-size-original', + device: this.device, + data: dispatchData.dispatchSizesFull, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC + }); + + // Flag to tell if the data is sorted + const isSortedBuffer = create_buffer_from_data({ + label: 'is-sorted', + device: this.device, + data: new Uint32Array([0]), usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); @@ -735,36 +1129,56 @@ class RadixSortKernel { tmpValues: tmpValuesBuffer, localPrefixSum: localPrefixSumBuffer, prefixBlockSum: prefixBlockSumBuffer, + + dispatchSize: dispatchBuffer, + originalDispatchSize: originalDispatchBuffer, + checkSortFullDispatchSize: checkSortFullDispatchBuffer, + originalCheckSortFullDispatchSize: checkSortFullOriginalDispatchBuffer, + isSorted: isSortedBuffer, }; } - // Create radix sort passes for every 2 bits - create_pipelines() { - for (let bit = 0; bit < this.bit_count; bit += 2) { - // Swap buffers every pass - const even = (bit % 4 == 0); - const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys; - const inValues = even ? this.buffers.values : this.buffers.tmpValues; - const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys; - const outValues = even ? this.buffers.tmpValues : this.buffers.values; + create_check_sort_kernels(inKeys, checkSortPartitionData) { + // Skip check sort if disabled + if (!this.check_order) { + return [ null, null ] + } - // Compute local prefix sums and block sums - const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit); + const { check_sort_fast_count, check_sort_full_count, start_full } = checkSortPartitionData; + + // Create the full pass + const checkSortFull = new CheckSortKernel({ + device: this.device, + data: inKeys, + result: this.buffers.dispatchSize, + original: this.buffers.originalDispatchSize, + is_sorted: this.buffers.isSorted, + count: check_sort_full_count, + start: start_full, + full_check: true, + workgroup_size: this.workgroup_size + }); - // Compute block sums prefix sums - const prefixSumKernel = new PrefixSumKernel({ - device: this.device, - data: this.buffers.prefixBlockSum, - count: this.prefix_block_workgroup_count, - workgroup_size: this.workgroup_size, - avoid_bank_conflicts: this.avoid_bank_conflicts, - }); - - // Reorder keys and values - const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit); + // Create the fast pass + const checkSortFast = new CheckSortKernel({ + device: this.device, + data: inKeys, + result: this.buffers.checkSortFullDispatchSize, + original: this.buffers.originalCheckSortFullDispatchSize, + is_sorted: this.buffers.isSorted, + count: check_sort_fast_count, + full_check: false, + workgroup_size: this.workgroup_size + }); - this.pipelines.push({ blockSumPipeline, prefixSumKernel, reorderPipeline }); + if (checkSortFast.threads_per_workgroup < checkSortFull.pipelines.length) { + console.warn(`Warning: workgroup size is too small to enable check sort optimization, disabling...`); + this.check_order = false; + return [ null, null ] } + + this.kernels.checkSortFast = checkSortFast; + this.kernels.checkSortFull = checkSortFull; } create_block_sum_pipeline(inKeys, inValues, bit) { @@ -947,17 +1361,24 @@ class RadixSortKernel { * * @param {GPUComputePassEncoder} pass */ - dispatch(pass) { - for (const { blockSumPipeline, prefixSumKernel, reorderPipeline } of this.pipelines) { + dispatch(pass) { + for (let i = 0; i < this.bit_count / 2; i++) { + const { blockSumPipeline, reorderPipeline } = this.pipelines[i]; + + if (this.check_order && i % 2 == 0) { + this.kernels.checkSortFast.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.check_sort_fast); + this.kernels.checkSortFull.dispatch(pass, this.buffers.checkSortFullDispatchSize); + } + pass.setPipeline(blockSumPipeline.pipeline); pass.setBindGroup(0, blockSumPipeline.bindGroup); - pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1); + pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort); - prefixSumKernel.dispatch(pass); + this.kernels.prefixSum.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.prefix_sum); pass.setPipeline(reorderPipeline.pipeline); pass.setBindGroup(0, reorderPipeline.bindGroup); - pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1); + pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort); } } } diff --git a/dist/esm/radix-sort-esm.js.map b/dist/esm/radix-sort-esm.js.map index 175b94f..08237c9 100644 --- a/dist/esm/radix-sort-esm.js.map +++ b/dist/esm/radix-sort-esm.js.map @@ -1 +1 @@ -{"version":3,"file":"radix-sort-esm.js","sources":["../../src/shaders/prefix_sum.js","../../src/shaders/optimizations/prefix_sum_no_bank_conflict.js","../../src/PrefixSumKernel.js","../../src/shaders/radix_sort.js","../../src/shaders/optimizations/radix_sort_local_shuffle.js","../../src/shaders/radix_sort_reorder.js","../../src/RadixSortKernel.js"],"sourcesContent":["const prefixSumSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n temp[ELM_TID] = items[ELM_GID];\r\n temp[ELM_TID + 1] = items[ELM_GID + 1];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n let last_offset = ITEMS_PER_WORKGROUP - 1;\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[ELM_GID] = temp[ELM_TID];\r\n items[ELM_GID + 1] = temp[ELM_TID + 1];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumSource","/**\r\n * Prefix sum with optimization to avoid bank conflicts\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst prefixSumNoBankConflictSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nconst NUM_BANKS: u32 = 32;\r\nconst LOG_NUM_BANKS: u32 = 5;\r\n\r\nfn get_offset(offset: u32) -> u32 {\r\n // return offset >> LOG_NUM_BANKS; // Conflict-free\r\n return (offset >> NUM_BANKS) + (offset >> (2 * LOG_NUM_BANKS)); // Zero bank conflict\r\n}\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n let ai: u32 = TID;\r\n let bi: u32 = TID + (ITEMS_PER_WORKGROUP >> 1);\r\n let s_ai = ai + get_offset(ai);\r\n let s_bi = bi + get_offset(bi);\r\n let g_ai = ai + WID * 2;\r\n let g_bi = bi + WID * 2;\r\n temp[s_ai] = items[g_ai];\r\n temp[s_bi] = items[g_bi];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n var last_offset = ITEMS_PER_WORKGROUP - 1;\r\n last_offset += get_offset(last_offset);\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[g_ai] = temp[s_ai];\r\n items[g_bi] = temp[s_bi];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumNoBankConflictSource","import prefixSumSource from \"./shaders/prefix_sum\"\r\nimport prefixSumSource_NoBankConflict from \"./shaders/optimizations/prefix_sum_no_bank_conflict\"\r\n\r\nclass PrefixSumKernel {\r\n /**\r\n * Perform a parallel prefix sum on the given data buffer\r\n * \r\n * Based on \"Parallel Prefix Sum (Scan) with CUDA\"\r\n * https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - Buffer containing the data to process\r\n * @param {number} count - Max number of elements to process\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} avoid_bank_conflicts - Use the \"Avoid bank conflicts\" optimization from the original publication\r\n */\r\n constructor({\r\n device,\r\n data,\r\n count,\r\n workgroup_size = { x: 16, y: 16 },\r\n avoid_bank_conflicts = false\r\n }) {\r\n this.device = device\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.items_per_workgroup = 2 * this.threads_per_workgroup // 2 items are processed per thread\r\n\r\n if (Math.log2(this.threads_per_workgroup) % 1 !== 0) \r\n throw new Error(`workgroup_size.x * workgroup_size.y must be a power of two. (current: ${this.threads_per_workgroup})`)\r\n\r\n this.pipelines = []\r\n\r\n this.shaderModule = this.device.createShaderModule({\r\n label: 'prefix-sum',\r\n code: avoid_bank_conflicts ? prefixSumSource_NoBankConflict : prefixSumSource,\r\n })\r\n\r\n this.create_pass_recursive(data, count)\r\n }\r\n\r\n find_optimal_dispatch_size(item_count) {\r\n const { maxComputeWorkgroupsPerDimension } = this.device.limits\r\n\r\n let workgroup_count = Math.ceil(item_count / this.items_per_workgroup)\r\n let x = workgroup_count\r\n let y = 1\r\n\r\n if (workgroup_count > maxComputeWorkgroupsPerDimension) {\r\n x = Math.floor(Math.sqrt(workgroup_count))\r\n y = Math.ceil(workgroup_count / x)\r\n workgroup_count = x * y\r\n }\r\n\r\n return { \r\n workgroup_count,\r\n dispatchSize: { x, y },\r\n }\r\n }\r\n\r\n create_pass_recursive(data, count) {\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n const { workgroup_count, dispatchSize } = this.find_optimal_dispatch_size(count)\r\n \r\n // Create buffer for block sums \r\n const blockSumBuffer = this.device.createBuffer({\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create bind group and pipeline layout\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n label: 'prefix-sum-bind-group',\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: data }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: blockSumBuffer }\r\n }\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n // Per-workgroup (block) prefix sum\r\n const scanPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-scan-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'reduce_downsweep',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ITEMS_PER_WORKGROUP': this.items_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: scanPipeline, bindGroup, dispatchSize })\r\n\r\n if (workgroup_count > 1) {\r\n // Prefix sum on block sums\r\n this.create_pass_recursive(blockSumBuffer, workgroup_count)\r\n\r\n // Add block sums to local prefix sums\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-add-block-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'add_block_sums',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: blockSumPipeline, bindGroup, dispatchSize })\r\n }\r\n }\r\n\r\n dispatch(pass) {\r\n for (const { pipeline, bindGroup, dispatchSize } of this.pipelines) {\r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n pass.dispatchWorkgroups(dispatchSize.x, dispatchSize.y, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default PrefixSumKernel","const radixSortSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n // Store local prefix sum to global memory\r\n local_prefix_sums[GID] = bit_prefix_sums[extract_bits];\r\n}`\r\n\r\nexport default radixSortSource;","/**\r\n * Radix sort with \"local shuffle and coalesced mapping\" optimization\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst radixSortCoalescedSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n@group(0) @binding(3) var values: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\nvar s_prefix_sum_scan: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let val = values[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n let prefix_sum = bit_prefix_sums[extract_bits]; \r\n\r\n // Scan bit prefix sums\r\n if (TID == LAST_THREAD) {\r\n var sum: u32 = 0;\r\n bit_prefix_sums[extract_bits] += 1;\r\n for (var i: u32 = 0; i < 4; i++) {\r\n s_prefix_sum_scan[i] = sum;\r\n sum += bit_prefix_sums[i];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n if (GID < ELEMENT_COUNT) {\r\n // Compute new position\r\n let new_pos: u32 = prefix_sum + s_prefix_sum_scan[extract_bits];\r\n\r\n // Shuffle elements locally\r\n input[WID + new_pos] = elm;\r\n values[WID + new_pos] = val;\r\n local_prefix_sums[WID + new_pos] = prefix_sum;\r\n }\r\n}`\r\n\r\nexport default radixSortCoalescedSource;","const radixSortReorderSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var inputKeys: array;\r\n@group(0) @binding(1) var outputKeys: array;\r\n@group(0) @binding(2) var local_prefix_sum: array;\r\n@group(0) @binding(3) var prefix_block_sum: array;\r\n@group(0) @binding(4) var inputValues: array;\r\n@group(0) @binding(5) var outputValues: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort_reorder(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) { \r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n if (GID >= ELEMENT_COUNT) {\r\n return;\r\n }\r\n\r\n let k = inputKeys[GID];\r\n let v = inputValues[GID];\r\n\r\n let local_prefix = local_prefix_sum[GID];\r\n\r\n // Calculate new position\r\n let extract_bits = (k >> CURRENT_BIT) & 0x3;\r\n let pid = extract_bits * WORKGROUP_COUNT + WORKGROUP_ID;\r\n let sorted_position = prefix_block_sum[pid] + local_prefix;\r\n \r\n outputKeys[sorted_position] = k;\r\n outputValues[sorted_position] = v;\r\n}`\r\n\r\nexport default radixSortReorderSource;","import PrefixSumKernel from \"./PrefixSumKernel\"\r\nimport radixSortSource from \"./shaders/radix_sort\"\r\nimport radixSortSource_LocalShuffle from \"./shaders/optimizations/radix_sort_local_shuffle\"\r\nimport reorderSource from \"./shaders/radix_sort_reorder\"\r\n\r\nclass RadixSortKernel {\r\n /**\r\n * Perform a parallel radix sort on the GPU given a buffer of keys and (optionnaly) values\r\n * Note: The buffers are sorted in-place.\r\n * \r\n * Based on \"Fast 4-way parallel radix sorting on GPUs\"\r\n * https://www.sci.utah.edu/~csilva/papers/cgf.pdf]\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} keys - Buffer containing the keys to sort\r\n * @param {GPUBuffer} values - (optional) Buffer containing the associated values\r\n * @param {number} count - Number of elements to sort\r\n * @param {number} bit_count - Number of bits per element (default: 32)\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} local_shuffle - Enable \"local shuffling\" optimization for the radix sort kernel (default: false)\r\n * @param {boolean} avoid_bank_conflicts - Enable \"avoiding bank conflicts\" optimization for the prefix sum kernel (default: false)\r\n */\r\n constructor({\r\n device,\r\n keys,\r\n values,\r\n count,\r\n bit_count = 32,\r\n workgroup_size = { x: 16, y: 16 },\r\n local_shuffle = false,\r\n avoid_bank_conflicts = false,\r\n } = {}) {\r\n if (device == null) throw new Error('No device provided')\r\n if (keys == null) throw new Error('No keys buffer provided')\r\n if (!Number.isInteger(count) || count <= 0) throw new Error('Invalid count parameter')\r\n if (!Number.isInteger(bit_count) || bit_count <= 0) throw new Error('Invalid bit_count parameter')\r\n if (!Number.isInteger(workgroup_size.x) || !Number.isInteger(workgroup_size.y)) throw new Error('Invalid workgroup_size parameter')\r\n\r\n this.device = device\r\n this.count = count\r\n this.bit_count = bit_count\r\n this.workgroup_size = workgroup_size\r\n this.local_shuffle = local_shuffle\r\n this.avoid_bank_conflicts = avoid_bank_conflicts\r\n\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n this.prefix_block_workgroup_count = 4 * this.workgroup_count\r\n\r\n this.has_values = (values != null) // Is the values buffer provided ?\r\n\r\n this.dispatchSize = {} // Dispatch dimension x and y\r\n this.shaderModules = {} // GPUShaderModules\r\n this.buffers = {} // GPUBuffers\r\n this.pipelines = [] // List of passes\r\n\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n this.find_optimal_dispatch_size()\r\n\r\n // Create shader modules from wgsl code\r\n this.create_shader_modules()\r\n\r\n // Create GPU buffers\r\n this.create_buffers(keys, values)\r\n \r\n // Create multi-pass pipelines\r\n this.create_pipelines()\r\n }\r\n\r\n find_optimal_dispatch_size() {\r\n const { maxComputeWorkgroupsPerDimension } = this.device.limits\r\n\r\n this.dispatchSize = { \r\n x: this.workgroup_count, \r\n y: 1\r\n }\r\n\r\n if (this.workgroup_count > maxComputeWorkgroupsPerDimension) {\r\n const x = Math.floor(Math.sqrt(this.workgroup_count))\r\n const y = Math.ceil(this.workgroup_count / x)\r\n \r\n this.dispatchSize = { x, y } \r\n }\r\n }\r\n\r\n create_shader_modules() {\r\n // Remove every occurence of \"values\" in the shader code if values buffer is not provided\r\n const remove_values = (source) => {\r\n return source.split('\\n')\r\n .filter(line => !line.toLowerCase().includes('values'))\r\n .join('\\n')\r\n }\r\n\r\n const blockSumSource = this.local_shuffle ? radixSortSource_LocalShuffle : radixSortSource\r\n \r\n this.shaderModules = {\r\n blockSum: this.device.createShaderModule({\r\n label: 'radix-sort-block-sum',\r\n code: this.has_values ? blockSumSource : remove_values(blockSumSource),\r\n }),\r\n reorder: this.device.createShaderModule({\r\n label: 'radix-sort-reorder',\r\n code: this.has_values ? reorderSource : remove_values(reorderSource),\r\n })\r\n }\r\n }\r\n\r\n create_buffers(keys, values) {\r\n // Keys and values double buffering\r\n const tmpKeysBuffer = this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n const tmpValuesBuffer = !this.has_values ? null : this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Local Prefix Sum buffer (1 element per item)\r\n const localPrefixSumBuffer = this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Prefix Block Sum buffer (4 element per workgroup)\r\n const prefixBlockSumBuffer = this.device.createBuffer({\r\n size: this.prefix_block_workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n \r\n this.buffers = {\r\n keys: keys,\r\n values: values,\r\n tmpKeys: tmpKeysBuffer,\r\n tmpValues: tmpValuesBuffer,\r\n localPrefixSum: localPrefixSumBuffer,\r\n prefixBlockSum: prefixBlockSumBuffer,\r\n }\r\n }\r\n\r\n // Create radix sort passes for every 2 bits\r\n create_pipelines() {\r\n for (let bit = 0; bit < this.bit_count; bit += 2) {\r\n // Swap buffers every pass\r\n const even = (bit % 4 == 0)\r\n const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys\r\n const inValues = even ? this.buffers.values : this.buffers.tmpValues\r\n const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys\r\n const outValues = even ? this.buffers.tmpValues : this.buffers.values\r\n\r\n // Compute local prefix sums and block sums\r\n const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit)\r\n\r\n // Compute block sums prefix sums\r\n const prefixSumKernel = new PrefixSumKernel({ \r\n device: this.device,\r\n data: this.buffers.prefixBlockSum, \r\n count: this.prefix_block_workgroup_count,\r\n workgroup_size: this.workgroup_size,\r\n avoid_bank_conflicts: this.avoid_bank_conflicts,\r\n })\r\n \r\n // Reorder keys and values\r\n const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit)\r\n\r\n this.pipelines.push({ blockSumPipeline, prefixSumKernel, reorderPipeline })\r\n }\r\n }\r\n\r\n create_block_sum_pipeline(inKeys, inValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-block-sum',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: this.local_shuffle ? 'storage' : 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n // \"Local shuffle\" optimization needs access to the values buffer\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n resource: { buffer: inValues }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-block-sum',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.blockSum,\r\n entryPoint: 'radix_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: blockSumPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-reorder',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 5,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outKeys }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 3,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n resource: { buffer: inValues }\r\n },\r\n {\r\n binding: 5,\r\n resource: { buffer: outValues }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const reorderPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-reorder',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.reorder,\r\n entryPoint: 'radix_sort_reorder',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: reorderPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n /**\r\n * Encode all pipelines into the current pass\r\n * \r\n * @param {GPUComputePassEncoder} pass \r\n */\r\n dispatch(pass) {\r\n for (const { blockSumPipeline, prefixSumKernel, reorderPipeline } of this.pipelines) { \r\n pass.setPipeline(blockSumPipeline.pipeline)\r\n pass.setBindGroup(0, blockSumPipeline.bindGroup)\r\n pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1)\r\n\r\n prefixSumKernel.dispatch(pass)\r\n\r\n pass.setPipeline(reorderPipeline.pipeline)\r\n pass.setBindGroup(0, reorderPipeline.bindGroup)\r\n pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default RadixSortKernel"],"names":["prefixSumSource_NoBankConflict","radixSortSource_LocalShuffle","reorderSource"],"mappings":"AAAA,MAAM,eAAe,cAAc,CAAC;AACpC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;ACzFD;AACA;AACA;AACA;AACA;AACA,MAAM,6BAA6B,cAAc,CAAC;AAClD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;AC7GD,MAAM,eAAe,CAAC;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,WAAW,CAAC;AAChB,QAAQ,MAAM;AACd,QAAQ,IAAI;AACZ,QAAQ,KAAK;AACb,QAAQ,cAAc,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;AACzC,QAAQ,oBAAoB,GAAG,KAAK;AACpC,KAAK,EAAE;AACP,QAAQ,IAAI,CAAC,MAAM,GAAG,OAAM;AAC5B,QAAQ,IAAI,CAAC,cAAc,GAAG,eAAc;AAC5C,QAAQ,IAAI,CAAC,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACxE,QAAQ,IAAI,CAAC,mBAAmB,GAAG,CAAC,GAAG,IAAI,CAAC,sBAAqB;AACjE;AACA,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,qBAAqB,CAAC,GAAG,CAAC,KAAK,CAAC;AAC3D,YAAY,MAAM,IAAI,KAAK,CAAC,CAAC,sEAAsE,EAAE,IAAI,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;AACnI;AACA,QAAQ,IAAI,CAAC,SAAS,GAAG,GAAE;AAC3B;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AAC3D,YAAY,KAAK,EAAE,YAAY;AAC/B,YAAY,IAAI,EAAE,oBAAoB,GAAGA,6BAA8B,GAAG,eAAe;AACzF,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,qBAAqB,CAAC,IAAI,EAAE,KAAK,EAAC;AAC/C,KAAK;AACL;AACA,IAAI,0BAA0B,CAAC,UAAU,EAAE;AAC3C,QAAQ,MAAM,EAAE,gCAAgC,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,OAAM;AACvE;AACA,QAAQ,IAAI,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,mBAAmB,EAAC;AAC9E,QAAQ,IAAI,CAAC,GAAG,gBAAe;AAC/B,QAAQ,IAAI,CAAC,GAAG,EAAC;AACjB;AACA,QAAQ,IAAI,eAAe,GAAG,gCAAgC,EAAE;AAChE,YAAY,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,EAAC;AACtD,YAAY,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,eAAe,GAAG,CAAC,EAAC;AAC9C,YAAY,eAAe,GAAG,CAAC,GAAG,EAAC;AACnC,SAAS;AACT;AACA,QAAQ,OAAO;AACf,YAAY,eAAe;AAC3B,YAAY,YAAY,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE;AAClC,SAAS;AACT,KAAK;AACL;AACA,IAAI,qBAAqB,CAAC,IAAI,EAAE,KAAK,EAAE;AACvC;AACA,QAAQ,MAAM,EAAE,eAAe,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,0BAA0B,CAAC,KAAK,EAAC;AACxF;AACA;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACxD,YAAY,IAAI,EAAE,eAAe,GAAG,CAAC;AACrC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,KAAK,EAAE,uBAAuB;AAC1C,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE;AAC9C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE;AACxD,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAC/D,YAAY,KAAK,EAAE,0BAA0B;AAC7C,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,YAAY;AACzC,gBAAgB,UAAU,EAAE,kBAAkB;AAC9C,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,qBAAqB,EAAE,IAAI,CAAC,mBAAmB;AACnE,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,YAAY,EAAE,EAAC;AAChF;AACA,QAAQ,IAAI,eAAe,GAAG,CAAC,EAAE;AACjC;AACA,YAAY,IAAI,CAAC,qBAAqB,CAAC,cAAc,EAAE,eAAe,EAAC;AACvE;AACA;AACA,YAAY,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AACvE,gBAAgB,KAAK,EAAE,+BAA+B;AACtD,gBAAgB,MAAM,EAAE,cAAc;AACtC,gBAAgB,OAAO,EAAE;AACzB,oBAAoB,MAAM,EAAE,IAAI,CAAC,YAAY;AAC7C,oBAAoB,UAAU,EAAE,gBAAgB;AAChD,oBAAoB,SAAS,EAAE;AAC/B,wBAAwB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AACjE,wBAAwB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AACjE,wBAAwB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AAC3E,qBAAqB;AACrB,iBAAiB;AACjB,aAAa,EAAC;AACd;AACA,YAAY,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,gBAAgB,EAAE,SAAS,EAAE,YAAY,EAAE,EAAC;AACxF,SAAS;AACT,KAAK;AACL;AACA,IAAI,QAAQ,CAAC,IAAI,EAAE;AACnB,QAAQ,KAAK,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,YAAY,EAAE,IAAI,IAAI,CAAC,SAAS,EAAE;AAC5E,YAAY,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAC;AACtC,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,SAAS,EAAC;AAC3C,YAAY,IAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,EAAE,CAAC,EAAC;AACtE,SAAS;AACT,KAAK;AACL;;ACzJA,MAAM,eAAe,cAAc,CAAC;AACpC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;ACtFD;AACA;AACA;AACA;AACA;AACA,MAAM,wBAAwB,cAAc,CAAC;AAC7C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;AClHD,MAAM,sBAAsB,cAAc,CAAC;AAC3C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;ACrCD,MAAM,eAAe,CAAC;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,WAAW,CAAC;AAChB,QAAQ,MAAM;AACd,QAAQ,IAAI;AACZ,QAAQ,MAAM;AACd,QAAQ,KAAK;AACb,QAAQ,SAAS,GAAG,EAAE;AACtB,QAAQ,cAAc,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;AACzC,QAAQ,aAAa,GAAG,KAAK;AAC7B,QAAQ,oBAAoB,GAAG,KAAK;AACpC,KAAK,GAAG,EAAE,EAAE;AACZ,QAAQ,IAAI,MAAM,IAAI,IAAI,EAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC;AACjE,QAAQ,IAAI,IAAI,IAAI,IAAI,EAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC;AACpE,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC;AAC9F,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,SAAS,IAAI,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC;AAC1G,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC;AAC3I;AACA,QAAQ,IAAI,CAAC,MAAM,GAAG,OAAM;AAC5B,QAAQ,IAAI,CAAC,KAAK,GAAG,MAAK;AAC1B,QAAQ,IAAI,CAAC,SAAS,GAAG,UAAS;AAClC,QAAQ,IAAI,CAAC,cAAc,GAAG,eAAc;AAC5C,QAAQ,IAAI,CAAC,aAAa,GAAG,cAAa;AAC1C,QAAQ,IAAI,CAAC,oBAAoB,GAAG,qBAAoB;AACxD;AACA,QAAQ,IAAI,CAAC,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACxE,QAAQ,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,qBAAqB,EAAC;AAC5E,QAAQ,IAAI,CAAC,4BAA4B,GAAG,CAAC,GAAG,IAAI,CAAC,gBAAe;AACpE;AACA,QAAQ,IAAI,CAAC,UAAU,IAAI,MAAM,IAAI,IAAI,EAAC;AAC1C;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG,GAAE;AAC9B,QAAQ,IAAI,CAAC,aAAa,GAAG,GAAE;AAC/B,QAAQ,IAAI,CAAC,OAAO,GAAG,GAAE;AACzB,QAAQ,IAAI,CAAC,SAAS,GAAG,GAAE;AAC3B;AACA;AACA,QAAQ,IAAI,CAAC,0BAA0B,GAAE;AACzC;AACA;AACA,QAAQ,IAAI,CAAC,qBAAqB,GAAE;AACpC;AACA;AACA,QAAQ,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,MAAM,EAAC;AACzC;AACA;AACA,QAAQ,IAAI,CAAC,gBAAgB,GAAE;AAC/B,KAAK;AACL;AACA,IAAI,0BAA0B,GAAG;AACjC,QAAQ,MAAM,EAAE,gCAAgC,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,OAAM;AACvE;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG;AAC5B,YAAY,CAAC,EAAE,IAAI,CAAC,eAAe;AACnC,YAAY,CAAC,EAAE,CAAC;AAChB,UAAS;AACT;AACA,QAAQ,IAAI,IAAI,CAAC,eAAe,GAAG,gCAAgC,EAAE;AACrE,YAAY,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,EAAC;AACjE,YAAY,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,GAAG,CAAC,EAAC;AACzD;AACA,YAAY,IAAI,CAAC,YAAY,GAAG,EAAE,CAAC,EAAE,CAAC,GAAE;AACxC,SAAS;AACT,KAAK;AACL;AACA,IAAI,qBAAqB,GAAG;AAC5B;AACA,QAAQ,MAAM,aAAa,GAAG,CAAC,MAAM,KAAK;AAC1C,YAAY,OAAO,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;AACrC,0BAA0B,MAAM,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;AAChF,0BAA0B,IAAI,CAAC,IAAI,CAAC;AACpC,UAAS;AACT;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,aAAa,GAAGC,wBAA4B,GAAG,gBAAe;AAClG;AACA,QAAQ,IAAI,CAAC,aAAa,GAAG;AAC7B,YAAY,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AACrD,gBAAgB,KAAK,EAAE,sBAAsB;AAC7C,gBAAgB,IAAI,EAAE,IAAI,CAAC,UAAU,GAAG,cAAc,GAAG,aAAa,CAAC,cAAc,CAAC;AACtF,aAAa,CAAC;AACd,YAAY,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AACpD,gBAAgB,KAAK,EAAE,oBAAoB;AAC3C,gBAAgB,IAAI,EAAE,IAAI,CAAC,UAAU,GAAGC,sBAAa,GAAG,aAAa,CAACA,sBAAa,CAAC;AACpF,aAAa,CAAC;AACd,UAAS;AACT,KAAK;AACL;AACA,IAAI,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE;AACjC;AACA,QAAQ,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACvD,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV,QAAQ,MAAM,eAAe,GAAG,CAAC,IAAI,CAAC,UAAU,GAAG,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACnF,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,oBAAoB,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AAC9D,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,oBAAoB,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AAC9D,YAAY,IAAI,EAAE,IAAI,CAAC,4BAA4B,GAAG,CAAC;AACvD,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,OAAO,GAAG;AACvB,YAAY,IAAI,EAAE,IAAI;AACtB,YAAY,MAAM,EAAE,MAAM;AAC1B,YAAY,OAAO,EAAE,aAAa;AAClC,YAAY,SAAS,EAAE,eAAe;AACtC,YAAY,cAAc,EAAE,oBAAoB;AAChD,YAAY,cAAc,EAAE,oBAAoB;AAChD,UAAS;AACT,KAAK;AACL;AACA;AACA,IAAI,gBAAgB,GAAG;AACvB,QAAQ,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,CAAC,EAAE;AAC1D;AACA,YAAY,MAAM,IAAI,SAAS,GAAG,GAAG,CAAC,IAAI,CAAC,EAAC;AAC5C,YAAY,MAAM,MAAM,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,QAAO;AAC7E,YAAY,MAAM,QAAQ,IAAI,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,UAAS;AACjF,YAAY,MAAM,OAAO,KAAK,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,KAAI;AAC7E,YAAY,MAAM,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,OAAM;AACjF;AACA;AACA,YAAY,MAAM,gBAAgB,GAAG,IAAI,CAAC,yBAAyB,CAAC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAC;AAC1F;AACA;AACA,YAAY,MAAM,eAAe,GAAG,IAAI,eAAe,CAAC;AACxD,gBAAgB,MAAM,EAAE,IAAI,CAAC,MAAM;AACnC,gBAAgB,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc;AACjD,gBAAgB,KAAK,EAAE,IAAI,CAAC,4BAA4B;AACxD,gBAAgB,cAAc,EAAE,IAAI,CAAC,cAAc;AACnD,gBAAgB,oBAAoB,EAAE,IAAI,CAAC,oBAAoB;AAC/D,aAAa,EAAC;AACd;AACA;AACA,YAAY,MAAM,eAAe,GAAG,IAAI,CAAC,uBAAuB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAC;AAC3G;AACA,YAAY,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,gBAAgB,EAAE,eAAe,EAAE,eAAe,EAAE,EAAC;AACvF,SAAS;AACT,KAAK;AACL;AACA,IAAI,yBAAyB,CAAC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE;AACrD,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,sBAAsB;AACzC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,GAAG,SAAS,GAAG,mBAAmB,EAAE;AAC1F,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC;AAC7D,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE;AAChD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB;AACA,gBAAgB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC;AAC7D,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE;AAClD,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AACnE,YAAY,KAAK,EAAE,sBAAsB;AACzC,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,QAAQ;AACnD,gBAAgB,UAAU,EAAE,YAAY;AACxC,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,iBAAiB,EAAE,IAAI,CAAC,eAAe;AAC3D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,eAAe,EAAE,IAAI,CAAC,KAAK;AAC/C,oBAAoB,aAAa,EAAE,GAAG;AACtC,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,OAAO;AACf,YAAY,QAAQ,EAAE,gBAAgB;AACtC,YAAY,SAAS;AACrB,SAAS;AACT,KAAK;AACL;AACA,IAAI,uBAAuB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE;AACvE,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,oBAAoB;AACvC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,UAAU,GAAG;AACtC,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,UAAU,EAAE,cAAc,CAAC,OAAO;AAC1D,wBAAwB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AAC7D,qBAAqB;AACrB,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,UAAU,EAAE,cAAc,CAAC,OAAO;AAC1D,wBAAwB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AACnD,qBAAqB;AACrB,iBAAiB,GAAG,EAAE,CAAC;AACvB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE;AAChD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE;AACjD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,UAAU,GAAG;AACtC,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,QAAQ,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE;AACtD,qBAAqB;AACrB,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,QAAQ,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE;AACvD,qBAAqB;AACrB,iBAAiB,GAAG,EAAE,CAAC;AACvB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,oBAAoB;AACvC,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,OAAO;AAClD,gBAAgB,UAAU,EAAE,oBAAoB;AAChD,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,iBAAiB,EAAE,IAAI,CAAC,eAAe;AAC3D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,eAAe,EAAE,IAAI,CAAC,KAAK;AAC/C,oBAAoB,aAAa,EAAE,GAAG;AACtC,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,OAAO;AACf,YAAY,QAAQ,EAAE,eAAe;AACrC,YAAY,SAAS;AACrB,SAAS;AACT,KAAK;AACL;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,QAAQ,CAAC,IAAI,EAAE;AACnB,QAAQ,KAAK,MAAM,EAAE,gBAAgB,EAAE,eAAe,EAAE,eAAe,EAAE,IAAI,IAAI,CAAC,SAAS,EAAE;AAC7F,YAAY,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,QAAQ,EAAC;AACvD,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,gBAAgB,CAAC,SAAS,EAAC;AAC5D,YAAY,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAC;AAChF;AACA,YAAY,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAC;AAC1C;AACA,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC,QAAQ,EAAC;AACtD,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,eAAe,CAAC,SAAS,EAAC;AAC3D,YAAY,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAC;AAChF,SAAS;AACT,KAAK;AACL;;;;"} \ No newline at end of file +{"version":3,"file":"radix-sort-esm.js","sources":["../../src/shaders/prefix_sum.js","../../src/shaders/optimizations/prefix_sum_no_bank_conflict.js","../../src/utils.js","../../src/PrefixSumKernel.js","../../src/shaders/radix_sort.js","../../src/shaders/optimizations/radix_sort_local_shuffle.js","../../src/shaders/radix_sort_reorder.js","../../src/shaders/check_sort.js","../../src/CheckSortKernel.js","../../src/RadixSortKernel.js"],"sourcesContent":["const prefixSumSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n temp[ELM_TID] = items[ELM_GID];\r\n temp[ELM_TID + 1] = items[ELM_GID + 1];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n let last_offset = ITEMS_PER_WORKGROUP - 1;\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[ELM_GID] = temp[ELM_TID];\r\n items[ELM_GID + 1] = temp[ELM_TID + 1];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumSource","/**\r\n * Prefix sum with optimization to avoid bank conflicts\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst prefixSumNoBankConflictSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nconst NUM_BANKS: u32 = 32;\r\nconst LOG_NUM_BANKS: u32 = 5;\r\n\r\nfn get_offset(offset: u32) -> u32 {\r\n // return offset >> LOG_NUM_BANKS; // Conflict-free\r\n return (offset >> NUM_BANKS) + (offset >> (2 * LOG_NUM_BANKS)); // Zero bank conflict\r\n}\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n let ai: u32 = TID;\r\n let bi: u32 = TID + (ITEMS_PER_WORKGROUP >> 1);\r\n let s_ai = ai + get_offset(ai);\r\n let s_bi = bi + get_offset(bi);\r\n let g_ai = ai + WID * 2;\r\n let g_bi = bi + WID * 2;\r\n temp[s_ai] = items[g_ai];\r\n temp[s_bi] = items[g_bi];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n var last_offset = ITEMS_PER_WORKGROUP - 1;\r\n last_offset += get_offset(last_offset);\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[g_ai] = temp[s_ai];\r\n items[g_bi] = temp[s_bi];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumNoBankConflictSource","/**\r\n * Find the best dispatch size x and y dimensions to minimize unused workgroups\r\n * \r\n * @param {GPUDevice} device - The GPU device\r\n * @param {int} workgroup_count - Number of workgroups to dispatch\r\n * @returns \r\n */\r\nfunction find_optimal_dispatch_size(device, workgroup_count) {\r\n const dispatchSize = { \r\n x: workgroup_count, \r\n y: 1\r\n }\r\n\r\n if (workgroup_count > device.limits.maxComputeWorkgroupsPerDimension) {\r\n const x = Math.floor(Math.sqrt(workgroup_count))\r\n const y = Math.ceil(workgroup_count / x)\r\n \r\n dispatchSize.x = x\r\n dispatchSize.y = y\r\n }\r\n\r\n return dispatchSize\r\n}\r\n\r\nfunction create_buffer_from_data({device, label, data, usage = 0}) {\r\n const dispatchSizes = device.createBuffer({\r\n label: label,\r\n usage: usage,\r\n size: data.length * 4,\r\n mappedAtCreation: true\r\n })\r\n\r\n const dispatchData = new Uint32Array(dispatchSizes.getMappedRange())\r\n dispatchData.set(data)\r\n dispatchSizes.unmap()\r\n\r\n return dispatchSizes\r\n}\r\n\r\nexport {\r\n find_optimal_dispatch_size,\r\n create_buffer_from_data,\r\n}","import prefixSumSource from \"./shaders/prefix_sum\"\r\nimport prefixSumSource_NoBankConflict from \"./shaders/optimizations/prefix_sum_no_bank_conflict\"\r\nimport { find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass PrefixSumKernel {\r\n /**\r\n * Perform a parallel prefix sum on the given data buffer\r\n * \r\n * Based on \"Parallel Prefix Sum (Scan) with CUDA\"\r\n * https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - Buffer containing the data to process\r\n * @param {number} count - Max number of elements to process\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} avoid_bank_conflicts - Use the \"Avoid bank conflicts\" optimization from the original publication\r\n */\r\n constructor({\r\n device,\r\n data,\r\n count,\r\n workgroup_size = { x: 16, y: 16 },\r\n avoid_bank_conflicts = false\r\n }) {\r\n this.device = device\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.items_per_workgroup = 2 * this.threads_per_workgroup // 2 items are processed per thread\r\n\r\n if (Math.log2(this.threads_per_workgroup) % 1 !== 0) \r\n throw new Error(`workgroup_size.x * workgroup_size.y must be a power of two. (current: ${this.threads_per_workgroup})`)\r\n\r\n this.pipelines = []\r\n\r\n this.shaderModule = this.device.createShaderModule({\r\n label: 'prefix-sum',\r\n code: avoid_bank_conflicts ? prefixSumSource_NoBankConflict : prefixSumSource,\r\n })\r\n\r\n this.create_pass_recursive(data, count)\r\n }\r\n\r\n create_pass_recursive(data, count) {\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n const workgroup_count = Math.ceil(count / this.items_per_workgroup)\r\n const dispatchSize = find_optimal_dispatch_size(this.device, workgroup_count)\r\n \r\n // Create buffer for block sums \r\n const blockSumBuffer = this.device.createBuffer({\r\n label: 'prefix-sum-block-sum',\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create bind group and pipeline layout\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n label: 'prefix-sum-bind-group',\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: data }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: blockSumBuffer }\r\n }\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n // Per-workgroup (block) prefix sum\r\n const scanPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-scan-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'reduce_downsweep',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ITEMS_PER_WORKGROUP': this.items_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: scanPipeline, bindGroup, dispatchSize })\r\n\r\n if (workgroup_count > 1) {\r\n // Prefix sum on block sums\r\n this.create_pass_recursive(blockSumBuffer, workgroup_count)\r\n\r\n // Add block sums to local prefix sums\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-add-block-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'add_block_sums',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: blockSumPipeline, bindGroup, dispatchSize })\r\n }\r\n }\r\n\r\n get_dispatch_chain() {\r\n return this.pipelines.flatMap(p => [ p.dispatchSize.x, p.dispatchSize.y, 1 ])\r\n }\r\n\r\n dispatch(pass, dispatchSize, offset = 0) {\r\n for (let i = 0; i < this.pipelines.length; i++) {\r\n const { pipeline, bindGroup } = this.pipelines[i]\r\n \r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4)\r\n }\r\n }\r\n}\r\n\r\nexport default PrefixSumKernel","const radixSortSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n // Store local prefix sum to global memory\r\n local_prefix_sums[GID] = bit_prefix_sums[extract_bits];\r\n}`\r\n\r\nexport default radixSortSource;","/**\r\n * Radix sort with \"local shuffle and coalesced mapping\" optimization\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst radixSortCoalescedSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n@group(0) @binding(3) var values: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\nvar s_prefix_sum_scan: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let val = values[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n let prefix_sum = bit_prefix_sums[extract_bits]; \r\n\r\n // Scan bit prefix sums\r\n if (TID == LAST_THREAD) {\r\n var sum: u32 = 0;\r\n bit_prefix_sums[extract_bits] += 1;\r\n for (var i: u32 = 0; i < 4; i++) {\r\n s_prefix_sum_scan[i] = sum;\r\n sum += bit_prefix_sums[i];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n if (GID < ELEMENT_COUNT) {\r\n // Compute new position\r\n let new_pos: u32 = prefix_sum + s_prefix_sum_scan[extract_bits];\r\n\r\n // Shuffle elements locally\r\n input[WID + new_pos] = elm;\r\n values[WID + new_pos] = val;\r\n local_prefix_sums[WID + new_pos] = prefix_sum;\r\n }\r\n}`\r\n\r\nexport default radixSortCoalescedSource;","const radixSortReorderSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var inputKeys: array;\r\n@group(0) @binding(1) var outputKeys: array;\r\n@group(0) @binding(2) var local_prefix_sum: array;\r\n@group(0) @binding(3) var prefix_block_sum: array;\r\n@group(0) @binding(4) var inputValues: array;\r\n@group(0) @binding(5) var outputValues: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort_reorder(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) { \r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n if (GID >= ELEMENT_COUNT) {\r\n return;\r\n }\r\n\r\n let k = inputKeys[GID];\r\n let v = inputValues[GID];\r\n\r\n let local_prefix = local_prefix_sum[GID];\r\n\r\n // Calculate new position\r\n let extract_bits = (k >> CURRENT_BIT) & 0x3;\r\n let pid = extract_bits * WORKGROUP_COUNT + WORKGROUP_ID;\r\n let sorted_position = prefix_block_sum[pid] + local_prefix;\r\n \r\n outputKeys[sorted_position] = k;\r\n outputValues[sorted_position] = v;\r\n}`\r\n\r\nexport default radixSortReorderSource;","const checkSortSource = (isFirstPass = false, isLastPass = false, isFullCheck = false) => /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var output: array;\r\n@group(0) @binding(2) var original: array;\r\n@group(0) @binding(3) var is_sorted: u32;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ELEMENT_COUNT: u32;\r\noverride START_ELEMENT: u32;\r\n\r\nvar s_data: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn check_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP + START_ELEMENT;\r\n let GID = TID + WID; // Global thread ID\r\n\r\n // Load data into shared memory\r\n ${ isFirstPass ? first_pass_load_data : \"s_data[TID] = select(0u, input[GID], GID < ELEMENT_COUNT);\" }\r\n\r\n // Perform parallel reduction\r\n for (var d = 1u; d < THREADS_PER_WORKGROUP; d *= 2u) { \r\n workgroupBarrier(); \r\n if (TID % (2u * d) == 0u) {\r\n s_data[TID] += s_data[TID + d];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Write reduction result\r\n ${ isLastPass ? last_pass(isFullCheck) : write_reduction_result }\r\n}`\r\n\r\nconst write_reduction_result = /* wgsl */ `\r\n if (TID == 0) {\r\n output[WORKGROUP_ID] = s_data[0];\r\n }\r\n`\r\n\r\nconst first_pass_load_data = /* wgsl */ `\r\n let LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n\r\n // Load current element into shared memory\r\n // Also load next element for comparison\r\n let elm = select(0u, input[GID], GID < ELEMENT_COUNT);\r\n let next = select(0u, input[GID + 1], GID < ELEMENT_COUNT-1);\r\n s_data[TID] = elm;\r\n workgroupBarrier();\r\n\r\n s_data[TID] = select(0u, 1u, GID < ELEMENT_COUNT-1 && elm > next);\r\n`\r\n\r\nconst last_pass = (isFullCheck) => /* wgsl */ `\r\n let fullDispatchLength = arrayLength(&output);\r\n let dispatchIndex = TID * 3;\r\n\r\n if (dispatchIndex >= fullDispatchLength) {\r\n return;\r\n }\r\n\r\n ${isFullCheck ? last_pass_full : last_pass_fast}\r\n`\r\n\r\nconst last_pass_fast = /* wgsl */ `\r\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] == 0 && is_sorted == 0u);\r\n`\r\n\r\nconst last_pass_full = /* wgsl */ `\r\n if (TID == 0 && s_data[0] == 0) {\r\n is_sorted = 1u;\r\n }\r\n\r\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] != 0);\r\n`\r\nexport default checkSortSource","import checkSortSource from \"./shaders/check_sort\"\r\nimport { find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass CheckSortKernel {\r\n /**\r\n * CheckSortKernel - Performs a parralel reduction to check if an array is sorted.\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - The buffer containing the data to check\r\n * @param {GPUBuffer} result - The result dispatch size buffer\r\n * @param {GPUBuffer} original - The original dispatch size buffer\r\n * @param {GPUBuffer} is_sorted - 1-element buffer to store whether the array is sorted\r\n * @param {number} count - The number of elements to check\r\n * @param {number} start - The index to start checking from\r\n * @param {boolean} full_check - Whether this kernel is performing a full check or a fast check\r\n * @param {object} workgroup_size - The workgroup size in x and y dimensions\r\n */\r\n constructor({\r\n device,\r\n data,\r\n result,\r\n original,\r\n is_sorted,\r\n count,\r\n start = 0,\r\n full_check = true,\r\n workgroup_size = { x: 16, y: 16 },\r\n }) {\r\n this.device = device\r\n this.count = count\r\n this.start = start\r\n this.full_check = full_check\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n\r\n this.pipelines = []\r\n\r\n this.buffers = {\r\n data, \r\n result, \r\n original, \r\n is_sorted,\r\n outputs: []\r\n }\r\n\r\n this.create_passes_recursive(data, count)\r\n }\r\n\r\n // Find the best dispatch size for each pass to minimize unused workgroups\r\n static find_optimal_dispatch_chain(device, item_count, workgroup_size) {\r\n const threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n const sizes = []\r\n\r\n do {\r\n // Number of workgroups required to process all items\r\n const target_workgroup_count = Math.ceil(item_count / threads_per_workgroup)\r\n \r\n // Optimal dispatch size and updated workgroup count\r\n const dispatchSize = find_optimal_dispatch_size(device, target_workgroup_count)\r\n \r\n sizes.push(dispatchSize.x, dispatchSize.y, 1)\r\n item_count = target_workgroup_count\r\n } while (item_count > 1)\r\n \r\n return sizes\r\n }\r\n\r\n create_passes_recursive(buffer, count, passIndex = 0) {\r\n const workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n\r\n const isFirstPass = passIndex === 0\r\n const isLastPass = workgroup_count <= 1\r\n\r\n const outputBuffer = isLastPass ? this.buffers.result : this.device.createBuffer({\r\n label: `check-sort-${this.full_check ? 'full' : 'fast'}-${passIndex}`,\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n // Last pass bindings\r\n ...(isLastPass ? [{\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n }, {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : []),\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: buffer }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outputBuffer }\r\n },\r\n // Last pass buffers\r\n ...(isLastPass ? [{\r\n binding: 2,\r\n resource: { buffer: this.buffers.original }\r\n }, {\r\n binding: 3,\r\n resource: { buffer: this.buffers.is_sorted }\r\n }] : []),\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [bindGroupLayout]\r\n })\r\n\r\n const element_count = isFirstPass ? this.start + count : count\r\n const start_element = isFirstPass ? this.start : 0\r\n\r\n const checkSortPipeline = this.device.createComputePipeline({\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.device.createShaderModule({\r\n code: checkSortSource(isFirstPass, isLastPass, this.full_check),\r\n label: 'check-sort',\r\n }),\r\n entryPoint: 'check_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': element_count,\r\n 'START_ELEMENT': start_element,\r\n },\r\n }\r\n })\r\n\r\n this.buffers.outputs.push(outputBuffer)\r\n this.pipelines.push({ pipeline: checkSortPipeline, bindGroup })\r\n \r\n if (!isLastPass) {\r\n this.create_passes_recursive(outputBuffer, workgroup_count, passIndex + 1)\r\n }\r\n }\r\n\r\n dispatch(pass, dispatchSize, offset = 0) {\r\n for (let i = 0; i < this.pipelines.length; i++) {\r\n const { pipeline, bindGroup } = this.pipelines[i]\r\n\r\n const dispatchIndirect = (this.full_check || i < this.pipelines.length - 1)\r\n\r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n\r\n if (dispatchIndirect)\r\n pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4)\r\n else\r\n // Only the last dispatch of the fast check kernel is constant to (1, 1, 1)\r\n pass.dispatchWorkgroups(1, 1, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default CheckSortKernel","import PrefixSumKernel from \"./PrefixSumKernel\"\r\nimport radixSortSource from \"./shaders/radix_sort\"\r\nimport radixSortSource_LocalShuffle from \"./shaders/optimizations/radix_sort_local_shuffle\"\r\nimport reorderSource from \"./shaders/radix_sort_reorder\"\r\nimport CheckSortKernel from \"./CheckSortKernel\"\r\nimport { create_buffer_from_data, find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass RadixSortKernel {\r\n /**\r\n * Perform a parallel radix sort on the GPU given a buffer of keys and (optionnaly) values\r\n * Note: The buffers are sorted in-place.\r\n * \r\n * Based on \"Fast 4-way parallel radix sorting on GPUs\"\r\n * https://www.sci.utah.edu/~csilva/papers/cgf.pdf]\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} keys - Buffer containing the keys to sort\r\n * @param {GPUBuffer} values - (optional) Buffer containing the associated values\r\n * @param {number} count - Number of elements to sort\r\n * @param {number} bit_count - Number of bits per element (default: 32)\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} check_order - Enable \"order checking\" optimization. Useful if the data needs to be sorted in real-time and doesn't change much. (default: false)\r\n * @param {boolean} local_shuffle - Enable \"local shuffling\" optimization for the radix sort kernel (default: false)\r\n * @param {boolean} avoid_bank_conflicts - Enable \"avoiding bank conflicts\" optimization for the prefix sum kernel (default: false)\r\n */\r\n constructor({\r\n device,\r\n keys,\r\n values,\r\n count,\r\n bit_count = 32,\r\n workgroup_size = { x: 16, y: 16 },\r\n check_order = false,\r\n local_shuffle = false,\r\n avoid_bank_conflicts = false,\r\n } = {}) {\r\n if (device == null) throw new Error('No device provided')\r\n if (keys == null) throw new Error('No keys buffer provided')\r\n if (!Number.isInteger(count) || count <= 0) throw new Error('Invalid count parameter')\r\n if (!Number.isInteger(bit_count) || bit_count <= 0 || bit_count > 32) throw new Error('Invalid bit_count parameter')\r\n if (!Number.isInteger(workgroup_size.x) || !Number.isInteger(workgroup_size.y)) throw new Error('Invalid workgroup_size parameter')\r\n if (bit_count % 4 != 0) throw new Error('bit_count must be a multiple of 4')\r\n\r\n this.device = device\r\n this.count = count\r\n this.bit_count = bit_count\r\n this.workgroup_size = workgroup_size\r\n this.check_order = check_order\r\n this.local_shuffle = local_shuffle\r\n this.avoid_bank_conflicts = avoid_bank_conflicts\r\n\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n this.prefix_block_workgroup_count = 4 * this.workgroup_count\r\n\r\n this.has_values = (values != null) // Is the values buffer provided ?\r\n\r\n this.dispatchSize = {} // Dispatch dimension x and y\r\n this.shaderModules = {} // GPUShaderModules\r\n this.buffers = {} // GPUBuffers\r\n this.pipelines = [] // List of passes\r\n this.kernels = {}\r\n\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n this.dispatchSize = find_optimal_dispatch_size(this.device, this.workgroup_count)\r\n\r\n // Create shader modules from wgsl code\r\n this.create_shader_modules()\r\n \r\n // Create multi-pass pipelines\r\n this.create_pipelines(keys, values)\r\n }\r\n\r\n create_shader_modules() {\r\n // Remove every occurence of \"values\" in the shader code if values buffer is not provided\r\n const remove_values = (source) => {\r\n return source.split('\\n')\r\n .filter(line => !line.toLowerCase().includes('values'))\r\n .join('\\n')\r\n }\r\n\r\n const blockSumSource = this.local_shuffle ? radixSortSource_LocalShuffle : radixSortSource\r\n \r\n this.shaderModules = {\r\n blockSum: this.device.createShaderModule({\r\n label: 'radix-sort-block-sum',\r\n code: this.has_values ? blockSumSource : remove_values(blockSumSource),\r\n }),\r\n reorder: this.device.createShaderModule({\r\n label: 'radix-sort-reorder',\r\n code: this.has_values ? reorderSource : remove_values(reorderSource),\r\n })\r\n }\r\n }\r\n\r\n create_pipelines(keys, values) { \r\n // Block prefix sum kernel \r\n const { prefixSumKernel, prefixBlockSumBuffer } = this.create_prefix_sum_kernel()\r\n\r\n // Indirect dispatch buffers\r\n const dispatchData = this.calculate_dispatch_sizes(prefixSumKernel)\r\n\r\n // GPU buffers\r\n this.create_buffers(keys, values, prefixBlockSumBuffer, dispatchData)\r\n\r\n // Check sort kernels\r\n this.create_check_sort_kernels(this.buffers.keys, dispatchData)\r\n\r\n // Radix sort passes for every 2 bits\r\n for (let bit = 0; bit < this.bit_count; bit += 2) {\r\n // Swap buffers every pass\r\n const even = (bit % 4 == 0)\r\n const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys\r\n const inValues = even ? this.buffers.values : this.buffers.tmpValues\r\n const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys\r\n const outValues = even ? this.buffers.tmpValues : this.buffers.values\r\n\r\n // Compute local prefix sums and block sums\r\n const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit)\r\n \r\n // Reorder keys and values\r\n const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit)\r\n\r\n this.pipelines.push({ blockSumPipeline, reorderPipeline })\r\n }\r\n }\r\n\r\n create_prefix_sum_kernel() {\r\n // Prefix Block Sum buffer (4 element per workgroup)\r\n const prefixBlockSumBuffer = this.device.createBuffer({\r\n label: 'radix-sort-prefix-block-sum',\r\n size: this.prefix_block_workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create block prefix sum kernel\r\n const prefixSumKernel = new PrefixSumKernel({ \r\n device: this.device,\r\n data: prefixBlockSumBuffer, \r\n count: this.prefix_block_workgroup_count,\r\n workgroup_size: this.workgroup_size,\r\n avoid_bank_conflicts: this.avoid_bank_conflicts,\r\n })\r\n\r\n this.kernels.prefixSum = prefixSumKernel\r\n\r\n return { prefixSumKernel, prefixBlockSumBuffer }\r\n }\r\n\r\n calculate_dispatch_sizes(prefixSumKernel) {\r\n // Prefix sum dispatch sizes\r\n const prefixSumDispatchSize = prefixSumKernel.get_dispatch_chain()\r\n\r\n // Check sort element count (fast/full)\r\n const check_sort_fast_count = Math.min(this.count, this.threads_per_workgroup * 4)\r\n const check_sort_full_count = this.count - check_sort_fast_count\r\n const start_full = check_sort_fast_count - 1\r\n\r\n // Check sort dispatch sizes\r\n const dispatchSizesFast = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_fast_count, this.workgroup_size)\r\n const dispatchSizesFull = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_full_count, this.workgroup_size)\r\n\r\n // Initial dispatch sizes\r\n const initialDispatch = [\r\n this.dispatchSize.x, this.dispatchSize.y, 1, // Radix Sort + Reorder\r\n ...dispatchSizesFast.slice(0, 3), // Check sort fast\r\n ...prefixSumDispatchSize // Prefix Sum\r\n ]\r\n\r\n // Dispatch offsets in main buffer\r\n this.dispatchOffsets = {\r\n radix_sort: 0,\r\n check_sort_fast: 3 * 4,\r\n prefix_sum: 6 * 4\r\n }\r\n\r\n return {\r\n initialDispatch,\r\n dispatchSizesFull,\r\n check_sort_fast_count, \r\n check_sort_full_count, \r\n start_full \r\n }\r\n }\r\n\r\n create_buffers(keys, values, prefixBlockSumBuffer, dispatchData) {\r\n // Keys and values double buffering\r\n const tmpKeysBuffer = this.device.createBuffer({\r\n label: 'radix-sort-tmp-keys',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n const tmpValuesBuffer = !this.has_values ? null : this.device.createBuffer({\r\n label: 'radix-sort-tmp-values',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Local Prefix Sum buffer (1 element per item)\r\n const localPrefixSumBuffer = this.device.createBuffer({\r\n label: 'radix-sort-local-prefix-sum',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Dispatch sizes (radix sort, check sort, prefix sum)\r\n const dispatchBuffer = create_buffer_from_data({\r\n device: this.device, \r\n label: 'radix-sort-dispatch-size',\r\n data: dispatchData.initialDispatch, \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT\r\n })\r\n const originalDispatchBuffer = create_buffer_from_data({\r\n device: this.device, \r\n label: 'radix-sort-dispatch-size-original',\r\n data: dispatchData.initialDispatch, \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC\r\n })\r\n\r\n // Dispatch sizes (full sort)\r\n const checkSortFullDispatchBuffer = create_buffer_from_data({\r\n label: 'check-sort-full-dispatch-size',\r\n device: this.device, \r\n data: dispatchData.dispatchSizesFull,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT\r\n })\r\n const checkSortFullOriginalDispatchBuffer = create_buffer_from_data({\r\n label: 'check-sort-full-dispatch-size-original',\r\n device: this.device, \r\n data: dispatchData.dispatchSizesFull,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC\r\n })\r\n\r\n // Flag to tell if the data is sorted\r\n const isSortedBuffer = create_buffer_from_data({\r\n label: 'is-sorted',\r\n device: this.device, \r\n data: new Uint32Array([0]), \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n \r\n this.buffers = {\r\n keys: keys,\r\n values: values,\r\n tmpKeys: tmpKeysBuffer,\r\n tmpValues: tmpValuesBuffer,\r\n localPrefixSum: localPrefixSumBuffer,\r\n prefixBlockSum: prefixBlockSumBuffer,\r\n \r\n dispatchSize: dispatchBuffer,\r\n originalDispatchSize: originalDispatchBuffer,\r\n checkSortFullDispatchSize: checkSortFullDispatchBuffer,\r\n originalCheckSortFullDispatchSize: checkSortFullOriginalDispatchBuffer,\r\n isSorted: isSortedBuffer,\r\n }\r\n }\r\n\r\n create_check_sort_kernels(inKeys, checkSortPartitionData) {\r\n // Skip check sort if disabled\r\n if (!this.check_order) {\r\n return [ null, null ]\r\n }\r\n\r\n const { check_sort_fast_count, check_sort_full_count, start_full } = checkSortPartitionData\r\n\r\n // Create the full pass\r\n const checkSortFull = new CheckSortKernel({\r\n device: this.device,\r\n data: inKeys,\r\n result: this.buffers.dispatchSize,\r\n original: this.buffers.originalDispatchSize,\r\n is_sorted: this.buffers.isSorted,\r\n count: check_sort_full_count,\r\n start: start_full,\r\n full_check: true,\r\n workgroup_size: this.workgroup_size\r\n })\r\n\r\n // Create the fast pass\r\n const checkSortFast = new CheckSortKernel({\r\n device: this.device,\r\n data: inKeys,\r\n result: this.buffers.checkSortFullDispatchSize,\r\n original: this.buffers.originalCheckSortFullDispatchSize,\r\n is_sorted: this.buffers.isSorted,\r\n count: check_sort_fast_count,\r\n full_check: false,\r\n workgroup_size: this.workgroup_size\r\n })\r\n\r\n if (checkSortFast.threads_per_workgroup < checkSortFull.pipelines.length) {\r\n console.warn(`Warning: workgroup size is too small to enable check sort optimization, disabling...`)\r\n this.check_order = false\r\n return [ null, null ]\r\n }\r\n\r\n this.kernels.checkSortFast = checkSortFast\r\n this.kernels.checkSortFull = checkSortFull\r\n }\r\n\r\n create_block_sum_pipeline(inKeys, inValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-block-sum',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: this.local_shuffle ? 'storage' : 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n // \"Local shuffle\" optimization needs access to the values buffer\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n resource: { buffer: inValues }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-block-sum',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.blockSum,\r\n entryPoint: 'radix_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: blockSumPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-reorder',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 5,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outKeys }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 3,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n resource: { buffer: inValues }\r\n },\r\n {\r\n binding: 5,\r\n resource: { buffer: outValues }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const reorderPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-reorder',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.reorder,\r\n entryPoint: 'radix_sort_reorder',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: reorderPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n /**\r\n * Encode all pipelines into the current pass\r\n * \r\n * @param {GPUComputePassEncoder} pass \r\n */\r\n dispatch(pass) { \r\n for (let i = 0; i < this.bit_count / 2; i++) {\r\n const { blockSumPipeline, reorderPipeline } = this.pipelines[i]\r\n\r\n if (this.check_order && i % 2 == 0) {\r\n this.kernels.checkSortFast.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.check_sort_fast)\r\n this.kernels.checkSortFull.dispatch(pass, this.buffers.checkSortFullDispatchSize)\r\n }\r\n \r\n pass.setPipeline(blockSumPipeline.pipeline)\r\n pass.setBindGroup(0, blockSumPipeline.bindGroup)\r\n pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort)\r\n\r\n this.kernels.prefixSum.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.prefix_sum)\r\n\r\n pass.setPipeline(reorderPipeline.pipeline)\r\n pass.setBindGroup(0, reorderPipeline.bindGroup)\r\n pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort)\r\n }\r\n }\r\n}\r\n\r\nexport default RadixSortKernel"],"names":["prefixSumSource_NoBankConflict","radixSortSource_LocalShuffle","reorderSource"],"mappings":"AAAA,MAAM,eAAe,cAAc,CAAC;AACpC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;ACzFD;AACA;AACA;AACA;AACA;AACA,MAAM,6BAA6B,cAAc,CAAC;AAClD;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;AChHD;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAAS,0BAA0B,CAAC,MAAM,EAAE,eAAe,EAAE;AAC7D,IAAI,MAAM,YAAY,GAAG;AACzB,QAAQ,CAAC,EAAE,eAAe;AAC1B,QAAQ,CAAC,EAAE,CAAC;AACZ,MAAK;AACL;AACA,IAAI,IAAI,eAAe,GAAG,MAAM,CAAC,MAAM,CAAC,gCAAgC,EAAE;AAC1E,QAAQ,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,EAAC;AACxD,QAAQ,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,eAAe,GAAG,CAAC,EAAC;AAChD;AACA,QAAQ,YAAY,CAAC,CAAC,GAAG,EAAC;AAC1B,QAAQ,YAAY,CAAC,CAAC,GAAG,EAAC;AAC1B,KAAK;AACL;AACA,IAAI,OAAO,YAAY;AACvB,CAAC;AACD;AACA,SAAS,uBAAuB,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,GAAG,CAAC,CAAC,EAAE;AACnE,IAAI,MAAM,aAAa,GAAG,MAAM,CAAC,YAAY,CAAC;AAC9C,QAAQ,KAAK,EAAE,KAAK;AACpB,QAAQ,KAAK,EAAE,KAAK;AACpB,QAAQ,IAAI,EAAE,IAAI,CAAC,MAAM,GAAG,CAAC;AAC7B,QAAQ,gBAAgB,EAAE,IAAI;AAC9B,KAAK,EAAC;AACN;AACA,IAAI,MAAM,YAAY,GAAG,IAAI,WAAW,CAAC,aAAa,CAAC,cAAc,EAAE,EAAC;AACxE,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,EAAC;AAC1B,IAAI,aAAa,CAAC,KAAK,GAAE;AACzB;AACA,IAAI,OAAO,aAAa;AACxB;;ACjCA,MAAM,eAAe,CAAC;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,WAAW,CAAC;AAChB,QAAQ,MAAM;AACd,QAAQ,IAAI;AACZ,QAAQ,KAAK;AACb,QAAQ,cAAc,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;AACzC,QAAQ,oBAAoB,GAAG,KAAK;AACpC,KAAK,EAAE;AACP,QAAQ,IAAI,CAAC,MAAM,GAAG,OAAM;AAC5B,QAAQ,IAAI,CAAC,cAAc,GAAG,eAAc;AAC5C,QAAQ,IAAI,CAAC,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACxE,QAAQ,IAAI,CAAC,mBAAmB,GAAG,CAAC,GAAG,IAAI,CAAC,sBAAqB;AACjE;AACA,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,qBAAqB,CAAC,GAAG,CAAC,KAAK,CAAC;AAC3D,YAAY,MAAM,IAAI,KAAK,CAAC,CAAC,sEAAsE,EAAE,IAAI,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;AACnI;AACA,QAAQ,IAAI,CAAC,SAAS,GAAG,GAAE;AAC3B;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AAC3D,YAAY,KAAK,EAAE,YAAY;AAC/B,YAAY,IAAI,EAAE,oBAAoB,GAAGA,6BAA8B,GAAG,eAAe;AACzF,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,qBAAqB,CAAC,IAAI,EAAE,KAAK,EAAC;AAC/C,KAAK;AACL;AACA,IAAI,qBAAqB,CAAC,IAAI,EAAE,KAAK,EAAE;AACvC;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,mBAAmB,EAAC;AAC3E,QAAQ,MAAM,YAAY,GAAG,0BAA0B,CAAC,IAAI,CAAC,MAAM,EAAE,eAAe,EAAC;AACrF;AACA;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACxD,YAAY,KAAK,EAAE,sBAAsB;AACzC,YAAY,IAAI,EAAE,eAAe,GAAG,CAAC;AACrC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,KAAK,EAAE,uBAAuB;AAC1C,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE;AAC9C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE;AACxD,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAC/D,YAAY,KAAK,EAAE,0BAA0B;AAC7C,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,YAAY;AACzC,gBAAgB,UAAU,EAAE,kBAAkB;AAC9C,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,qBAAqB,EAAE,IAAI,CAAC,mBAAmB;AACnE,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,YAAY,EAAE,EAAC;AAChF;AACA,QAAQ,IAAI,eAAe,GAAG,CAAC,EAAE;AACjC;AACA,YAAY,IAAI,CAAC,qBAAqB,CAAC,cAAc,EAAE,eAAe,EAAC;AACvE;AACA;AACA,YAAY,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AACvE,gBAAgB,KAAK,EAAE,+BAA+B;AACtD,gBAAgB,MAAM,EAAE,cAAc;AACtC,gBAAgB,OAAO,EAAE;AACzB,oBAAoB,MAAM,EAAE,IAAI,CAAC,YAAY;AAC7C,oBAAoB,UAAU,EAAE,gBAAgB;AAChD,oBAAoB,SAAS,EAAE;AAC/B,wBAAwB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AACjE,wBAAwB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AACjE,wBAAwB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AAC3E,qBAAqB;AACrB,iBAAiB;AACjB,aAAa,EAAC;AACd;AACA,YAAY,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,gBAAgB,EAAE,SAAS,EAAE,YAAY,EAAE,EAAC;AACxF,SAAS;AACT,KAAK;AACL;AACA,IAAI,kBAAkB,GAAG;AACzB,QAAQ,OAAO,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC;AACrF,KAAK;AACL;AACA,IAAI,QAAQ,CAAC,IAAI,EAAE,YAAY,EAAE,MAAM,GAAG,CAAC,EAAE;AAC7C,QAAQ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;AACxD,YAAY,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAC;AAC7D;AACA,YAAY,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAC;AACtC,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,SAAS,EAAC;AAC3C,YAAY,IAAI,CAAC,0BAA0B,CAAC,YAAY,EAAE,MAAM,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,EAAC;AAC7E,SAAS;AACT,KAAK;AACL;;AC/IA,MAAM,eAAe,cAAc,CAAC;AACpC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;ACtFD;AACA;AACA;AACA;AACA;AACA,MAAM,wBAAwB,cAAc,CAAC;AAC7C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;AClHD,MAAM,sBAAsB,cAAc,CAAC;AAC3C;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;;AC1CD,MAAM,eAAe,GAAG,CAAC,WAAW,GAAG,KAAK,EAAE,UAAU,GAAG,KAAK,EAAE,WAAW,GAAG,KAAK,gBAAgB,CAAC;AACtG;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,GAAG,WAAW,GAAG,oBAAoB,GAAG,4DAA4D,EAAE;AAC1G;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,GAAG,UAAU,GAAG,SAAS,CAAC,WAAW,CAAC,GAAG,sBAAsB,EAAE;AACrE,CAAC,EAAC;AACF;AACA,MAAM,sBAAsB,cAAc,CAAC;AAC3C;AACA;AACA;AACA,EAAC;AACD;AACA,MAAM,oBAAoB,cAAc,CAAC;AACzC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,EAAC;AACD;AACA,MAAM,SAAS,GAAG,CAAC,WAAW,gBAAgB,CAAC;AAC/C;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,EAAE,WAAW,GAAG,cAAc,GAAG,cAAc,CAAC;AACpD,EAAC;AACD;AACA,MAAM,cAAc,cAAc,CAAC;AACnC;AACA,EAAC;AACD;AACA,MAAM,cAAc,cAAc,CAAC;AACnC;AACA;AACA;AACA;AACA;AACA;;AC9EA,MAAM,eAAe,CAAC;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,WAAW,CAAC;AAChB,QAAQ,MAAM;AACd,QAAQ,IAAI;AACZ,QAAQ,MAAM;AACd,QAAQ,QAAQ;AAChB,QAAQ,SAAS;AACjB,QAAQ,KAAK;AACb,QAAQ,KAAK,GAAG,CAAC;AACjB,QAAQ,UAAU,GAAG,IAAI;AACzB,QAAQ,cAAc,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;AACzC,KAAK,EAAE;AACP,QAAQ,IAAI,CAAC,MAAM,GAAG,OAAM;AAC5B,QAAQ,IAAI,CAAC,KAAK,GAAG,MAAK;AAC1B,QAAQ,IAAI,CAAC,KAAK,GAAG,MAAK;AAC1B,QAAQ,IAAI,CAAC,UAAU,GAAG,WAAU;AACpC,QAAQ,IAAI,CAAC,cAAc,GAAG,eAAc;AAC5C,QAAQ,IAAI,CAAC,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACxE;AACA,QAAQ,IAAI,CAAC,SAAS,GAAG,GAAE;AAC3B;AACA,QAAQ,IAAI,CAAC,OAAO,GAAG;AACvB,YAAY,IAAI;AAChB,YAAY,MAAM;AAClB,YAAY,QAAQ;AACpB,YAAY,SAAS;AACrB,YAAY,OAAO,EAAE,EAAE;AACvB,UAAS;AACT;AACA,QAAQ,IAAI,CAAC,uBAAuB,CAAC,IAAI,EAAE,KAAK,EAAC;AACjD,KAAK;AACL;AACA;AACA,IAAI,OAAO,2BAA2B,CAAC,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE;AAC3E,QAAQ,MAAM,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACzE,QAAQ,MAAM,KAAK,GAAG,GAAE;AACxB;AACA,QAAQ,GAAG;AACX;AACA,YAAY,MAAM,sBAAsB,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,qBAAqB,EAAC;AACxF;AACA;AACA,YAAY,MAAM,YAAY,GAAG,0BAA0B,CAAC,MAAM,EAAE,sBAAsB,EAAC;AAC3F;AACA,YAAY,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,EAAE,CAAC,EAAC;AACzD,YAAY,UAAU,GAAG,uBAAsB;AAC/C,SAAS,QAAQ,UAAU,GAAG,CAAC,CAAC;AAChC;AACA,QAAQ,OAAO,KAAK;AACpB,KAAK;AACL;AACA,IAAI,uBAAuB,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,GAAG,CAAC,EAAE;AAC1D,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,qBAAqB,EAAC;AAC7E;AACA,QAAQ,MAAM,WAAW,GAAG,SAAS,KAAK,EAAC;AAC3C,QAAQ,MAAM,UAAU,GAAG,eAAe,IAAI,EAAC;AAC/C;AACA,QAAQ,MAAM,YAAY,GAAG,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACzF,YAAY,KAAK,EAAE,CAAC,WAAW,EAAE,IAAI,CAAC,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;AACjF,YAAY,IAAI,EAAE,eAAe,GAAG,CAAC;AACrC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB;AACA,gBAAgB,IAAI,UAAU,GAAG,CAAC;AAClC,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB,EAAE;AACnB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE;AAChD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,YAAY,EAAE;AACtD,iBAAiB;AACjB;AACA,gBAAgB,IAAI,UAAU,GAAG,CAAC;AAClC,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE;AAC/D,iBAAiB,EAAE;AACnB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE;AAChE,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,CAAC,eAAe,CAAC;AAC/C,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,aAAa,GAAG,WAAW,GAAG,IAAI,CAAC,KAAK,GAAG,KAAK,GAAG,MAAK;AACtE,QAAQ,MAAM,aAAa,GAAG,WAAW,GAAG,IAAI,CAAC,KAAK,GAAG,EAAC;AAC1D;AACA,QAAQ,MAAM,iBAAiB,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AACpE,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AACvD,oBAAoB,IAAI,EAAE,eAAe,CAAC,WAAW,EAAE,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC;AACnF,oBAAoB,KAAK,EAAE,YAAY;AACvC,iBAAiB,CAAC;AAClB,gBAAgB,UAAU,EAAE,YAAY;AACxC,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,eAAe,EAAE,aAAa;AAClD,oBAAoB,eAAe,EAAE,aAAa;AAClD,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,YAAY,EAAC;AAC/C,QAAQ,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,iBAAiB,EAAE,SAAS,EAAE,EAAC;AACvE;AACA,QAAQ,IAAI,CAAC,UAAU,EAAE;AACzB,YAAY,IAAI,CAAC,uBAAuB,CAAC,YAAY,EAAE,eAAe,EAAE,SAAS,GAAG,CAAC,EAAC;AACtF,SAAS;AACT,KAAK;AACL;AACA,IAAI,QAAQ,CAAC,IAAI,EAAE,YAAY,EAAE,MAAM,GAAG,CAAC,EAAE;AAC7C,QAAQ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;AACxD,YAAY,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAC;AAC7D;AACA,YAAY,MAAM,gBAAgB,IAAI,IAAI,CAAC,UAAU,IAAI,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAC;AACvF;AACA,YAAY,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAC;AACtC,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,SAAS,EAAC;AAC3C;AACA,YAAY,IAAI,gBAAgB;AAChC,gBAAgB,IAAI,CAAC,0BAA0B,CAAC,YAAY,EAAE,MAAM,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,EAAC;AACjF;AACA;AACA,gBAAgB,IAAI,CAAC,kBAAkB,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAC;AAChD,SAAS;AACT,KAAK;AACL;;ACxKA,MAAM,eAAe,CAAC;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,WAAW,CAAC;AAChB,QAAQ,MAAM;AACd,QAAQ,IAAI;AACZ,QAAQ,MAAM;AACd,QAAQ,KAAK;AACb,QAAQ,SAAS,GAAG,EAAE;AACtB,QAAQ,cAAc,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE;AACzC,QAAQ,WAAW,GAAG,KAAK;AAC3B,QAAQ,aAAa,GAAG,KAAK;AAC7B,QAAQ,oBAAoB,GAAG,KAAK;AACpC,KAAK,GAAG,EAAE,EAAE;AACZ,QAAQ,IAAI,MAAM,IAAI,IAAI,EAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC;AACjE,QAAQ,IAAI,IAAI,IAAI,IAAI,EAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC;AACpE,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC;AAC9F,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,SAAS,IAAI,CAAC,IAAI,SAAS,GAAG,EAAE,EAAE,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC;AAC5H,QAAQ,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC;AAC3I,QAAQ,IAAI,SAAS,GAAG,CAAC,IAAI,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC;AACpF;AACA,QAAQ,IAAI,CAAC,MAAM,GAAG,OAAM;AAC5B,QAAQ,IAAI,CAAC,KAAK,GAAG,MAAK;AAC1B,QAAQ,IAAI,CAAC,SAAS,GAAG,UAAS;AAClC,QAAQ,IAAI,CAAC,cAAc,GAAG,eAAc;AAC5C,QAAQ,IAAI,CAAC,WAAW,GAAG,YAAW;AACtC,QAAQ,IAAI,CAAC,aAAa,GAAG,cAAa;AAC1C,QAAQ,IAAI,CAAC,oBAAoB,GAAG,qBAAoB;AACxD;AACA,QAAQ,IAAI,CAAC,qBAAqB,GAAG,cAAc,CAAC,CAAC,GAAG,cAAc,CAAC,EAAC;AACxE,QAAQ,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,qBAAqB,EAAC;AAC5E,QAAQ,IAAI,CAAC,4BAA4B,GAAG,CAAC,GAAG,IAAI,CAAC,gBAAe;AACpE;AACA,QAAQ,IAAI,CAAC,UAAU,IAAI,MAAM,IAAI,IAAI,EAAC;AAC1C;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG,GAAE;AAC9B,QAAQ,IAAI,CAAC,aAAa,GAAG,GAAE;AAC/B,QAAQ,IAAI,CAAC,OAAO,GAAG,GAAE;AACzB,QAAQ,IAAI,CAAC,SAAS,GAAG,GAAE;AAC3B,QAAQ,IAAI,CAAC,OAAO,GAAG,GAAE;AACzB;AACA;AACA,QAAQ,IAAI,CAAC,YAAY,GAAG,0BAA0B,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,eAAe,EAAC;AACzF;AACA;AACA,QAAQ,IAAI,CAAC,qBAAqB,GAAE;AACpC;AACA;AACA,QAAQ,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAC;AAC3C,KAAK;AACL;AACA,IAAI,qBAAqB,GAAG;AAC5B;AACA,QAAQ,MAAM,aAAa,GAAG,CAAC,MAAM,KAAK;AAC1C,YAAY,OAAO,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;AACrC,0BAA0B,MAAM,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;AAChF,0BAA0B,IAAI,CAAC,IAAI,CAAC;AACpC,UAAS;AACT;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,aAAa,GAAGC,wBAA4B,GAAG,gBAAe;AAClG;AACA,QAAQ,IAAI,CAAC,aAAa,GAAG;AAC7B,YAAY,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AACrD,gBAAgB,KAAK,EAAE,sBAAsB;AAC7C,gBAAgB,IAAI,EAAE,IAAI,CAAC,UAAU,GAAG,cAAc,GAAG,aAAa,CAAC,cAAc,CAAC;AACtF,aAAa,CAAC;AACd,YAAY,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC;AACpD,gBAAgB,KAAK,EAAE,oBAAoB;AAC3C,gBAAgB,IAAI,EAAE,IAAI,CAAC,UAAU,GAAGC,sBAAa,GAAG,aAAa,CAACA,sBAAa,CAAC;AACpF,aAAa,CAAC;AACd,UAAS;AACT,KAAK;AACL;AACA,IAAI,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE;AACnC;AACA,QAAQ,MAAM,EAAE,eAAe,EAAE,oBAAoB,EAAE,GAAG,IAAI,CAAC,wBAAwB,GAAE;AACzF;AACA;AACA,QAAQ,MAAM,YAAY,GAAG,IAAI,CAAC,wBAAwB,CAAC,eAAe,EAAC;AAC3E;AACA;AACA,QAAQ,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,oBAAoB,EAAE,YAAY,EAAC;AAC7E;AACA;AACA,QAAQ,IAAI,CAAC,yBAAyB,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,YAAY,EAAC;AACvE;AACA;AACA,QAAQ,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,IAAI,CAAC,SAAS,EAAE,GAAG,IAAI,CAAC,EAAE;AAC1D;AACA,YAAY,MAAM,IAAI,SAAS,GAAG,GAAG,CAAC,IAAI,CAAC,EAAC;AAC5C,YAAY,MAAM,MAAM,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,QAAO;AAC7E,YAAY,MAAM,QAAQ,IAAI,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,UAAS;AACjF,YAAY,MAAM,OAAO,KAAK,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,KAAI;AAC7E,YAAY,MAAM,SAAS,GAAG,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,OAAM;AACjF;AACA;AACA,YAAY,MAAM,gBAAgB,GAAG,IAAI,CAAC,yBAAyB,CAAC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAC;AAC1F;AACA;AACA,YAAY,MAAM,eAAe,GAAG,IAAI,CAAC,uBAAuB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAC;AAC3G;AACA,YAAY,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,gBAAgB,EAAE,eAAe,EAAE,EAAC;AACtE,SAAS;AACT,KAAK;AACL;AACA,IAAI,wBAAwB,GAAG;AAC/B;AACA,QAAQ,MAAM,oBAAoB,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AAC9D,YAAY,KAAK,EAAE,6BAA6B;AAChD,YAAY,IAAI,EAAE,IAAI,CAAC,4BAA4B,GAAG,CAAC;AACvD,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,eAAe,CAAC;AACpD,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,oBAAoB;AACtC,YAAY,KAAK,EAAE,IAAI,CAAC,4BAA4B;AACpD,YAAY,cAAc,EAAE,IAAI,CAAC,cAAc;AAC/C,YAAY,oBAAoB,EAAE,IAAI,CAAC,oBAAoB;AAC3D,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,gBAAe;AAChD;AACA,QAAQ,OAAO,EAAE,eAAe,EAAE,oBAAoB,EAAE;AACxD,KAAK;AACL;AACA,IAAI,wBAAwB,CAAC,eAAe,EAAE;AAC9C;AACA,QAAQ,MAAM,qBAAqB,GAAG,eAAe,CAAC,kBAAkB,GAAE;AAC1E;AACA;AACA,QAAQ,MAAM,qBAAqB,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,qBAAqB,GAAG,CAAC,EAAC;AAC1F,QAAQ,MAAM,qBAAqB,GAAG,IAAI,CAAC,KAAK,GAAG,sBAAqB;AACxE,QAAQ,MAAM,UAAU,GAAG,qBAAqB,GAAG,EAAC;AACpD;AACA;AACA,QAAQ,MAAM,iBAAiB,GAAG,eAAe,CAAC,2BAA2B,CAAC,IAAI,CAAC,MAAM,EAAE,qBAAqB,EAAE,IAAI,CAAC,cAAc,EAAC;AACtI,QAAQ,MAAM,iBAAiB,GAAG,eAAe,CAAC,2BAA2B,CAAC,IAAI,CAAC,MAAM,EAAE,qBAAqB,EAAE,IAAI,CAAC,cAAc,EAAC;AACtI;AACA;AACA,QAAQ,MAAM,eAAe,GAAG;AAChC,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC;AACvD,YAAY,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;AAC5C,YAAY,GAAG,qBAAqB;AACpC,UAAS;AACT;AACA;AACA,QAAQ,IAAI,CAAC,eAAe,GAAG;AAC/B,YAAY,UAAU,EAAE,CAAC;AACzB,YAAY,eAAe,EAAE,CAAC,GAAG,CAAC;AAClC,YAAY,UAAU,EAAE,CAAC,GAAG,CAAC;AAC7B,UAAS;AACT;AACA,QAAQ,OAAO;AACf,YAAY,eAAe;AAC3B,YAAY,iBAAiB;AAC7B,YAAY,qBAAqB;AACjC,YAAY,qBAAqB;AACjC,YAAY,UAAU;AACtB,SAAS;AACT,KAAK;AACL;AACA,IAAI,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,oBAAoB,EAAE,YAAY,EAAE;AACrE;AACA,QAAQ,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACvD,YAAY,KAAK,EAAE,qBAAqB;AACxC,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV,QAAQ,MAAM,eAAe,GAAG,CAAC,IAAI,CAAC,UAAU,GAAG,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AACnF,YAAY,KAAK,EAAE,uBAAuB;AAC1C,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,oBAAoB,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;AAC9D,YAAY,KAAK,EAAE,6BAA6B;AAChD,YAAY,IAAI,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC;AAChC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,cAAc,GAAG,uBAAuB,CAAC;AACvD,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,KAAK,EAAE,0BAA0B;AAC7C,YAAY,IAAI,EAAE,YAAY,CAAC,eAAe;AAC9C,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV,QAAQ,MAAM,sBAAsB,GAAG,uBAAuB,CAAC;AAC/D,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,KAAK,EAAE,mCAAmC;AACtD,YAAY,IAAI,EAAE,YAAY,CAAC,eAAe;AAC9C,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ;AACnE,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,2BAA2B,GAAG,uBAAuB,CAAC;AACpE,YAAY,KAAK,EAAE,+BAA+B;AAClD,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,YAAY,CAAC,iBAAiB;AAChD,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV,QAAQ,MAAM,mCAAmC,GAAG,uBAAuB,CAAC;AAC5E,YAAY,KAAK,EAAE,wCAAwC;AAC3D,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,YAAY,CAAC,iBAAiB;AAChD,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ;AACnE,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,cAAc,GAAG,uBAAuB,CAAC;AACvD,YAAY,KAAK,EAAE,WAAW;AAC9B,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,IAAI,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;AACtC,YAAY,KAAK,EAAE,cAAc,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ;AAC7F,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,CAAC,OAAO,GAAG;AACvB,YAAY,IAAI,EAAE,IAAI;AACtB,YAAY,MAAM,EAAE,MAAM;AAC1B,YAAY,OAAO,EAAE,aAAa;AAClC,YAAY,SAAS,EAAE,eAAe;AACtC,YAAY,cAAc,EAAE,oBAAoB;AAChD,YAAY,cAAc,EAAE,oBAAoB;AAChD;AACA,YAAY,YAAY,EAAE,cAAc;AACxC,YAAY,oBAAoB,EAAE,sBAAsB;AACxD,YAAY,yBAAyB,EAAE,2BAA2B;AAClE,YAAY,iCAAiC,EAAE,mCAAmC;AAClF,YAAY,QAAQ,EAAE,cAAc;AACpC,UAAS;AACT,KAAK;AACL;AACA,IAAI,yBAAyB,CAAC,MAAM,EAAE,sBAAsB,EAAE;AAC9D;AACA,QAAQ,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE;AAC/B,YAAY,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE;AACjC,SAAS;AACT;AACA,QAAQ,MAAM,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,UAAU,EAAE,GAAG,uBAAsB;AACnG;AACA;AACA,QAAQ,MAAM,aAAa,GAAG,IAAI,eAAe,CAAC;AAClD,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,MAAM;AACxB,YAAY,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY;AAC7C,YAAY,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,oBAAoB;AACvD,YAAY,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ;AAC5C,YAAY,KAAK,EAAE,qBAAqB;AACxC,YAAY,KAAK,EAAE,UAAU;AAC7B,YAAY,UAAU,EAAE,IAAI;AAC5B,YAAY,cAAc,EAAE,IAAI,CAAC,cAAc;AAC/C,SAAS,EAAC;AACV;AACA;AACA,QAAQ,MAAM,aAAa,GAAG,IAAI,eAAe,CAAC;AAClD,YAAY,MAAM,EAAE,IAAI,CAAC,MAAM;AAC/B,YAAY,IAAI,EAAE,MAAM;AACxB,YAAY,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,yBAAyB;AAC1D,YAAY,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,iCAAiC;AACpE,YAAY,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ;AAC5C,YAAY,KAAK,EAAE,qBAAqB;AACxC,YAAY,UAAU,EAAE,KAAK;AAC7B,YAAY,cAAc,EAAE,IAAI,CAAC,cAAc;AAC/C,SAAS,EAAC;AACV;AACA,QAAQ,IAAI,aAAa,CAAC,qBAAqB,GAAG,aAAa,CAAC,SAAS,CAAC,MAAM,EAAE;AAClF,YAAY,OAAO,CAAC,IAAI,CAAC,CAAC,oFAAoF,CAAC,EAAC;AAChH,YAAY,IAAI,CAAC,WAAW,GAAG,MAAK;AACpC,YAAY,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE;AACjC,SAAS;AACT;AACA,QAAQ,IAAI,CAAC,OAAO,CAAC,aAAa,GAAG,cAAa;AAClD,QAAQ,IAAI,CAAC,OAAO,CAAC,aAAa,GAAG,cAAa;AAClD,KAAK;AACL;AACA,IAAI,yBAAyB,CAAC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE;AACrD,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,sBAAsB;AACzC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,aAAa,GAAG,SAAS,GAAG,mBAAmB,EAAE;AAC1F,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC;AAC7D,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE;AAChD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB;AACA,gBAAgB,IAAI,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC;AAC7D,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE;AAClD,iBAAiB,CAAC,GAAG,EAAE,CAAC;AACxB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AACnE,YAAY,KAAK,EAAE,sBAAsB;AACzC,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,QAAQ;AACnD,gBAAgB,UAAU,EAAE,YAAY;AACxC,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,iBAAiB,EAAE,IAAI,CAAC,eAAe;AAC3D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,eAAe,EAAE,IAAI,CAAC,KAAK;AAC/C,oBAAoB,aAAa,EAAE,GAAG;AACtC,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,OAAO;AACf,YAAY,QAAQ,EAAE,gBAAgB;AACtC,YAAY,SAAS;AACrB,SAAS;AACT,KAAK;AACL;AACA,IAAI,uBAAuB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,EAAE;AACvE,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,oBAAoB;AACvC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AAC/C,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,UAAU,EAAE,cAAc,CAAC,OAAO;AACtD,oBAAoB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AACzD,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,UAAU,GAAG;AACtC,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,UAAU,EAAE,cAAc,CAAC,OAAO;AAC1D,wBAAwB,MAAM,EAAE,EAAE,IAAI,EAAE,mBAAmB,EAAE;AAC7D,qBAAqB;AACrB,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,UAAU,EAAE,cAAc,CAAC,OAAO;AAC1D,wBAAwB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;AACnD,qBAAqB;AACrB,iBAAiB,GAAG,EAAE,CAAC;AACvB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;AACtD,YAAY,MAAM,EAAE,eAAe;AACnC,YAAY,OAAO,EAAE;AACrB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE;AAChD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE;AACjD,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB,OAAO,EAAE,CAAC;AAC9B,oBAAoB,QAAQ,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE;AACrE,iBAAiB;AACjB,gBAAgB,IAAI,IAAI,CAAC,UAAU,GAAG;AACtC,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,QAAQ,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE;AACtD,qBAAqB;AACrB,oBAAoB;AACpB,wBAAwB,OAAO,EAAE,CAAC;AAClC,wBAAwB,QAAQ,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE;AACvD,qBAAqB;AACrB,iBAAiB,GAAG,EAAE,CAAC;AACvB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;AAChE,YAAY,gBAAgB,EAAE,EAAE,eAAe,EAAE;AACjD,SAAS,EAAC;AACV;AACA,QAAQ,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;AAClE,YAAY,KAAK,EAAE,oBAAoB;AACvC,YAAY,MAAM,EAAE,cAAc;AAClC,YAAY,OAAO,EAAE;AACrB,gBAAgB,MAAM,EAAE,IAAI,CAAC,aAAa,CAAC,OAAO;AAClD,gBAAgB,UAAU,EAAE,oBAAoB;AAChD,gBAAgB,SAAS,EAAE;AAC3B,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,kBAAkB,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;AAC7D,oBAAoB,iBAAiB,EAAE,IAAI,CAAC,eAAe;AAC3D,oBAAoB,uBAAuB,EAAE,IAAI,CAAC,qBAAqB;AACvE,oBAAoB,eAAe,EAAE,IAAI,CAAC,KAAK;AAC/C,oBAAoB,aAAa,EAAE,GAAG;AACtC,iBAAiB;AACjB,aAAa;AACb,SAAS,EAAC;AACV;AACA,QAAQ,OAAO;AACf,YAAY,QAAQ,EAAE,eAAe;AACrC,YAAY,SAAS;AACrB,SAAS;AACT,KAAK;AACL;AACA;AACA;AACA;AACA;AACA;AACA,IAAI,QAAQ,CAAC,IAAI,EAAE;AACnB,QAAQ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;AACrD,YAAY,MAAM,EAAE,gBAAgB,EAAE,eAAe,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAC;AAC3E;AACA,YAAY,IAAI,IAAI,CAAC,WAAW,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;AAChD,gBAAgB,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,eAAe,CAAC,eAAe,EAAC;AAC1H,gBAAgB,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,yBAAyB,EAAC;AACjG,aAAa;AACb;AACA,YAAY,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,QAAQ,EAAC;AACvD,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,gBAAgB,CAAC,SAAS,EAAC;AAC5D,YAAY,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,eAAe,CAAC,UAAU,EAAC;AACvG;AACA,YAAY,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,eAAe,CAAC,UAAU,EAAC;AAC7G;AACA,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC,QAAQ,EAAC;AACtD,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,eAAe,CAAC,SAAS,EAAC;AAC3D,YAAY,IAAI,CAAC,0BAA0B,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,eAAe,CAAC,UAAU,EAAC;AACvG,SAAS;AACT,KAAK;AACL;;;;"} \ No newline at end of file diff --git a/dist/umd/radix-sort-umd.js b/dist/umd/radix-sort-umd.js index 169c0a5..da60cbb 100644 --- a/dist/umd/radix-sort-umd.js +++ b/dist/umd/radix-sort-umd.js @@ -26,54 +26,6 @@ writable: !1 }), e; } - function _createForOfIteratorHelper(r, e) { - var t = "undefined" != typeof Symbol && r[Symbol.iterator] || r["@@iterator"]; - if (!t) { - if (Array.isArray(r) || (t = _unsupportedIterableToArray(r)) || e && r && "number" == typeof r.length) { - t && (r = t); - var n = 0, - F = function () {}; - return { - s: F, - n: function () { - return n >= r.length ? { - done: !0 - } : { - done: !1, - value: r[n++] - }; - }, - e: function (r) { - throw r; - }, - f: F - }; - } - throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); - } - var o, - a = !0, - u = !1; - return { - s: function () { - t = t.call(r); - }, - n: function () { - var r = t.next(); - return a = r.done, r; - }, - e: function (r) { - u = !0, o = r; - }, - f: function () { - try { - a || null == t.return || t.return(); - } finally { - if (u) throw o; - } - } - }; - } function _iterableToArray(r) { if ("undefined" != typeof Symbol && null != r[Symbol.iterator] || null != r["@@iterator"]) return Array.from(r); } @@ -114,6 +66,44 @@ */ var prefixSumNoBankConflictSource = /* wgsl */"\n\n@group(0) @binding(0) var items: array;\n@group(0) @binding(1) var blockSums: array;\n\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride ITEMS_PER_WORKGROUP: u32;\n\nconst NUM_BANKS: u32 = 32;\nconst LOG_NUM_BANKS: u32 = 5;\n\nfn get_offset(offset: u32) -> u32 {\n // return offset >> LOG_NUM_BANKS; // Conflict-free\n return (offset >> NUM_BANKS) + (offset >> (2 * LOG_NUM_BANKS)); // Zero bank conflict\n}\n\nvar temp: array;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn reduce_downsweep(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n \n let ELM_TID = TID * 2; // Element pair local ID\n let ELM_GID = GID * 2; // Element pair global ID\n \n // Load input to shared memory\n let ai: u32 = TID;\n let bi: u32 = TID + (ITEMS_PER_WORKGROUP >> 1);\n let s_ai = ai + get_offset(ai);\n let s_bi = bi + get_offset(bi);\n let g_ai = ai + WID * 2;\n let g_bi = bi + WID * 2;\n temp[s_ai] = items[g_ai];\n temp[s_bi] = items[g_bi];\n\n var offset: u32 = 1;\n\n // Up-sweep (reduce) phase\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\n workgroupBarrier();\n\n if (TID < d) {\n var ai: u32 = offset * (ELM_TID + 1) - 1;\n var bi: u32 = offset * (ELM_TID + 2) - 1;\n ai += get_offset(ai);\n bi += get_offset(bi);\n temp[bi] += temp[ai];\n }\n\n offset *= 2;\n }\n\n // Save workgroup sum and clear last element\n if (TID == 0) {\n var last_offset = ITEMS_PER_WORKGROUP - 1;\n last_offset += get_offset(last_offset);\n\n blockSums[WORKGROUP_ID] = temp[last_offset];\n temp[last_offset] = 0;\n }\n\n // Down-sweep phase\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\n offset >>= 1;\n workgroupBarrier();\n\n if (TID < d) {\n var ai: u32 = offset * (ELM_TID + 1) - 1;\n var bi: u32 = offset * (ELM_TID + 2) - 1;\n ai += get_offset(ai);\n bi += get_offset(bi);\n\n let t: u32 = temp[ai];\n temp[ai] = temp[bi];\n temp[bi] += t;\n }\n }\n workgroupBarrier();\n\n // Copy result from shared memory to global memory\n items[g_ai] = temp[s_ai];\n items[g_bi] = temp[s_bi];\n}\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn add_block_sums(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n\n let ELM_ID = GID * 2;\n let blockSum = blockSums[WORKGROUP_ID];\n\n items[ELM_ID] += blockSum;\n items[ELM_ID + 1] += blockSum;\n}"; + /** + * Find the best dispatch size x and y dimensions to minimize unused workgroups + * + * @param {GPUDevice} device - The GPU device + * @param {int} workgroup_count - Number of workgroups to dispatch + * @returns + */ + function find_optimal_dispatch_size(device, workgroup_count) { + var dispatchSize = { + x: workgroup_count, + y: 1 + }; + if (workgroup_count > device.limits.maxComputeWorkgroupsPerDimension) { + var x = Math.floor(Math.sqrt(workgroup_count)); + var y = Math.ceil(workgroup_count / x); + dispatchSize.x = x; + dispatchSize.y = y; + } + return dispatchSize; + } + function create_buffer_from_data(_ref) { + var device = _ref.device, + label = _ref.label, + data = _ref.data, + _ref$usage = _ref.usage, + usage = _ref$usage === void 0 ? 0 : _ref$usage; + var dispatchSizes = device.createBuffer({ + label: label, + usage: usage, + size: data.length * 4, + mappedAtCreation: true + }); + var dispatchData = new Uint32Array(dispatchSizes.getMappedRange()); + dispatchData.set(data); + dispatchSizes.unmap(); + return dispatchSizes; + } + var PrefixSumKernel = /*#__PURE__*/function () { /** * Perform a parallel prefix sum on the given data buffer @@ -153,35 +143,15 @@ this.create_pass_recursive(data, count); } return _createClass(PrefixSumKernel, [{ - key: "find_optimal_dispatch_size", - value: function find_optimal_dispatch_size(item_count) { - var maxComputeWorkgroupsPerDimension = this.device.limits.maxComputeWorkgroupsPerDimension; - var workgroup_count = Math.ceil(item_count / this.items_per_workgroup); - var x = workgroup_count; - var y = 1; - if (workgroup_count > maxComputeWorkgroupsPerDimension) { - x = Math.floor(Math.sqrt(workgroup_count)); - y = Math.ceil(workgroup_count / x); - workgroup_count = x * y; - } - return { - workgroup_count: workgroup_count, - dispatchSize: { - x: x, - y: y - } - }; - } - }, { key: "create_pass_recursive", value: function create_pass_recursive(data, count) { // Find best dispatch x and y dimensions to minimize unused threads - var _this$find_optimal_di = this.find_optimal_dispatch_size(count), - workgroup_count = _this$find_optimal_di.workgroup_count, - dispatchSize = _this$find_optimal_di.dispatchSize; + var workgroup_count = Math.ceil(count / this.items_per_workgroup); + var dispatchSize = find_optimal_dispatch_size(this.device, workgroup_count); // Create buffer for block sums var blockSumBuffer = this.device.createBuffer({ + label: 'prefix-sum-block-sum', size: workgroup_count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); @@ -266,25 +236,24 @@ }); } } + }, { + key: "get_dispatch_chain", + value: function get_dispatch_chain() { + return this.pipelines.flatMap(function (p) { + return [p.dispatchSize.x, p.dispatchSize.y, 1]; + }); + } }, { key: "dispatch", - value: function dispatch(pass) { - var _iterator = _createForOfIteratorHelper(this.pipelines), - _step; - try { - for (_iterator.s(); !(_step = _iterator.n()).done;) { - var _step$value = _step.value, - pipeline = _step$value.pipeline, - bindGroup = _step$value.bindGroup, - dispatchSize = _step$value.dispatchSize; - pass.setPipeline(pipeline); - pass.setBindGroup(0, bindGroup); - pass.dispatchWorkgroups(dispatchSize.x, dispatchSize.y, 1); - } - } catch (err) { - _iterator.e(err); - } finally { - _iterator.f(); + value: function dispatch(pass, dispatchSize) { + var offset = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0; + for (var i = 0; i < this.pipelines.length; i++) { + var _this$pipelines$i = this.pipelines[i], + pipeline = _this$pipelines$i.pipeline, + bindGroup = _this$pipelines$i.bindGroup; + pass.setPipeline(pipeline); + pass.setBindGroup(0, bindGroup); + pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4); } } }]); @@ -301,6 +270,198 @@ var radixSortReorderSource = /* wgsl */"\n\n@group(0) @binding(0) var inputKeys: array;\n@group(0) @binding(1) var outputKeys: array;\n@group(0) @binding(2) var local_prefix_sum: array;\n@group(0) @binding(3) var prefix_block_sum: array;\n@group(0) @binding(4) var inputValues: array;\n@group(0) @binding(5) var outputValues: array;\n\noverride WORKGROUP_COUNT: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride CURRENT_BIT: u32;\noverride ELEMENT_COUNT: u32;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn radix_sort_reorder(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) { \n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n\n if (GID >= ELEMENT_COUNT) {\n return;\n }\n\n let k = inputKeys[GID];\n let v = inputValues[GID];\n\n let local_prefix = local_prefix_sum[GID];\n\n // Calculate new position\n let extract_bits = (k >> CURRENT_BIT) & 0x3;\n let pid = extract_bits * WORKGROUP_COUNT + WORKGROUP_ID;\n let sorted_position = prefix_block_sum[pid] + local_prefix;\n \n outputKeys[sorted_position] = k;\n outputValues[sorted_position] = v;\n}"; + var checkSortSource = function checkSortSource() { + var isFirstPass = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : false; + var isLastPass = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false; + var isFullCheck = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false; + return /* wgsl */"\n\n@group(0) @binding(0) var input: array;\n@group(0) @binding(1) var output: array;\n@group(0) @binding(2) var original: array;\n@group(0) @binding(3) var is_sorted: u32;\n\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride ELEMENT_COUNT: u32;\noverride START_ELEMENT: u32;\n\nvar s_data: array;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn check_sort(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP + START_ELEMENT;\n let GID = TID + WID; // Global thread ID\n\n // Load data into shared memory\n ".concat(isFirstPass ? first_pass_load_data : "s_data[TID] = select(0u, input[GID], GID < ELEMENT_COUNT);", "\n\n // Perform parallel reduction\n for (var d = 1u; d < THREADS_PER_WORKGROUP; d *= 2u) { \n workgroupBarrier(); \n if (TID % (2u * d) == 0u) {\n s_data[TID] += s_data[TID + d];\n }\n }\n workgroupBarrier();\n\n // Write reduction result\n ").concat(isLastPass ? last_pass(isFullCheck) : write_reduction_result, "\n}"); + }; + var write_reduction_result = /* wgsl */"\n if (TID == 0) {\n output[WORKGROUP_ID] = s_data[0];\n }\n"; + var first_pass_load_data = /* wgsl */"\n let LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\n\n // Load current element into shared memory\n // Also load next element for comparison\n let elm = select(0u, input[GID], GID < ELEMENT_COUNT);\n let next = select(0u, input[GID + 1], GID < ELEMENT_COUNT-1);\n s_data[TID] = elm;\n workgroupBarrier();\n\n s_data[TID] = select(0u, 1u, GID < ELEMENT_COUNT-1 && elm > next);\n"; + var last_pass = function last_pass(isFullCheck) { + return /* wgsl */"\n let fullDispatchLength = arrayLength(&output);\n let dispatchIndex = TID * 3;\n\n if (dispatchIndex >= fullDispatchLength) {\n return;\n }\n\n ".concat(isFullCheck ? last_pass_full : last_pass_fast, "\n"); + }; + var last_pass_fast = /* wgsl */"\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] == 0 && is_sorted == 0u);\n"; + var last_pass_full = /* wgsl */"\n if (TID == 0 && s_data[0] == 0) {\n is_sorted = 1u;\n }\n\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] != 0);\n"; + + var CheckSortKernel = /*#__PURE__*/function () { + /** + * CheckSortKernel - Performs a parralel reduction to check if an array is sorted. + * + * @param {GPUDevice} device + * @param {GPUBuffer} data - The buffer containing the data to check + * @param {GPUBuffer} result - The result dispatch size buffer + * @param {GPUBuffer} original - The original dispatch size buffer + * @param {GPUBuffer} is_sorted - 1-element buffer to store whether the array is sorted + * @param {number} count - The number of elements to check + * @param {number} start - The index to start checking from + * @param {boolean} full_check - Whether this kernel is performing a full check or a fast check + * @param {object} workgroup_size - The workgroup size in x and y dimensions + */ + function CheckSortKernel(_ref) { + var device = _ref.device, + data = _ref.data, + result = _ref.result, + original = _ref.original, + is_sorted = _ref.is_sorted, + count = _ref.count, + _ref$start = _ref.start, + start = _ref$start === void 0 ? 0 : _ref$start, + _ref$full_check = _ref.full_check, + full_check = _ref$full_check === void 0 ? true : _ref$full_check, + _ref$workgroup_size = _ref.workgroup_size, + workgroup_size = _ref$workgroup_size === void 0 ? { + x: 16, + y: 16 + } : _ref$workgroup_size; + _classCallCheck(this, CheckSortKernel); + this.device = device; + this.count = count; + this.start = start; + this.full_check = full_check; + this.workgroup_size = workgroup_size; + this.threads_per_workgroup = workgroup_size.x * workgroup_size.y; + this.pipelines = []; + this.buffers = { + data: data, + result: result, + original: original, + is_sorted: is_sorted, + outputs: [] + }; + this.create_passes_recursive(data, count); + } + + // Find the best dispatch size for each pass to minimize unused workgroups + return _createClass(CheckSortKernel, [{ + key: "create_passes_recursive", + value: function create_passes_recursive(buffer, count) { + var passIndex = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0; + var workgroup_count = Math.ceil(count / this.threads_per_workgroup); + var isFirstPass = passIndex === 0; + var isLastPass = workgroup_count <= 1; + var outputBuffer = isLastPass ? this.buffers.result : this.device.createBuffer({ + label: "check-sort-".concat(this.full_check ? 'full' : 'fast', "-").concat(passIndex), + size: workgroup_count * 4, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST + }); + var bindGroupLayout = this.device.createBindGroupLayout({ + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }].concat(_toConsumableArray(isLastPass ? [{ + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] : [])) + }); + var bindGroup = this.device.createBindGroup({ + layout: bindGroupLayout, + entries: [{ + binding: 0, + resource: { + buffer: buffer + } + }, { + binding: 1, + resource: { + buffer: outputBuffer + } + }].concat(_toConsumableArray(isLastPass ? [{ + binding: 2, + resource: { + buffer: this.buffers.original + } + }, { + binding: 3, + resource: { + buffer: this.buffers.is_sorted + } + }] : [])) + }); + var pipelineLayout = this.device.createPipelineLayout({ + bindGroupLayouts: [bindGroupLayout] + }); + var element_count = isFirstPass ? this.start + count : count; + var start_element = isFirstPass ? this.start : 0; + var checkSortPipeline = this.device.createComputePipeline({ + layout: pipelineLayout, + compute: { + module: this.device.createShaderModule({ + code: checkSortSource(isFirstPass, isLastPass, this.full_check), + label: 'check-sort' + }), + entryPoint: 'check_sort', + constants: { + 'WORKGROUP_SIZE_X': this.workgroup_size.x, + 'WORKGROUP_SIZE_Y': this.workgroup_size.y, + 'THREADS_PER_WORKGROUP': this.threads_per_workgroup, + 'ELEMENT_COUNT': element_count, + 'START_ELEMENT': start_element + } + } + }); + this.buffers.outputs.push(outputBuffer); + this.pipelines.push({ + pipeline: checkSortPipeline, + bindGroup: bindGroup + }); + if (!isLastPass) { + this.create_passes_recursive(outputBuffer, workgroup_count, passIndex + 1); + } + } + }, { + key: "dispatch", + value: function dispatch(pass, dispatchSize) { + var offset = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0; + for (var i = 0; i < this.pipelines.length; i++) { + var _this$pipelines$i = this.pipelines[i], + pipeline = _this$pipelines$i.pipeline, + bindGroup = _this$pipelines$i.bindGroup; + var dispatchIndirect = this.full_check || i < this.pipelines.length - 1; + pass.setPipeline(pipeline); + pass.setBindGroup(0, bindGroup); + if (dispatchIndirect) pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4);else + // Only the last dispatch of the fast check kernel is constant to (1, 1, 1) + pass.dispatchWorkgroups(1, 1, 1); + } + } + }], [{ + key: "find_optimal_dispatch_chain", + value: function find_optimal_dispatch_chain(device, item_count, workgroup_size) { + var threads_per_workgroup = workgroup_size.x * workgroup_size.y; + var sizes = []; + do { + // Number of workgroups required to process all items + var target_workgroup_count = Math.ceil(item_count / threads_per_workgroup); + + // Optimal dispatch size and updated workgroup count + var dispatchSize = find_optimal_dispatch_size(device, target_workgroup_count); + sizes.push(dispatchSize.x, dispatchSize.y, 1); + item_count = target_workgroup_count; + } while (item_count > 1); + return sizes; + } + }]); + }(); + var RadixSortKernel = /*#__PURE__*/function () { /** * Perform a parallel radix sort on the GPU given a buffer of keys and (optionnaly) values @@ -315,6 +476,7 @@ * @param {number} count - Number of elements to sort * @param {number} bit_count - Number of bits per element (default: 32) * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two + * @param {boolean} check_order - Enable "order checking" optimization. Useful if the data needs to be sorted in real-time and doesn't change much. (default: false) * @param {boolean} local_shuffle - Enable "local shuffling" optimization for the radix sort kernel (default: false) * @param {boolean} avoid_bank_conflicts - Enable "avoiding bank conflicts" optimization for the prefix sum kernel (default: false) */ @@ -331,6 +493,8 @@ x: 16, y: 16 } : _ref$workgroup_size, + _ref$check_order = _ref.check_order, + check_order = _ref$check_order === void 0 ? false : _ref$check_order, _ref$local_shuffle = _ref.local_shuffle, local_shuffle = _ref$local_shuffle === void 0 ? false : _ref$local_shuffle, _ref$avoid_bank_confl = _ref.avoid_bank_conflicts, @@ -339,12 +503,14 @@ if (device == null) throw new Error('No device provided'); if (keys == null) throw new Error('No keys buffer provided'); if (!Number.isInteger(count) || count <= 0) throw new Error('Invalid count parameter'); - if (!Number.isInteger(bit_count) || bit_count <= 0) throw new Error('Invalid bit_count parameter'); + if (!Number.isInteger(bit_count) || bit_count <= 0 || bit_count > 32) throw new Error('Invalid bit_count parameter'); if (!Number.isInteger(workgroup_size.x) || !Number.isInteger(workgroup_size.y)) throw new Error('Invalid workgroup_size parameter'); + if (bit_count % 4 != 0) throw new Error('bit_count must be a multiple of 4'); this.device = device; this.count = count; this.bit_count = bit_count; this.workgroup_size = workgroup_size; + this.check_order = check_order; this.local_shuffle = local_shuffle; this.avoid_bank_conflicts = avoid_bank_conflicts; this.threads_per_workgroup = workgroup_size.x * workgroup_size.y; @@ -356,37 +522,18 @@ this.shaderModules = {}; // GPUShaderModules this.buffers = {}; // GPUBuffers this.pipelines = []; // List of passes + this.kernels = {}; // Find best dispatch x and y dimensions to minimize unused threads - this.find_optimal_dispatch_size(); + this.dispatchSize = find_optimal_dispatch_size(this.device, this.workgroup_count); // Create shader modules from wgsl code this.create_shader_modules(); - // Create GPU buffers - this.create_buffers(keys, values); - // Create multi-pass pipelines - this.create_pipelines(); + this.create_pipelines(keys, values); } return _createClass(RadixSortKernel, [{ - key: "find_optimal_dispatch_size", - value: function find_optimal_dispatch_size() { - var maxComputeWorkgroupsPerDimension = this.device.limits.maxComputeWorkgroupsPerDimension; - this.dispatchSize = { - x: this.workgroup_count, - y: 1 - }; - if (this.workgroup_count > maxComputeWorkgroupsPerDimension) { - var x = Math.floor(Math.sqrt(this.workgroup_count)); - var y = Math.ceil(this.workgroup_count / x); - this.dispatchSize = { - x: x, - y: y - }; - } - } - }, { key: "create_shader_modules", value: function create_shader_modules() { // Remove every occurence of "values" in the shader code if values buffer is not provided @@ -407,28 +554,154 @@ }) }; } + }, { + key: "create_pipelines", + value: function create_pipelines(keys, values) { + // Block prefix sum kernel + var _this$create_prefix_s = this.create_prefix_sum_kernel(), + prefixSumKernel = _this$create_prefix_s.prefixSumKernel, + prefixBlockSumBuffer = _this$create_prefix_s.prefixBlockSumBuffer; + + // Indirect dispatch buffers + var dispatchData = this.calculate_dispatch_sizes(prefixSumKernel); + + // GPU buffers + this.create_buffers(keys, values, prefixBlockSumBuffer, dispatchData); + + // Check sort kernels + this.create_check_sort_kernels(this.buffers.keys, dispatchData); + + // Radix sort passes for every 2 bits + for (var bit = 0; bit < this.bit_count; bit += 2) { + // Swap buffers every pass + var even = bit % 4 == 0; + var inKeys = even ? this.buffers.keys : this.buffers.tmpKeys; + var inValues = even ? this.buffers.values : this.buffers.tmpValues; + var outKeys = even ? this.buffers.tmpKeys : this.buffers.keys; + var outValues = even ? this.buffers.tmpValues : this.buffers.values; + + // Compute local prefix sums and block sums + var blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit); + + // Reorder keys and values + var reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit); + this.pipelines.push({ + blockSumPipeline: blockSumPipeline, + reorderPipeline: reorderPipeline + }); + } + } + }, { + key: "create_prefix_sum_kernel", + value: function create_prefix_sum_kernel() { + // Prefix Block Sum buffer (4 element per workgroup) + var prefixBlockSumBuffer = this.device.createBuffer({ + label: 'radix-sort-prefix-block-sum', + size: this.prefix_block_workgroup_count * 4, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST + }); + + // Create block prefix sum kernel + var prefixSumKernel = new PrefixSumKernel({ + device: this.device, + data: prefixBlockSumBuffer, + count: this.prefix_block_workgroup_count, + workgroup_size: this.workgroup_size, + avoid_bank_conflicts: this.avoid_bank_conflicts + }); + this.kernels.prefixSum = prefixSumKernel; + return { + prefixSumKernel: prefixSumKernel, + prefixBlockSumBuffer: prefixBlockSumBuffer + }; + } + }, { + key: "calculate_dispatch_sizes", + value: function calculate_dispatch_sizes(prefixSumKernel) { + // Prefix sum dispatch sizes + var prefixSumDispatchSize = prefixSumKernel.get_dispatch_chain(); + + // Check sort element count (fast/full) + var check_sort_fast_count = Math.min(this.count, this.threads_per_workgroup * 4); + var check_sort_full_count = this.count - check_sort_fast_count; + var start_full = check_sort_fast_count - 1; + + // Check sort dispatch sizes + var dispatchSizesFast = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_fast_count, this.workgroup_size); + var dispatchSizesFull = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_full_count, this.workgroup_size); + + // Initial dispatch sizes + var initialDispatch = [this.dispatchSize.x, this.dispatchSize.y, 1].concat(_toConsumableArray(dispatchSizesFast.slice(0, 3)), _toConsumableArray(prefixSumDispatchSize)); + + // Dispatch offsets in main buffer + this.dispatchOffsets = { + radix_sort: 0, + check_sort_fast: 3 * 4, + prefix_sum: 6 * 4 + }; + return { + initialDispatch: initialDispatch, + dispatchSizesFull: dispatchSizesFull, + check_sort_fast_count: check_sort_fast_count, + check_sort_full_count: check_sort_full_count, + start_full: start_full + }; + } }, { key: "create_buffers", - value: function create_buffers(keys, values) { + value: function create_buffers(keys, values, prefixBlockSumBuffer, dispatchData) { // Keys and values double buffering var tmpKeysBuffer = this.device.createBuffer({ + label: 'radix-sort-tmp-keys', size: this.count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); var tmpValuesBuffer = !this.has_values ? null : this.device.createBuffer({ + label: 'radix-sort-tmp-values', size: this.count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); // Local Prefix Sum buffer (1 element per item) var localPrefixSumBuffer = this.device.createBuffer({ + label: 'radix-sort-local-prefix-sum', size: this.count * 4, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); - // Prefix Block Sum buffer (4 element per workgroup) - var prefixBlockSumBuffer = this.device.createBuffer({ - size: this.prefix_block_workgroup_count * 4, + // Dispatch sizes (radix sort, check sort, prefix sum) + var dispatchBuffer = create_buffer_from_data({ + device: this.device, + label: 'radix-sort-dispatch-size', + data: dispatchData.initialDispatch, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT + }); + var originalDispatchBuffer = create_buffer_from_data({ + device: this.device, + label: 'radix-sort-dispatch-size-original', + data: dispatchData.initialDispatch, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC + }); + + // Dispatch sizes (full sort) + var checkSortFullDispatchBuffer = create_buffer_from_data({ + label: 'check-sort-full-dispatch-size', + device: this.device, + data: dispatchData.dispatchSizesFull, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT + }); + var checkSortFullOriginalDispatchBuffer = create_buffer_from_data({ + label: 'check-sort-full-dispatch-size-original', + device: this.device, + data: dispatchData.dispatchSizesFull, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC + }); + + // Flag to tell if the data is sorted + var isSortedBuffer = create_buffer_from_data({ + label: 'is-sorted', + device: this.device, + data: new Uint32Array([0]), usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }); this.buffers = { @@ -437,42 +710,56 @@ tmpKeys: tmpKeysBuffer, tmpValues: tmpValuesBuffer, localPrefixSum: localPrefixSumBuffer, - prefixBlockSum: prefixBlockSumBuffer + prefixBlockSum: prefixBlockSumBuffer, + dispatchSize: dispatchBuffer, + originalDispatchSize: originalDispatchBuffer, + checkSortFullDispatchSize: checkSortFullDispatchBuffer, + originalCheckSortFullDispatchSize: checkSortFullOriginalDispatchBuffer, + isSorted: isSortedBuffer }; } - - // Create radix sort passes for every 2 bits }, { - key: "create_pipelines", - value: function create_pipelines() { - for (var bit = 0; bit < this.bit_count; bit += 2) { - // Swap buffers every pass - var even = bit % 4 == 0; - var inKeys = even ? this.buffers.keys : this.buffers.tmpKeys; - var inValues = even ? this.buffers.values : this.buffers.tmpValues; - var outKeys = even ? this.buffers.tmpKeys : this.buffers.keys; - var outValues = even ? this.buffers.tmpValues : this.buffers.values; - - // Compute local prefix sums and block sums - var blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit); + key: "create_check_sort_kernels", + value: function create_check_sort_kernels(inKeys, checkSortPartitionData) { + // Skip check sort if disabled + if (!this.check_order) { + return [null, null]; + } + var check_sort_fast_count = checkSortPartitionData.check_sort_fast_count, + check_sort_full_count = checkSortPartitionData.check_sort_full_count, + start_full = checkSortPartitionData.start_full; - // Compute block sums prefix sums - var prefixSumKernel = new PrefixSumKernel({ - device: this.device, - data: this.buffers.prefixBlockSum, - count: this.prefix_block_workgroup_count, - workgroup_size: this.workgroup_size, - avoid_bank_conflicts: this.avoid_bank_conflicts - }); + // Create the full pass + var checkSortFull = new CheckSortKernel({ + device: this.device, + data: inKeys, + result: this.buffers.dispatchSize, + original: this.buffers.originalDispatchSize, + is_sorted: this.buffers.isSorted, + count: check_sort_full_count, + start: start_full, + full_check: true, + workgroup_size: this.workgroup_size + }); - // Reorder keys and values - var reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit); - this.pipelines.push({ - blockSumPipeline: blockSumPipeline, - prefixSumKernel: prefixSumKernel, - reorderPipeline: reorderPipeline - }); + // Create the fast pass + var checkSortFast = new CheckSortKernel({ + device: this.device, + data: inKeys, + result: this.buffers.checkSortFullDispatchSize, + original: this.buffers.originalCheckSortFullDispatchSize, + is_sorted: this.buffers.isSorted, + count: check_sort_fast_count, + full_check: false, + workgroup_size: this.workgroup_size + }); + if (checkSortFast.threads_per_workgroup < checkSortFull.pipelines.length) { + console.warn("Warning: workgroup size is too small to enable check sort optimization, disabling..."); + this.check_order = false; + return [null, null]; } + this.kernels.checkSortFast = checkSortFast; + this.kernels.checkSortFull = checkSortFull; } }, { key: "create_block_sum_pipeline", @@ -663,26 +950,21 @@ }, { key: "dispatch", value: function dispatch(pass) { - var _iterator = _createForOfIteratorHelper(this.pipelines), - _step; - try { - for (_iterator.s(); !(_step = _iterator.n()).done;) { - var _step$value = _step.value, - blockSumPipeline = _step$value.blockSumPipeline, - prefixSumKernel = _step$value.prefixSumKernel, - reorderPipeline = _step$value.reorderPipeline; - pass.setPipeline(blockSumPipeline.pipeline); - pass.setBindGroup(0, blockSumPipeline.bindGroup); - pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1); - prefixSumKernel.dispatch(pass); - pass.setPipeline(reorderPipeline.pipeline); - pass.setBindGroup(0, reorderPipeline.bindGroup); - pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1); + for (var i = 0; i < this.bit_count / 2; i++) { + var _this$pipelines$i = this.pipelines[i], + blockSumPipeline = _this$pipelines$i.blockSumPipeline, + reorderPipeline = _this$pipelines$i.reorderPipeline; + if (this.check_order && i % 2 == 0) { + this.kernels.checkSortFast.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.check_sort_fast); + this.kernels.checkSortFull.dispatch(pass, this.buffers.checkSortFullDispatchSize); } - } catch (err) { - _iterator.e(err); - } finally { - _iterator.f(); + pass.setPipeline(blockSumPipeline.pipeline); + pass.setBindGroup(0, blockSumPipeline.bindGroup); + pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort); + this.kernels.prefixSum.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.prefix_sum); + pass.setPipeline(reorderPipeline.pipeline); + pass.setBindGroup(0, reorderPipeline.bindGroup); + pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort); } } }]); diff --git a/dist/umd/radix-sort-umd.js.map b/dist/umd/radix-sort-umd.js.map index a4f145c..1eb5dbf 100644 --- a/dist/umd/radix-sort-umd.js.map +++ b/dist/umd/radix-sort-umd.js.map @@ -1 +1 @@ -{"version":3,"file":"radix-sort-umd.js","sources":["../../src/shaders/prefix_sum.js","../../src/shaders/optimizations/prefix_sum_no_bank_conflict.js","../../src/PrefixSumKernel.js","../../src/shaders/radix_sort.js","../../src/shaders/optimizations/radix_sort_local_shuffle.js","../../src/shaders/radix_sort_reorder.js","../../src/RadixSortKernel.js"],"sourcesContent":["const prefixSumSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n temp[ELM_TID] = items[ELM_GID];\r\n temp[ELM_TID + 1] = items[ELM_GID + 1];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n let last_offset = ITEMS_PER_WORKGROUP - 1;\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[ELM_GID] = temp[ELM_TID];\r\n items[ELM_GID + 1] = temp[ELM_TID + 1];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumSource","/**\r\n * Prefix sum with optimization to avoid bank conflicts\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst prefixSumNoBankConflictSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nconst NUM_BANKS: u32 = 32;\r\nconst LOG_NUM_BANKS: u32 = 5;\r\n\r\nfn get_offset(offset: u32) -> u32 {\r\n // return offset >> LOG_NUM_BANKS; // Conflict-free\r\n return (offset >> NUM_BANKS) + (offset >> (2 * LOG_NUM_BANKS)); // Zero bank conflict\r\n}\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n let ai: u32 = TID;\r\n let bi: u32 = TID + (ITEMS_PER_WORKGROUP >> 1);\r\n let s_ai = ai + get_offset(ai);\r\n let s_bi = bi + get_offset(bi);\r\n let g_ai = ai + WID * 2;\r\n let g_bi = bi + WID * 2;\r\n temp[s_ai] = items[g_ai];\r\n temp[s_bi] = items[g_bi];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n var last_offset = ITEMS_PER_WORKGROUP - 1;\r\n last_offset += get_offset(last_offset);\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[g_ai] = temp[s_ai];\r\n items[g_bi] = temp[s_bi];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumNoBankConflictSource","import prefixSumSource from \"./shaders/prefix_sum\"\r\nimport prefixSumSource_NoBankConflict from \"./shaders/optimizations/prefix_sum_no_bank_conflict\"\r\n\r\nclass PrefixSumKernel {\r\n /**\r\n * Perform a parallel prefix sum on the given data buffer\r\n * \r\n * Based on \"Parallel Prefix Sum (Scan) with CUDA\"\r\n * https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - Buffer containing the data to process\r\n * @param {number} count - Max number of elements to process\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} avoid_bank_conflicts - Use the \"Avoid bank conflicts\" optimization from the original publication\r\n */\r\n constructor({\r\n device,\r\n data,\r\n count,\r\n workgroup_size = { x: 16, y: 16 },\r\n avoid_bank_conflicts = false\r\n }) {\r\n this.device = device\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.items_per_workgroup = 2 * this.threads_per_workgroup // 2 items are processed per thread\r\n\r\n if (Math.log2(this.threads_per_workgroup) % 1 !== 0) \r\n throw new Error(`workgroup_size.x * workgroup_size.y must be a power of two. (current: ${this.threads_per_workgroup})`)\r\n\r\n this.pipelines = []\r\n\r\n this.shaderModule = this.device.createShaderModule({\r\n label: 'prefix-sum',\r\n code: avoid_bank_conflicts ? prefixSumSource_NoBankConflict : prefixSumSource,\r\n })\r\n\r\n this.create_pass_recursive(data, count)\r\n }\r\n\r\n find_optimal_dispatch_size(item_count) {\r\n const { maxComputeWorkgroupsPerDimension } = this.device.limits\r\n\r\n let workgroup_count = Math.ceil(item_count / this.items_per_workgroup)\r\n let x = workgroup_count\r\n let y = 1\r\n\r\n if (workgroup_count > maxComputeWorkgroupsPerDimension) {\r\n x = Math.floor(Math.sqrt(workgroup_count))\r\n y = Math.ceil(workgroup_count / x)\r\n workgroup_count = x * y\r\n }\r\n\r\n return { \r\n workgroup_count,\r\n dispatchSize: { x, y },\r\n }\r\n }\r\n\r\n create_pass_recursive(data, count) {\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n const { workgroup_count, dispatchSize } = this.find_optimal_dispatch_size(count)\r\n \r\n // Create buffer for block sums \r\n const blockSumBuffer = this.device.createBuffer({\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create bind group and pipeline layout\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n label: 'prefix-sum-bind-group',\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: data }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: blockSumBuffer }\r\n }\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n // Per-workgroup (block) prefix sum\r\n const scanPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-scan-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'reduce_downsweep',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ITEMS_PER_WORKGROUP': this.items_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: scanPipeline, bindGroup, dispatchSize })\r\n\r\n if (workgroup_count > 1) {\r\n // Prefix sum on block sums\r\n this.create_pass_recursive(blockSumBuffer, workgroup_count)\r\n\r\n // Add block sums to local prefix sums\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-add-block-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'add_block_sums',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: blockSumPipeline, bindGroup, dispatchSize })\r\n }\r\n }\r\n\r\n dispatch(pass) {\r\n for (const { pipeline, bindGroup, dispatchSize } of this.pipelines) {\r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n pass.dispatchWorkgroups(dispatchSize.x, dispatchSize.y, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default PrefixSumKernel","const radixSortSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n // Store local prefix sum to global memory\r\n local_prefix_sums[GID] = bit_prefix_sums[extract_bits];\r\n}`\r\n\r\nexport default radixSortSource;","/**\r\n * Radix sort with \"local shuffle and coalesced mapping\" optimization\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst radixSortCoalescedSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n@group(0) @binding(3) var values: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\nvar s_prefix_sum_scan: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let val = values[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n let prefix_sum = bit_prefix_sums[extract_bits]; \r\n\r\n // Scan bit prefix sums\r\n if (TID == LAST_THREAD) {\r\n var sum: u32 = 0;\r\n bit_prefix_sums[extract_bits] += 1;\r\n for (var i: u32 = 0; i < 4; i++) {\r\n s_prefix_sum_scan[i] = sum;\r\n sum += bit_prefix_sums[i];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n if (GID < ELEMENT_COUNT) {\r\n // Compute new position\r\n let new_pos: u32 = prefix_sum + s_prefix_sum_scan[extract_bits];\r\n\r\n // Shuffle elements locally\r\n input[WID + new_pos] = elm;\r\n values[WID + new_pos] = val;\r\n local_prefix_sums[WID + new_pos] = prefix_sum;\r\n }\r\n}`\r\n\r\nexport default radixSortCoalescedSource;","const radixSortReorderSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var inputKeys: array;\r\n@group(0) @binding(1) var outputKeys: array;\r\n@group(0) @binding(2) var local_prefix_sum: array;\r\n@group(0) @binding(3) var prefix_block_sum: array;\r\n@group(0) @binding(4) var inputValues: array;\r\n@group(0) @binding(5) var outputValues: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort_reorder(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) { \r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n if (GID >= ELEMENT_COUNT) {\r\n return;\r\n }\r\n\r\n let k = inputKeys[GID];\r\n let v = inputValues[GID];\r\n\r\n let local_prefix = local_prefix_sum[GID];\r\n\r\n // Calculate new position\r\n let extract_bits = (k >> CURRENT_BIT) & 0x3;\r\n let pid = extract_bits * WORKGROUP_COUNT + WORKGROUP_ID;\r\n let sorted_position = prefix_block_sum[pid] + local_prefix;\r\n \r\n outputKeys[sorted_position] = k;\r\n outputValues[sorted_position] = v;\r\n}`\r\n\r\nexport default radixSortReorderSource;","import PrefixSumKernel from \"./PrefixSumKernel\"\r\nimport radixSortSource from \"./shaders/radix_sort\"\r\nimport radixSortSource_LocalShuffle from \"./shaders/optimizations/radix_sort_local_shuffle\"\r\nimport reorderSource from \"./shaders/radix_sort_reorder\"\r\n\r\nclass RadixSortKernel {\r\n /**\r\n * Perform a parallel radix sort on the GPU given a buffer of keys and (optionnaly) values\r\n * Note: The buffers are sorted in-place.\r\n * \r\n * Based on \"Fast 4-way parallel radix sorting on GPUs\"\r\n * https://www.sci.utah.edu/~csilva/papers/cgf.pdf]\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} keys - Buffer containing the keys to sort\r\n * @param {GPUBuffer} values - (optional) Buffer containing the associated values\r\n * @param {number} count - Number of elements to sort\r\n * @param {number} bit_count - Number of bits per element (default: 32)\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} local_shuffle - Enable \"local shuffling\" optimization for the radix sort kernel (default: false)\r\n * @param {boolean} avoid_bank_conflicts - Enable \"avoiding bank conflicts\" optimization for the prefix sum kernel (default: false)\r\n */\r\n constructor({\r\n device,\r\n keys,\r\n values,\r\n count,\r\n bit_count = 32,\r\n workgroup_size = { x: 16, y: 16 },\r\n local_shuffle = false,\r\n avoid_bank_conflicts = false,\r\n } = {}) {\r\n if (device == null) throw new Error('No device provided')\r\n if (keys == null) throw new Error('No keys buffer provided')\r\n if (!Number.isInteger(count) || count <= 0) throw new Error('Invalid count parameter')\r\n if (!Number.isInteger(bit_count) || bit_count <= 0) throw new Error('Invalid bit_count parameter')\r\n if (!Number.isInteger(workgroup_size.x) || !Number.isInteger(workgroup_size.y)) throw new Error('Invalid workgroup_size parameter')\r\n\r\n this.device = device\r\n this.count = count\r\n this.bit_count = bit_count\r\n this.workgroup_size = workgroup_size\r\n this.local_shuffle = local_shuffle\r\n this.avoid_bank_conflicts = avoid_bank_conflicts\r\n\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n this.prefix_block_workgroup_count = 4 * this.workgroup_count\r\n\r\n this.has_values = (values != null) // Is the values buffer provided ?\r\n\r\n this.dispatchSize = {} // Dispatch dimension x and y\r\n this.shaderModules = {} // GPUShaderModules\r\n this.buffers = {} // GPUBuffers\r\n this.pipelines = [] // List of passes\r\n\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n this.find_optimal_dispatch_size()\r\n\r\n // Create shader modules from wgsl code\r\n this.create_shader_modules()\r\n\r\n // Create GPU buffers\r\n this.create_buffers(keys, values)\r\n \r\n // Create multi-pass pipelines\r\n this.create_pipelines()\r\n }\r\n\r\n find_optimal_dispatch_size() {\r\n const { maxComputeWorkgroupsPerDimension } = this.device.limits\r\n\r\n this.dispatchSize = { \r\n x: this.workgroup_count, \r\n y: 1\r\n }\r\n\r\n if (this.workgroup_count > maxComputeWorkgroupsPerDimension) {\r\n const x = Math.floor(Math.sqrt(this.workgroup_count))\r\n const y = Math.ceil(this.workgroup_count / x)\r\n \r\n this.dispatchSize = { x, y } \r\n }\r\n }\r\n\r\n create_shader_modules() {\r\n // Remove every occurence of \"values\" in the shader code if values buffer is not provided\r\n const remove_values = (source) => {\r\n return source.split('\\n')\r\n .filter(line => !line.toLowerCase().includes('values'))\r\n .join('\\n')\r\n }\r\n\r\n const blockSumSource = this.local_shuffle ? radixSortSource_LocalShuffle : radixSortSource\r\n \r\n this.shaderModules = {\r\n blockSum: this.device.createShaderModule({\r\n label: 'radix-sort-block-sum',\r\n code: this.has_values ? blockSumSource : remove_values(blockSumSource),\r\n }),\r\n reorder: this.device.createShaderModule({\r\n label: 'radix-sort-reorder',\r\n code: this.has_values ? reorderSource : remove_values(reorderSource),\r\n })\r\n }\r\n }\r\n\r\n create_buffers(keys, values) {\r\n // Keys and values double buffering\r\n const tmpKeysBuffer = this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n const tmpValuesBuffer = !this.has_values ? null : this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Local Prefix Sum buffer (1 element per item)\r\n const localPrefixSumBuffer = this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Prefix Block Sum buffer (4 element per workgroup)\r\n const prefixBlockSumBuffer = this.device.createBuffer({\r\n size: this.prefix_block_workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n \r\n this.buffers = {\r\n keys: keys,\r\n values: values,\r\n tmpKeys: tmpKeysBuffer,\r\n tmpValues: tmpValuesBuffer,\r\n localPrefixSum: localPrefixSumBuffer,\r\n prefixBlockSum: prefixBlockSumBuffer,\r\n }\r\n }\r\n\r\n // Create radix sort passes for every 2 bits\r\n create_pipelines() {\r\n for (let bit = 0; bit < this.bit_count; bit += 2) {\r\n // Swap buffers every pass\r\n const even = (bit % 4 == 0)\r\n const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys\r\n const inValues = even ? this.buffers.values : this.buffers.tmpValues\r\n const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys\r\n const outValues = even ? this.buffers.tmpValues : this.buffers.values\r\n\r\n // Compute local prefix sums and block sums\r\n const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit)\r\n\r\n // Compute block sums prefix sums\r\n const prefixSumKernel = new PrefixSumKernel({ \r\n device: this.device,\r\n data: this.buffers.prefixBlockSum, \r\n count: this.prefix_block_workgroup_count,\r\n workgroup_size: this.workgroup_size,\r\n avoid_bank_conflicts: this.avoid_bank_conflicts,\r\n })\r\n \r\n // Reorder keys and values\r\n const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit)\r\n\r\n this.pipelines.push({ blockSumPipeline, prefixSumKernel, reorderPipeline })\r\n }\r\n }\r\n\r\n create_block_sum_pipeline(inKeys, inValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-block-sum',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: this.local_shuffle ? 'storage' : 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n // \"Local shuffle\" optimization needs access to the values buffer\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n resource: { buffer: inValues }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-block-sum',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.blockSum,\r\n entryPoint: 'radix_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: blockSumPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-reorder',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 5,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outKeys }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 3,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n resource: { buffer: inValues }\r\n },\r\n {\r\n binding: 5,\r\n resource: { buffer: outValues }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const reorderPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-reorder',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.reorder,\r\n entryPoint: 'radix_sort_reorder',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: reorderPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n /**\r\n * Encode all pipelines into the current pass\r\n * \r\n * @param {GPUComputePassEncoder} pass \r\n */\r\n dispatch(pass) {\r\n for (const { blockSumPipeline, prefixSumKernel, reorderPipeline } of this.pipelines) { \r\n pass.setPipeline(blockSumPipeline.pipeline)\r\n pass.setBindGroup(0, blockSumPipeline.bindGroup)\r\n pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1)\r\n\r\n prefixSumKernel.dispatch(pass)\r\n\r\n pass.setPipeline(reorderPipeline.pipeline)\r\n pass.setBindGroup(0, reorderPipeline.bindGroup)\r\n pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default RadixSortKernel"],"names":["prefixSumSource","prefixSumNoBankConflictSource","PrefixSumKernel","_ref","device","data","count","_ref$workgroup_size","workgroup_size","x","y","_ref$avoid_bank_confl","avoid_bank_conflicts","_classCallCheck","threads_per_workgroup","items_per_workgroup","Math","log2","Error","concat","pipelines","shaderModule","createShaderModule","label","code","prefixSumSource_NoBankConflict","create_pass_recursive","_createClass","key","value","find_optimal_dispatch_size","item_count","maxComputeWorkgroupsPerDimension","limits","workgroup_count","ceil","floor","sqrt","dispatchSize","_this$find_optimal_di","blockSumBuffer","createBuffer","size","usage","GPUBufferUsage","STORAGE","COPY_SRC","COPY_DST","bindGroupLayout","createBindGroupLayout","entries","binding","visibility","GPUShaderStage","COMPUTE","buffer","type","bindGroup","createBindGroup","layout","resource","pipelineLayout","createPipelineLayout","bindGroupLayouts","scanPipeline","createComputePipeline","compute","module","entryPoint","constants","push","pipeline","blockSumPipeline","dispatch","pass","_iterator","_createForOfIteratorHelper","_step","s","n","done","_step$value","setPipeline","setBindGroup","dispatchWorkgroups","err","e","f","radixSortSource","radixSortCoalescedSource","radixSortReorderSource","RadixSortKernel","arguments","length","undefined","keys","values","_ref$bit_count","bit_count","_ref$local_shuffle","local_shuffle","Number","isInteger","prefix_block_workgroup_count","has_values","shaderModules","buffers","create_shader_modules","create_buffers","create_pipelines","remove_values","source","split","filter","line","toLowerCase","includes","join","blockSumSource","radixSortSource_LocalShuffle","blockSum","reorder","reorderSource","tmpKeysBuffer","tmpValuesBuffer","localPrefixSumBuffer","prefixBlockSumBuffer","tmpKeys","tmpValues","localPrefixSum","prefixBlockSum","bit","even","inKeys","inValues","outKeys","outValues","create_block_sum_pipeline","prefixSumKernel","reorderPipeline","create_reorder_pipeline","_toConsumableArray"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAAA,IAAMA,eAAe,aAyFnB,4oFAAA;;ECzFF;EACA;EACA;EACA;EACA;EACA,IAAMC,6BAA6B,aA2GjC,iuGAAA;;AC/G8F,MAE1FC,eAAe,gBAAA,YAAA;EACjB;EACJ;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;IACI,SAAAA,eAAAA,CAAAC,IAAA,EAMG;EAAA,IAAA,IALCC,MAAM,GAAAD,IAAA,CAANC,MAAM;QACNC,IAAI,GAAAF,IAAA,CAAJE,IAAI;QACJC,KAAK,GAAAH,IAAA,CAALG,KAAK;QAAAC,mBAAA,GAAAJ,IAAA,CACLK,cAAc;QAAdA,cAAc,GAAAD,mBAAA,KAAG,KAAA,CAAA,GAAA;EAAEE,QAAAA,CAAC,EAAE,EAAE;EAAEC,QAAAA,CAAC,EAAE,EAAA;EAAG,OAAC,GAAAH,mBAAA;QAAAI,qBAAA,GAAAR,IAAA,CACjCS,oBAAoB;EAApBA,MAAAA,oBAAoB,GAAAD,qBAAA,KAAG,KAAA,CAAA,GAAA,KAAK,GAAAA,qBAAA,CAAA;EAAAE,IAAAA,eAAA,OAAAX,eAAA,CAAA,CAAA;MAE5B,IAAI,CAACE,MAAM,GAAGA,MAAM,CAAA;MACpB,IAAI,CAACI,cAAc,GAAGA,cAAc,CAAA;MACpC,IAAI,CAACM,qBAAqB,GAAGN,cAAc,CAACC,CAAC,GAAGD,cAAc,CAACE,CAAC,CAAA;MAChE,IAAI,CAACK,mBAAmB,GAAG,CAAC,GAAG,IAAI,CAACD,qBAAqB,CAAC;;MAE1D,IAAIE,IAAI,CAACC,IAAI,CAAC,IAAI,CAACH,qBAAqB,CAAC,GAAG,CAAC,KAAK,CAAC,EAC/C,MAAM,IAAII,KAAK,CAAAC,wEAAAA,CAAAA,MAAA,CAA0E,IAAI,CAACL,qBAAqB,EAAA,GAAA,CAAG,CAAC,CAAA;MAE3H,IAAI,CAACM,SAAS,GAAG,EAAE,CAAA;MAEnB,IAAI,CAACC,YAAY,GAAG,IAAI,CAACjB,MAAM,CAACkB,kBAAkB,CAAC;EAC/CC,MAAAA,KAAK,EAAE,YAAY;EACnBC,MAAAA,IAAI,EAAEZ,oBAAoB,GAAGa,6BAA8B,GAAGzB,eAAAA;EAClE,KAAC,CAAC,CAAA;EAEF,IAAA,IAAI,CAAC0B,qBAAqB,CAACrB,IAAI,EAAEC,KAAK,CAAC,CAAA;EAC3C,GAAA;IAAC,OAAAqB,YAAA,CAAAzB,eAAA,EAAA,CAAA;MAAA0B,GAAA,EAAA,4BAAA;EAAAC,IAAAA,KAAA,EAED,SAAAC,0BAA2BC,CAAAA,UAAU,EAAE;QACnC,IAAQC,gCAAgC,GAAK,IAAI,CAAC5B,MAAM,CAAC6B,MAAM,CAAvDD,gCAAgC,CAAA;QAExC,IAAIE,eAAe,GAAGlB,IAAI,CAACmB,IAAI,CAACJ,UAAU,GAAG,IAAI,CAAChB,mBAAmB,CAAC,CAAA;QACtE,IAAIN,CAAC,GAAGyB,eAAe,CAAA;QACvB,IAAIxB,CAAC,GAAG,CAAC,CAAA;QAET,IAAIwB,eAAe,GAAGF,gCAAgC,EAAE;UACpDvB,CAAC,GAAGO,IAAI,CAACoB,KAAK,CAACpB,IAAI,CAACqB,IAAI,CAACH,eAAe,CAAC,CAAC,CAAA;UAC1CxB,CAAC,GAAGM,IAAI,CAACmB,IAAI,CAACD,eAAe,GAAGzB,CAAC,CAAC,CAAA;UAClCyB,eAAe,GAAGzB,CAAC,GAAGC,CAAC,CAAA;EAC3B,OAAA;QAEA,OAAO;EACHwB,QAAAA,eAAe,EAAfA,eAAe;EACfI,QAAAA,YAAY,EAAE;EAAE7B,UAAAA,CAAC,EAADA,CAAC;EAAEC,UAAAA,CAAC,EAADA,CAAAA;EAAE,SAAA;SACxB,CAAA;EACL,KAAA;EAAC,GAAA,EAAA;MAAAkB,GAAA,EAAA,uBAAA;EAAAC,IAAAA,KAAA,EAED,SAAAH,qBAAAA,CAAsBrB,IAAI,EAAEC,KAAK,EAAE;EAC/B;EACA,MAAA,IAAAiC,qBAAA,GAA0C,IAAI,CAACT,0BAA0B,CAACxB,KAAK,CAAC;UAAxE4B,eAAe,GAAAK,qBAAA,CAAfL,eAAe;UAAEI,YAAY,GAAAC,qBAAA,CAAZD,YAAY,CAAA;;EAErC;EACA,MAAA,IAAME,cAAc,GAAG,IAAI,CAACpC,MAAM,CAACqC,YAAY,CAAC;UAC5CC,IAAI,EAAER,eAAe,GAAG,CAAC;UACzBS,KAAK,EAAEC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;;EAEF;EACA,MAAA,IAAMC,eAAe,GAAG,IAAI,CAAC5C,MAAM,CAAC6C,qBAAqB,CAAC;EACtDC,QAAAA,OAAO,EAAE,CACL;EACIC,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;EAC9B,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;WAC7B,CAAA;EAET,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMC,SAAS,GAAG,IAAI,CAACrD,MAAM,CAACsD,eAAe,CAAC;EAC1CnC,QAAAA,KAAK,EAAE,uBAAuB;EAC9BoC,QAAAA,MAAM,EAAEX,eAAe;EACvBE,QAAAA,OAAO,EAAE,CACL;EACIC,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAElD,IAAAA;EAAK,WAAA;EAC7B,SAAC,EACD;EACI8C,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAEf,cAAAA;EAAe,WAAA;WACtC,CAAA;EAET,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMqB,cAAc,GAAG,IAAI,CAACzD,MAAM,CAAC0D,oBAAoB,CAAC;UACpDC,gBAAgB,EAAE,CAAEf,eAAe,CAAA;EACvC,OAAC,CAAC,CAAA;;EAEF;EACA,MAAA,IAAMgB,YAAY,GAAG,IAAI,CAAC5D,MAAM,CAAC6D,qBAAqB,CAAC;EACnD1C,QAAAA,KAAK,EAAE,0BAA0B;EACjCoC,QAAAA,MAAM,EAAEE,cAAc;EACtBK,QAAAA,OAAO,EAAE;YACLC,MAAM,EAAE,IAAI,CAAC9C,YAAY;EACzB+C,UAAAA,UAAU,EAAE,kBAAkB;EAC9BC,UAAAA,SAAS,EAAE;EACP,YAAA,kBAAkB,EAAE,IAAI,CAAC7D,cAAc,CAACC,CAAC;EACzC,YAAA,kBAAkB,EAAE,IAAI,CAACD,cAAc,CAACE,CAAC;cACzC,uBAAuB,EAAE,IAAI,CAACI,qBAAqB;cACnD,qBAAqB,EAAE,IAAI,CAACC,mBAAAA;EAChC,WAAA;EACJ,SAAA;EACJ,OAAC,CAAC,CAAA;EAEF,MAAA,IAAI,CAACK,SAAS,CAACkD,IAAI,CAAC;EAAEC,QAAAA,QAAQ,EAAEP,YAAY;EAAEP,QAAAA,SAAS,EAATA,SAAS;EAAEnB,QAAAA,YAAY,EAAZA,YAAAA;EAAa,OAAC,CAAC,CAAA;QAExE,IAAIJ,eAAe,GAAG,CAAC,EAAE;EACrB;EACA,QAAA,IAAI,CAACR,qBAAqB,CAACc,cAAc,EAAEN,eAAe,CAAC,CAAA;;EAE3D;EACA,QAAA,IAAMsC,gBAAgB,GAAG,IAAI,CAACpE,MAAM,CAAC6D,qBAAqB,CAAC;EACvD1C,UAAAA,KAAK,EAAE,+BAA+B;EACtCoC,UAAAA,MAAM,EAAEE,cAAc;EACtBK,UAAAA,OAAO,EAAE;cACLC,MAAM,EAAE,IAAI,CAAC9C,YAAY;EACzB+C,YAAAA,UAAU,EAAE,gBAAgB;EAC5BC,YAAAA,SAAS,EAAE;EACP,cAAA,kBAAkB,EAAE,IAAI,CAAC7D,cAAc,CAACC,CAAC;EACzC,cAAA,kBAAkB,EAAE,IAAI,CAACD,cAAc,CAACE,CAAC;gBACzC,uBAAuB,EAAE,IAAI,CAACI,qBAAAA;EAClC,aAAA;EACJ,WAAA;EACJ,SAAC,CAAC,CAAA;EAEF,QAAA,IAAI,CAACM,SAAS,CAACkD,IAAI,CAAC;EAAEC,UAAAA,QAAQ,EAAEC,gBAAgB;EAAEf,UAAAA,SAAS,EAATA,SAAS;EAAEnB,UAAAA,YAAY,EAAZA,YAAAA;EAAa,SAAC,CAAC,CAAA;EAChF,OAAA;EACJ,KAAA;EAAC,GAAA,EAAA;MAAAV,GAAA,EAAA,UAAA;EAAAC,IAAAA,KAAA,EAED,SAAA4C,QAASC,CAAAA,IAAI,EAAE;EAAA,MAAA,IAAAC,SAAA,GAAAC,0BAAA,CACyC,IAAI,CAACxD,SAAS,CAAA;UAAAyD,KAAA,CAAA;EAAA,MAAA,IAAA;UAAlE,KAAAF,SAAA,CAAAG,CAAA,EAAAD,EAAAA,CAAAA,CAAAA,KAAA,GAAAF,SAAA,CAAAI,CAAA,EAAAC,EAAAA,IAAA,GAAoE;EAAA,UAAA,IAAAC,WAAA,GAAAJ,KAAA,CAAAhD,KAAA;cAAvD0C,QAAQ,GAAAU,WAAA,CAARV,QAAQ;cAAEd,SAAS,GAAAwB,WAAA,CAATxB,SAAS;cAAEnB,YAAY,GAAA2C,WAAA,CAAZ3C,YAAY,CAAA;EAC1CoC,UAAAA,IAAI,CAACQ,WAAW,CAACX,QAAQ,CAAC,CAAA;EAC1BG,UAAAA,IAAI,CAACS,YAAY,CAAC,CAAC,EAAE1B,SAAS,CAAC,CAAA;EAC/BiB,UAAAA,IAAI,CAACU,kBAAkB,CAAC9C,YAAY,CAAC7B,CAAC,EAAE6B,YAAY,CAAC5B,CAAC,EAAE,CAAC,CAAC,CAAA;EAC9D,SAAA;EAAC,OAAA,CAAA,OAAA2E,GAAA,EAAA;UAAAV,SAAA,CAAAW,CAAA,CAAAD,GAAA,CAAA,CAAA;EAAA,OAAA,SAAA;EAAAV,QAAAA,SAAA,CAAAY,CAAA,EAAA,CAAA;EAAA,OAAA;EACL,KAAA;EAAC,GAAA,CAAA,CAAA,CAAA;EAAA,CAAA;;ECxJL,IAAMC,eAAe,aAsFnB,m8FAAA;;ECtFF;EACA;EACA;EACA;EACA;EACA,IAAMC,wBAAwB,aA6G5B,kpHAAA;;EClHF,IAAMC,sBAAsB,aA0C1B,w9CAAA;;ACvCsD,MAElDC,eAAe,gBAAA,YAAA;EACjB;EACJ;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACI,EAAA,SAAAA,kBASQ;EAAA,IAAA,IAAAxF,IAAA,GAAAyF,SAAA,CAAAC,MAAA,GAAA,CAAA,IAAAD,SAAA,CAAA,CAAA,CAAA,KAAAE,SAAA,GAAAF,SAAA,CAAA,CAAA,CAAA,GAAJ,EAAE;QARFxF,MAAM,GAAAD,IAAA,CAANC,MAAM;QACN2F,IAAI,GAAA5F,IAAA,CAAJ4F,IAAI;QACJC,MAAM,GAAA7F,IAAA,CAAN6F,MAAM;QACN1F,KAAK,GAAAH,IAAA,CAALG,KAAK;QAAA2F,cAAA,GAAA9F,IAAA,CACL+F,SAAS;EAATA,MAAAA,SAAS,GAAAD,cAAA,KAAG,KAAA,CAAA,GAAA,EAAE,GAAAA,cAAA;QAAA1F,mBAAA,GAAAJ,IAAA,CACdK,cAAc;QAAdA,cAAc,GAAAD,mBAAA,KAAG,KAAA,CAAA,GAAA;EAAEE,QAAAA,CAAC,EAAE,EAAE;EAAEC,QAAAA,CAAC,EAAE,EAAA;EAAG,OAAC,GAAAH,mBAAA;QAAA4F,kBAAA,GAAAhG,IAAA,CACjCiG,aAAa;EAAbA,MAAAA,aAAa,GAAAD,kBAAA,KAAG,KAAA,CAAA,GAAA,KAAK,GAAAA,kBAAA;QAAAxF,qBAAA,GAAAR,IAAA,CACrBS,oBAAoB;EAApBA,MAAAA,oBAAoB,GAAAD,qBAAA,KAAG,KAAA,CAAA,GAAA,KAAK,GAAAA,qBAAA,CAAA;EAAAE,IAAAA,eAAA,OAAA8E,eAAA,CAAA,CAAA;MAE5B,IAAIvF,MAAM,IAAI,IAAI,EAAE,MAAM,IAAIc,KAAK,CAAC,oBAAoB,CAAC,CAAA;MACzD,IAAI6E,IAAI,IAAI,IAAI,EAAE,MAAM,IAAI7E,KAAK,CAAC,yBAAyB,CAAC,CAAA;EAC5D,IAAA,IAAI,CAACmF,MAAM,CAACC,SAAS,CAAChG,KAAK,CAAC,IAAIA,KAAK,IAAI,CAAC,EAAE,MAAM,IAAIY,KAAK,CAAC,yBAAyB,CAAC,CAAA;EACtF,IAAA,IAAI,CAACmF,MAAM,CAACC,SAAS,CAACJ,SAAS,CAAC,IAAIA,SAAS,IAAI,CAAC,EAAE,MAAM,IAAIhF,KAAK,CAAC,6BAA6B,CAAC,CAAA;MAClG,IAAI,CAACmF,MAAM,CAACC,SAAS,CAAC9F,cAAc,CAACC,CAAC,CAAC,IAAI,CAAC4F,MAAM,CAACC,SAAS,CAAC9F,cAAc,CAACE,CAAC,CAAC,EAAE,MAAM,IAAIQ,KAAK,CAAC,kCAAkC,CAAC,CAAA;MAEnI,IAAI,CAACd,MAAM,GAAGA,MAAM,CAAA;MACpB,IAAI,CAACE,KAAK,GAAGA,KAAK,CAAA;MAClB,IAAI,CAAC4F,SAAS,GAAGA,SAAS,CAAA;MAC1B,IAAI,CAAC1F,cAAc,GAAGA,cAAc,CAAA;MACpC,IAAI,CAAC4F,aAAa,GAAGA,aAAa,CAAA;MAClC,IAAI,CAACxF,oBAAoB,GAAGA,oBAAoB,CAAA;MAEhD,IAAI,CAACE,qBAAqB,GAAGN,cAAc,CAACC,CAAC,GAAGD,cAAc,CAACE,CAAC,CAAA;EAChE,IAAA,IAAI,CAACwB,eAAe,GAAGlB,IAAI,CAACmB,IAAI,CAAC7B,KAAK,GAAG,IAAI,CAACQ,qBAAqB,CAAC,CAAA;EACpE,IAAA,IAAI,CAACyF,4BAA4B,GAAG,CAAC,GAAG,IAAI,CAACrE,eAAe,CAAA;EAE5D,IAAA,IAAI,CAACsE,UAAU,GAAIR,MAAM,IAAI,IAAK,CAAC;;EAEnC,IAAA,IAAI,CAAC1D,YAAY,GAAG,EAAE,CAAE;EACxB,IAAA,IAAI,CAACmE,aAAa,GAAG,EAAE,CAAC;EACxB,IAAA,IAAI,CAACC,OAAO,GAAG,EAAE,CAAO;EACxB,IAAA,IAAI,CAACtF,SAAS,GAAG,EAAE,CAAK;;EAExB;MACA,IAAI,CAACU,0BAA0B,EAAE,CAAA;;EAEjC;MACA,IAAI,CAAC6E,qBAAqB,EAAE,CAAA;;EAE5B;EACA,IAAA,IAAI,CAACC,cAAc,CAACb,IAAI,EAAEC,MAAM,CAAC,CAAA;;EAEjC;MACA,IAAI,CAACa,gBAAgB,EAAE,CAAA;EAC3B,GAAA;IAAC,OAAAlF,YAAA,CAAAgE,eAAA,EAAA,CAAA;MAAA/D,GAAA,EAAA,4BAAA;MAAAC,KAAA,EAED,SAAAC,0BAAAA,GAA6B;QACzB,IAAQE,gCAAgC,GAAK,IAAI,CAAC5B,MAAM,CAAC6B,MAAM,CAAvDD,gCAAgC,CAAA;QAExC,IAAI,CAACM,YAAY,GAAG;UAChB7B,CAAC,EAAE,IAAI,CAACyB,eAAe;EACvBxB,QAAAA,CAAC,EAAE,CAAA;SACN,CAAA;EAED,MAAA,IAAI,IAAI,CAACwB,eAAe,GAAGF,gCAAgC,EAAE;EACzD,QAAA,IAAMvB,CAAC,GAAGO,IAAI,CAACoB,KAAK,CAACpB,IAAI,CAACqB,IAAI,CAAC,IAAI,CAACH,eAAe,CAAC,CAAC,CAAA;UACrD,IAAMxB,CAAC,GAAGM,IAAI,CAACmB,IAAI,CAAC,IAAI,CAACD,eAAe,GAAGzB,CAAC,CAAC,CAAA;UAE7C,IAAI,CAAC6B,YAAY,GAAG;EAAE7B,UAAAA,CAAC,EAADA,CAAC;EAAEC,UAAAA,CAAC,EAADA,CAAAA;WAAG,CAAA;EAChC,OAAA;EACJ,KAAA;EAAC,GAAA,EAAA;MAAAkB,GAAA,EAAA,uBAAA;MAAAC,KAAA,EAED,SAAA8E,qBAAAA,GAAwB;EACpB;EACA,MAAA,IAAMG,aAAa,GAAG,SAAhBA,aAAaA,CAAIC,MAAM,EAAK;UAC9B,OAAOA,MAAM,CAACC,KAAK,CAAC,IAAI,CAAC,CACXC,MAAM,CAAC,UAAAC,IAAI,EAAA;YAAA,OAAI,CAACA,IAAI,CAACC,WAAW,EAAE,CAACC,QAAQ,CAAC,QAAQ,CAAC,CAAA;EAAA,SAAA,CAAC,CACtDC,IAAI,CAAC,IAAI,CAAC,CAAA;SAC3B,CAAA;QAED,IAAMC,cAAc,GAAG,IAAI,CAAClB,aAAa,GAAGmB,wBAA4B,GAAG/B,eAAe,CAAA;QAE1F,IAAI,CAACiB,aAAa,GAAG;EACjBe,QAAAA,QAAQ,EAAE,IAAI,CAACpH,MAAM,CAACkB,kBAAkB,CAAC;EACrCC,UAAAA,KAAK,EAAE,sBAAsB;YAC7BC,IAAI,EAAE,IAAI,CAACgF,UAAU,GAAGc,cAAc,GAAGR,aAAa,CAACQ,cAAc,CAAA;EACzE,SAAC,CAAC;EACFG,QAAAA,OAAO,EAAE,IAAI,CAACrH,MAAM,CAACkB,kBAAkB,CAAC;EACpCC,UAAAA,KAAK,EAAE,oBAAoB;YAC3BC,IAAI,EAAE,IAAI,CAACgF,UAAU,GAAGkB,sBAAa,GAAGZ,aAAa,CAACY,sBAAa,CAAA;WACtE,CAAA;SACJ,CAAA;EACL,KAAA;EAAC,GAAA,EAAA;MAAA9F,GAAA,EAAA,gBAAA;EAAAC,IAAAA,KAAA,EAED,SAAA+E,cAAAA,CAAeb,IAAI,EAAEC,MAAM,EAAE;EACzB;EACA,MAAA,IAAM2B,aAAa,GAAG,IAAI,CAACvH,MAAM,CAACqC,YAAY,CAAC;EAC3CC,QAAAA,IAAI,EAAE,IAAI,CAACpC,KAAK,GAAG,CAAC;UACpBqC,KAAK,EAAEC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;EACF,MAAA,IAAM6E,eAAe,GAAG,CAAC,IAAI,CAACpB,UAAU,GAAG,IAAI,GAAG,IAAI,CAACpG,MAAM,CAACqC,YAAY,CAAC;EACvEC,QAAAA,IAAI,EAAE,IAAI,CAACpC,KAAK,GAAG,CAAC;UACpBqC,KAAK,EAAEC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;;EAEF;EACA,MAAA,IAAM8E,oBAAoB,GAAG,IAAI,CAACzH,MAAM,CAACqC,YAAY,CAAC;EAClDC,QAAAA,IAAI,EAAE,IAAI,CAACpC,KAAK,GAAG,CAAC;UACpBqC,KAAK,EAAEC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;;EAEF;EACA,MAAA,IAAM+E,oBAAoB,GAAG,IAAI,CAAC1H,MAAM,CAACqC,YAAY,CAAC;EAClDC,QAAAA,IAAI,EAAE,IAAI,CAAC6D,4BAA4B,GAAG,CAAC;UAC3C5D,KAAK,EAAEC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;QAEF,IAAI,CAAC2D,OAAO,GAAG;EACXX,QAAAA,IAAI,EAAEA,IAAI;EACVC,QAAAA,MAAM,EAAEA,MAAM;EACd+B,QAAAA,OAAO,EAAEJ,aAAa;EACtBK,QAAAA,SAAS,EAAEJ,eAAe;EAC1BK,QAAAA,cAAc,EAAEJ,oBAAoB;EACpCK,QAAAA,cAAc,EAAEJ,oBAAAA;SACnB,CAAA;EACL,KAAA;;EAEA;EAAA,GAAA,EAAA;MAAAlG,GAAA,EAAA,kBAAA;MAAAC,KAAA,EACA,SAAAgF,gBAAAA,GAAmB;EACf,MAAA,KAAK,IAAIsB,GAAG,GAAG,CAAC,EAAEA,GAAG,GAAG,IAAI,CAACjC,SAAS,EAAEiC,GAAG,IAAI,CAAC,EAAE;EAC9C;EACA,QAAA,IAAMC,IAAI,GAASD,GAAG,GAAG,CAAC,IAAI,CAAE,CAAA;EAChC,QAAA,IAAME,MAAM,GAAMD,IAAI,GAAG,IAAI,CAAC1B,OAAO,CAACX,IAAI,GAAG,IAAI,CAACW,OAAO,CAACqB,OAAO,CAAA;EACjE,QAAA,IAAMO,QAAQ,GAAIF,IAAI,GAAG,IAAI,CAAC1B,OAAO,CAACV,MAAM,GAAG,IAAI,CAACU,OAAO,CAACsB,SAAS,CAAA;EACrE,QAAA,IAAMO,OAAO,GAAKH,IAAI,GAAG,IAAI,CAAC1B,OAAO,CAACqB,OAAO,GAAG,IAAI,CAACrB,OAAO,CAACX,IAAI,CAAA;EACjE,QAAA,IAAMyC,SAAS,GAAGJ,IAAI,GAAG,IAAI,CAAC1B,OAAO,CAACsB,SAAS,GAAG,IAAI,CAACtB,OAAO,CAACV,MAAM,CAAA;;EAErE;UACA,IAAMxB,gBAAgB,GAAG,IAAI,CAACiE,yBAAyB,CAACJ,MAAM,EAAEC,QAAQ,EAAEH,GAAG,CAAC,CAAA;;EAE9E;EACA,QAAA,IAAMO,eAAe,GAAG,IAAIxI,eAAe,CAAC;YACxCE,MAAM,EAAE,IAAI,CAACA,MAAM;EACnBC,UAAAA,IAAI,EAAE,IAAI,CAACqG,OAAO,CAACwB,cAAc;YACjC5H,KAAK,EAAE,IAAI,CAACiG,4BAA4B;YACxC/F,cAAc,EAAE,IAAI,CAACA,cAAc;YACnCI,oBAAoB,EAAE,IAAI,CAACA,oBAAAA;EAC/B,SAAC,CAAC,CAAA;;EAEF;EACA,QAAA,IAAM+H,eAAe,GAAG,IAAI,CAACC,uBAAuB,CAACP,MAAM,EAAEC,QAAQ,EAAEC,OAAO,EAAEC,SAAS,EAAEL,GAAG,CAAC,CAAA;EAE/F,QAAA,IAAI,CAAC/G,SAAS,CAACkD,IAAI,CAAC;EAAEE,UAAAA,gBAAgB,EAAhBA,gBAAgB;EAAEkE,UAAAA,eAAe,EAAfA,eAAe;EAAEC,UAAAA,eAAe,EAAfA,eAAAA;EAAgB,SAAC,CAAC,CAAA;EAC/E,OAAA;EACJ,KAAA;EAAC,GAAA,EAAA;MAAA/G,GAAA,EAAA,2BAAA;MAAAC,KAAA,EAED,SAAA4G,yBAA0BJ,CAAAA,MAAM,EAAEC,QAAQ,EAAEH,GAAG,EAAE;EAC7C,MAAA,IAAMnF,eAAe,GAAG,IAAI,CAAC5C,MAAM,CAAC6C,qBAAqB,CAAC;EACtD1B,QAAAA,KAAK,EAAE,sBAAsB;EAC7B2B,QAAAA,OAAO,EACH,CAAA;EACIC,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,IAAI,CAAC4C,aAAa,GAAG,SAAS,GAAG,mBAAA;EAAoB,WAAA;EACzE,SAAC,EACD;EACIjD,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;EAC9B,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;EAC9B,SAAC,CAAArC,CAAAA,MAAA,CAAA0H,kBAAA,CACG,IAAI,CAACzC,aAAa,IAAI,IAAI,CAACI,UAAU,GAAG,CAAC;EACzCrD,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;WAC7B,CAAC,GAAG,EAAE,CAAA,CAAA;EAEf,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMC,SAAS,GAAG,IAAI,CAACrD,MAAM,CAACsD,eAAe,CAAC;EAC1CC,QAAAA,MAAM,EAAEX,eAAe;EACvBE,QAAAA,OAAO,EACH,CAAA;EACIC,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE8E,MAAAA;EAAO,WAAA;EAC/B,SAAC,EACD;EACIlF,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE,IAAI,CAACmD,OAAO,CAACuB,cAAAA;EAAe,WAAA;EACpD,SAAC,EACD;EACI9E,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE,IAAI,CAACmD,OAAO,CAACwB,cAAAA;EAAe,WAAA;EACpD,SAAC,CAAA/G,CAAAA,MAAA,CAAA0H,kBAAA,CAEG,IAAI,CAACzC,aAAa,IAAI,IAAI,CAACI,UAAU,GAAG,CAAC;EACzCrD,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE+E,QAAAA;EAAS,WAAA;WAChC,CAAC,GAAG,EAAE,CAAA,CAAA;EAEf,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMzE,cAAc,GAAG,IAAI,CAACzD,MAAM,CAAC0D,oBAAoB,CAAC;UACpDC,gBAAgB,EAAE,CAAEf,eAAe,CAAA;EACvC,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMwB,gBAAgB,GAAG,IAAI,CAACpE,MAAM,CAAC6D,qBAAqB,CAAC;EACvD1C,QAAAA,KAAK,EAAE,sBAAsB;EAC7BoC,QAAAA,MAAM,EAAEE,cAAc;EACtBK,QAAAA,OAAO,EAAE;EACLC,UAAAA,MAAM,EAAE,IAAI,CAACsC,aAAa,CAACe,QAAQ;EACnCpD,UAAAA,UAAU,EAAE,YAAY;EACxBC,UAAAA,SAAS,EAAE;EACP,YAAA,kBAAkB,EAAE,IAAI,CAAC7D,cAAc,CAACC,CAAC;EACzC,YAAA,kBAAkB,EAAE,IAAI,CAACD,cAAc,CAACE,CAAC;cACzC,iBAAiB,EAAE,IAAI,CAACwB,eAAe;cACvC,uBAAuB,EAAE,IAAI,CAACpB,qBAAqB;cACnD,eAAe,EAAE,IAAI,CAACR,KAAK;EAC3B,YAAA,aAAa,EAAE6H,GAAAA;EACnB,WAAA;EACJ,SAAA;EACJ,OAAC,CAAC,CAAA;QAEF,OAAO;EACH5D,QAAAA,QAAQ,EAAEC,gBAAgB;EAC1Bf,QAAAA,SAAS,EAATA,SAAAA;SACH,CAAA;EACL,KAAA;EAAC,GAAA,EAAA;MAAA7B,GAAA,EAAA,yBAAA;EAAAC,IAAAA,KAAA,EAED,SAAA+G,uBAAwBP,CAAAA,MAAM,EAAEC,QAAQ,EAAEC,OAAO,EAAEC,SAAS,EAAEL,GAAG,EAAE;EAC/D,MAAA,IAAMnF,eAAe,GAAG,IAAI,CAAC5C,MAAM,CAAC6C,qBAAqB,CAAC;EACtD1B,QAAAA,KAAK,EAAE,oBAAoB;EAC3B2B,QAAAA,OAAO,EACH,CAAA;EACIC,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,mBAAA;EAAoB,WAAA;EACxC,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;EAC9B,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,mBAAA;EAAoB,WAAA;EACxC,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,mBAAA;EAAoB,WAAA;WACvC,CAAA,CAAArC,MAAA,CAAA0H,kBAAA,CACG,IAAI,CAACrC,UAAU,GAAG,CAClB;EACIrD,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,mBAAA;EAAoB,WAAA;EACxC,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;WAC7B,CACJ,GAAG,EAAE,CAAA,CAAA;EAEd,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMC,SAAS,GAAG,IAAI,CAACrD,MAAM,CAACsD,eAAe,CAAC;EAC1CC,QAAAA,MAAM,EAAEX,eAAe;EACvBE,QAAAA,OAAO,EACH,CAAA;EACIC,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE8E,MAAAA;EAAO,WAAA;EAC/B,SAAC,EACD;EACIlF,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAEgF,OAAAA;EAAQ,WAAA;EAChC,SAAC,EACD;EACIpF,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE,IAAI,CAACmD,OAAO,CAACuB,cAAAA;EAAe,WAAA;EACpD,SAAC,EACD;EACI9E,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE,IAAI,CAACmD,OAAO,CAACwB,cAAAA;EAAe,WAAA;WACnD,CAAA,CAAA/G,MAAA,CAAA0H,kBAAA,CACG,IAAI,CAACrC,UAAU,GAAG,CAClB;EACIrD,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE+E,QAAAA;EAAS,WAAA;EACjC,SAAC,EACD;EACInF,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAEiF,SAAAA;EAAU,WAAA;WACjC,CACJ,GAAG,EAAE,CAAA,CAAA;EAEd,OAAC,CAAC,CAAA;EAEF,MAAA,IAAM3E,cAAc,GAAG,IAAI,CAACzD,MAAM,CAAC0D,oBAAoB,CAAC;UACpDC,gBAAgB,EAAE,CAAEf,eAAe,CAAA;EACvC,OAAC,CAAC,CAAA;EAEF,MAAA,IAAM2F,eAAe,GAAG,IAAI,CAACvI,MAAM,CAAC6D,qBAAqB,CAAC;EACtD1C,QAAAA,KAAK,EAAE,oBAAoB;EAC3BoC,QAAAA,MAAM,EAAEE,cAAc;EACtBK,QAAAA,OAAO,EAAE;EACLC,UAAAA,MAAM,EAAE,IAAI,CAACsC,aAAa,CAACgB,OAAO;EAClCrD,UAAAA,UAAU,EAAE,oBAAoB;EAChCC,UAAAA,SAAS,EAAE;EACP,YAAA,kBAAkB,EAAE,IAAI,CAAC7D,cAAc,CAACC,CAAC;EACzC,YAAA,kBAAkB,EAAE,IAAI,CAACD,cAAc,CAACE,CAAC;cACzC,iBAAiB,EAAE,IAAI,CAACwB,eAAe;cACvC,uBAAuB,EAAE,IAAI,CAACpB,qBAAqB;cACnD,eAAe,EAAE,IAAI,CAACR,KAAK;EAC3B,YAAA,aAAa,EAAE6H,GAAAA;EACnB,WAAA;EACJ,SAAA;EACJ,OAAC,CAAC,CAAA;QAEF,OAAO;EACH5D,QAAAA,QAAQ,EAAEoE,eAAe;EACzBlF,QAAAA,SAAS,EAATA,SAAAA;SACH,CAAA;EACL,KAAA;;EAEA;EACJ;EACA;EACA;EACA;EAJI,GAAA,EAAA;MAAA7B,GAAA,EAAA,UAAA;EAAAC,IAAAA,KAAA,EAKA,SAAA4C,QAASC,CAAAA,IAAI,EAAE;EAAA,MAAA,IAAAC,SAAA,GAAAC,0BAAA,CAC0D,IAAI,CAACxD,SAAS,CAAA;UAAAyD,KAAA,CAAA;EAAA,MAAA,IAAA;UAAnF,KAAAF,SAAA,CAAAG,CAAA,EAAAD,EAAAA,CAAAA,CAAAA,KAAA,GAAAF,SAAA,CAAAI,CAAA,EAAAC,EAAAA,IAAA,GAAqF;EAAA,UAAA,IAAAC,WAAA,GAAAJ,KAAA,CAAAhD,KAAA;cAAxE2C,gBAAgB,GAAAS,WAAA,CAAhBT,gBAAgB;cAAEkE,eAAe,GAAAzD,WAAA,CAAfyD,eAAe;cAAEC,eAAe,GAAA1D,WAAA,CAAf0D,eAAe,CAAA;EAC3DjE,UAAAA,IAAI,CAACQ,WAAW,CAACV,gBAAgB,CAACD,QAAQ,CAAC,CAAA;YAC3CG,IAAI,CAACS,YAAY,CAAC,CAAC,EAAEX,gBAAgB,CAACf,SAAS,CAAC,CAAA;EAChDiB,UAAAA,IAAI,CAACU,kBAAkB,CAAC,IAAI,CAAC9C,YAAY,CAAC7B,CAAC,EAAE,IAAI,CAAC6B,YAAY,CAAC5B,CAAC,EAAE,CAAC,CAAC,CAAA;EAEpEgI,UAAAA,eAAe,CAACjE,QAAQ,CAACC,IAAI,CAAC,CAAA;EAE9BA,UAAAA,IAAI,CAACQ,WAAW,CAACyD,eAAe,CAACpE,QAAQ,CAAC,CAAA;YAC1CG,IAAI,CAACS,YAAY,CAAC,CAAC,EAAEwD,eAAe,CAAClF,SAAS,CAAC,CAAA;EAC/CiB,UAAAA,IAAI,CAACU,kBAAkB,CAAC,IAAI,CAAC9C,YAAY,CAAC7B,CAAC,EAAE,IAAI,CAAC6B,YAAY,CAAC5B,CAAC,EAAE,CAAC,CAAC,CAAA;EACxE,SAAA;EAAC,OAAA,CAAA,OAAA2E,GAAA,EAAA;UAAAV,SAAA,CAAAW,CAAA,CAAAD,GAAA,CAAA,CAAA;EAAA,OAAA,SAAA;EAAAV,QAAAA,SAAA,CAAAY,CAAA,EAAA,CAAA;EAAA,OAAA;EACL,KAAA;EAAC,GAAA,CAAA,CAAA,CAAA;EAAA,CAAA;;;;;;;;;"} \ No newline at end of file +{"version":3,"file":"radix-sort-umd.js","sources":["../../src/shaders/prefix_sum.js","../../src/shaders/optimizations/prefix_sum_no_bank_conflict.js","../../src/utils.js","../../src/PrefixSumKernel.js","../../src/shaders/radix_sort.js","../../src/shaders/optimizations/radix_sort_local_shuffle.js","../../src/shaders/radix_sort_reorder.js","../../src/shaders/check_sort.js","../../src/CheckSortKernel.js","../../src/RadixSortKernel.js"],"sourcesContent":["const prefixSumSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n temp[ELM_TID] = items[ELM_GID];\r\n temp[ELM_TID + 1] = items[ELM_GID + 1];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n let last_offset = ITEMS_PER_WORKGROUP - 1;\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[ELM_GID] = temp[ELM_TID];\r\n items[ELM_GID + 1] = temp[ELM_TID + 1];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumSource","/**\r\n * Prefix sum with optimization to avoid bank conflicts\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst prefixSumNoBankConflictSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nconst NUM_BANKS: u32 = 32;\r\nconst LOG_NUM_BANKS: u32 = 5;\r\n\r\nfn get_offset(offset: u32) -> u32 {\r\n // return offset >> LOG_NUM_BANKS; // Conflict-free\r\n return (offset >> NUM_BANKS) + (offset >> (2 * LOG_NUM_BANKS)); // Zero bank conflict\r\n}\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n let ai: u32 = TID;\r\n let bi: u32 = TID + (ITEMS_PER_WORKGROUP >> 1);\r\n let s_ai = ai + get_offset(ai);\r\n let s_bi = bi + get_offset(bi);\r\n let g_ai = ai + WID * 2;\r\n let g_bi = bi + WID * 2;\r\n temp[s_ai] = items[g_ai];\r\n temp[s_bi] = items[g_bi];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n var last_offset = ITEMS_PER_WORKGROUP - 1;\r\n last_offset += get_offset(last_offset);\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[g_ai] = temp[s_ai];\r\n items[g_bi] = temp[s_bi];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumNoBankConflictSource","/**\r\n * Find the best dispatch size x and y dimensions to minimize unused workgroups\r\n * \r\n * @param {GPUDevice} device - The GPU device\r\n * @param {int} workgroup_count - Number of workgroups to dispatch\r\n * @returns \r\n */\r\nfunction find_optimal_dispatch_size(device, workgroup_count) {\r\n const dispatchSize = { \r\n x: workgroup_count, \r\n y: 1\r\n }\r\n\r\n if (workgroup_count > device.limits.maxComputeWorkgroupsPerDimension) {\r\n const x = Math.floor(Math.sqrt(workgroup_count))\r\n const y = Math.ceil(workgroup_count / x)\r\n \r\n dispatchSize.x = x\r\n dispatchSize.y = y\r\n }\r\n\r\n return dispatchSize\r\n}\r\n\r\nfunction create_buffer_from_data({device, label, data, usage = 0}) {\r\n const dispatchSizes = device.createBuffer({\r\n label: label,\r\n usage: usage,\r\n size: data.length * 4,\r\n mappedAtCreation: true\r\n })\r\n\r\n const dispatchData = new Uint32Array(dispatchSizes.getMappedRange())\r\n dispatchData.set(data)\r\n dispatchSizes.unmap()\r\n\r\n return dispatchSizes\r\n}\r\n\r\nexport {\r\n find_optimal_dispatch_size,\r\n create_buffer_from_data,\r\n}","import prefixSumSource from \"./shaders/prefix_sum\"\r\nimport prefixSumSource_NoBankConflict from \"./shaders/optimizations/prefix_sum_no_bank_conflict\"\r\nimport { find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass PrefixSumKernel {\r\n /**\r\n * Perform a parallel prefix sum on the given data buffer\r\n * \r\n * Based on \"Parallel Prefix Sum (Scan) with CUDA\"\r\n * https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - Buffer containing the data to process\r\n * @param {number} count - Max number of elements to process\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} avoid_bank_conflicts - Use the \"Avoid bank conflicts\" optimization from the original publication\r\n */\r\n constructor({\r\n device,\r\n data,\r\n count,\r\n workgroup_size = { x: 16, y: 16 },\r\n avoid_bank_conflicts = false\r\n }) {\r\n this.device = device\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.items_per_workgroup = 2 * this.threads_per_workgroup // 2 items are processed per thread\r\n\r\n if (Math.log2(this.threads_per_workgroup) % 1 !== 0) \r\n throw new Error(`workgroup_size.x * workgroup_size.y must be a power of two. (current: ${this.threads_per_workgroup})`)\r\n\r\n this.pipelines = []\r\n\r\n this.shaderModule = this.device.createShaderModule({\r\n label: 'prefix-sum',\r\n code: avoid_bank_conflicts ? prefixSumSource_NoBankConflict : prefixSumSource,\r\n })\r\n\r\n this.create_pass_recursive(data, count)\r\n }\r\n\r\n create_pass_recursive(data, count) {\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n const workgroup_count = Math.ceil(count / this.items_per_workgroup)\r\n const dispatchSize = find_optimal_dispatch_size(this.device, workgroup_count)\r\n \r\n // Create buffer for block sums \r\n const blockSumBuffer = this.device.createBuffer({\r\n label: 'prefix-sum-block-sum',\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create bind group and pipeline layout\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n label: 'prefix-sum-bind-group',\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: data }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: blockSumBuffer }\r\n }\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n // Per-workgroup (block) prefix sum\r\n const scanPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-scan-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'reduce_downsweep',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ITEMS_PER_WORKGROUP': this.items_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: scanPipeline, bindGroup, dispatchSize })\r\n\r\n if (workgroup_count > 1) {\r\n // Prefix sum on block sums\r\n this.create_pass_recursive(blockSumBuffer, workgroup_count)\r\n\r\n // Add block sums to local prefix sums\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-add-block-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'add_block_sums',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: blockSumPipeline, bindGroup, dispatchSize })\r\n }\r\n }\r\n\r\n get_dispatch_chain() {\r\n return this.pipelines.flatMap(p => [ p.dispatchSize.x, p.dispatchSize.y, 1 ])\r\n }\r\n\r\n dispatch(pass, dispatchSize, offset = 0) {\r\n for (let i = 0; i < this.pipelines.length; i++) {\r\n const { pipeline, bindGroup } = this.pipelines[i]\r\n \r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4)\r\n }\r\n }\r\n}\r\n\r\nexport default PrefixSumKernel","const radixSortSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n // Store local prefix sum to global memory\r\n local_prefix_sums[GID] = bit_prefix_sums[extract_bits];\r\n}`\r\n\r\nexport default radixSortSource;","/**\r\n * Radix sort with \"local shuffle and coalesced mapping\" optimization\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst radixSortCoalescedSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n@group(0) @binding(3) var values: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\nvar s_prefix_sum_scan: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let val = values[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n let prefix_sum = bit_prefix_sums[extract_bits]; \r\n\r\n // Scan bit prefix sums\r\n if (TID == LAST_THREAD) {\r\n var sum: u32 = 0;\r\n bit_prefix_sums[extract_bits] += 1;\r\n for (var i: u32 = 0; i < 4; i++) {\r\n s_prefix_sum_scan[i] = sum;\r\n sum += bit_prefix_sums[i];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n if (GID < ELEMENT_COUNT) {\r\n // Compute new position\r\n let new_pos: u32 = prefix_sum + s_prefix_sum_scan[extract_bits];\r\n\r\n // Shuffle elements locally\r\n input[WID + new_pos] = elm;\r\n values[WID + new_pos] = val;\r\n local_prefix_sums[WID + new_pos] = prefix_sum;\r\n }\r\n}`\r\n\r\nexport default radixSortCoalescedSource;","const radixSortReorderSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var inputKeys: array;\r\n@group(0) @binding(1) var outputKeys: array;\r\n@group(0) @binding(2) var local_prefix_sum: array;\r\n@group(0) @binding(3) var prefix_block_sum: array;\r\n@group(0) @binding(4) var inputValues: array;\r\n@group(0) @binding(5) var outputValues: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort_reorder(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) { \r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n if (GID >= ELEMENT_COUNT) {\r\n return;\r\n }\r\n\r\n let k = inputKeys[GID];\r\n let v = inputValues[GID];\r\n\r\n let local_prefix = local_prefix_sum[GID];\r\n\r\n // Calculate new position\r\n let extract_bits = (k >> CURRENT_BIT) & 0x3;\r\n let pid = extract_bits * WORKGROUP_COUNT + WORKGROUP_ID;\r\n let sorted_position = prefix_block_sum[pid] + local_prefix;\r\n \r\n outputKeys[sorted_position] = k;\r\n outputValues[sorted_position] = v;\r\n}`\r\n\r\nexport default radixSortReorderSource;","const checkSortSource = (isFirstPass = false, isLastPass = false, isFullCheck = false) => /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var output: array;\r\n@group(0) @binding(2) var original: array;\r\n@group(0) @binding(3) var is_sorted: u32;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ELEMENT_COUNT: u32;\r\noverride START_ELEMENT: u32;\r\n\r\nvar s_data: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn check_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP + START_ELEMENT;\r\n let GID = TID + WID; // Global thread ID\r\n\r\n // Load data into shared memory\r\n ${ isFirstPass ? first_pass_load_data : \"s_data[TID] = select(0u, input[GID], GID < ELEMENT_COUNT);\" }\r\n\r\n // Perform parallel reduction\r\n for (var d = 1u; d < THREADS_PER_WORKGROUP; d *= 2u) { \r\n workgroupBarrier(); \r\n if (TID % (2u * d) == 0u) {\r\n s_data[TID] += s_data[TID + d];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Write reduction result\r\n ${ isLastPass ? last_pass(isFullCheck) : write_reduction_result }\r\n}`\r\n\r\nconst write_reduction_result = /* wgsl */ `\r\n if (TID == 0) {\r\n output[WORKGROUP_ID] = s_data[0];\r\n }\r\n`\r\n\r\nconst first_pass_load_data = /* wgsl */ `\r\n let LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n\r\n // Load current element into shared memory\r\n // Also load next element for comparison\r\n let elm = select(0u, input[GID], GID < ELEMENT_COUNT);\r\n let next = select(0u, input[GID + 1], GID < ELEMENT_COUNT-1);\r\n s_data[TID] = elm;\r\n workgroupBarrier();\r\n\r\n s_data[TID] = select(0u, 1u, GID < ELEMENT_COUNT-1 && elm > next);\r\n`\r\n\r\nconst last_pass = (isFullCheck) => /* wgsl */ `\r\n let fullDispatchLength = arrayLength(&output);\r\n let dispatchIndex = TID * 3;\r\n\r\n if (dispatchIndex >= fullDispatchLength) {\r\n return;\r\n }\r\n\r\n ${isFullCheck ? last_pass_full : last_pass_fast}\r\n`\r\n\r\nconst last_pass_fast = /* wgsl */ `\r\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] == 0 && is_sorted == 0u);\r\n`\r\n\r\nconst last_pass_full = /* wgsl */ `\r\n if (TID == 0 && s_data[0] == 0) {\r\n is_sorted = 1u;\r\n }\r\n\r\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] != 0);\r\n`\r\nexport default checkSortSource","import checkSortSource from \"./shaders/check_sort\"\r\nimport { find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass CheckSortKernel {\r\n /**\r\n * CheckSortKernel - Performs a parralel reduction to check if an array is sorted.\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - The buffer containing the data to check\r\n * @param {GPUBuffer} result - The result dispatch size buffer\r\n * @param {GPUBuffer} original - The original dispatch size buffer\r\n * @param {GPUBuffer} is_sorted - 1-element buffer to store whether the array is sorted\r\n * @param {number} count - The number of elements to check\r\n * @param {number} start - The index to start checking from\r\n * @param {boolean} full_check - Whether this kernel is performing a full check or a fast check\r\n * @param {object} workgroup_size - The workgroup size in x and y dimensions\r\n */\r\n constructor({\r\n device,\r\n data,\r\n result,\r\n original,\r\n is_sorted,\r\n count,\r\n start = 0,\r\n full_check = true,\r\n workgroup_size = { x: 16, y: 16 },\r\n }) {\r\n this.device = device\r\n this.count = count\r\n this.start = start\r\n this.full_check = full_check\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n\r\n this.pipelines = []\r\n\r\n this.buffers = {\r\n data, \r\n result, \r\n original, \r\n is_sorted,\r\n outputs: []\r\n }\r\n\r\n this.create_passes_recursive(data, count)\r\n }\r\n\r\n // Find the best dispatch size for each pass to minimize unused workgroups\r\n static find_optimal_dispatch_chain(device, item_count, workgroup_size) {\r\n const threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n const sizes = []\r\n\r\n do {\r\n // Number of workgroups required to process all items\r\n const target_workgroup_count = Math.ceil(item_count / threads_per_workgroup)\r\n \r\n // Optimal dispatch size and updated workgroup count\r\n const dispatchSize = find_optimal_dispatch_size(device, target_workgroup_count)\r\n \r\n sizes.push(dispatchSize.x, dispatchSize.y, 1)\r\n item_count = target_workgroup_count\r\n } while (item_count > 1)\r\n \r\n return sizes\r\n }\r\n\r\n create_passes_recursive(buffer, count, passIndex = 0) {\r\n const workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n\r\n const isFirstPass = passIndex === 0\r\n const isLastPass = workgroup_count <= 1\r\n\r\n const outputBuffer = isLastPass ? this.buffers.result : this.device.createBuffer({\r\n label: `check-sort-${this.full_check ? 'full' : 'fast'}-${passIndex}`,\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n // Last pass bindings\r\n ...(isLastPass ? [{\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n }, {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : []),\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: buffer }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outputBuffer }\r\n },\r\n // Last pass buffers\r\n ...(isLastPass ? [{\r\n binding: 2,\r\n resource: { buffer: this.buffers.original }\r\n }, {\r\n binding: 3,\r\n resource: { buffer: this.buffers.is_sorted }\r\n }] : []),\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [bindGroupLayout]\r\n })\r\n\r\n const element_count = isFirstPass ? this.start + count : count\r\n const start_element = isFirstPass ? this.start : 0\r\n\r\n const checkSortPipeline = this.device.createComputePipeline({\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.device.createShaderModule({\r\n code: checkSortSource(isFirstPass, isLastPass, this.full_check),\r\n label: 'check-sort',\r\n }),\r\n entryPoint: 'check_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': element_count,\r\n 'START_ELEMENT': start_element,\r\n },\r\n }\r\n })\r\n\r\n this.buffers.outputs.push(outputBuffer)\r\n this.pipelines.push({ pipeline: checkSortPipeline, bindGroup })\r\n \r\n if (!isLastPass) {\r\n this.create_passes_recursive(outputBuffer, workgroup_count, passIndex + 1)\r\n }\r\n }\r\n\r\n dispatch(pass, dispatchSize, offset = 0) {\r\n for (let i = 0; i < this.pipelines.length; i++) {\r\n const { pipeline, bindGroup } = this.pipelines[i]\r\n\r\n const dispatchIndirect = (this.full_check || i < this.pipelines.length - 1)\r\n\r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n\r\n if (dispatchIndirect)\r\n pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4)\r\n else\r\n // Only the last dispatch of the fast check kernel is constant to (1, 1, 1)\r\n pass.dispatchWorkgroups(1, 1, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default CheckSortKernel","import PrefixSumKernel from \"./PrefixSumKernel\"\r\nimport radixSortSource from \"./shaders/radix_sort\"\r\nimport radixSortSource_LocalShuffle from \"./shaders/optimizations/radix_sort_local_shuffle\"\r\nimport reorderSource from \"./shaders/radix_sort_reorder\"\r\nimport CheckSortKernel from \"./CheckSortKernel\"\r\nimport { create_buffer_from_data, find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass RadixSortKernel {\r\n /**\r\n * Perform a parallel radix sort on the GPU given a buffer of keys and (optionnaly) values\r\n * Note: The buffers are sorted in-place.\r\n * \r\n * Based on \"Fast 4-way parallel radix sorting on GPUs\"\r\n * https://www.sci.utah.edu/~csilva/papers/cgf.pdf]\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} keys - Buffer containing the keys to sort\r\n * @param {GPUBuffer} values - (optional) Buffer containing the associated values\r\n * @param {number} count - Number of elements to sort\r\n * @param {number} bit_count - Number of bits per element (default: 32)\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} check_order - Enable \"order checking\" optimization. Useful if the data needs to be sorted in real-time and doesn't change much. (default: false)\r\n * @param {boolean} local_shuffle - Enable \"local shuffling\" optimization for the radix sort kernel (default: false)\r\n * @param {boolean} avoid_bank_conflicts - Enable \"avoiding bank conflicts\" optimization for the prefix sum kernel (default: false)\r\n */\r\n constructor({\r\n device,\r\n keys,\r\n values,\r\n count,\r\n bit_count = 32,\r\n workgroup_size = { x: 16, y: 16 },\r\n check_order = false,\r\n local_shuffle = false,\r\n avoid_bank_conflicts = false,\r\n } = {}) {\r\n if (device == null) throw new Error('No device provided')\r\n if (keys == null) throw new Error('No keys buffer provided')\r\n if (!Number.isInteger(count) || count <= 0) throw new Error('Invalid count parameter')\r\n if (!Number.isInteger(bit_count) || bit_count <= 0 || bit_count > 32) throw new Error('Invalid bit_count parameter')\r\n if (!Number.isInteger(workgroup_size.x) || !Number.isInteger(workgroup_size.y)) throw new Error('Invalid workgroup_size parameter')\r\n if (bit_count % 4 != 0) throw new Error('bit_count must be a multiple of 4')\r\n\r\n this.device = device\r\n this.count = count\r\n this.bit_count = bit_count\r\n this.workgroup_size = workgroup_size\r\n this.check_order = check_order\r\n this.local_shuffle = local_shuffle\r\n this.avoid_bank_conflicts = avoid_bank_conflicts\r\n\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n this.prefix_block_workgroup_count = 4 * this.workgroup_count\r\n\r\n this.has_values = (values != null) // Is the values buffer provided ?\r\n\r\n this.dispatchSize = {} // Dispatch dimension x and y\r\n this.shaderModules = {} // GPUShaderModules\r\n this.buffers = {} // GPUBuffers\r\n this.pipelines = [] // List of passes\r\n this.kernels = {}\r\n\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n this.dispatchSize = find_optimal_dispatch_size(this.device, this.workgroup_count)\r\n\r\n // Create shader modules from wgsl code\r\n this.create_shader_modules()\r\n \r\n // Create multi-pass pipelines\r\n this.create_pipelines(keys, values)\r\n }\r\n\r\n create_shader_modules() {\r\n // Remove every occurence of \"values\" in the shader code if values buffer is not provided\r\n const remove_values = (source) => {\r\n return source.split('\\n')\r\n .filter(line => !line.toLowerCase().includes('values'))\r\n .join('\\n')\r\n }\r\n\r\n const blockSumSource = this.local_shuffle ? radixSortSource_LocalShuffle : radixSortSource\r\n \r\n this.shaderModules = {\r\n blockSum: this.device.createShaderModule({\r\n label: 'radix-sort-block-sum',\r\n code: this.has_values ? blockSumSource : remove_values(blockSumSource),\r\n }),\r\n reorder: this.device.createShaderModule({\r\n label: 'radix-sort-reorder',\r\n code: this.has_values ? reorderSource : remove_values(reorderSource),\r\n })\r\n }\r\n }\r\n\r\n create_pipelines(keys, values) { \r\n // Block prefix sum kernel \r\n const { prefixSumKernel, prefixBlockSumBuffer } = this.create_prefix_sum_kernel()\r\n\r\n // Indirect dispatch buffers\r\n const dispatchData = this.calculate_dispatch_sizes(prefixSumKernel)\r\n\r\n // GPU buffers\r\n this.create_buffers(keys, values, prefixBlockSumBuffer, dispatchData)\r\n\r\n // Check sort kernels\r\n this.create_check_sort_kernels(this.buffers.keys, dispatchData)\r\n\r\n // Radix sort passes for every 2 bits\r\n for (let bit = 0; bit < this.bit_count; bit += 2) {\r\n // Swap buffers every pass\r\n const even = (bit % 4 == 0)\r\n const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys\r\n const inValues = even ? this.buffers.values : this.buffers.tmpValues\r\n const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys\r\n const outValues = even ? this.buffers.tmpValues : this.buffers.values\r\n\r\n // Compute local prefix sums and block sums\r\n const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit)\r\n \r\n // Reorder keys and values\r\n const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit)\r\n\r\n this.pipelines.push({ blockSumPipeline, reorderPipeline })\r\n }\r\n }\r\n\r\n create_prefix_sum_kernel() {\r\n // Prefix Block Sum buffer (4 element per workgroup)\r\n const prefixBlockSumBuffer = this.device.createBuffer({\r\n label: 'radix-sort-prefix-block-sum',\r\n size: this.prefix_block_workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create block prefix sum kernel\r\n const prefixSumKernel = new PrefixSumKernel({ \r\n device: this.device,\r\n data: prefixBlockSumBuffer, \r\n count: this.prefix_block_workgroup_count,\r\n workgroup_size: this.workgroup_size,\r\n avoid_bank_conflicts: this.avoid_bank_conflicts,\r\n })\r\n\r\n this.kernels.prefixSum = prefixSumKernel\r\n\r\n return { prefixSumKernel, prefixBlockSumBuffer }\r\n }\r\n\r\n calculate_dispatch_sizes(prefixSumKernel) {\r\n // Prefix sum dispatch sizes\r\n const prefixSumDispatchSize = prefixSumKernel.get_dispatch_chain()\r\n\r\n // Check sort element count (fast/full)\r\n const check_sort_fast_count = Math.min(this.count, this.threads_per_workgroup * 4)\r\n const check_sort_full_count = this.count - check_sort_fast_count\r\n const start_full = check_sort_fast_count - 1\r\n\r\n // Check sort dispatch sizes\r\n const dispatchSizesFast = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_fast_count, this.workgroup_size)\r\n const dispatchSizesFull = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_full_count, this.workgroup_size)\r\n\r\n // Initial dispatch sizes\r\n const initialDispatch = [\r\n this.dispatchSize.x, this.dispatchSize.y, 1, // Radix Sort + Reorder\r\n ...dispatchSizesFast.slice(0, 3), // Check sort fast\r\n ...prefixSumDispatchSize // Prefix Sum\r\n ]\r\n\r\n // Dispatch offsets in main buffer\r\n this.dispatchOffsets = {\r\n radix_sort: 0,\r\n check_sort_fast: 3 * 4,\r\n prefix_sum: 6 * 4\r\n }\r\n\r\n return {\r\n initialDispatch,\r\n dispatchSizesFull,\r\n check_sort_fast_count, \r\n check_sort_full_count, \r\n start_full \r\n }\r\n }\r\n\r\n create_buffers(keys, values, prefixBlockSumBuffer, dispatchData) {\r\n // Keys and values double buffering\r\n const tmpKeysBuffer = this.device.createBuffer({\r\n label: 'radix-sort-tmp-keys',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n const tmpValuesBuffer = !this.has_values ? null : this.device.createBuffer({\r\n label: 'radix-sort-tmp-values',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Local Prefix Sum buffer (1 element per item)\r\n const localPrefixSumBuffer = this.device.createBuffer({\r\n label: 'radix-sort-local-prefix-sum',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Dispatch sizes (radix sort, check sort, prefix sum)\r\n const dispatchBuffer = create_buffer_from_data({\r\n device: this.device, \r\n label: 'radix-sort-dispatch-size',\r\n data: dispatchData.initialDispatch, \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT\r\n })\r\n const originalDispatchBuffer = create_buffer_from_data({\r\n device: this.device, \r\n label: 'radix-sort-dispatch-size-original',\r\n data: dispatchData.initialDispatch, \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC\r\n })\r\n\r\n // Dispatch sizes (full sort)\r\n const checkSortFullDispatchBuffer = create_buffer_from_data({\r\n label: 'check-sort-full-dispatch-size',\r\n device: this.device, \r\n data: dispatchData.dispatchSizesFull,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT\r\n })\r\n const checkSortFullOriginalDispatchBuffer = create_buffer_from_data({\r\n label: 'check-sort-full-dispatch-size-original',\r\n device: this.device, \r\n data: dispatchData.dispatchSizesFull,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC\r\n })\r\n\r\n // Flag to tell if the data is sorted\r\n const isSortedBuffer = create_buffer_from_data({\r\n label: 'is-sorted',\r\n device: this.device, \r\n data: new Uint32Array([0]), \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n \r\n this.buffers = {\r\n keys: keys,\r\n values: values,\r\n tmpKeys: tmpKeysBuffer,\r\n tmpValues: tmpValuesBuffer,\r\n localPrefixSum: localPrefixSumBuffer,\r\n prefixBlockSum: prefixBlockSumBuffer,\r\n \r\n dispatchSize: dispatchBuffer,\r\n originalDispatchSize: originalDispatchBuffer,\r\n checkSortFullDispatchSize: checkSortFullDispatchBuffer,\r\n originalCheckSortFullDispatchSize: checkSortFullOriginalDispatchBuffer,\r\n isSorted: isSortedBuffer,\r\n }\r\n }\r\n\r\n create_check_sort_kernels(inKeys, checkSortPartitionData) {\r\n // Skip check sort if disabled\r\n if (!this.check_order) {\r\n return [ null, null ]\r\n }\r\n\r\n const { check_sort_fast_count, check_sort_full_count, start_full } = checkSortPartitionData\r\n\r\n // Create the full pass\r\n const checkSortFull = new CheckSortKernel({\r\n device: this.device,\r\n data: inKeys,\r\n result: this.buffers.dispatchSize,\r\n original: this.buffers.originalDispatchSize,\r\n is_sorted: this.buffers.isSorted,\r\n count: check_sort_full_count,\r\n start: start_full,\r\n full_check: true,\r\n workgroup_size: this.workgroup_size\r\n })\r\n\r\n // Create the fast pass\r\n const checkSortFast = new CheckSortKernel({\r\n device: this.device,\r\n data: inKeys,\r\n result: this.buffers.checkSortFullDispatchSize,\r\n original: this.buffers.originalCheckSortFullDispatchSize,\r\n is_sorted: this.buffers.isSorted,\r\n count: check_sort_fast_count,\r\n full_check: false,\r\n workgroup_size: this.workgroup_size\r\n })\r\n\r\n if (checkSortFast.threads_per_workgroup < checkSortFull.pipelines.length) {\r\n console.warn(`Warning: workgroup size is too small to enable check sort optimization, disabling...`)\r\n this.check_order = false\r\n return [ null, null ]\r\n }\r\n\r\n this.kernels.checkSortFast = checkSortFast\r\n this.kernels.checkSortFull = checkSortFull\r\n }\r\n\r\n create_block_sum_pipeline(inKeys, inValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-block-sum',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: this.local_shuffle ? 'storage' : 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n // \"Local shuffle\" optimization needs access to the values buffer\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n resource: { buffer: inValues }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-block-sum',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.blockSum,\r\n entryPoint: 'radix_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: blockSumPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-reorder',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 5,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outKeys }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 3,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n resource: { buffer: inValues }\r\n },\r\n {\r\n binding: 5,\r\n resource: { buffer: outValues }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const reorderPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-reorder',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.reorder,\r\n entryPoint: 'radix_sort_reorder',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: reorderPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n /**\r\n * Encode all pipelines into the current pass\r\n * \r\n * @param {GPUComputePassEncoder} pass \r\n */\r\n dispatch(pass) { \r\n for (let i = 0; i < this.bit_count / 2; i++) {\r\n const { blockSumPipeline, reorderPipeline } = this.pipelines[i]\r\n\r\n if (this.check_order && i % 2 == 0) {\r\n this.kernels.checkSortFast.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.check_sort_fast)\r\n this.kernels.checkSortFull.dispatch(pass, this.buffers.checkSortFullDispatchSize)\r\n }\r\n \r\n pass.setPipeline(blockSumPipeline.pipeline)\r\n pass.setBindGroup(0, blockSumPipeline.bindGroup)\r\n pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort)\r\n\r\n this.kernels.prefixSum.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.prefix_sum)\r\n\r\n pass.setPipeline(reorderPipeline.pipeline)\r\n pass.setBindGroup(0, reorderPipeline.bindGroup)\r\n pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort)\r\n }\r\n }\r\n}\r\n\r\nexport default RadixSortKernel"],"names":["prefixSumSource","prefixSumNoBankConflictSource","find_optimal_dispatch_size","device","workgroup_count","dispatchSize","x","y","limits","maxComputeWorkgroupsPerDimension","Math","floor","sqrt","ceil","create_buffer_from_data","_ref","label","data","_ref$usage","usage","dispatchSizes","createBuffer","size","length","mappedAtCreation","dispatchData","Uint32Array","getMappedRange","set","unmap","PrefixSumKernel","count","_ref$workgroup_size","workgroup_size","_ref$avoid_bank_confl","avoid_bank_conflicts","_classCallCheck","threads_per_workgroup","items_per_workgroup","log2","Error","concat","pipelines","shaderModule","createShaderModule","code","prefixSumSource_NoBankConflict","create_pass_recursive","_createClass","key","value","blockSumBuffer","GPUBufferUsage","STORAGE","COPY_SRC","COPY_DST","bindGroupLayout","createBindGroupLayout","entries","binding","visibility","GPUShaderStage","COMPUTE","buffer","type","bindGroup","createBindGroup","layout","resource","pipelineLayout","createPipelineLayout","bindGroupLayouts","scanPipeline","createComputePipeline","compute","module","entryPoint","constants","push","pipeline","blockSumPipeline","get_dispatch_chain","flatMap","p","dispatch","pass","offset","arguments","undefined","i","_this$pipelines$i","setPipeline","setBindGroup","dispatchWorkgroupsIndirect","radixSortSource","radixSortCoalescedSource","radixSortReorderSource","checkSortSource","isFirstPass","isLastPass","isFullCheck","first_pass_load_data","last_pass","write_reduction_result","last_pass_full","last_pass_fast","CheckSortKernel","result","original","is_sorted","_ref$start","start","_ref$full_check","full_check","buffers","outputs","create_passes_recursive","passIndex","outputBuffer","_toConsumableArray","element_count","start_element","checkSortPipeline","dispatchIndirect","dispatchWorkgroups","find_optimal_dispatch_chain","item_count","sizes","target_workgroup_count","RadixSortKernel","keys","values","_ref$bit_count","bit_count","_ref$check_order","check_order","_ref$local_shuffle","local_shuffle","Number","isInteger","prefix_block_workgroup_count","has_values","shaderModules","kernels","create_shader_modules","create_pipelines","remove_values","source","split","filter","line","toLowerCase","includes","join","blockSumSource","radixSortSource_LocalShuffle","blockSum","reorder","reorderSource","_this$create_prefix_s","create_prefix_sum_kernel","prefixSumKernel","prefixBlockSumBuffer","calculate_dispatch_sizes","create_buffers","create_check_sort_kernels","bit","even","inKeys","tmpKeys","inValues","tmpValues","outKeys","outValues","create_block_sum_pipeline","reorderPipeline","create_reorder_pipeline","prefixSum","prefixSumDispatchSize","check_sort_fast_count","min","check_sort_full_count","start_full","dispatchSizesFast","dispatchSizesFull","initialDispatch","slice","dispatchOffsets","radix_sort","check_sort_fast","prefix_sum","tmpKeysBuffer","tmpValuesBuffer","localPrefixSumBuffer","dispatchBuffer","INDIRECT","originalDispatchBuffer","checkSortFullDispatchBuffer","checkSortFullOriginalDispatchBuffer","isSortedBuffer","localPrefixSum","prefixBlockSum","originalDispatchSize","checkSortFullDispatchSize","originalCheckSortFullDispatchSize","isSorted","checkSortPartitionData","checkSortFull","checkSortFast","console","warn"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAAA,IAAMA,eAAe,aAyFnB,4oFAAA;;ECzFF;EACA;EACA;EACA;EACA;EACA,IAAMC,6BAA6B,aA2GjC,iuGAAA;;EChHF;EACA;EACA;EACA;EACA;EACA;EACA;EACA,SAASC,0BAA0BA,CAACC,MAAM,EAAEC,eAAe,EAAE;EACzD,EAAA,IAAMC,YAAY,GAAG;EACjBC,IAAAA,CAAC,EAAEF,eAAe;EAClBG,IAAAA,CAAC,EAAE,CAAA;KACN,CAAA;EAED,EAAA,IAAIH,eAAe,GAAGD,MAAM,CAACK,MAAM,CAACC,gCAAgC,EAAE;EAClE,IAAA,IAAMH,CAAC,GAAGI,IAAI,CAACC,KAAK,CAACD,IAAI,CAACE,IAAI,CAACR,eAAe,CAAC,CAAC,CAAA;MAChD,IAAMG,CAAC,GAAGG,IAAI,CAACG,IAAI,CAACT,eAAe,GAAGE,CAAC,CAAC,CAAA;MAExCD,YAAY,CAACC,CAAC,GAAGA,CAAC,CAAA;MAClBD,YAAY,CAACE,CAAC,GAAGA,CAAC,CAAA;EACtB,GAAA;EAEA,EAAA,OAAOF,YAAY,CAAA;EACvB,CAAA;EAEA,SAASS,uBAAuBA,CAAAC,IAAA,EAAmC;EAAA,EAAA,IAAjCZ,MAAM,GAAAY,IAAA,CAANZ,MAAM;MAAEa,KAAK,GAAAD,IAAA,CAALC,KAAK;MAAEC,IAAI,GAAAF,IAAA,CAAJE,IAAI;MAAAC,UAAA,GAAAH,IAAA,CAAEI,KAAK;EAALA,IAAAA,KAAK,GAAAD,UAAA,KAAG,KAAA,CAAA,GAAA,CAAC,GAAAA,UAAA,CAAA;EAC5D,EAAA,IAAME,aAAa,GAAGjB,MAAM,CAACkB,YAAY,CAAC;EACtCL,IAAAA,KAAK,EAAEA,KAAK;EACZG,IAAAA,KAAK,EAAEA,KAAK;EACZG,IAAAA,IAAI,EAAEL,IAAI,CAACM,MAAM,GAAG,CAAC;EACrBC,IAAAA,gBAAgB,EAAE,IAAA;EACtB,GAAC,CAAC,CAAA;IAEF,IAAMC,YAAY,GAAG,IAAIC,WAAW,CAACN,aAAa,CAACO,cAAc,EAAE,CAAC,CAAA;EACpEF,EAAAA,YAAY,CAACG,GAAG,CAACX,IAAI,CAAC,CAAA;IACtBG,aAAa,CAACS,KAAK,EAAE,CAAA;EAErB,EAAA,OAAOT,aAAa,CAAA;EACxB;;ACnCoD,MAE9CU,eAAe,gBAAA,YAAA;EACjB;EACJ;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;IACI,SAAAA,eAAAA,CAAAf,IAAA,EAMG;EAAA,IAAA,IALCZ,MAAM,GAAAY,IAAA,CAANZ,MAAM;QACNc,IAAI,GAAAF,IAAA,CAAJE,IAAI;QACJc,KAAK,GAAAhB,IAAA,CAALgB,KAAK;QAAAC,mBAAA,GAAAjB,IAAA,CACLkB,cAAc;QAAdA,cAAc,GAAAD,mBAAA,KAAG,KAAA,CAAA,GAAA;EAAE1B,QAAAA,CAAC,EAAE,EAAE;EAAEC,QAAAA,CAAC,EAAE,EAAA;EAAG,OAAC,GAAAyB,mBAAA;QAAAE,qBAAA,GAAAnB,IAAA,CACjCoB,oBAAoB;EAApBA,MAAAA,oBAAoB,GAAAD,qBAAA,KAAG,KAAA,CAAA,GAAA,KAAK,GAAAA,qBAAA,CAAA;EAAAE,IAAAA,eAAA,OAAAN,eAAA,CAAA,CAAA;MAE5B,IAAI,CAAC3B,MAAM,GAAGA,MAAM,CAAA;MACpB,IAAI,CAAC8B,cAAc,GAAGA,cAAc,CAAA;MACpC,IAAI,CAACI,qBAAqB,GAAGJ,cAAc,CAAC3B,CAAC,GAAG2B,cAAc,CAAC1B,CAAC,CAAA;MAChE,IAAI,CAAC+B,mBAAmB,GAAG,CAAC,GAAG,IAAI,CAACD,qBAAqB,CAAC;;MAE1D,IAAI3B,IAAI,CAAC6B,IAAI,CAAC,IAAI,CAACF,qBAAqB,CAAC,GAAG,CAAC,KAAK,CAAC,EAC/C,MAAM,IAAIG,KAAK,CAAAC,wEAAAA,CAAAA,MAAA,CAA0E,IAAI,CAACJ,qBAAqB,EAAA,GAAA,CAAG,CAAC,CAAA;MAE3H,IAAI,CAACK,SAAS,GAAG,EAAE,CAAA;MAEnB,IAAI,CAACC,YAAY,GAAG,IAAI,CAACxC,MAAM,CAACyC,kBAAkB,CAAC;EAC/C5B,MAAAA,KAAK,EAAE,YAAY;EACnB6B,MAAAA,IAAI,EAAEV,oBAAoB,GAAGW,6BAA8B,GAAG9C,eAAAA;EAClE,KAAC,CAAC,CAAA;EAEF,IAAA,IAAI,CAAC+C,qBAAqB,CAAC9B,IAAI,EAAEc,KAAK,CAAC,CAAA;EAC3C,GAAA;IAAC,OAAAiB,YAAA,CAAAlB,eAAA,EAAA,CAAA;MAAAmB,GAAA,EAAA,uBAAA;EAAAC,IAAAA,KAAA,EAED,SAAAH,qBAAAA,CAAsB9B,IAAI,EAAEc,KAAK,EAAE;EAC/B;QACA,IAAM3B,eAAe,GAAGM,IAAI,CAACG,IAAI,CAACkB,KAAK,GAAG,IAAI,CAACO,mBAAmB,CAAC,CAAA;QACnE,IAAMjC,YAAY,GAAGH,0BAA0B,CAAC,IAAI,CAACC,MAAM,EAAEC,eAAe,CAAC,CAAA;;EAE7E;EACA,MAAA,IAAM+C,cAAc,GAAG,IAAI,CAAChD,MAAM,CAACkB,YAAY,CAAC;EAC5CL,QAAAA,KAAK,EAAE,sBAAsB;UAC7BM,IAAI,EAAElB,eAAe,GAAG,CAAC;UACzBe,KAAK,EAAEiC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;;EAEF;EACA,MAAA,IAAMC,eAAe,GAAG,IAAI,CAACrD,MAAM,CAACsD,qBAAqB,CAAC;EACtDC,QAAAA,OAAO,EAAE,CACL;EACIC,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;EAC9B,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;WAC7B,CAAA;EAET,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMC,SAAS,GAAG,IAAI,CAAC9D,MAAM,CAAC+D,eAAe,CAAC;EAC1ClD,QAAAA,KAAK,EAAE,uBAAuB;EAC9BmD,QAAAA,MAAM,EAAEX,eAAe;EACvBE,QAAAA,OAAO,EAAE,CACL;EACIC,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE9C,IAAAA;EAAK,WAAA;EAC7B,SAAC,EACD;EACI0C,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAEZ,cAAAA;EAAe,WAAA;WACtC,CAAA;EAET,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMkB,cAAc,GAAG,IAAI,CAAClE,MAAM,CAACmE,oBAAoB,CAAC;UACpDC,gBAAgB,EAAE,CAAEf,eAAe,CAAA;EACvC,OAAC,CAAC,CAAA;;EAEF;EACA,MAAA,IAAMgB,YAAY,GAAG,IAAI,CAACrE,MAAM,CAACsE,qBAAqB,CAAC;EACnDzD,QAAAA,KAAK,EAAE,0BAA0B;EACjCmD,QAAAA,MAAM,EAAEE,cAAc;EACtBK,QAAAA,OAAO,EAAE;YACLC,MAAM,EAAE,IAAI,CAAChC,YAAY;EACzBiC,UAAAA,UAAU,EAAE,kBAAkB;EAC9BC,UAAAA,SAAS,EAAE;EACP,YAAA,kBAAkB,EAAE,IAAI,CAAC5C,cAAc,CAAC3B,CAAC;EACzC,YAAA,kBAAkB,EAAE,IAAI,CAAC2B,cAAc,CAAC1B,CAAC;cACzC,uBAAuB,EAAE,IAAI,CAAC8B,qBAAqB;cACnD,qBAAqB,EAAE,IAAI,CAACC,mBAAAA;EAChC,WAAA;EACJ,SAAA;EACJ,OAAC,CAAC,CAAA;EAEF,MAAA,IAAI,CAACI,SAAS,CAACoC,IAAI,CAAC;EAAEC,QAAAA,QAAQ,EAAEP,YAAY;EAAEP,QAAAA,SAAS,EAATA,SAAS;EAAE5D,QAAAA,YAAY,EAAZA,YAAAA;EAAa,OAAC,CAAC,CAAA;QAExE,IAAID,eAAe,GAAG,CAAC,EAAE;EACrB;EACA,QAAA,IAAI,CAAC2C,qBAAqB,CAACI,cAAc,EAAE/C,eAAe,CAAC,CAAA;;EAE3D;EACA,QAAA,IAAM4E,gBAAgB,GAAG,IAAI,CAAC7E,MAAM,CAACsE,qBAAqB,CAAC;EACvDzD,UAAAA,KAAK,EAAE,+BAA+B;EACtCmD,UAAAA,MAAM,EAAEE,cAAc;EACtBK,UAAAA,OAAO,EAAE;cACLC,MAAM,EAAE,IAAI,CAAChC,YAAY;EACzBiC,YAAAA,UAAU,EAAE,gBAAgB;EAC5BC,YAAAA,SAAS,EAAE;EACP,cAAA,kBAAkB,EAAE,IAAI,CAAC5C,cAAc,CAAC3B,CAAC;EACzC,cAAA,kBAAkB,EAAE,IAAI,CAAC2B,cAAc,CAAC1B,CAAC;gBACzC,uBAAuB,EAAE,IAAI,CAAC8B,qBAAAA;EAClC,aAAA;EACJ,WAAA;EACJ,SAAC,CAAC,CAAA;EAEF,QAAA,IAAI,CAACK,SAAS,CAACoC,IAAI,CAAC;EAAEC,UAAAA,QAAQ,EAAEC,gBAAgB;EAAEf,UAAAA,SAAS,EAATA,SAAS;EAAE5D,UAAAA,YAAY,EAAZA,YAAAA;EAAa,SAAC,CAAC,CAAA;EAChF,OAAA;EACJ,KAAA;EAAC,GAAA,EAAA;MAAA4C,GAAA,EAAA,oBAAA;MAAAC,KAAA,EAED,SAAA+B,kBAAAA,GAAqB;EACjB,MAAA,OAAO,IAAI,CAACvC,SAAS,CAACwC,OAAO,CAAC,UAAAC,CAAC,EAAA;EAAA,QAAA,OAAI,CAAEA,CAAC,CAAC9E,YAAY,CAACC,CAAC,EAAE6E,CAAC,CAAC9E,YAAY,CAACE,CAAC,EAAE,CAAC,CAAE,CAAA;SAAC,CAAA,CAAA;EACjF,KAAA;EAAC,GAAA,EAAA;MAAA0C,GAAA,EAAA,UAAA;EAAAC,IAAAA,KAAA,EAED,SAAAkC,QAAAA,CAASC,IAAI,EAAEhF,YAAY,EAAc;EAAA,MAAA,IAAZiF,MAAM,GAAAC,SAAA,CAAAhE,MAAA,GAAA,CAAA,IAAAgE,SAAA,CAAA,CAAA,CAAA,KAAAC,SAAA,GAAAD,SAAA,CAAA,CAAA,CAAA,GAAG,CAAC,CAAA;EACnC,MAAA,KAAK,IAAIE,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG,IAAI,CAAC/C,SAAS,CAACnB,MAAM,EAAEkE,CAAC,EAAE,EAAE;EAC5C,QAAA,IAAAC,iBAAA,GAAgC,IAAI,CAAChD,SAAS,CAAC+C,CAAC,CAAC;YAAzCV,QAAQ,GAAAW,iBAAA,CAARX,QAAQ;YAAEd,SAAS,GAAAyB,iBAAA,CAATzB,SAAS,CAAA;EAE3BoB,QAAAA,IAAI,CAACM,WAAW,CAACZ,QAAQ,CAAC,CAAA;EAC1BM,QAAAA,IAAI,CAACO,YAAY,CAAC,CAAC,EAAE3B,SAAS,CAAC,CAAA;EAC/BoB,QAAAA,IAAI,CAACQ,0BAA0B,CAACxF,YAAY,EAAEiF,MAAM,GAAGG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAA;EACrE,OAAA;EACJ,KAAA;EAAC,GAAA,CAAA,CAAA,CAAA;EAAA,CAAA;;EC9IL,IAAMK,eAAe,aAsFnB,m8FAAA;;ECtFF;EACA;EACA;EACA;EACA;EACA,IAAMC,wBAAwB,aA6G5B,kpHAAA;;EClHF,IAAMC,sBAAsB,aA0C1B,w9CAAA;;EC1CF,IAAMC,eAAe,GAAG,SAAlBA,eAAeA,GAAA;EAAA,EAAA,IAAIC,WAAW,GAAAX,SAAA,CAAAhE,MAAA,GAAA,CAAA,IAAAgE,SAAA,CAAA,CAAA,CAAA,KAAAC,SAAA,GAAAD,SAAA,CAAA,CAAA,CAAA,GAAG,KAAK,CAAA;EAAA,EAAA,IAAEY,UAAU,GAAAZ,SAAA,CAAAhE,MAAA,GAAA,CAAA,IAAAgE,SAAA,CAAA,CAAA,CAAA,KAAAC,SAAA,GAAAD,SAAA,CAAA,CAAA,CAAA,GAAG,KAAK,CAAA;EAAA,EAAA,IAAEa,WAAW,GAAAb,SAAA,CAAAhE,MAAA,GAAA,CAAA,IAAAgE,SAAA,CAAA,CAAA,CAAA,KAAAC,SAAA,GAAAD,SAAA,CAAA,CAAA,CAAA,GAAG,KAAK,CAAA;EAAA,EAAA,q8BAAK9C,MAAA,CA0BnFyD,WAAW,GAAGG,oBAAoB,GAAG,4DAA4D,EAAA,4SAAA,CAAA,CAAA5D,MAAA,CAYjG0D,UAAU,GAAGG,SAAS,CAACF,WAAW,CAAC,GAAGG,sBAAsB,EAAA,KAAA,CAAA,CAAA;EAAA,CACjE,CAAA;EAEF,IAAMA,sBAAsB,aAI3B,2EAAA,CAAA;EAED,IAAMF,oBAAoB,aAWzB,0aAAA,CAAA;EAED,IAAMC,SAAS,GAAG,SAAZA,SAASA,CAAIF,WAAW,EAAA;EAAA,EAAA,4LAAK3D,MAAA,CAQ7B2D,WAAW,GAAGI,cAAc,GAAGC,cAAc,EAAA,IAAA,CAAA,CAAA;EAAA,CAClD,CAAA;EAED,IAAMA,cAAc,aAEnB,wGAAA,CAAA;EAED,IAAMD,cAAc,aAMnB,8JAAA;;EChFmD,IAE9CE,eAAe,gBAAA,YAAA;EACjB;EACJ;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;IACI,SAAAA,eAAAA,CAAA3F,IAAA,EAUG;EAAA,IAAA,IATCZ,MAAM,GAAAY,IAAA,CAANZ,MAAM;QACNc,IAAI,GAAAF,IAAA,CAAJE,IAAI;QACJ0F,MAAM,GAAA5F,IAAA,CAAN4F,MAAM;QACNC,QAAQ,GAAA7F,IAAA,CAAR6F,QAAQ;QACRC,SAAS,GAAA9F,IAAA,CAAT8F,SAAS;QACT9E,KAAK,GAAAhB,IAAA,CAALgB,KAAK;QAAA+E,UAAA,GAAA/F,IAAA,CACLgG,KAAK;EAALA,MAAAA,KAAK,GAAAD,UAAA,KAAG,KAAA,CAAA,GAAA,CAAC,GAAAA,UAAA;QAAAE,eAAA,GAAAjG,IAAA,CACTkG,UAAU;EAAVA,MAAAA,UAAU,GAAAD,eAAA,KAAG,KAAA,CAAA,GAAA,IAAI,GAAAA,eAAA;QAAAhF,mBAAA,GAAAjB,IAAA,CACjBkB,cAAc;QAAdA,cAAc,GAAAD,mBAAA,KAAG,KAAA,CAAA,GAAA;EAAE1B,QAAAA,CAAC,EAAE,EAAE;EAAEC,QAAAA,CAAC,EAAE,EAAA;EAAG,OAAC,GAAAyB,mBAAA,CAAA;EAAAI,IAAAA,eAAA,OAAAsE,eAAA,CAAA,CAAA;MAEjC,IAAI,CAACvG,MAAM,GAAGA,MAAM,CAAA;MACpB,IAAI,CAAC4B,KAAK,GAAGA,KAAK,CAAA;MAClB,IAAI,CAACgF,KAAK,GAAGA,KAAK,CAAA;MAClB,IAAI,CAACE,UAAU,GAAGA,UAAU,CAAA;MAC5B,IAAI,CAAChF,cAAc,GAAGA,cAAc,CAAA;MACpC,IAAI,CAACI,qBAAqB,GAAGJ,cAAc,CAAC3B,CAAC,GAAG2B,cAAc,CAAC1B,CAAC,CAAA;MAEhE,IAAI,CAACmC,SAAS,GAAG,EAAE,CAAA;MAEnB,IAAI,CAACwE,OAAO,GAAG;EACXjG,MAAAA,IAAI,EAAJA,IAAI;EACJ0F,MAAAA,MAAM,EAANA,MAAM;EACNC,MAAAA,QAAQ,EAARA,QAAQ;EACRC,MAAAA,SAAS,EAATA,SAAS;EACTM,MAAAA,OAAO,EAAE,EAAA;OACZ,CAAA;EAED,IAAA,IAAI,CAACC,uBAAuB,CAACnG,IAAI,EAAEc,KAAK,CAAC,CAAA;EAC7C,GAAA;;EAEA;IAAA,OAAAiB,YAAA,CAAA0D,eAAA,EAAA,CAAA;MAAAzD,GAAA,EAAA,yBAAA;EAAAC,IAAAA,KAAA,EAmBA,SAAAkE,uBAAAA,CAAwBrD,MAAM,EAAEhC,KAAK,EAAiB;EAAA,MAAA,IAAfsF,SAAS,GAAA9B,SAAA,CAAAhE,MAAA,GAAA,CAAA,IAAAgE,SAAA,CAAA,CAAA,CAAA,KAAAC,SAAA,GAAAD,SAAA,CAAA,CAAA,CAAA,GAAG,CAAC,CAAA;QAChD,IAAMnF,eAAe,GAAGM,IAAI,CAACG,IAAI,CAACkB,KAAK,GAAG,IAAI,CAACM,qBAAqB,CAAC,CAAA;EAErE,MAAA,IAAM6D,WAAW,GAAGmB,SAAS,KAAK,CAAC,CAAA;EACnC,MAAA,IAAMlB,UAAU,GAAG/F,eAAe,IAAI,CAAC,CAAA;EAEvC,MAAA,IAAMkH,YAAY,GAAGnB,UAAU,GAAG,IAAI,CAACe,OAAO,CAACP,MAAM,GAAG,IAAI,CAACxG,MAAM,CAACkB,YAAY,CAAC;EAC7EL,QAAAA,KAAK,EAAAyB,aAAAA,CAAAA,MAAA,CAAgB,IAAI,CAACwE,UAAU,GAAG,MAAM,GAAG,MAAM,EAAA,GAAA,CAAA,CAAAxE,MAAA,CAAI4E,SAAS,CAAE;UACrE/F,IAAI,EAAElB,eAAe,GAAG,CAAC;UACzBe,KAAK,EAAEiC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMC,eAAe,GAAG,IAAI,CAACrD,MAAM,CAACsD,qBAAqB,CAAC;EACtDC,QAAAA,OAAO,EACH,CAAA;EACIC,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,mBAAA;EAAoB,WAAA;EACxC,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;EAC9B,SAAC,EAAAvB,MAAA,CAAA8E,kBAAA,CAEGpB,UAAU,GAAG,CAAC;EACdxC,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,mBAAA;EAAoB,WAAA;EACxC,SAAC,EAAE;EACCL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;WAC7B,CAAC,GAAG,EAAE,CAAA,CAAA;EAEf,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMC,SAAS,GAAG,IAAI,CAAC9D,MAAM,CAAC+D,eAAe,CAAC;EAC1CC,QAAAA,MAAM,EAAEX,eAAe;EACvBE,QAAAA,OAAO,EACH,CAAA;EACIC,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAEA,MAAAA;EAAO,WAAA;EAC/B,SAAC,EACD;EACIJ,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAEuD,YAAAA;EAAa,WAAA;EACrC,SAAC,EAAA7E,MAAA,CAAA8E,kBAAA,CAEGpB,UAAU,GAAG,CAAC;EACdxC,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE,IAAI,CAACmD,OAAO,CAACN,QAAAA;EAAS,WAAA;EAC9C,SAAC,EAAE;EACCjD,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE,IAAI,CAACmD,OAAO,CAACL,SAAAA;EAAU,WAAA;WAC9C,CAAC,GAAG,EAAE,CAAA,CAAA;EAEf,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMxC,cAAc,GAAG,IAAI,CAAClE,MAAM,CAACmE,oBAAoB,CAAC;UACpDC,gBAAgB,EAAE,CAACf,eAAe,CAAA;EACtC,OAAC,CAAC,CAAA;QAEF,IAAMgE,aAAa,GAAGtB,WAAW,GAAG,IAAI,CAACa,KAAK,GAAGhF,KAAK,GAAGA,KAAK,CAAA;QAC9D,IAAM0F,aAAa,GAAGvB,WAAW,GAAG,IAAI,CAACa,KAAK,GAAG,CAAC,CAAA;EAElD,MAAA,IAAMW,iBAAiB,GAAG,IAAI,CAACvH,MAAM,CAACsE,qBAAqB,CAAC;EACxDN,QAAAA,MAAM,EAAEE,cAAc;EACtBK,QAAAA,OAAO,EAAE;EACLC,UAAAA,MAAM,EAAE,IAAI,CAACxE,MAAM,CAACyC,kBAAkB,CAAC;cACnCC,IAAI,EAAEoD,eAAe,CAACC,WAAW,EAAEC,UAAU,EAAE,IAAI,CAACc,UAAU,CAAC;EAC/DjG,YAAAA,KAAK,EAAE,YAAA;EACX,WAAC,CAAC;EACF4D,UAAAA,UAAU,EAAE,YAAY;EACxBC,UAAAA,SAAS,EAAE;EACP,YAAA,kBAAkB,EAAE,IAAI,CAAC5C,cAAc,CAAC3B,CAAC;EACzC,YAAA,kBAAkB,EAAE,IAAI,CAAC2B,cAAc,CAAC1B,CAAC;cACzC,uBAAuB,EAAE,IAAI,CAAC8B,qBAAqB;EACnD,YAAA,eAAe,EAAEmF,aAAa;EAC9B,YAAA,eAAe,EAAEC,aAAAA;EACrB,WAAA;EACJ,SAAA;EACJ,OAAC,CAAC,CAAA;QAEF,IAAI,CAACP,OAAO,CAACC,OAAO,CAACrC,IAAI,CAACwC,YAAY,CAAC,CAAA;EACvC,MAAA,IAAI,CAAC5E,SAAS,CAACoC,IAAI,CAAC;EAAEC,QAAAA,QAAQ,EAAE2C,iBAAiB;EAAEzD,QAAAA,SAAS,EAATA,SAAAA;EAAU,OAAC,CAAC,CAAA;QAE/D,IAAI,CAACkC,UAAU,EAAE;UACb,IAAI,CAACiB,uBAAuB,CAACE,YAAY,EAAElH,eAAe,EAAEiH,SAAS,GAAG,CAAC,CAAC,CAAA;EAC9E,OAAA;EACJ,KAAA;EAAC,GAAA,EAAA;MAAApE,GAAA,EAAA,UAAA;EAAAC,IAAAA,KAAA,EAED,SAAAkC,QAAAA,CAASC,IAAI,EAAEhF,YAAY,EAAc;EAAA,MAAA,IAAZiF,MAAM,GAAAC,SAAA,CAAAhE,MAAA,GAAA,CAAA,IAAAgE,SAAA,CAAA,CAAA,CAAA,KAAAC,SAAA,GAAAD,SAAA,CAAA,CAAA,CAAA,GAAG,CAAC,CAAA;EACnC,MAAA,KAAK,IAAIE,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG,IAAI,CAAC/C,SAAS,CAACnB,MAAM,EAAEkE,CAAC,EAAE,EAAE;EAC5C,QAAA,IAAAC,iBAAA,GAAgC,IAAI,CAAChD,SAAS,CAAC+C,CAAC,CAAC;YAAzCV,QAAQ,GAAAW,iBAAA,CAARX,QAAQ;YAAEd,SAAS,GAAAyB,iBAAA,CAATzB,SAAS,CAAA;EAE3B,QAAA,IAAM0D,gBAAgB,GAAI,IAAI,CAACV,UAAU,IAAIxB,CAAC,GAAG,IAAI,CAAC/C,SAAS,CAACnB,MAAM,GAAG,CAAE,CAAA;EAE3E8D,QAAAA,IAAI,CAACM,WAAW,CAACZ,QAAQ,CAAC,CAAA;EAC1BM,QAAAA,IAAI,CAACO,YAAY,CAAC,CAAC,EAAE3B,SAAS,CAAC,CAAA;EAE/B,QAAA,IAAI0D,gBAAgB,EAChBtC,IAAI,CAACQ,0BAA0B,CAACxF,YAAY,EAAEiF,MAAM,GAAGG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAA;EAEjE;YACAJ,IAAI,CAACuC,kBAAkB,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAA;EACxC,OAAA;EACJ,KAAA;EAAC,GAAA,CAAA,EAAA,CAAA;MAAA3E,GAAA,EAAA,6BAAA;MAAAC,KAAA,EA7HD,SAAA2E,2BAAmC1H,CAAAA,MAAM,EAAE2H,UAAU,EAAE7F,cAAc,EAAE;QACnE,IAAMI,qBAAqB,GAAGJ,cAAc,CAAC3B,CAAC,GAAG2B,cAAc,CAAC1B,CAAC,CAAA;QACjE,IAAMwH,KAAK,GAAG,EAAE,CAAA;QAEhB,GAAG;EACC;UACA,IAAMC,sBAAsB,GAAGtH,IAAI,CAACG,IAAI,CAACiH,UAAU,GAAGzF,qBAAqB,CAAC,CAAA;;EAE5E;EACA,QAAA,IAAMhC,YAAY,GAAGH,0BAA0B,CAACC,MAAM,EAAE6H,sBAAsB,CAAC,CAAA;EAE/ED,QAAAA,KAAK,CAACjD,IAAI,CAACzE,YAAY,CAACC,CAAC,EAAED,YAAY,CAACE,CAAC,EAAE,CAAC,CAAC,CAAA;EAC7CuH,QAAAA,UAAU,GAAGE,sBAAsB,CAAA;SACtC,QAAQF,UAAU,GAAG,CAAC,EAAA;EAEvB,MAAA,OAAOC,KAAK,CAAA;EAChB,KAAA;EAAC,GAAA,CAAA,CAAA,CAAA;EAAA,CAAA,EAAA;;AC5DwE,MAEvEE,eAAe,gBAAA,YAAA;EACjB;EACJ;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACI,EAAA,SAAAA,kBAUQ;EAAA,IAAA,IAAAlH,IAAA,GAAAwE,SAAA,CAAAhE,MAAA,GAAA,CAAA,IAAAgE,SAAA,CAAA,CAAA,CAAA,KAAAC,SAAA,GAAAD,SAAA,CAAA,CAAA,CAAA,GAAJ,EAAE;QATFpF,MAAM,GAAAY,IAAA,CAANZ,MAAM;QACN+H,IAAI,GAAAnH,IAAA,CAAJmH,IAAI;QACJC,MAAM,GAAApH,IAAA,CAANoH,MAAM;QACNpG,KAAK,GAAAhB,IAAA,CAALgB,KAAK;QAAAqG,cAAA,GAAArH,IAAA,CACLsH,SAAS;EAATA,MAAAA,SAAS,GAAAD,cAAA,KAAG,KAAA,CAAA,GAAA,EAAE,GAAAA,cAAA;QAAApG,mBAAA,GAAAjB,IAAA,CACdkB,cAAc;QAAdA,cAAc,GAAAD,mBAAA,KAAG,KAAA,CAAA,GAAA;EAAE1B,QAAAA,CAAC,EAAE,EAAE;EAAEC,QAAAA,CAAC,EAAE,EAAA;EAAG,OAAC,GAAAyB,mBAAA;QAAAsG,gBAAA,GAAAvH,IAAA,CACjCwH,WAAW;EAAXA,MAAAA,WAAW,GAAAD,gBAAA,KAAG,KAAA,CAAA,GAAA,KAAK,GAAAA,gBAAA;QAAAE,kBAAA,GAAAzH,IAAA,CACnB0H,aAAa;EAAbA,MAAAA,aAAa,GAAAD,kBAAA,KAAG,KAAA,CAAA,GAAA,KAAK,GAAAA,kBAAA;QAAAtG,qBAAA,GAAAnB,IAAA,CACrBoB,oBAAoB;EAApBA,MAAAA,oBAAoB,GAAAD,qBAAA,KAAG,KAAA,CAAA,GAAA,KAAK,GAAAA,qBAAA,CAAA;EAAAE,IAAAA,eAAA,OAAA6F,eAAA,CAAA,CAAA;MAE5B,IAAI9H,MAAM,IAAI,IAAI,EAAE,MAAM,IAAIqC,KAAK,CAAC,oBAAoB,CAAC,CAAA;MACzD,IAAI0F,IAAI,IAAI,IAAI,EAAE,MAAM,IAAI1F,KAAK,CAAC,yBAAyB,CAAC,CAAA;EAC5D,IAAA,IAAI,CAACkG,MAAM,CAACC,SAAS,CAAC5G,KAAK,CAAC,IAAIA,KAAK,IAAI,CAAC,EAAE,MAAM,IAAIS,KAAK,CAAC,yBAAyB,CAAC,CAAA;MACtF,IAAI,CAACkG,MAAM,CAACC,SAAS,CAACN,SAAS,CAAC,IAAIA,SAAS,IAAI,CAAC,IAAIA,SAAS,GAAG,EAAE,EAAE,MAAM,IAAI7F,KAAK,CAAC,6BAA6B,CAAC,CAAA;MACpH,IAAI,CAACkG,MAAM,CAACC,SAAS,CAAC1G,cAAc,CAAC3B,CAAC,CAAC,IAAI,CAACoI,MAAM,CAACC,SAAS,CAAC1G,cAAc,CAAC1B,CAAC,CAAC,EAAE,MAAM,IAAIiC,KAAK,CAAC,kCAAkC,CAAC,CAAA;MACnI,IAAI6F,SAAS,GAAG,CAAC,IAAI,CAAC,EAAE,MAAM,IAAI7F,KAAK,CAAC,mCAAmC,CAAC,CAAA;MAE5E,IAAI,CAACrC,MAAM,GAAGA,MAAM,CAAA;MACpB,IAAI,CAAC4B,KAAK,GAAGA,KAAK,CAAA;MAClB,IAAI,CAACsG,SAAS,GAAGA,SAAS,CAAA;MAC1B,IAAI,CAACpG,cAAc,GAAGA,cAAc,CAAA;MACpC,IAAI,CAACsG,WAAW,GAAGA,WAAW,CAAA;MAC9B,IAAI,CAACE,aAAa,GAAGA,aAAa,CAAA;MAClC,IAAI,CAACtG,oBAAoB,GAAGA,oBAAoB,CAAA;MAEhD,IAAI,CAACE,qBAAqB,GAAGJ,cAAc,CAAC3B,CAAC,GAAG2B,cAAc,CAAC1B,CAAC,CAAA;EAChE,IAAA,IAAI,CAACH,eAAe,GAAGM,IAAI,CAACG,IAAI,CAACkB,KAAK,GAAG,IAAI,CAACM,qBAAqB,CAAC,CAAA;EACpE,IAAA,IAAI,CAACuG,4BAA4B,GAAG,CAAC,GAAG,IAAI,CAACxI,eAAe,CAAA;EAE5D,IAAA,IAAI,CAACyI,UAAU,GAAIV,MAAM,IAAI,IAAK,CAAC;;EAEnC,IAAA,IAAI,CAAC9H,YAAY,GAAG,EAAE,CAAE;EACxB,IAAA,IAAI,CAACyI,aAAa,GAAG,EAAE,CAAC;EACxB,IAAA,IAAI,CAAC5B,OAAO,GAAG,EAAE,CAAO;EACxB,IAAA,IAAI,CAACxE,SAAS,GAAG,EAAE,CAAK;EACxB,IAAA,IAAI,CAACqG,OAAO,GAAG,EAAE,CAAA;;EAEjB;EACA,IAAA,IAAI,CAAC1I,YAAY,GAAGH,0BAA0B,CAAC,IAAI,CAACC,MAAM,EAAE,IAAI,CAACC,eAAe,CAAC,CAAA;;EAEjF;MACA,IAAI,CAAC4I,qBAAqB,EAAE,CAAA;;EAE5B;EACA,IAAA,IAAI,CAACC,gBAAgB,CAACf,IAAI,EAAEC,MAAM,CAAC,CAAA;EACvC,GAAA;IAAC,OAAAnF,YAAA,CAAAiF,eAAA,EAAA,CAAA;MAAAhF,GAAA,EAAA,uBAAA;MAAAC,KAAA,EAED,SAAA8F,qBAAAA,GAAwB;EACpB;EACA,MAAA,IAAME,aAAa,GAAG,SAAhBA,aAAaA,CAAIC,MAAM,EAAK;UAC9B,OAAOA,MAAM,CAACC,KAAK,CAAC,IAAI,CAAC,CACXC,MAAM,CAAC,UAAAC,IAAI,EAAA;YAAA,OAAI,CAACA,IAAI,CAACC,WAAW,EAAE,CAACC,QAAQ,CAAC,QAAQ,CAAC,CAAA;EAAA,SAAA,CAAC,CACtDC,IAAI,CAAC,IAAI,CAAC,CAAA;SAC3B,CAAA;QAED,IAAMC,cAAc,GAAG,IAAI,CAACjB,aAAa,GAAGkB,wBAA4B,GAAG7D,eAAe,CAAA;QAE1F,IAAI,CAACgD,aAAa,GAAG;EACjBc,QAAAA,QAAQ,EAAE,IAAI,CAACzJ,MAAM,CAACyC,kBAAkB,CAAC;EACrC5B,UAAAA,KAAK,EAAE,sBAAsB;YAC7B6B,IAAI,EAAE,IAAI,CAACgG,UAAU,GAAGa,cAAc,GAAGR,aAAa,CAACQ,cAAc,CAAA;EACzE,SAAC,CAAC;EACFG,QAAAA,OAAO,EAAE,IAAI,CAAC1J,MAAM,CAACyC,kBAAkB,CAAC;EACpC5B,UAAAA,KAAK,EAAE,oBAAoB;YAC3B6B,IAAI,EAAE,IAAI,CAACgG,UAAU,GAAGiB,sBAAa,GAAGZ,aAAa,CAACY,sBAAa,CAAA;WACtE,CAAA;SACJ,CAAA;EACL,KAAA;EAAC,GAAA,EAAA;MAAA7G,GAAA,EAAA,kBAAA;EAAAC,IAAAA,KAAA,EAED,SAAA+F,gBAAAA,CAAiBf,IAAI,EAAEC,MAAM,EAAE;EAC3B;EACA,MAAA,IAAA4B,qBAAA,GAAkD,IAAI,CAACC,wBAAwB,EAAE;UAAzEC,eAAe,GAAAF,qBAAA,CAAfE,eAAe;UAAEC,oBAAoB,GAAAH,qBAAA,CAApBG,oBAAoB,CAAA;;EAE7C;EACA,MAAA,IAAMzI,YAAY,GAAG,IAAI,CAAC0I,wBAAwB,CAACF,eAAe,CAAC,CAAA;;EAEnE;QACA,IAAI,CAACG,cAAc,CAAClC,IAAI,EAAEC,MAAM,EAAE+B,oBAAoB,EAAEzI,YAAY,CAAC,CAAA;;EAErE;QACA,IAAI,CAAC4I,yBAAyB,CAAC,IAAI,CAACnD,OAAO,CAACgB,IAAI,EAAEzG,YAAY,CAAC,CAAA;;EAE/D;EACA,MAAA,KAAK,IAAI6I,GAAG,GAAG,CAAC,EAAEA,GAAG,GAAG,IAAI,CAACjC,SAAS,EAAEiC,GAAG,IAAI,CAAC,EAAE;EAC9C;EACA,QAAA,IAAMC,IAAI,GAASD,GAAG,GAAG,CAAC,IAAI,CAAE,CAAA;EAChC,QAAA,IAAME,MAAM,GAAMD,IAAI,GAAG,IAAI,CAACrD,OAAO,CAACgB,IAAI,GAAG,IAAI,CAAChB,OAAO,CAACuD,OAAO,CAAA;EACjE,QAAA,IAAMC,QAAQ,GAAIH,IAAI,GAAG,IAAI,CAACrD,OAAO,CAACiB,MAAM,GAAG,IAAI,CAACjB,OAAO,CAACyD,SAAS,CAAA;EACrE,QAAA,IAAMC,OAAO,GAAKL,IAAI,GAAG,IAAI,CAACrD,OAAO,CAACuD,OAAO,GAAG,IAAI,CAACvD,OAAO,CAACgB,IAAI,CAAA;EACjE,QAAA,IAAM2C,SAAS,GAAGN,IAAI,GAAG,IAAI,CAACrD,OAAO,CAACyD,SAAS,GAAG,IAAI,CAACzD,OAAO,CAACiB,MAAM,CAAA;;EAErE;UACA,IAAMnD,gBAAgB,GAAG,IAAI,CAAC8F,yBAAyB,CAACN,MAAM,EAAEE,QAAQ,EAAEJ,GAAG,CAAC,CAAA;;EAE9E;EACA,QAAA,IAAMS,eAAe,GAAG,IAAI,CAACC,uBAAuB,CAACR,MAAM,EAAEE,QAAQ,EAAEE,OAAO,EAAEC,SAAS,EAAEP,GAAG,CAAC,CAAA;EAE/F,QAAA,IAAI,CAAC5H,SAAS,CAACoC,IAAI,CAAC;EAAEE,UAAAA,gBAAgB,EAAhBA,gBAAgB;EAAE+F,UAAAA,eAAe,EAAfA,eAAAA;EAAgB,SAAC,CAAC,CAAA;EAC9D,OAAA;EACJ,KAAA;EAAC,GAAA,EAAA;MAAA9H,GAAA,EAAA,0BAAA;MAAAC,KAAA,EAED,SAAA8G,wBAAAA,GAA2B;EACvB;EACA,MAAA,IAAME,oBAAoB,GAAG,IAAI,CAAC/J,MAAM,CAACkB,YAAY,CAAC;EAClDL,QAAAA,KAAK,EAAE,6BAA6B;EACpCM,QAAAA,IAAI,EAAE,IAAI,CAACsH,4BAA4B,GAAG,CAAC;UAC3CzH,KAAK,EAAEiC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;;EAEF;EACA,MAAA,IAAM0G,eAAe,GAAG,IAAInI,eAAe,CAAC;UACxC3B,MAAM,EAAE,IAAI,CAACA,MAAM;EACnBc,QAAAA,IAAI,EAAEiJ,oBAAoB;UAC1BnI,KAAK,EAAE,IAAI,CAAC6G,4BAA4B;UACxC3G,cAAc,EAAE,IAAI,CAACA,cAAc;UACnCE,oBAAoB,EAAE,IAAI,CAACA,oBAAAA;EAC/B,OAAC,CAAC,CAAA;EAEF,MAAA,IAAI,CAAC4G,OAAO,CAACkC,SAAS,GAAGhB,eAAe,CAAA;QAExC,OAAO;EAAEA,QAAAA,eAAe,EAAfA,eAAe;EAAEC,QAAAA,oBAAoB,EAApBA,oBAAAA;SAAsB,CAAA;EACpD,KAAA;EAAC,GAAA,EAAA;MAAAjH,GAAA,EAAA,0BAAA;EAAAC,IAAAA,KAAA,EAED,SAAAiH,wBAAyBF,CAAAA,eAAe,EAAE;EACtC;EACA,MAAA,IAAMiB,qBAAqB,GAAGjB,eAAe,CAAChF,kBAAkB,EAAE,CAAA;;EAElE;EACA,MAAA,IAAMkG,qBAAqB,GAAGzK,IAAI,CAAC0K,GAAG,CAAC,IAAI,CAACrJ,KAAK,EAAE,IAAI,CAACM,qBAAqB,GAAG,CAAC,CAAC,CAAA;EAClF,MAAA,IAAMgJ,qBAAqB,GAAG,IAAI,CAACtJ,KAAK,GAAGoJ,qBAAqB,CAAA;EAChE,MAAA,IAAMG,UAAU,GAAGH,qBAAqB,GAAG,CAAC,CAAA;;EAE5C;EACA,MAAA,IAAMI,iBAAiB,GAAG7E,eAAe,CAACmB,2BAA2B,CAAC,IAAI,CAAC1H,MAAM,EAAEgL,qBAAqB,EAAE,IAAI,CAAClJ,cAAc,CAAC,CAAA;EAC9H,MAAA,IAAMuJ,iBAAiB,GAAG9E,eAAe,CAACmB,2BAA2B,CAAC,IAAI,CAAC1H,MAAM,EAAEkL,qBAAqB,EAAE,IAAI,CAACpJ,cAAc,CAAC,CAAA;;EAE9H;EACA,MAAA,IAAMwJ,eAAe,GAAA,CACjB,IAAI,CAACpL,YAAY,CAACC,CAAC,EAAE,IAAI,CAACD,YAAY,CAACE,CAAC,EAAE,CAAC,CAAA,CAAAkC,MAAA,CAAA8E,kBAAA,CACxCgE,iBAAiB,CAACG,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA,EAAAnE,kBAAA,CAC7B2D,qBAAqB,CAC3B,CAAA,CAAA;;EAED;QACA,IAAI,CAACS,eAAe,GAAG;EACnBC,QAAAA,UAAU,EAAE,CAAC;UACbC,eAAe,EAAE,CAAC,GAAG,CAAC;UACtBC,UAAU,EAAE,CAAC,GAAG,CAAA;SACnB,CAAA;QAED,OAAO;EACHL,QAAAA,eAAe,EAAfA,eAAe;EACfD,QAAAA,iBAAiB,EAAjBA,iBAAiB;EACjBL,QAAAA,qBAAqB,EAArBA,qBAAqB;EACrBE,QAAAA,qBAAqB,EAArBA,qBAAqB;EACrBC,QAAAA,UAAU,EAAVA,UAAAA;SACH,CAAA;EACL,KAAA;EAAC,GAAA,EAAA;MAAArI,GAAA,EAAA,gBAAA;MAAAC,KAAA,EAED,SAAAkH,cAAAA,CAAelC,IAAI,EAAEC,MAAM,EAAE+B,oBAAoB,EAAEzI,YAAY,EAAE;EAC7D;EACA,MAAA,IAAMsK,aAAa,GAAG,IAAI,CAAC5L,MAAM,CAACkB,YAAY,CAAC;EAC3CL,QAAAA,KAAK,EAAE,qBAAqB;EAC5BM,QAAAA,IAAI,EAAE,IAAI,CAACS,KAAK,GAAG,CAAC;UACpBZ,KAAK,EAAEiC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;EACF,MAAA,IAAMyI,eAAe,GAAG,CAAC,IAAI,CAACnD,UAAU,GAAG,IAAI,GAAG,IAAI,CAAC1I,MAAM,CAACkB,YAAY,CAAC;EACvEL,QAAAA,KAAK,EAAE,uBAAuB;EAC9BM,QAAAA,IAAI,EAAE,IAAI,CAACS,KAAK,GAAG,CAAC;UACpBZ,KAAK,EAAEiC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;;EAEF;EACA,MAAA,IAAM0I,oBAAoB,GAAG,IAAI,CAAC9L,MAAM,CAACkB,YAAY,CAAC;EAClDL,QAAAA,KAAK,EAAE,6BAA6B;EACpCM,QAAAA,IAAI,EAAE,IAAI,CAACS,KAAK,GAAG,CAAC;UACpBZ,KAAK,EAAEiC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;;EAEF;QACA,IAAM2I,cAAc,GAAGpL,uBAAuB,CAAC;UAC3CX,MAAM,EAAE,IAAI,CAACA,MAAM;EACnBa,QAAAA,KAAK,EAAE,0BAA0B;UACjCC,IAAI,EAAEQ,YAAY,CAACgK,eAAe;UAClCtK,KAAK,EAAEiC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAAC+I,QAAAA;EAC7E,OAAC,CAAC,CAAA;QACF,IAAMC,sBAAsB,GAAGtL,uBAAuB,CAAC;UACnDX,MAAM,EAAE,IAAI,CAACA,MAAM;EACnBa,QAAAA,KAAK,EAAE,mCAAmC;UAC1CC,IAAI,EAAEQ,YAAY,CAACgK,eAAe;EAClCtK,QAAAA,KAAK,EAAEiC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAAA;EACnD,OAAC,CAAC,CAAA;;EAEF;QACA,IAAM+I,2BAA2B,GAAGvL,uBAAuB,CAAC;EACxDE,QAAAA,KAAK,EAAE,+BAA+B;UACtCb,MAAM,EAAE,IAAI,CAACA,MAAM;UACnBc,IAAI,EAAEQ,YAAY,CAAC+J,iBAAiB;UACpCrK,KAAK,EAAEiC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAAC+I,QAAAA;EAC7E,OAAC,CAAC,CAAA;QACF,IAAMG,mCAAmC,GAAGxL,uBAAuB,CAAC;EAChEE,QAAAA,KAAK,EAAE,wCAAwC;UAC/Cb,MAAM,EAAE,IAAI,CAACA,MAAM;UACnBc,IAAI,EAAEQ,YAAY,CAAC+J,iBAAiB;EACpCrK,QAAAA,KAAK,EAAEiC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAAA;EACnD,OAAC,CAAC,CAAA;;EAEF;QACA,IAAMiJ,cAAc,GAAGzL,uBAAuB,CAAC;EAC3CE,QAAAA,KAAK,EAAE,WAAW;UAClBb,MAAM,EAAE,IAAI,CAACA,MAAM;EACnBc,QAAAA,IAAI,EAAE,IAAIS,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;UAC1BP,KAAK,EAAEiC,cAAc,CAACC,OAAO,GAAGD,cAAc,CAACE,QAAQ,GAAGF,cAAc,CAACG,QAAAA;EAC7E,OAAC,CAAC,CAAA;QAEF,IAAI,CAAC2D,OAAO,GAAG;EACXgB,QAAAA,IAAI,EAAEA,IAAI;EACVC,QAAAA,MAAM,EAAEA,MAAM;EACdsC,QAAAA,OAAO,EAAEsB,aAAa;EACtBpB,QAAAA,SAAS,EAAEqB,eAAe;EAC1BQ,QAAAA,cAAc,EAAEP,oBAAoB;EACpCQ,QAAAA,cAAc,EAAEvC,oBAAoB;EAEpC7J,QAAAA,YAAY,EAAE6L,cAAc;EAC5BQ,QAAAA,oBAAoB,EAAEN,sBAAsB;EAC5CO,QAAAA,yBAAyB,EAAEN,2BAA2B;EACtDO,QAAAA,iCAAiC,EAAEN,mCAAmC;EACtEO,QAAAA,QAAQ,EAAEN,cAAAA;SACb,CAAA;EACL,KAAA;EAAC,GAAA,EAAA;MAAAtJ,GAAA,EAAA,2BAAA;EAAAC,IAAAA,KAAA,EAED,SAAAmH,yBAAAA,CAA0BG,MAAM,EAAEsC,sBAAsB,EAAE;EACtD;EACA,MAAA,IAAI,CAAC,IAAI,CAACvE,WAAW,EAAE;EACnB,QAAA,OAAO,CAAE,IAAI,EAAE,IAAI,CAAE,CAAA;EACzB,OAAA;EAEA,MAAA,IAAQ4C,qBAAqB,GAAwC2B,sBAAsB,CAAnF3B,qBAAqB;UAAEE,qBAAqB,GAAiByB,sBAAsB,CAA5DzB,qBAAqB;UAAEC,UAAU,GAAKwB,sBAAsB,CAArCxB,UAAU,CAAA;;EAEhE;EACA,MAAA,IAAMyB,aAAa,GAAG,IAAIrG,eAAe,CAAC;UACtCvG,MAAM,EAAE,IAAI,CAACA,MAAM;EACnBc,QAAAA,IAAI,EAAEuJ,MAAM;EACZ7D,QAAAA,MAAM,EAAE,IAAI,CAACO,OAAO,CAAC7G,YAAY;EACjCuG,QAAAA,QAAQ,EAAE,IAAI,CAACM,OAAO,CAACwF,oBAAoB;EAC3C7F,QAAAA,SAAS,EAAE,IAAI,CAACK,OAAO,CAAC2F,QAAQ;EAChC9K,QAAAA,KAAK,EAAEsJ,qBAAqB;EAC5BtE,QAAAA,KAAK,EAAEuE,UAAU;EACjBrE,QAAAA,UAAU,EAAE,IAAI;UAChBhF,cAAc,EAAE,IAAI,CAACA,cAAAA;EACzB,OAAC,CAAC,CAAA;;EAEF;EACA,MAAA,IAAM+K,aAAa,GAAG,IAAItG,eAAe,CAAC;UACtCvG,MAAM,EAAE,IAAI,CAACA,MAAM;EACnBc,QAAAA,IAAI,EAAEuJ,MAAM;EACZ7D,QAAAA,MAAM,EAAE,IAAI,CAACO,OAAO,CAACyF,yBAAyB;EAC9C/F,QAAAA,QAAQ,EAAE,IAAI,CAACM,OAAO,CAAC0F,iCAAiC;EACxD/F,QAAAA,SAAS,EAAE,IAAI,CAACK,OAAO,CAAC2F,QAAQ;EAChC9K,QAAAA,KAAK,EAAEoJ,qBAAqB;EAC5BlE,QAAAA,UAAU,EAAE,KAAK;UACjBhF,cAAc,EAAE,IAAI,CAACA,cAAAA;EACzB,OAAC,CAAC,CAAA;QAEF,IAAI+K,aAAa,CAAC3K,qBAAqB,GAAG0K,aAAa,CAACrK,SAAS,CAACnB,MAAM,EAAE;UACtE0L,OAAO,CAACC,IAAI,CAAA,sFAAuF,CAAC,CAAA;UACpG,IAAI,CAAC3E,WAAW,GAAG,KAAK,CAAA;EACxB,QAAA,OAAO,CAAE,IAAI,EAAE,IAAI,CAAE,CAAA;EACzB,OAAA;EAEA,MAAA,IAAI,CAACQ,OAAO,CAACiE,aAAa,GAAGA,aAAa,CAAA;EAC1C,MAAA,IAAI,CAACjE,OAAO,CAACgE,aAAa,GAAGA,aAAa,CAAA;EAC9C,KAAA;EAAC,GAAA,EAAA;MAAA9J,GAAA,EAAA,2BAAA;MAAAC,KAAA,EAED,SAAA4H,yBAA0BN,CAAAA,MAAM,EAAEE,QAAQ,EAAEJ,GAAG,EAAE;EAC7C,MAAA,IAAM9G,eAAe,GAAG,IAAI,CAACrD,MAAM,CAACsD,qBAAqB,CAAC;EACtDzC,QAAAA,KAAK,EAAE,sBAAsB;EAC7B0C,QAAAA,OAAO,EACH,CAAA;EACIC,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,IAAI,CAACyE,aAAa,GAAG,SAAS,GAAG,mBAAA;EAAoB,WAAA;EACzE,SAAC,EACD;EACI9E,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;EAC9B,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;EAC9B,SAAC,CAAAvB,CAAAA,MAAA,CAAA8E,kBAAA,CACG,IAAI,CAACkB,aAAa,IAAI,IAAI,CAACI,UAAU,GAAG,CAAC;EACzClF,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;WAC7B,CAAC,GAAG,EAAE,CAAA,CAAA;EAEf,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMC,SAAS,GAAG,IAAI,CAAC9D,MAAM,CAAC+D,eAAe,CAAC;EAC1CC,QAAAA,MAAM,EAAEX,eAAe;EACvBE,QAAAA,OAAO,EACH,CAAA;EACIC,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAEyG,MAAAA;EAAO,WAAA;EAC/B,SAAC,EACD;EACI7G,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE,IAAI,CAACmD,OAAO,CAACsF,cAAAA;EAAe,WAAA;EACpD,SAAC,EACD;EACI7I,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE,IAAI,CAACmD,OAAO,CAACuF,cAAAA;EAAe,WAAA;EACpD,SAAC,CAAAhK,CAAAA,MAAA,CAAA8E,kBAAA,CAEG,IAAI,CAACkB,aAAa,IAAI,IAAI,CAACI,UAAU,GAAG,CAAC;EACzClF,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE2G,QAAAA;EAAS,WAAA;WAChC,CAAC,GAAG,EAAE,CAAA,CAAA;EAEf,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMrG,cAAc,GAAG,IAAI,CAAClE,MAAM,CAACmE,oBAAoB,CAAC;UACpDC,gBAAgB,EAAE,CAAEf,eAAe,CAAA;EACvC,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMwB,gBAAgB,GAAG,IAAI,CAAC7E,MAAM,CAACsE,qBAAqB,CAAC;EACvDzD,QAAAA,KAAK,EAAE,sBAAsB;EAC7BmD,QAAAA,MAAM,EAAEE,cAAc;EACtBK,QAAAA,OAAO,EAAE;EACLC,UAAAA,MAAM,EAAE,IAAI,CAACmE,aAAa,CAACc,QAAQ;EACnChF,UAAAA,UAAU,EAAE,YAAY;EACxBC,UAAAA,SAAS,EAAE;EACP,YAAA,kBAAkB,EAAE,IAAI,CAAC5C,cAAc,CAAC3B,CAAC;EACzC,YAAA,kBAAkB,EAAE,IAAI,CAAC2B,cAAc,CAAC1B,CAAC;cACzC,iBAAiB,EAAE,IAAI,CAACH,eAAe;cACvC,uBAAuB,EAAE,IAAI,CAACiC,qBAAqB;cACnD,eAAe,EAAE,IAAI,CAACN,KAAK;EAC3B,YAAA,aAAa,EAAEuI,GAAAA;EACnB,WAAA;EACJ,SAAA;EACJ,OAAC,CAAC,CAAA;QAEF,OAAO;EACHvF,QAAAA,QAAQ,EAAEC,gBAAgB;EAC1Bf,QAAAA,SAAS,EAATA,SAAAA;SACH,CAAA;EACL,KAAA;EAAC,GAAA,EAAA;MAAAhB,GAAA,EAAA,yBAAA;EAAAC,IAAAA,KAAA,EAED,SAAA8H,uBAAwBR,CAAAA,MAAM,EAAEE,QAAQ,EAAEE,OAAO,EAAEC,SAAS,EAAEP,GAAG,EAAE;EAC/D,MAAA,IAAM9G,eAAe,GAAG,IAAI,CAACrD,MAAM,CAACsD,qBAAqB,CAAC;EACtDzC,QAAAA,KAAK,EAAE,oBAAoB;EAC3B0C,QAAAA,OAAO,EACH,CAAA;EACIC,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,mBAAA;EAAoB,WAAA;EACxC,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;EAC9B,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,mBAAA;EAAoB,WAAA;EACxC,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,mBAAA;EAAoB,WAAA;WACvC,CAAA,CAAAvB,MAAA,CAAA8E,kBAAA,CACG,IAAI,CAACsB,UAAU,GAAG,CAClB;EACIlF,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,mBAAA;EAAoB,WAAA;EACxC,SAAC,EACD;EACIL,UAAAA,OAAO,EAAE,CAAC;YACVC,UAAU,EAAEC,cAAc,CAACC,OAAO;EAClCC,UAAAA,MAAM,EAAE;EAAEC,YAAAA,IAAI,EAAE,SAAA;EAAU,WAAA;WAC7B,CACJ,GAAG,EAAE,CAAA,CAAA;EAEd,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMC,SAAS,GAAG,IAAI,CAAC9D,MAAM,CAAC+D,eAAe,CAAC;EAC1CC,QAAAA,MAAM,EAAEX,eAAe;EACvBE,QAAAA,OAAO,EACH,CAAA;EACIC,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAEyG,MAAAA;EAAO,WAAA;EAC/B,SAAC,EACD;EACI7G,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE6G,OAAAA;EAAQ,WAAA;EAChC,SAAC,EACD;EACIjH,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE,IAAI,CAACmD,OAAO,CAACsF,cAAAA;EAAe,WAAA;EACpD,SAAC,EACD;EACI7I,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE,IAAI,CAACmD,OAAO,CAACuF,cAAAA;EAAe,WAAA;WACnD,CAAA,CAAAhK,MAAA,CAAA8E,kBAAA,CACG,IAAI,CAACsB,UAAU,GAAG,CAClB;EACIlF,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE2G,QAAAA;EAAS,WAAA;EACjC,SAAC,EACD;EACI/G,UAAAA,OAAO,EAAE,CAAC;EACVS,UAAAA,QAAQ,EAAE;EAAEL,YAAAA,MAAM,EAAE8G,SAAAA;EAAU,WAAA;WACjC,CACJ,GAAG,EAAE,CAAA,CAAA;EAEd,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMxG,cAAc,GAAG,IAAI,CAAClE,MAAM,CAACmE,oBAAoB,CAAC;UACpDC,gBAAgB,EAAE,CAAEf,eAAe,CAAA;EACvC,OAAC,CAAC,CAAA;EAEF,MAAA,IAAMuH,eAAe,GAAG,IAAI,CAAC5K,MAAM,CAACsE,qBAAqB,CAAC;EACtDzD,QAAAA,KAAK,EAAE,oBAAoB;EAC3BmD,QAAAA,MAAM,EAAEE,cAAc;EACtBK,QAAAA,OAAO,EAAE;EACLC,UAAAA,MAAM,EAAE,IAAI,CAACmE,aAAa,CAACe,OAAO;EAClCjF,UAAAA,UAAU,EAAE,oBAAoB;EAChCC,UAAAA,SAAS,EAAE;EACP,YAAA,kBAAkB,EAAE,IAAI,CAAC5C,cAAc,CAAC3B,CAAC;EACzC,YAAA,kBAAkB,EAAE,IAAI,CAAC2B,cAAc,CAAC1B,CAAC;cACzC,iBAAiB,EAAE,IAAI,CAACH,eAAe;cACvC,uBAAuB,EAAE,IAAI,CAACiC,qBAAqB;cACnD,eAAe,EAAE,IAAI,CAACN,KAAK;EAC3B,YAAA,aAAa,EAAEuI,GAAAA;EACnB,WAAA;EACJ,SAAA;EACJ,OAAC,CAAC,CAAA;QAEF,OAAO;EACHvF,QAAAA,QAAQ,EAAEgG,eAAe;EACzB9G,QAAAA,SAAS,EAATA,SAAAA;SACH,CAAA;EACL,KAAA;;EAEA;EACJ;EACA;EACA;EACA;EAJI,GAAA,EAAA;MAAAhB,GAAA,EAAA,UAAA;EAAAC,IAAAA,KAAA,EAKA,SAAAkC,QAASC,CAAAA,IAAI,EAAE;EACX,MAAA,KAAK,IAAII,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG,IAAI,CAAC4C,SAAS,GAAG,CAAC,EAAE5C,CAAC,EAAE,EAAE;EACzC,QAAA,IAAAC,iBAAA,GAA8C,IAAI,CAAChD,SAAS,CAAC+C,CAAC,CAAC;YAAvDT,gBAAgB,GAAAU,iBAAA,CAAhBV,gBAAgB;YAAE+F,eAAe,GAAArF,iBAAA,CAAfqF,eAAe,CAAA;UAEzC,IAAI,IAAI,CAACxC,WAAW,IAAI9C,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;YAChC,IAAI,CAACsD,OAAO,CAACiE,aAAa,CAAC5H,QAAQ,CAACC,IAAI,EAAE,IAAI,CAAC6B,OAAO,CAAC7G,YAAY,EAAE,IAAI,CAACsL,eAAe,CAACE,eAAe,CAAC,CAAA;EAC1G,UAAA,IAAI,CAAC9C,OAAO,CAACgE,aAAa,CAAC3H,QAAQ,CAACC,IAAI,EAAE,IAAI,CAAC6B,OAAO,CAACyF,yBAAyB,CAAC,CAAA;EACrF,SAAA;EAEAtH,QAAAA,IAAI,CAACM,WAAW,CAACX,gBAAgB,CAACD,QAAQ,CAAC,CAAA;UAC3CM,IAAI,CAACO,YAAY,CAAC,CAAC,EAAEZ,gBAAgB,CAACf,SAAS,CAAC,CAAA;EAChDoB,QAAAA,IAAI,CAACQ,0BAA0B,CAAC,IAAI,CAACqB,OAAO,CAAC7G,YAAY,EAAE,IAAI,CAACsL,eAAe,CAACC,UAAU,CAAC,CAAA;UAE3F,IAAI,CAAC7C,OAAO,CAACkC,SAAS,CAAC7F,QAAQ,CAACC,IAAI,EAAE,IAAI,CAAC6B,OAAO,CAAC7G,YAAY,EAAE,IAAI,CAACsL,eAAe,CAACG,UAAU,CAAC,CAAA;EAEjGzG,QAAAA,IAAI,CAACM,WAAW,CAACoF,eAAe,CAAChG,QAAQ,CAAC,CAAA;UAC1CM,IAAI,CAACO,YAAY,CAAC,CAAC,EAAEmF,eAAe,CAAC9G,SAAS,CAAC,CAAA;EAC/CoB,QAAAA,IAAI,CAACQ,0BAA0B,CAAC,IAAI,CAACqB,OAAO,CAAC7G,YAAY,EAAE,IAAI,CAACsL,eAAe,CAACC,UAAU,CAAC,CAAA;EAC/F,OAAA;EACJ,KAAA;EAAC,GAAA,CAAA,CAAA,CAAA;EAAA,CAAA;;;;;;;;;"} \ No newline at end of file diff --git a/dist/umd/radix-sort-umd.min.js b/dist/umd/radix-sort-umd.min.js index 99acf4a..28f01fd 100644 --- a/dist/umd/radix-sort-umd.min.js +++ b/dist/umd/radix-sort-umd.min.js @@ -1,2 +1,2 @@ -!function(e,r){"object"==typeof exports&&"undefined"!=typeof module?r(exports):"function"==typeof define&&define.amd?define(["exports"],r):r((e="undefined"!=typeof globalThis?globalThis:e||self).RadixSort={})}(this,(function(e){"use strict";function r(e,r){(null==r||r>e.length)&&(r=e.length);for(var i=0,t=Array(r);i=e.length?{done:!0}:{done:!1,value:e[t++]}},e:function(e){throw e},f:n}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}var o,s=!0,u=!1;return{s:function(){i=i.call(e)},n:function(){var e=i.next();return s=e.done,e},e:function(e){u=!0,o=e},f:function(){try{s||null==i.return||i.return()}finally{if(u)throw o}}}}function s(e){return function(e){if(Array.isArray(e))return r(e)}(e)||function(e){if("undefined"!=typeof Symbol&&null!=e[Symbol.iterator]||null!=e["@@iterator"])return Array.from(e)}(e)||a(e)||function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function u(e){var r=function(e,r){if("object"!=typeof e||!e)return e;var i=e[Symbol.toPrimitive];if(void 0!==i){var t=i.call(e,r||"default");if("object"!=typeof t)return t;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===r?String:Number)(e)}(e,"string");return"symbol"==typeof r?r:r+""}function a(e,i){if(e){if("string"==typeof e)return r(e,i);var t={}.toString.call(e).slice(8,-1);return"Object"===t&&e.constructor&&(t=e.constructor.name),"Map"===t||"Set"===t?Array.from(e):"Arguments"===t||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t)?r(e,i):void 0}}var _=function(){return n((function e(r){var t=r.device,n=r.data,o=r.count,s=r.workgroup_size,u=void 0===s?{x:16,y:16}:s,a=r.avoid_bank_conflicts,_=void 0!==a&&a;if(i(this,e),this.device=t,this.workgroup_size=u,this.threads_per_workgroup=u.x*u.y,this.items_per_workgroup=2*this.threads_per_workgroup,Math.log2(this.threads_per_workgroup)%1!=0)throw new Error("workgroup_size.x * workgroup_size.y must be a power of two. (current: ".concat(this.threads_per_workgroup,")"));this.pipelines=[],this.shaderModule=this.device.createShaderModule({label:"prefix-sum",code:_?"\n\n@group(0) @binding(0) var items: array;\n@group(0) @binding(1) var blockSums: array;\n\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride ITEMS_PER_WORKGROUP: u32;\n\nconst NUM_BANKS: u32 = 32;\nconst LOG_NUM_BANKS: u32 = 5;\n\nfn get_offset(offset: u32) -> u32 {\n // return offset >> LOG_NUM_BANKS; // Conflict-free\n return (offset >> NUM_BANKS) + (offset >> (2 * LOG_NUM_BANKS)); // Zero bank conflict\n}\n\nvar temp: array;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn reduce_downsweep(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n \n let ELM_TID = TID * 2; // Element pair local ID\n let ELM_GID = GID * 2; // Element pair global ID\n \n // Load input to shared memory\n let ai: u32 = TID;\n let bi: u32 = TID + (ITEMS_PER_WORKGROUP >> 1);\n let s_ai = ai + get_offset(ai);\n let s_bi = bi + get_offset(bi);\n let g_ai = ai + WID * 2;\n let g_bi = bi + WID * 2;\n temp[s_ai] = items[g_ai];\n temp[s_bi] = items[g_bi];\n\n var offset: u32 = 1;\n\n // Up-sweep (reduce) phase\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\n workgroupBarrier();\n\n if (TID < d) {\n var ai: u32 = offset * (ELM_TID + 1) - 1;\n var bi: u32 = offset * (ELM_TID + 2) - 1;\n ai += get_offset(ai);\n bi += get_offset(bi);\n temp[bi] += temp[ai];\n }\n\n offset *= 2;\n }\n\n // Save workgroup sum and clear last element\n if (TID == 0) {\n var last_offset = ITEMS_PER_WORKGROUP - 1;\n last_offset += get_offset(last_offset);\n\n blockSums[WORKGROUP_ID] = temp[last_offset];\n temp[last_offset] = 0;\n }\n\n // Down-sweep phase\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\n offset >>= 1;\n workgroupBarrier();\n\n if (TID < d) {\n var ai: u32 = offset * (ELM_TID + 1) - 1;\n var bi: u32 = offset * (ELM_TID + 2) - 1;\n ai += get_offset(ai);\n bi += get_offset(bi);\n\n let t: u32 = temp[ai];\n temp[ai] = temp[bi];\n temp[bi] += t;\n }\n }\n workgroupBarrier();\n\n // Copy result from shared memory to global memory\n items[g_ai] = temp[s_ai];\n items[g_bi] = temp[s_bi];\n}\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn add_block_sums(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n\n let ELM_ID = GID * 2;\n let blockSum = blockSums[WORKGROUP_ID];\n\n items[ELM_ID] += blockSum;\n items[ELM_ID + 1] += blockSum;\n}":"\n\n@group(0) @binding(0) var items: array;\n@group(0) @binding(1) var blockSums: array;\n\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride ITEMS_PER_WORKGROUP: u32;\n\nvar temp: array;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn reduce_downsweep(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n \n let ELM_TID = TID * 2; // Element pair local ID\n let ELM_GID = GID * 2; // Element pair global ID\n \n // Load input to shared memory\n temp[ELM_TID] = items[ELM_GID];\n temp[ELM_TID + 1] = items[ELM_GID + 1];\n\n var offset: u32 = 1;\n\n // Up-sweep (reduce) phase\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\n workgroupBarrier();\n\n if (TID < d) {\n var ai: u32 = offset * (ELM_TID + 1) - 1;\n var bi: u32 = offset * (ELM_TID + 2) - 1;\n temp[bi] += temp[ai];\n }\n\n offset *= 2;\n }\n\n // Save workgroup sum and clear last element\n if (TID == 0) {\n let last_offset = ITEMS_PER_WORKGROUP - 1;\n\n blockSums[WORKGROUP_ID] = temp[last_offset];\n temp[last_offset] = 0;\n }\n\n // Down-sweep phase\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\n offset >>= 1;\n workgroupBarrier();\n\n if (TID < d) {\n var ai: u32 = offset * (ELM_TID + 1) - 1;\n var bi: u32 = offset * (ELM_TID + 2) - 1;\n\n let t: u32 = temp[ai];\n temp[ai] = temp[bi];\n temp[bi] += t;\n }\n }\n workgroupBarrier();\n\n // Copy result from shared memory to global memory\n items[ELM_GID] = temp[ELM_TID];\n items[ELM_GID + 1] = temp[ELM_TID + 1];\n}\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn add_block_sums(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n \n\n let ELM_ID = GID * 2;\n let blockSum = blockSums[WORKGROUP_ID];\n\n items[ELM_ID] += blockSum;\n items[ELM_ID + 1] += blockSum;\n}"}),this.create_pass_recursive(n,o)}),[{key:"find_optimal_dispatch_size",value:function(e){var r=this.device.limits.maxComputeWorkgroupsPerDimension,i=Math.ceil(e/this.items_per_workgroup),t=i,n=1;return i>r&&(i=(t=Math.floor(Math.sqrt(i)))*(n=Math.ceil(i/t))),{workgroup_count:i,dispatchSize:{x:t,y:n}}}},{key:"create_pass_recursive",value:function(e,r){var i=this.find_optimal_dispatch_size(r),t=i.workgroup_count,n=i.dispatchSize,o=this.device.createBuffer({size:4*t,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST}),s=this.device.createBindGroupLayout({entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),u=this.device.createBindGroup({label:"prefix-sum-bind-group",layout:s,entries:[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:o}}]}),a=this.device.createPipelineLayout({bindGroupLayouts:[s]}),_=this.device.createComputePipeline({label:"prefix-sum-scan-pipeline",layout:a,compute:{module:this.shaderModule,entryPoint:"reduce_downsweep",constants:{WORKGROUP_SIZE_X:this.workgroup_size.x,WORKGROUP_SIZE_Y:this.workgroup_size.y,THREADS_PER_WORKGROUP:this.threads_per_workgroup,ITEMS_PER_WORKGROUP:this.items_per_workgroup}}});if(this.pipelines.push({pipeline:_,bindGroup:u,dispatchSize:n}),t>1){this.create_pass_recursive(o,t);var f=this.device.createComputePipeline({label:"prefix-sum-add-block-pipeline",layout:a,compute:{module:this.shaderModule,entryPoint:"add_block_sums",constants:{WORKGROUP_SIZE_X:this.workgroup_size.x,WORKGROUP_SIZE_Y:this.workgroup_size.y,THREADS_PER_WORKGROUP:this.threads_per_workgroup}}});this.pipelines.push({pipeline:f,bindGroup:u,dispatchSize:n})}}},{key:"dispatch",value:function(e){var r,i=o(this.pipelines);try{for(i.s();!(r=i.n()).done;){var t=r.value,n=t.pipeline,s=t.bindGroup,u=t.dispatchSize;e.setPipeline(n),e.setBindGroup(0,s),e.dispatchWorkgroups(u.x,u.y,1)}}catch(e){i.e(e)}finally{i.f()}}}])}(),f="\n\n@group(0) @binding(0) var inputKeys: array;\n@group(0) @binding(1) var outputKeys: array;\n@group(0) @binding(2) var local_prefix_sum: array;\n@group(0) @binding(3) var prefix_block_sum: array;\n@group(0) @binding(4) var inputValues: array;\n@group(0) @binding(5) var outputValues: array;\n\noverride WORKGROUP_COUNT: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride CURRENT_BIT: u32;\noverride ELEMENT_COUNT: u32;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn radix_sort_reorder(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) { \n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n\n if (GID >= ELEMENT_COUNT) {\n return;\n }\n\n let k = inputKeys[GID];\n let v = inputValues[GID];\n\n let local_prefix = local_prefix_sum[GID];\n\n // Calculate new position\n let extract_bits = (k >> CURRENT_BIT) & 0x3;\n let pid = extract_bits * WORKGROUP_COUNT + WORKGROUP_ID;\n let sorted_position = prefix_block_sum[pid] + local_prefix;\n \n outputKeys[sorted_position] = k;\n outputValues[sorted_position] = v;\n}",l=function(){return n((function e(){var r=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},t=r.device,n=r.keys,o=r.values,s=r.count,u=r.bit_count,a=void 0===u?32:u,_=r.workgroup_size,f=void 0===_?{x:16,y:16}:_,l=r.local_shuffle,p=void 0!==l&&l,d=r.avoid_bank_conflicts,c=void 0!==d&&d;if(i(this,e),null==t)throw new Error("No device provided");if(null==n)throw new Error("No keys buffer provided");if(!Number.isInteger(s)||s<=0)throw new Error("Invalid count parameter");if(!Number.isInteger(a)||a<=0)throw new Error("Invalid bit_count parameter");if(!Number.isInteger(f.x)||!Number.isInteger(f.y))throw new Error("Invalid workgroup_size parameter");this.device=t,this.count=s,this.bit_count=a,this.workgroup_size=f,this.local_shuffle=p,this.avoid_bank_conflicts=c,this.threads_per_workgroup=f.x*f.y,this.workgroup_count=Math.ceil(s/this.threads_per_workgroup),this.prefix_block_workgroup_count=4*this.workgroup_count,this.has_values=null!=o,this.dispatchSize={},this.shaderModules={},this.buffers={},this.pipelines=[],this.find_optimal_dispatch_size(),this.create_shader_modules(),this.create_buffers(n,o),this.create_pipelines()}),[{key:"find_optimal_dispatch_size",value:function(){var e=this.device.limits.maxComputeWorkgroupsPerDimension;if(this.dispatchSize={x:this.workgroup_count,y:1},this.workgroup_count>e){var r=Math.floor(Math.sqrt(this.workgroup_count)),i=Math.ceil(this.workgroup_count/r);this.dispatchSize={x:r,y:i}}}},{key:"create_shader_modules",value:function(){var e=function(e){return e.split("\n").filter((function(e){return!e.toLowerCase().includes("values")})).join("\n")},r=this.local_shuffle?"\n\n@group(0) @binding(0) var input: array;\n@group(0) @binding(1) var local_prefix_sums: array;\n@group(0) @binding(2) var block_sums: array;\n@group(0) @binding(3) var values: array;\n\noverride WORKGROUP_COUNT: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride CURRENT_BIT: u32;\noverride ELEMENT_COUNT: u32;\n\nvar s_prefix_sum: array;\nvar s_prefix_sum_scan: array;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn radix_sort(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n\n // Extract 2 bits from the input\n let elm = input[GID];\n let val = values[GID];\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\n\n var bit_prefix_sums = array(0, 0, 0, 0);\n\n // If the workgroup is inactive, prevent block_sums buffer update\n var LAST_THREAD: u32 = 0xffffffff; \n\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\n // Otherwise store the index of the last active thread in the workgroup\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\n }\n\n // Initialize parameters for double-buffering\n let TPW = THREADS_PER_WORKGROUP + 1;\n var swapOffset: u32 = 0;\n var inOffset: u32 = TID;\n var outOffset: u32 = TID + TPW;\n\n // 4-way prefix sum\n for (var b: u32 = 0; b < 4; b++) {\n // Initialize local prefix with bitmask\n let bitmask = select(0u, 1u, extract_bits == b);\n s_prefix_sum[inOffset + 1] = bitmask;\n workgroupBarrier();\n\n // Prefix sum\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\n if (TID >= offset) {\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\n } else {\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\n }\n\n // Swap buffers\n outOffset = inOffset;\n swapOffset = TPW - swapOffset;\n inOffset = TID + swapOffset;\n \n workgroupBarrier();\n }\n\n // Store prefix sum for current bit\n let prefix_sum = s_prefix_sum[inOffset];\n bit_prefix_sums[b] = prefix_sum;\n\n if (TID == LAST_THREAD) {\n // Store block sum to global memory\n let total_sum: u32 = prefix_sum + bitmask;\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\n }\n\n // Swap buffers\n outOffset = inOffset;\n swapOffset = TPW - swapOffset;\n inOffset = TID + swapOffset;\n }\n\n let prefix_sum = bit_prefix_sums[extract_bits]; \n\n // Scan bit prefix sums\n if (TID == LAST_THREAD) {\n var sum: u32 = 0;\n bit_prefix_sums[extract_bits] += 1;\n for (var i: u32 = 0; i < 4; i++) {\n s_prefix_sum_scan[i] = sum;\n sum += bit_prefix_sums[i];\n }\n }\n workgroupBarrier();\n\n if (GID < ELEMENT_COUNT) {\n // Compute new position\n let new_pos: u32 = prefix_sum + s_prefix_sum_scan[extract_bits];\n\n // Shuffle elements locally\n input[WID + new_pos] = elm;\n values[WID + new_pos] = val;\n local_prefix_sums[WID + new_pos] = prefix_sum;\n }\n}":"\n\n@group(0) @binding(0) var input: array;\n@group(0) @binding(1) var local_prefix_sums: array;\n@group(0) @binding(2) var block_sums: array;\n\noverride WORKGROUP_COUNT: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride CURRENT_BIT: u32;\noverride ELEMENT_COUNT: u32;\n\nvar s_prefix_sum: array;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn radix_sort(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n\n // Extract 2 bits from the input\n let elm = input[GID];\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\n\n var bit_prefix_sums = array(0, 0, 0, 0);\n\n // If the workgroup is inactive, prevent block_sums buffer update\n var LAST_THREAD: u32 = 0xffffffff; \n\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\n // Otherwise store the index of the last active thread in the workgroup\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\n }\n\n // Initialize parameters for double-buffering\n let TPW = THREADS_PER_WORKGROUP + 1;\n var swapOffset: u32 = 0;\n var inOffset: u32 = TID;\n var outOffset: u32 = TID + TPW;\n\n // 4-way prefix sum\n for (var b: u32 = 0; b < 4; b++) {\n // Initialize local prefix with bitmask\n let bitmask = select(0u, 1u, extract_bits == b);\n s_prefix_sum[inOffset + 1] = bitmask;\n workgroupBarrier();\n\n // Prefix sum\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\n if (TID >= offset) {\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\n } else {\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\n }\n\n // Swap buffers\n outOffset = inOffset;\n swapOffset = TPW - swapOffset;\n inOffset = TID + swapOffset;\n \n workgroupBarrier();\n }\n\n // Store prefix sum for current bit\n let prefix_sum = s_prefix_sum[inOffset];\n bit_prefix_sums[b] = prefix_sum;\n\n if (TID == LAST_THREAD) {\n // Store block sum to global memory\n let total_sum: u32 = prefix_sum + bitmask;\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\n }\n\n // Swap buffers\n outOffset = inOffset;\n swapOffset = TPW - swapOffset;\n inOffset = TID + swapOffset;\n }\n\n // Store local prefix sum to global memory\n local_prefix_sums[GID] = bit_prefix_sums[extract_bits];\n}";this.shaderModules={blockSum:this.device.createShaderModule({label:"radix-sort-block-sum",code:this.has_values?r:e(r)}),reorder:this.device.createShaderModule({label:"radix-sort-reorder",code:this.has_values?f:e(f)})}}},{key:"create_buffers",value:function(e,r){var i=this.device.createBuffer({size:4*this.count,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST}),t=this.has_values?this.device.createBuffer({size:4*this.count,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST}):null,n=this.device.createBuffer({size:4*this.count,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST}),o=this.device.createBuffer({size:4*this.prefix_block_workgroup_count,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST});this.buffers={keys:e,values:r,tmpKeys:i,tmpValues:t,localPrefixSum:n,prefixBlockSum:o}}},{key:"create_pipelines",value:function(){for(var e=0;ee.length)&&(r=e.length);for(var i=0,t=Array(r);ie.limits.maxComputeWorkgroupsPerDimension){var t=Math.floor(Math.sqrt(r)),n=Math.ceil(r/t);i.x=t,i.y=n}return i}function u(e){var r=e.device,i=e.label,t=e.data,n=e.usage,s=void 0===n?0:n,o=r.createBuffer({label:i,usage:s,size:4*t.length,mappedAtCreation:!0});return new Uint32Array(o.getMappedRange()).set(t),o.unmap(),o}var _=function(){return n((function e(r){var t=r.device,n=r.data,s=r.count,o=r.workgroup_size,a=void 0===o?{x:16,y:16}:o,u=r.avoid_bank_conflicts,_=void 0!==u&&u;if(i(this,e),this.device=t,this.workgroup_size=a,this.threads_per_workgroup=a.x*a.y,this.items_per_workgroup=2*this.threads_per_workgroup,Math.log2(this.threads_per_workgroup)%1!=0)throw new Error("workgroup_size.x * workgroup_size.y must be a power of two. (current: ".concat(this.threads_per_workgroup,")"));this.pipelines=[],this.shaderModule=this.device.createShaderModule({label:"prefix-sum",code:_?"\n\n@group(0) @binding(0) var items: array;\n@group(0) @binding(1) var blockSums: array;\n\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride ITEMS_PER_WORKGROUP: u32;\n\nconst NUM_BANKS: u32 = 32;\nconst LOG_NUM_BANKS: u32 = 5;\n\nfn get_offset(offset: u32) -> u32 {\n // return offset >> LOG_NUM_BANKS; // Conflict-free\n return (offset >> NUM_BANKS) + (offset >> (2 * LOG_NUM_BANKS)); // Zero bank conflict\n}\n\nvar temp: array;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn reduce_downsweep(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n \n let ELM_TID = TID * 2; // Element pair local ID\n let ELM_GID = GID * 2; // Element pair global ID\n \n // Load input to shared memory\n let ai: u32 = TID;\n let bi: u32 = TID + (ITEMS_PER_WORKGROUP >> 1);\n let s_ai = ai + get_offset(ai);\n let s_bi = bi + get_offset(bi);\n let g_ai = ai + WID * 2;\n let g_bi = bi + WID * 2;\n temp[s_ai] = items[g_ai];\n temp[s_bi] = items[g_bi];\n\n var offset: u32 = 1;\n\n // Up-sweep (reduce) phase\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\n workgroupBarrier();\n\n if (TID < d) {\n var ai: u32 = offset * (ELM_TID + 1) - 1;\n var bi: u32 = offset * (ELM_TID + 2) - 1;\n ai += get_offset(ai);\n bi += get_offset(bi);\n temp[bi] += temp[ai];\n }\n\n offset *= 2;\n }\n\n // Save workgroup sum and clear last element\n if (TID == 0) {\n var last_offset = ITEMS_PER_WORKGROUP - 1;\n last_offset += get_offset(last_offset);\n\n blockSums[WORKGROUP_ID] = temp[last_offset];\n temp[last_offset] = 0;\n }\n\n // Down-sweep phase\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\n offset >>= 1;\n workgroupBarrier();\n\n if (TID < d) {\n var ai: u32 = offset * (ELM_TID + 1) - 1;\n var bi: u32 = offset * (ELM_TID + 2) - 1;\n ai += get_offset(ai);\n bi += get_offset(bi);\n\n let t: u32 = temp[ai];\n temp[ai] = temp[bi];\n temp[bi] += t;\n }\n }\n workgroupBarrier();\n\n // Copy result from shared memory to global memory\n items[g_ai] = temp[s_ai];\n items[g_bi] = temp[s_bi];\n}\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn add_block_sums(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n\n let ELM_ID = GID * 2;\n let blockSum = blockSums[WORKGROUP_ID];\n\n items[ELM_ID] += blockSum;\n items[ELM_ID + 1] += blockSum;\n}":"\n\n@group(0) @binding(0) var items: array;\n@group(0) @binding(1) var blockSums: array;\n\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride ITEMS_PER_WORKGROUP: u32;\n\nvar temp: array;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn reduce_downsweep(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n \n let ELM_TID = TID * 2; // Element pair local ID\n let ELM_GID = GID * 2; // Element pair global ID\n \n // Load input to shared memory\n temp[ELM_TID] = items[ELM_GID];\n temp[ELM_TID + 1] = items[ELM_GID + 1];\n\n var offset: u32 = 1;\n\n // Up-sweep (reduce) phase\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\n workgroupBarrier();\n\n if (TID < d) {\n var ai: u32 = offset * (ELM_TID + 1) - 1;\n var bi: u32 = offset * (ELM_TID + 2) - 1;\n temp[bi] += temp[ai];\n }\n\n offset *= 2;\n }\n\n // Save workgroup sum and clear last element\n if (TID == 0) {\n let last_offset = ITEMS_PER_WORKGROUP - 1;\n\n blockSums[WORKGROUP_ID] = temp[last_offset];\n temp[last_offset] = 0;\n }\n\n // Down-sweep phase\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\n offset >>= 1;\n workgroupBarrier();\n\n if (TID < d) {\n var ai: u32 = offset * (ELM_TID + 1) - 1;\n var bi: u32 = offset * (ELM_TID + 2) - 1;\n\n let t: u32 = temp[ai];\n temp[ai] = temp[bi];\n temp[bi] += t;\n }\n }\n workgroupBarrier();\n\n // Copy result from shared memory to global memory\n items[ELM_GID] = temp[ELM_TID];\n items[ELM_GID + 1] = temp[ELM_TID + 1];\n}\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn add_block_sums(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n \n\n let ELM_ID = GID * 2;\n let blockSum = blockSums[WORKGROUP_ID];\n\n items[ELM_ID] += blockSum;\n items[ELM_ID + 1] += blockSum;\n}"}),this.create_pass_recursive(n,s)}),[{key:"create_pass_recursive",value:function(e,r){var i=Math.ceil(r/this.items_per_workgroup),t=a(this.device,i),n=this.device.createBuffer({label:"prefix-sum-block-sum",size:4*i,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST}),s=this.device.createBindGroupLayout({entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),o=this.device.createBindGroup({label:"prefix-sum-bind-group",layout:s,entries:[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:n}}]}),u=this.device.createPipelineLayout({bindGroupLayouts:[s]}),_=this.device.createComputePipeline({label:"prefix-sum-scan-pipeline",layout:u,compute:{module:this.shaderModule,entryPoint:"reduce_downsweep",constants:{WORKGROUP_SIZE_X:this.workgroup_size.x,WORKGROUP_SIZE_Y:this.workgroup_size.y,THREADS_PER_WORKGROUP:this.threads_per_workgroup,ITEMS_PER_WORKGROUP:this.items_per_workgroup}}});if(this.pipelines.push({pipeline:_,bindGroup:o,dispatchSize:t}),i>1){this.create_pass_recursive(n,i);var f=this.device.createComputePipeline({label:"prefix-sum-add-block-pipeline",layout:u,compute:{module:this.shaderModule,entryPoint:"add_block_sums",constants:{WORKGROUP_SIZE_X:this.workgroup_size.x,WORKGROUP_SIZE_Y:this.workgroup_size.y,THREADS_PER_WORKGROUP:this.threads_per_workgroup}}});this.pipelines.push({pipeline:f,bindGroup:o,dispatchSize:t})}}},{key:"get_dispatch_chain",value:function(){return this.pipelines.flatMap((function(e){return[e.dispatchSize.x,e.dispatchSize.y,1]}))}},{key:"dispatch",value:function(e,r){for(var i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:0,t=0;t inputKeys: array;\n@group(0) @binding(1) var outputKeys: array;\n@group(0) @binding(2) var local_prefix_sum: array;\n@group(0) @binding(3) var prefix_block_sum: array;\n@group(0) @binding(4) var inputValues: array;\n@group(0) @binding(5) var outputValues: array;\n\noverride WORKGROUP_COUNT: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride CURRENT_BIT: u32;\noverride ELEMENT_COUNT: u32;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn radix_sort_reorder(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) { \n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n\n if (GID >= ELEMENT_COUNT) {\n return;\n }\n\n let k = inputKeys[GID];\n let v = inputValues[GID];\n\n let local_prefix = local_prefix_sum[GID];\n\n // Calculate new position\n let extract_bits = (k >> CURRENT_BIT) & 0x3;\n let pid = extract_bits * WORKGROUP_COUNT + WORKGROUP_ID;\n let sorted_position = prefix_block_sum[pid] + local_prefix;\n \n outputKeys[sorted_position] = k;\n outputValues[sorted_position] = v;\n}",l=function(){var e=arguments.length>1&&void 0!==arguments[1]&&arguments[1],r=arguments.length>2&&void 0!==arguments[2]&&arguments[2];return"\n\n@group(0) @binding(0) var input: array;\n@group(0) @binding(1) var output: array;\n@group(0) @binding(2) var original: array;\n@group(0) @binding(3) var is_sorted: u32;\n\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride ELEMENT_COUNT: u32;\noverride START_ELEMENT: u32;\n\nvar s_data: array;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn check_sort(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP + START_ELEMENT;\n let GID = TID + WID; // Global thread ID\n\n // Load data into shared memory\n ".concat(arguments.length>0&&void 0!==arguments[0]&&arguments[0]?c:"s_data[TID] = select(0u, input[GID], GID < ELEMENT_COUNT);","\n\n // Perform parallel reduction\n for (var d = 1u; d < THREADS_PER_WORKGROUP; d *= 2u) { \n workgroupBarrier(); \n if (TID % (2u * d) == 0u) {\n s_data[TID] += s_data[TID + d];\n }\n }\n workgroupBarrier();\n\n // Write reduction result\n ").concat(e?d(r):p,"\n}")},p="\n if (TID == 0) {\n output[WORKGROUP_ID] = s_data[0];\n }\n",c="\n let LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\n\n // Load current element into shared memory\n // Also load next element for comparison\n let elm = select(0u, input[GID], GID < ELEMENT_COUNT);\n let next = select(0u, input[GID + 1], GID < ELEMENT_COUNT-1);\n s_data[TID] = elm;\n workgroupBarrier();\n\n s_data[TID] = select(0u, 1u, GID < ELEMENT_COUNT-1 && elm > next);\n",d=function(e){return"\n let fullDispatchLength = arrayLength(&output);\n let dispatchIndex = TID * 3;\n\n if (dispatchIndex >= fullDispatchLength) {\n return;\n }\n\n ".concat(e?b:h,"\n")},h="\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] == 0 && is_sorted == 0u);\n",b="\n if (TID == 0 && s_data[0] == 0) {\n is_sorted = 1u;\n }\n\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] != 0);\n",O=function(){return n((function e(r){var t=r.device,n=r.data,s=r.result,o=r.original,a=r.is_sorted,u=r.count,_=r.start,f=void 0===_?0:_,l=r.full_check,p=void 0===l||l,c=r.workgroup_size,d=void 0===c?{x:16,y:16}:c;i(this,e),this.device=t,this.count=u,this.start=f,this.full_check=p,this.workgroup_size=d,this.threads_per_workgroup=d.x*d.y,this.pipelines=[],this.buffers={data:n,result:s,original:o,is_sorted:a,outputs:[]},this.create_passes_recursive(n,u)}),[{key:"create_passes_recursive",value:function(e,r){var i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:0,t=Math.ceil(r/this.threads_per_workgroup),n=0===i,o=t<=1,a=o?this.buffers.result:this.device.createBuffer({label:"check-sort-".concat(this.full_check?"full":"fast","-").concat(i),size:4*t,usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_SRC|GPUBufferUsage.COPY_DST}),u=this.device.createBindGroupLayout({entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}].concat(s(o?[{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]:[]))}),_=this.device.createBindGroup({layout:u,entries:[{binding:0,resource:{buffer:e}},{binding:1,resource:{buffer:a}}].concat(s(o?[{binding:2,resource:{buffer:this.buffers.original}},{binding:3,resource:{buffer:this.buffers.is_sorted}}]:[]))}),f=this.device.createPipelineLayout({bindGroupLayouts:[u]}),p=n?this.start+r:r,c=n?this.start:0,d=this.device.createComputePipeline({layout:f,compute:{module:this.device.createShaderModule({code:l(n,o,this.full_check),label:"check-sort"}),entryPoint:"check_sort",constants:{WORKGROUP_SIZE_X:this.workgroup_size.x,WORKGROUP_SIZE_Y:this.workgroup_size.y,THREADS_PER_WORKGROUP:this.threads_per_workgroup,ELEMENT_COUNT:p,START_ELEMENT:c}}});this.buffers.outputs.push(a),this.pipelines.push({pipeline:d,bindGroup:_}),o||this.create_passes_recursive(a,t,i+1)}},{key:"dispatch",value:function(e,r){for(var i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:0,t=0;t1);return n}}])}(),g=function(){return n((function e(){var r=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},t=r.device,n=r.keys,s=r.values,o=r.count,u=r.bit_count,_=void 0===u?32:u,f=r.workgroup_size,l=void 0===f?{x:16,y:16}:f,p=r.check_order,c=void 0!==p&&p,d=r.local_shuffle,h=void 0!==d&&d,b=r.avoid_bank_conflicts,O=void 0!==b&&b;if(i(this,e),null==t)throw new Error("No device provided");if(null==n)throw new Error("No keys buffer provided");if(!Number.isInteger(o)||o<=0)throw new Error("Invalid count parameter");if(!Number.isInteger(_)||_<=0||_>32)throw new Error("Invalid bit_count parameter");if(!Number.isInteger(l.x)||!Number.isInteger(l.y))throw new Error("Invalid workgroup_size parameter");if(_%4!=0)throw new Error("bit_count must be a multiple of 4");this.device=t,this.count=o,this.bit_count=_,this.workgroup_size=l,this.check_order=c,this.local_shuffle=h,this.avoid_bank_conflicts=O,this.threads_per_workgroup=l.x*l.y,this.workgroup_count=Math.ceil(o/this.threads_per_workgroup),this.prefix_block_workgroup_count=4*this.workgroup_count,this.has_values=null!=s,this.dispatchSize={},this.shaderModules={},this.buffers={},this.pipelines=[],this.kernels={},this.dispatchSize=a(this.device,this.workgroup_count),this.create_shader_modules(),this.create_pipelines(n,s)}),[{key:"create_shader_modules",value:function(){var e=function(e){return e.split("\n").filter((function(e){return!e.toLowerCase().includes("values")})).join("\n")},r=this.local_shuffle?"\n\n@group(0) @binding(0) var input: array;\n@group(0) @binding(1) var local_prefix_sums: array;\n@group(0) @binding(2) var block_sums: array;\n@group(0) @binding(3) var values: array;\n\noverride WORKGROUP_COUNT: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride CURRENT_BIT: u32;\noverride ELEMENT_COUNT: u32;\n\nvar s_prefix_sum: array;\nvar s_prefix_sum_scan: array;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn radix_sort(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n\n // Extract 2 bits from the input\n let elm = input[GID];\n let val = values[GID];\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\n\n var bit_prefix_sums = array(0, 0, 0, 0);\n\n // If the workgroup is inactive, prevent block_sums buffer update\n var LAST_THREAD: u32 = 0xffffffff; \n\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\n // Otherwise store the index of the last active thread in the workgroup\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\n }\n\n // Initialize parameters for double-buffering\n let TPW = THREADS_PER_WORKGROUP + 1;\n var swapOffset: u32 = 0;\n var inOffset: u32 = TID;\n var outOffset: u32 = TID + TPW;\n\n // 4-way prefix sum\n for (var b: u32 = 0; b < 4; b++) {\n // Initialize local prefix with bitmask\n let bitmask = select(0u, 1u, extract_bits == b);\n s_prefix_sum[inOffset + 1] = bitmask;\n workgroupBarrier();\n\n // Prefix sum\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\n if (TID >= offset) {\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\n } else {\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\n }\n\n // Swap buffers\n outOffset = inOffset;\n swapOffset = TPW - swapOffset;\n inOffset = TID + swapOffset;\n \n workgroupBarrier();\n }\n\n // Store prefix sum for current bit\n let prefix_sum = s_prefix_sum[inOffset];\n bit_prefix_sums[b] = prefix_sum;\n\n if (TID == LAST_THREAD) {\n // Store block sum to global memory\n let total_sum: u32 = prefix_sum + bitmask;\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\n }\n\n // Swap buffers\n outOffset = inOffset;\n swapOffset = TPW - swapOffset;\n inOffset = TID + swapOffset;\n }\n\n let prefix_sum = bit_prefix_sums[extract_bits]; \n\n // Scan bit prefix sums\n if (TID == LAST_THREAD) {\n var sum: u32 = 0;\n bit_prefix_sums[extract_bits] += 1;\n for (var i: u32 = 0; i < 4; i++) {\n s_prefix_sum_scan[i] = sum;\n sum += bit_prefix_sums[i];\n }\n }\n workgroupBarrier();\n\n if (GID < ELEMENT_COUNT) {\n // Compute new position\n let new_pos: u32 = prefix_sum + s_prefix_sum_scan[extract_bits];\n\n // Shuffle elements locally\n input[WID + new_pos] = elm;\n values[WID + new_pos] = val;\n local_prefix_sums[WID + new_pos] = prefix_sum;\n }\n}":"\n\n@group(0) @binding(0) var input: array;\n@group(0) @binding(1) var local_prefix_sums: array;\n@group(0) @binding(2) var block_sums: array;\n\noverride WORKGROUP_COUNT: u32;\noverride THREADS_PER_WORKGROUP: u32;\noverride WORKGROUP_SIZE_X: u32;\noverride WORKGROUP_SIZE_Y: u32;\noverride CURRENT_BIT: u32;\noverride ELEMENT_COUNT: u32;\n\nvar s_prefix_sum: array;\n\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\nfn radix_sort(\n @builtin(workgroup_id) w_id: vec3,\n @builtin(num_workgroups) w_dim: vec3,\n @builtin(local_invocation_index) TID: u32, // Local thread ID\n) {\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\n let GID = WID + TID; // Global thread ID\n\n // Extract 2 bits from the input\n let elm = input[GID];\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\n\n var bit_prefix_sums = array(0, 0, 0, 0);\n\n // If the workgroup is inactive, prevent block_sums buffer update\n var LAST_THREAD: u32 = 0xffffffff; \n\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\n // Otherwise store the index of the last active thread in the workgroup\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\n }\n\n // Initialize parameters for double-buffering\n let TPW = THREADS_PER_WORKGROUP + 1;\n var swapOffset: u32 = 0;\n var inOffset: u32 = TID;\n var outOffset: u32 = TID + TPW;\n\n // 4-way prefix sum\n for (var b: u32 = 0; b < 4; b++) {\n // Initialize local prefix with bitmask\n let bitmask = select(0u, 1u, extract_bits == b);\n s_prefix_sum[inOffset + 1] = bitmask;\n workgroupBarrier();\n\n // Prefix sum\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\n if (TID >= offset) {\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\n } else {\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\n }\n\n // Swap buffers\n outOffset = inOffset;\n swapOffset = TPW - swapOffset;\n inOffset = TID + swapOffset;\n \n workgroupBarrier();\n }\n\n // Store prefix sum for current bit\n let prefix_sum = s_prefix_sum[inOffset];\n bit_prefix_sums[b] = prefix_sum;\n\n if (TID == LAST_THREAD) {\n // Store block sum to global memory\n let total_sum: u32 = prefix_sum + bitmask;\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\n }\n\n // Swap buffers\n outOffset = inOffset;\n swapOffset = TPW - swapOffset;\n inOffset = TID + swapOffset;\n }\n\n // Store local prefix sum to global memory\n local_prefix_sums[GID] = bit_prefix_sums[extract_bits];\n}";this.shaderModules={blockSum:this.device.createShaderModule({label:"radix-sort-block-sum",code:this.has_values?r:e(r)}),reorder:this.device.createShaderModule({label:"radix-sort-reorder",code:this.has_values?f:e(f)})}}},{key:"create_pipelines",value:function(e,r){var i=this.create_prefix_sum_kernel(),t=i.prefixSumKernel,n=i.prefixBlockSumBuffer,s=this.calculate_dispatch_sizes(t);this.create_buffers(e,r,n,s),this.create_check_sort_kernels(this.buffers.keys,s);for(var o=0;o items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n temp[ELM_TID] = items[ELM_GID];\r\n temp[ELM_TID + 1] = items[ELM_GID + 1];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n let last_offset = ITEMS_PER_WORKGROUP - 1;\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[ELM_GID] = temp[ELM_TID];\r\n items[ELM_GID + 1] = temp[ELM_TID + 1];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumSource","import prefixSumSource from \"./shaders/prefix_sum\"\r\nimport prefixSumSource_NoBankConflict from \"./shaders/optimizations/prefix_sum_no_bank_conflict\"\r\n\r\nclass PrefixSumKernel {\r\n /**\r\n * Perform a parallel prefix sum on the given data buffer\r\n * \r\n * Based on \"Parallel Prefix Sum (Scan) with CUDA\"\r\n * https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - Buffer containing the data to process\r\n * @param {number} count - Max number of elements to process\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} avoid_bank_conflicts - Use the \"Avoid bank conflicts\" optimization from the original publication\r\n */\r\n constructor({\r\n device,\r\n data,\r\n count,\r\n workgroup_size = { x: 16, y: 16 },\r\n avoid_bank_conflicts = false\r\n }) {\r\n this.device = device\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.items_per_workgroup = 2 * this.threads_per_workgroup // 2 items are processed per thread\r\n\r\n if (Math.log2(this.threads_per_workgroup) % 1 !== 0) \r\n throw new Error(`workgroup_size.x * workgroup_size.y must be a power of two. (current: ${this.threads_per_workgroup})`)\r\n\r\n this.pipelines = []\r\n\r\n this.shaderModule = this.device.createShaderModule({\r\n label: 'prefix-sum',\r\n code: avoid_bank_conflicts ? prefixSumSource_NoBankConflict : prefixSumSource,\r\n })\r\n\r\n this.create_pass_recursive(data, count)\r\n }\r\n\r\n find_optimal_dispatch_size(item_count) {\r\n const { maxComputeWorkgroupsPerDimension } = this.device.limits\r\n\r\n let workgroup_count = Math.ceil(item_count / this.items_per_workgroup)\r\n let x = workgroup_count\r\n let y = 1\r\n\r\n if (workgroup_count > maxComputeWorkgroupsPerDimension) {\r\n x = Math.floor(Math.sqrt(workgroup_count))\r\n y = Math.ceil(workgroup_count / x)\r\n workgroup_count = x * y\r\n }\r\n\r\n return { \r\n workgroup_count,\r\n dispatchSize: { x, y },\r\n }\r\n }\r\n\r\n create_pass_recursive(data, count) {\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n const { workgroup_count, dispatchSize } = this.find_optimal_dispatch_size(count)\r\n \r\n // Create buffer for block sums \r\n const blockSumBuffer = this.device.createBuffer({\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create bind group and pipeline layout\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n label: 'prefix-sum-bind-group',\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: data }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: blockSumBuffer }\r\n }\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n // Per-workgroup (block) prefix sum\r\n const scanPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-scan-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'reduce_downsweep',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ITEMS_PER_WORKGROUP': this.items_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: scanPipeline, bindGroup, dispatchSize })\r\n\r\n if (workgroup_count > 1) {\r\n // Prefix sum on block sums\r\n this.create_pass_recursive(blockSumBuffer, workgroup_count)\r\n\r\n // Add block sums to local prefix sums\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-add-block-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'add_block_sums',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: blockSumPipeline, bindGroup, dispatchSize })\r\n }\r\n }\r\n\r\n dispatch(pass) {\r\n for (const { pipeline, bindGroup, dispatchSize } of this.pipelines) {\r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n pass.dispatchWorkgroups(dispatchSize.x, dispatchSize.y, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default PrefixSumKernel","/**\r\n * Prefix sum with optimization to avoid bank conflicts\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst prefixSumNoBankConflictSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nconst NUM_BANKS: u32 = 32;\r\nconst LOG_NUM_BANKS: u32 = 5;\r\n\r\nfn get_offset(offset: u32) -> u32 {\r\n // return offset >> LOG_NUM_BANKS; // Conflict-free\r\n return (offset >> NUM_BANKS) + (offset >> (2 * LOG_NUM_BANKS)); // Zero bank conflict\r\n}\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n let ai: u32 = TID;\r\n let bi: u32 = TID + (ITEMS_PER_WORKGROUP >> 1);\r\n let s_ai = ai + get_offset(ai);\r\n let s_bi = bi + get_offset(bi);\r\n let g_ai = ai + WID * 2;\r\n let g_bi = bi + WID * 2;\r\n temp[s_ai] = items[g_ai];\r\n temp[s_bi] = items[g_bi];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n var last_offset = ITEMS_PER_WORKGROUP - 1;\r\n last_offset += get_offset(last_offset);\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[g_ai] = temp[s_ai];\r\n items[g_bi] = temp[s_bi];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumNoBankConflictSource","const radixSortReorderSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var inputKeys: array;\r\n@group(0) @binding(1) var outputKeys: array;\r\n@group(0) @binding(2) var local_prefix_sum: array;\r\n@group(0) @binding(3) var prefix_block_sum: array;\r\n@group(0) @binding(4) var inputValues: array;\r\n@group(0) @binding(5) var outputValues: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort_reorder(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) { \r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n if (GID >= ELEMENT_COUNT) {\r\n return;\r\n }\r\n\r\n let k = inputKeys[GID];\r\n let v = inputValues[GID];\r\n\r\n let local_prefix = local_prefix_sum[GID];\r\n\r\n // Calculate new position\r\n let extract_bits = (k >> CURRENT_BIT) & 0x3;\r\n let pid = extract_bits * WORKGROUP_COUNT + WORKGROUP_ID;\r\n let sorted_position = prefix_block_sum[pid] + local_prefix;\r\n \r\n outputKeys[sorted_position] = k;\r\n outputValues[sorted_position] = v;\r\n}`\r\n\r\nexport default radixSortReorderSource;","import PrefixSumKernel from \"./PrefixSumKernel\"\r\nimport radixSortSource from \"./shaders/radix_sort\"\r\nimport radixSortSource_LocalShuffle from \"./shaders/optimizations/radix_sort_local_shuffle\"\r\nimport reorderSource from \"./shaders/radix_sort_reorder\"\r\n\r\nclass RadixSortKernel {\r\n /**\r\n * Perform a parallel radix sort on the GPU given a buffer of keys and (optionnaly) values\r\n * Note: The buffers are sorted in-place.\r\n * \r\n * Based on \"Fast 4-way parallel radix sorting on GPUs\"\r\n * https://www.sci.utah.edu/~csilva/papers/cgf.pdf]\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} keys - Buffer containing the keys to sort\r\n * @param {GPUBuffer} values - (optional) Buffer containing the associated values\r\n * @param {number} count - Number of elements to sort\r\n * @param {number} bit_count - Number of bits per element (default: 32)\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} local_shuffle - Enable \"local shuffling\" optimization for the radix sort kernel (default: false)\r\n * @param {boolean} avoid_bank_conflicts - Enable \"avoiding bank conflicts\" optimization for the prefix sum kernel (default: false)\r\n */\r\n constructor({\r\n device,\r\n keys,\r\n values,\r\n count,\r\n bit_count = 32,\r\n workgroup_size = { x: 16, y: 16 },\r\n local_shuffle = false,\r\n avoid_bank_conflicts = false,\r\n } = {}) {\r\n if (device == null) throw new Error('No device provided')\r\n if (keys == null) throw new Error('No keys buffer provided')\r\n if (!Number.isInteger(count) || count <= 0) throw new Error('Invalid count parameter')\r\n if (!Number.isInteger(bit_count) || bit_count <= 0) throw new Error('Invalid bit_count parameter')\r\n if (!Number.isInteger(workgroup_size.x) || !Number.isInteger(workgroup_size.y)) throw new Error('Invalid workgroup_size parameter')\r\n\r\n this.device = device\r\n this.count = count\r\n this.bit_count = bit_count\r\n this.workgroup_size = workgroup_size\r\n this.local_shuffle = local_shuffle\r\n this.avoid_bank_conflicts = avoid_bank_conflicts\r\n\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n this.prefix_block_workgroup_count = 4 * this.workgroup_count\r\n\r\n this.has_values = (values != null) // Is the values buffer provided ?\r\n\r\n this.dispatchSize = {} // Dispatch dimension x and y\r\n this.shaderModules = {} // GPUShaderModules\r\n this.buffers = {} // GPUBuffers\r\n this.pipelines = [] // List of passes\r\n\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n this.find_optimal_dispatch_size()\r\n\r\n // Create shader modules from wgsl code\r\n this.create_shader_modules()\r\n\r\n // Create GPU buffers\r\n this.create_buffers(keys, values)\r\n \r\n // Create multi-pass pipelines\r\n this.create_pipelines()\r\n }\r\n\r\n find_optimal_dispatch_size() {\r\n const { maxComputeWorkgroupsPerDimension } = this.device.limits\r\n\r\n this.dispatchSize = { \r\n x: this.workgroup_count, \r\n y: 1\r\n }\r\n\r\n if (this.workgroup_count > maxComputeWorkgroupsPerDimension) {\r\n const x = Math.floor(Math.sqrt(this.workgroup_count))\r\n const y = Math.ceil(this.workgroup_count / x)\r\n \r\n this.dispatchSize = { x, y } \r\n }\r\n }\r\n\r\n create_shader_modules() {\r\n // Remove every occurence of \"values\" in the shader code if values buffer is not provided\r\n const remove_values = (source) => {\r\n return source.split('\\n')\r\n .filter(line => !line.toLowerCase().includes('values'))\r\n .join('\\n')\r\n }\r\n\r\n const blockSumSource = this.local_shuffle ? radixSortSource_LocalShuffle : radixSortSource\r\n \r\n this.shaderModules = {\r\n blockSum: this.device.createShaderModule({\r\n label: 'radix-sort-block-sum',\r\n code: this.has_values ? blockSumSource : remove_values(blockSumSource),\r\n }),\r\n reorder: this.device.createShaderModule({\r\n label: 'radix-sort-reorder',\r\n code: this.has_values ? reorderSource : remove_values(reorderSource),\r\n })\r\n }\r\n }\r\n\r\n create_buffers(keys, values) {\r\n // Keys and values double buffering\r\n const tmpKeysBuffer = this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n const tmpValuesBuffer = !this.has_values ? null : this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Local Prefix Sum buffer (1 element per item)\r\n const localPrefixSumBuffer = this.device.createBuffer({\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Prefix Block Sum buffer (4 element per workgroup)\r\n const prefixBlockSumBuffer = this.device.createBuffer({\r\n size: this.prefix_block_workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n \r\n this.buffers = {\r\n keys: keys,\r\n values: values,\r\n tmpKeys: tmpKeysBuffer,\r\n tmpValues: tmpValuesBuffer,\r\n localPrefixSum: localPrefixSumBuffer,\r\n prefixBlockSum: prefixBlockSumBuffer,\r\n }\r\n }\r\n\r\n // Create radix sort passes for every 2 bits\r\n create_pipelines() {\r\n for (let bit = 0; bit < this.bit_count; bit += 2) {\r\n // Swap buffers every pass\r\n const even = (bit % 4 == 0)\r\n const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys\r\n const inValues = even ? this.buffers.values : this.buffers.tmpValues\r\n const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys\r\n const outValues = even ? this.buffers.tmpValues : this.buffers.values\r\n\r\n // Compute local prefix sums and block sums\r\n const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit)\r\n\r\n // Compute block sums prefix sums\r\n const prefixSumKernel = new PrefixSumKernel({ \r\n device: this.device,\r\n data: this.buffers.prefixBlockSum, \r\n count: this.prefix_block_workgroup_count,\r\n workgroup_size: this.workgroup_size,\r\n avoid_bank_conflicts: this.avoid_bank_conflicts,\r\n })\r\n \r\n // Reorder keys and values\r\n const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit)\r\n\r\n this.pipelines.push({ blockSumPipeline, prefixSumKernel, reorderPipeline })\r\n }\r\n }\r\n\r\n create_block_sum_pipeline(inKeys, inValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-block-sum',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: this.local_shuffle ? 'storage' : 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n // \"Local shuffle\" optimization needs access to the values buffer\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n resource: { buffer: inValues }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-block-sum',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.blockSum,\r\n entryPoint: 'radix_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: blockSumPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-reorder',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 5,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outKeys }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 3,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n resource: { buffer: inValues }\r\n },\r\n {\r\n binding: 5,\r\n resource: { buffer: outValues }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const reorderPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-reorder',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.reorder,\r\n entryPoint: 'radix_sort_reorder',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: reorderPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n /**\r\n * Encode all pipelines into the current pass\r\n * \r\n * @param {GPUComputePassEncoder} pass \r\n */\r\n dispatch(pass) {\r\n for (const { blockSumPipeline, prefixSumKernel, reorderPipeline } of this.pipelines) { \r\n pass.setPipeline(blockSumPipeline.pipeline)\r\n pass.setBindGroup(0, blockSumPipeline.bindGroup)\r\n pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1)\r\n\r\n prefixSumKernel.dispatch(pass)\r\n\r\n pass.setPipeline(reorderPipeline.pipeline)\r\n pass.setBindGroup(0, reorderPipeline.bindGroup)\r\n pass.dispatchWorkgroups(this.dispatchSize.x, this.dispatchSize.y, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default RadixSortKernel","/**\r\n * Radix sort with \"local shuffle and coalesced mapping\" optimization\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst radixSortCoalescedSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n@group(0) @binding(3) var values: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\nvar s_prefix_sum_scan: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let val = values[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n let prefix_sum = bit_prefix_sums[extract_bits]; \r\n\r\n // Scan bit prefix sums\r\n if (TID == LAST_THREAD) {\r\n var sum: u32 = 0;\r\n bit_prefix_sums[extract_bits] += 1;\r\n for (var i: u32 = 0; i < 4; i++) {\r\n s_prefix_sum_scan[i] = sum;\r\n sum += bit_prefix_sums[i];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n if (GID < ELEMENT_COUNT) {\r\n // Compute new position\r\n let new_pos: u32 = prefix_sum + s_prefix_sum_scan[extract_bits];\r\n\r\n // Shuffle elements locally\r\n input[WID + new_pos] = elm;\r\n values[WID + new_pos] = val;\r\n local_prefix_sums[WID + new_pos] = prefix_sum;\r\n }\r\n}`\r\n\r\nexport default radixSortCoalescedSource;","const radixSortSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n // Store local prefix sum to global memory\r\n local_prefix_sums[GID] = bit_prefix_sums[extract_bits];\r\n}`\r\n\r\nexport default radixSortSource;"],"names":["PrefixSumKernel","_createClass","_ref","device","data","count","_ref$workgroup_size","workgroup_size","x","y","_ref$avoid_bank_confl","avoid_bank_conflicts","_classCallCheck","this","threads_per_workgroup","items_per_workgroup","Math","log2","Error","concat","pipelines","shaderModule","createShaderModule","label","code","create_pass_recursive","key","value","item_count","maxComputeWorkgroupsPerDimension","limits","workgroup_count","ceil","floor","sqrt","dispatchSize","_this$find_optimal_di","find_optimal_dispatch_size","blockSumBuffer","createBuffer","size","usage","GPUBufferUsage","STORAGE","COPY_SRC","COPY_DST","bindGroupLayout","createBindGroupLayout","entries","binding","visibility","GPUShaderStage","COMPUTE","buffer","type","bindGroup","createBindGroup","layout","resource","pipelineLayout","createPipelineLayout","bindGroupLayouts","scanPipeline","createComputePipeline","compute","module","entryPoint","constants","WORKGROUP_SIZE_X","WORKGROUP_SIZE_Y","THREADS_PER_WORKGROUP","ITEMS_PER_WORKGROUP","push","pipeline","blockSumPipeline","pass","_step","_iterator","_createForOfIteratorHelper","s","n","done","_step$value","setPipeline","setBindGroup","dispatchWorkgroups","err","e","f","radixSortReorderSource","RadixSortKernel","arguments","length","undefined","keys","values","_ref$bit_count","bit_count","_ref$local_shuffle","local_shuffle","Number","isInteger","prefix_block_workgroup_count","has_values","shaderModules","buffers","create_shader_modules","create_buffers","create_pipelines","remove_values","source","split","filter","line","toLowerCase","includes","join","blockSumSource","blockSum","reorder","reorderSource","tmpKeysBuffer","tmpValuesBuffer","localPrefixSumBuffer","prefixBlockSumBuffer","tmpKeys","tmpValues","localPrefixSum","prefixBlockSum","bit","even","inKeys","inValues","outKeys","outValues","create_block_sum_pipeline","prefixSumKernel","reorderPipeline","create_reorder_pipeline","_toConsumableArray","WORKGROUP_COUNT","ELEMENT_COUNT","CURRENT_BIT","dispatch"],"mappings":"0wEAAA,ICGMA,EAAe,WAoChB,OAAAC,GAvBD,SAAAD,EAAAE,GAMG,IALCC,EAAMD,EAANC,OACAC,EAAIF,EAAJE,KACAC,EAAKH,EAALG,MAAKC,EAAAJ,EACLK,eAAAA,OAAiB,IAAHD,EAAG,CAAEE,EAAG,GAAIC,EAAG,IAAIH,EAAAI,EAAAR,EACjCS,qBAAAA,OAAuB,IAAHD,GAAQA,EAO5B,GAP4BE,OAAAZ,GAE5Ba,KAAKV,OAASA,EACdU,KAAKN,eAAiBA,EACtBM,KAAKC,sBAAwBP,EAAeC,EAAID,EAAeE,EAC/DI,KAAKE,oBAAsB,EAAIF,KAAKC,sBAEhCE,KAAKC,KAAKJ,KAAKC,uBAAyB,GAAM,EAC9C,MAAM,IAAII,MAAKC,yEAAAA,OAA0EN,KAAKC,sBAAqB,MAEvHD,KAAKO,UAAY,GAEjBP,KAAKQ,aAAeR,KAAKV,OAAOmB,mBAAmB,CAC/CC,MAAO,aACPC,KAAMb,EC6EhB,kuGFvBA,+oFCnDME,KAAKY,sBAAsBrB,EAAMC,EACrC,GAAC,CAAA,CAAAqB,IAAA,6BAAAC,MAED,SAA2BC,GACvB,IAAQC,EAAqChB,KAAKV,OAAO2B,OAAjDD,iCAEJE,EAAkBf,KAAKgB,KAAKJ,EAAaf,KAAKE,qBAC9CP,EAAIuB,EACJtB,EAAI,EAQR,OANIsB,EAAkBF,IAGlBE,GAFAvB,EAAIQ,KAAKiB,MAAMjB,KAAKkB,KAAKH,MACzBtB,EAAIO,KAAKgB,KAAKD,EAAkBvB,KAI7B,CACHuB,gBAAAA,EACAI,aAAc,CAAE3B,EAAAA,EAAGC,EAAAA,GAE3B,GAAC,CAAAiB,IAAA,wBAAAC,MAED,SAAsBvB,EAAMC,GAExB,IAAA+B,EAA0CvB,KAAKwB,2BAA2BhC,GAAlE0B,EAAeK,EAAfL,gBAAiBI,EAAYC,EAAZD,aAGnBG,EAAiBzB,KAAKV,OAAOoC,aAAa,CAC5CC,KAAwB,EAAlBT,EACNU,MAAOC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAIvEC,EAAkBjC,KAAKV,OAAO4C,sBAAsB,CACtDC,QAAS,CACL,CACIC,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,YAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,eAKtBC,EAAY1C,KAAKV,OAAOqD,gBAAgB,CAC1CjC,MAAO,wBACPkC,OAAQX,EACRE,QAAS,CACL,CACIC,QAAS,EACTS,SAAU,CAAEL,OAAQjD,IAExB,CACI6C,QAAS,EACTS,SAAU,CAAEL,OAAQf,OAK1BqB,EAAiB9C,KAAKV,OAAOyD,qBAAqB,CACpDC,iBAAkB,CAAEf,KAIlBgB,EAAejD,KAAKV,OAAO4D,sBAAsB,CACnDxC,MAAO,2BACPkC,OAAQE,EACRK,QAAS,CACLC,OAAQpD,KAAKQ,aACb6C,WAAY,mBACZC,UAAW,CACPC,iBAAoBvD,KAAKN,eAAeC,EACxC6D,iBAAoBxD,KAAKN,eAAeE,EACxC6D,sBAAyBzD,KAAKC,sBAC9ByD,oBAAuB1D,KAAKE,wBAOxC,GAFAF,KAAKO,UAAUoD,KAAK,CAAEC,SAAUX,EAAcP,UAAAA,EAAWpB,aAAAA,IAErDJ,EAAkB,EAAG,CAErBlB,KAAKY,sBAAsBa,EAAgBP,GAG3C,IAAM2C,EAAmB7D,KAAKV,OAAO4D,sBAAsB,CACvDxC,MAAO,gCACPkC,OAAQE,EACRK,QAAS,CACLC,OAAQpD,KAAKQ,aACb6C,WAAY,iBACZC,UAAW,CACPC,iBAAoBvD,KAAKN,eAAeC,EACxC6D,iBAAoBxD,KAAKN,eAAeE,EACxC6D,sBAAyBzD,KAAKC,0BAK1CD,KAAKO,UAAUoD,KAAK,CAAEC,SAAUC,EAAkBnB,UAAAA,EAAWpB,aAAAA,GACjE,CACJ,GAAC,CAAAT,IAAA,WAAAC,MAED,SAASgD,GAAM,IACuDC,EADvDC,EAAAC,EACyCjE,KAAKO,WAAS,IAAlE,IAAAyD,EAAAE,MAAAH,EAAAC,EAAAG,KAAAC,MAAoE,CAAA,IAAAC,EAAAN,EAAAjD,MAAvD8C,EAAQS,EAART,SAAUlB,EAAS2B,EAAT3B,UAAWpB,EAAY+C,EAAZ/C,aAC9BwC,EAAKQ,YAAYV,GACjBE,EAAKS,aAAa,EAAG7B,GACrBoB,EAAKU,mBAAmBlD,EAAa3B,EAAG2B,EAAa1B,EAAG,EAC5D,CAAC,CAAA,MAAA6E,GAAAT,EAAAU,EAAAD,EAAA,CAAA,QAAAT,EAAAW,GAAA,CACL,IAAC,CArJgB,GEHfC,EA0CJ,y9CCrCIC,EAAe,WA8DhB,OAAAzF,GA7CD,SAAAyF,IASQ,IAAAxF,EAAAyF,UAAAC,OAAA,QAAAC,IAAAF,UAAA,GAAAA,UAAA,GAAJ,CAAE,EARFxF,EAAMD,EAANC,OACA2F,EAAI5F,EAAJ4F,KACAC,EAAM7F,EAAN6F,OACA1F,EAAKH,EAALG,MAAK2F,EAAA9F,EACL+F,UAAAA,OAAY,IAAHD,EAAG,GAAEA,EAAA1F,EAAAJ,EACdK,eAAAA,OAAiB,IAAHD,EAAG,CAAEE,EAAG,GAAIC,EAAG,IAAIH,EAAA4F,EAAAhG,EACjCiG,cAAAA,OAAgB,IAAHD,GAAQA,EAAAxF,EAAAR,EACrBS,qBAAAA,OAAuB,IAAHD,GAAQA,EAE5B,GAF4BE,OAAA8E,GAEd,MAAVvF,EAAgB,MAAM,IAAIe,MAAM,sBACpC,GAAY,MAAR4E,EAAc,MAAM,IAAI5E,MAAM,2BAClC,IAAKkF,OAAOC,UAAUhG,IAAUA,GAAS,EAAG,MAAM,IAAIa,MAAM,2BAC5D,IAAKkF,OAAOC,UAAUJ,IAAcA,GAAa,EAAG,MAAM,IAAI/E,MAAM,+BACpE,IAAKkF,OAAOC,UAAU9F,EAAeC,KAAO4F,OAAOC,UAAU9F,EAAeE,GAAI,MAAM,IAAIS,MAAM,oCAEhGL,KAAKV,OAASA,EACdU,KAAKR,MAAQA,EACbQ,KAAKoF,UAAYA,EACjBpF,KAAKN,eAAiBA,EACtBM,KAAKsF,cAAgBA,EACrBtF,KAAKF,qBAAuBA,EAE5BE,KAAKC,sBAAwBP,EAAeC,EAAID,EAAeE,EAC/DI,KAAKkB,gBAAkBf,KAAKgB,KAAK3B,EAAQQ,KAAKC,uBAC9CD,KAAKyF,6BAA+B,EAAIzF,KAAKkB,gBAE7ClB,KAAK0F,WAAwB,MAAVR,EAEnBlF,KAAKsB,aAAe,GACpBtB,KAAK2F,cAAgB,GACrB3F,KAAK4F,QAAU,GACf5F,KAAKO,UAAY,GAGjBP,KAAKwB,6BAGLxB,KAAK6F,wBAGL7F,KAAK8F,eAAeb,EAAMC,GAG1BlF,KAAK+F,kBACT,GAAC,CAAA,CAAAlF,IAAA,6BAAAC,MAED,WACI,IAAQE,EAAqChB,KAAKV,OAAO2B,OAAjDD,iCAOR,GALAhB,KAAKsB,aAAe,CAChB3B,EAAGK,KAAKkB,gBACRtB,EAAG,GAGHI,KAAKkB,gBAAkBF,EAAkC,CACzD,IAAMrB,EAAIQ,KAAKiB,MAAMjB,KAAKkB,KAAKrB,KAAKkB,kBAC9BtB,EAAIO,KAAKgB,KAAKnB,KAAKkB,gBAAkBvB,GAE3CK,KAAKsB,aAAe,CAAE3B,EAAAA,EAAGC,EAAAA,EAC7B,CACJ,GAAC,CAAAiB,IAAA,wBAAAC,MAED,WAEI,IAAMkF,EAAgB,SAACC,GACnB,OAAOA,EAAOC,MAAM,MACNC,QAAO,SAAAC,GAAI,OAAKA,EAAKC,cAAcC,SAAS,SAAS,IACrDC,KAAK,OAGjBC,EAAiBxG,KAAKsF,cCqBlC,mpHC5BA,o8FFSMtF,KAAK2F,cAAgB,CACjBc,SAAUzG,KAAKV,OAAOmB,mBAAmB,CACrCC,MAAO,uBACPC,KAAMX,KAAK0F,WAAac,EAAiBR,EAAcQ,KAE3DE,QAAS1G,KAAKV,OAAOmB,mBAAmB,CACpCC,MAAO,qBACPC,KAAMX,KAAK0F,WAAaiB,EAAgBX,EAAcW,KAGlE,GAAC,CAAA9F,IAAA,iBAAAC,MAED,SAAemE,EAAMC,GAEjB,IAAM0B,EAAgB5G,KAAKV,OAAOoC,aAAa,CAC3CC,KAAmB,EAAb3B,KAAKR,MACXoC,MAAOC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAEvE6E,EAAmB7G,KAAK0F,WAAoB1F,KAAKV,OAAOoC,aAAa,CACvEC,KAAmB,EAAb3B,KAAKR,MACXoC,MAAOC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAFlC,KAMrC8E,EAAuB9G,KAAKV,OAAOoC,aAAa,CAClDC,KAAmB,EAAb3B,KAAKR,MACXoC,MAAOC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAIvE+E,EAAuB/G,KAAKV,OAAOoC,aAAa,CAClDC,KAA0C,EAApC3B,KAAKyF,6BACX7D,MAAOC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAG7EhC,KAAK4F,QAAU,CACXX,KAAMA,EACNC,OAAQA,EACR8B,QAASJ,EACTK,UAAWJ,EACXK,eAAgBJ,EAChBK,eAAgBJ,EAExB,GAEA,CAAAlG,IAAA,mBAAAC,MACA,WACI,IAAK,IAAIsG,EAAM,EAAGA,EAAMpH,KAAKoF,UAAWgC,GAAO,EAAG,CAE9C,IAAMC,EAAaD,EAAM,GAAK,EACxBE,EAAYD,EAAOrH,KAAK4F,QAAQX,KAAOjF,KAAK4F,QAAQoB,QACpDO,EAAYF,EAAOrH,KAAK4F,QAAQV,OAASlF,KAAK4F,QAAQqB,UACtDO,EAAYH,EAAOrH,KAAK4F,QAAQoB,QAAUhH,KAAK4F,QAAQX,KACvDwC,EAAYJ,EAAOrH,KAAK4F,QAAQqB,UAAYjH,KAAK4F,QAAQV,OAGzDrB,EAAmB7D,KAAK0H,0BAA0BJ,EAAQC,EAAUH,GAGpEO,EAAkB,IAAIxI,EAAgB,CACxCG,OAAQU,KAAKV,OACbC,KAAMS,KAAK4F,QAAQuB,eACnB3H,MAAOQ,KAAKyF,6BACZ/F,eAAgBM,KAAKN,eACrBI,qBAAsBE,KAAKF,uBAIzB8H,EAAkB5H,KAAK6H,wBAAwBP,EAAQC,EAAUC,EAASC,EAAWL,GAE3FpH,KAAKO,UAAUoD,KAAK,CAAEE,iBAAAA,EAAkB8D,gBAAAA,EAAiBC,gBAAAA,GAC7D,CACJ,GAAC,CAAA/G,IAAA,4BAAAC,MAED,SAA0BwG,EAAQC,EAAUH,GACxC,IAAMnF,EAAkBjC,KAAKV,OAAO4C,sBAAsB,CACtDxB,MAAO,uBACPyB,QACI,CAAA,CACIC,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAMzC,KAAKsF,cAAgB,UAAY,sBAErD,CACIlD,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,YAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,aACnBnC,OAAAwH,EACG9H,KAAKsF,eAAiBtF,KAAK0F,WAAa,CAAC,CACzCtD,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,aACf,OAIPC,EAAY1C,KAAKV,OAAOqD,gBAAgB,CAC1CC,OAAQX,EACRE,QACI,CAAA,CACIC,QAAS,EACTS,SAAU,CAAEL,OAAQ8E,IAExB,CACIlF,QAAS,EACTS,SAAU,CAAEL,OAAQxC,KAAK4F,QAAQsB,iBAErC,CACI9E,QAAS,EACTS,SAAU,CAAEL,OAAQxC,KAAK4F,QAAQuB,kBACpC7G,OAAAwH,EAEG9H,KAAKsF,eAAiBtF,KAAK0F,WAAa,CAAC,CACzCtD,QAAS,EACTS,SAAU,CAAEL,OAAQ+E,KACnB,OAIPzE,EAAiB9C,KAAKV,OAAOyD,qBAAqB,CACpDC,iBAAkB,CAAEf,KAoBxB,MAAO,CACH2B,SAlBqB5D,KAAKV,OAAO4D,sBAAsB,CACvDxC,MAAO,uBACPkC,OAAQE,EACRK,QAAS,CACLC,OAAQpD,KAAK2F,cAAcc,SAC3BpD,WAAY,aACZC,UAAW,CACPC,iBAAoBvD,KAAKN,eAAeC,EACxC6D,iBAAoBxD,KAAKN,eAAeE,EACxCmI,gBAAmB/H,KAAKkB,gBACxBuC,sBAAyBzD,KAAKC,sBAC9B+H,cAAiBhI,KAAKR,MACtByI,YAAeb,MAOvB1E,UAAAA,EAER,GAAC,CAAA7B,IAAA,0BAAAC,MAED,SAAwBwG,EAAQC,EAAUC,EAASC,EAAWL,GAC1D,IAAMnF,EAAkBjC,KAAKV,OAAO4C,sBAAsB,CACtDxB,MAAO,qBACPyB,QACI,CAAA,CACIC,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,sBAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,YAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,sBAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,uBACnBnC,OAAAwH,EACG9H,KAAK0F,WAAa,CAClB,CACItD,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,sBAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,aAEpB,OAINC,EAAY1C,KAAKV,OAAOqD,gBAAgB,CAC1CC,OAAQX,EACRE,QACI,CAAA,CACIC,QAAS,EACTS,SAAU,CAAEL,OAAQ8E,IAExB,CACIlF,QAAS,EACTS,SAAU,CAAEL,OAAQgF,IAExB,CACIpF,QAAS,EACTS,SAAU,CAAEL,OAAQxC,KAAK4F,QAAQsB,iBAErC,CACI9E,QAAS,EACTS,SAAU,CAAEL,OAAQxC,KAAK4F,QAAQuB,kBACpC7G,OAAAwH,EACG9H,KAAK0F,WAAa,CAClB,CACItD,QAAS,EACTS,SAAU,CAAEL,OAAQ+E,IAExB,CACInF,QAAS,EACTS,SAAU,CAAEL,OAAQiF,KAExB,OAIN3E,EAAiB9C,KAAKV,OAAOyD,qBAAqB,CACpDC,iBAAkB,CAAEf,KAoBxB,MAAO,CACH2B,SAlBoB5D,KAAKV,OAAO4D,sBAAsB,CACtDxC,MAAO,qBACPkC,OAAQE,EACRK,QAAS,CACLC,OAAQpD,KAAK2F,cAAce,QAC3BrD,WAAY,qBACZC,UAAW,CACPC,iBAAoBvD,KAAKN,eAAeC,EACxC6D,iBAAoBxD,KAAKN,eAAeE,EACxCmI,gBAAmB/H,KAAKkB,gBACxBuC,sBAAyBzD,KAAKC,sBAC9B+H,cAAiBhI,KAAKR,MACtByI,YAAeb,MAOvB1E,UAAAA,EAER,GAEA,CAAA7B,IAAA,WAAAC,MAKA,SAASgD,GAAM,IACwEC,EADxEC,EAAAC,EAC0DjE,KAAKO,WAAS,IAAnF,IAAAyD,EAAAE,MAAAH,EAAAC,EAAAG,KAAAC,MAAqF,CAAA,IAAAC,EAAAN,EAAAjD,MAAxE+C,EAAgBQ,EAAhBR,iBAAkB8D,EAAetD,EAAfsD,gBAAiBC,EAAevD,EAAfuD,gBAC5C9D,EAAKQ,YAAYT,EAAiBD,UAClCE,EAAKS,aAAa,EAAGV,EAAiBnB,WACtCoB,EAAKU,mBAAmBxE,KAAKsB,aAAa3B,EAAGK,KAAKsB,aAAa1B,EAAG,GAElE+H,EAAgBO,SAASpE,GAEzBA,EAAKQ,YAAYsD,EAAgBhE,UACjCE,EAAKS,aAAa,EAAGqD,EAAgBlF,WACrCoB,EAAKU,mBAAmBxE,KAAKsB,aAAa3B,EAAGK,KAAKsB,aAAa1B,EAAG,EACtE,CAAC,CAAA,MAAA6E,GAAAT,EAAAU,EAAAD,EAAA,CAAA,QAAAT,EAAAW,GAAA,CACL,IAAC,CApWgB"} \ No newline at end of file +{"version":3,"file":"radix-sort-umd.min.js","sources":["../../src/utils.js","../../src/PrefixSumKernel.js","../../src/shaders/optimizations/prefix_sum_no_bank_conflict.js","../../src/shaders/prefix_sum.js","../../src/shaders/radix_sort_reorder.js","../../src/shaders/check_sort.js","../../src/CheckSortKernel.js","../../src/RadixSortKernel.js","../../src/shaders/optimizations/radix_sort_local_shuffle.js","../../src/shaders/radix_sort.js"],"sourcesContent":["/**\r\n * Find the best dispatch size x and y dimensions to minimize unused workgroups\r\n * \r\n * @param {GPUDevice} device - The GPU device\r\n * @param {int} workgroup_count - Number of workgroups to dispatch\r\n * @returns \r\n */\r\nfunction find_optimal_dispatch_size(device, workgroup_count) {\r\n const dispatchSize = { \r\n x: workgroup_count, \r\n y: 1\r\n }\r\n\r\n if (workgroup_count > device.limits.maxComputeWorkgroupsPerDimension) {\r\n const x = Math.floor(Math.sqrt(workgroup_count))\r\n const y = Math.ceil(workgroup_count / x)\r\n \r\n dispatchSize.x = x\r\n dispatchSize.y = y\r\n }\r\n\r\n return dispatchSize\r\n}\r\n\r\nfunction create_buffer_from_data({device, label, data, usage = 0}) {\r\n const dispatchSizes = device.createBuffer({\r\n label: label,\r\n usage: usage,\r\n size: data.length * 4,\r\n mappedAtCreation: true\r\n })\r\n\r\n const dispatchData = new Uint32Array(dispatchSizes.getMappedRange())\r\n dispatchData.set(data)\r\n dispatchSizes.unmap()\r\n\r\n return dispatchSizes\r\n}\r\n\r\nexport {\r\n find_optimal_dispatch_size,\r\n create_buffer_from_data,\r\n}","import prefixSumSource from \"./shaders/prefix_sum\"\r\nimport prefixSumSource_NoBankConflict from \"./shaders/optimizations/prefix_sum_no_bank_conflict\"\r\nimport { find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass PrefixSumKernel {\r\n /**\r\n * Perform a parallel prefix sum on the given data buffer\r\n * \r\n * Based on \"Parallel Prefix Sum (Scan) with CUDA\"\r\n * https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - Buffer containing the data to process\r\n * @param {number} count - Max number of elements to process\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} avoid_bank_conflicts - Use the \"Avoid bank conflicts\" optimization from the original publication\r\n */\r\n constructor({\r\n device,\r\n data,\r\n count,\r\n workgroup_size = { x: 16, y: 16 },\r\n avoid_bank_conflicts = false\r\n }) {\r\n this.device = device\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.items_per_workgroup = 2 * this.threads_per_workgroup // 2 items are processed per thread\r\n\r\n if (Math.log2(this.threads_per_workgroup) % 1 !== 0) \r\n throw new Error(`workgroup_size.x * workgroup_size.y must be a power of two. (current: ${this.threads_per_workgroup})`)\r\n\r\n this.pipelines = []\r\n\r\n this.shaderModule = this.device.createShaderModule({\r\n label: 'prefix-sum',\r\n code: avoid_bank_conflicts ? prefixSumSource_NoBankConflict : prefixSumSource,\r\n })\r\n\r\n this.create_pass_recursive(data, count)\r\n }\r\n\r\n create_pass_recursive(data, count) {\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n const workgroup_count = Math.ceil(count / this.items_per_workgroup)\r\n const dispatchSize = find_optimal_dispatch_size(this.device, workgroup_count)\r\n \r\n // Create buffer for block sums \r\n const blockSumBuffer = this.device.createBuffer({\r\n label: 'prefix-sum-block-sum',\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create bind group and pipeline layout\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n label: 'prefix-sum-bind-group',\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: data }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: blockSumBuffer }\r\n }\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n // Per-workgroup (block) prefix sum\r\n const scanPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-scan-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'reduce_downsweep',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ITEMS_PER_WORKGROUP': this.items_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: scanPipeline, bindGroup, dispatchSize })\r\n\r\n if (workgroup_count > 1) {\r\n // Prefix sum on block sums\r\n this.create_pass_recursive(blockSumBuffer, workgroup_count)\r\n\r\n // Add block sums to local prefix sums\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'prefix-sum-add-block-pipeline',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModule,\r\n entryPoint: 'add_block_sums',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup\r\n }\r\n }\r\n })\r\n\r\n this.pipelines.push({ pipeline: blockSumPipeline, bindGroup, dispatchSize })\r\n }\r\n }\r\n\r\n get_dispatch_chain() {\r\n return this.pipelines.flatMap(p => [ p.dispatchSize.x, p.dispatchSize.y, 1 ])\r\n }\r\n\r\n dispatch(pass, dispatchSize, offset = 0) {\r\n for (let i = 0; i < this.pipelines.length; i++) {\r\n const { pipeline, bindGroup } = this.pipelines[i]\r\n \r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4)\r\n }\r\n }\r\n}\r\n\r\nexport default PrefixSumKernel","/**\r\n * Prefix sum with optimization to avoid bank conflicts\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst prefixSumNoBankConflictSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nconst NUM_BANKS: u32 = 32;\r\nconst LOG_NUM_BANKS: u32 = 5;\r\n\r\nfn get_offset(offset: u32) -> u32 {\r\n // return offset >> LOG_NUM_BANKS; // Conflict-free\r\n return (offset >> NUM_BANKS) + (offset >> (2 * LOG_NUM_BANKS)); // Zero bank conflict\r\n}\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n let ai: u32 = TID;\r\n let bi: u32 = TID + (ITEMS_PER_WORKGROUP >> 1);\r\n let s_ai = ai + get_offset(ai);\r\n let s_bi = bi + get_offset(bi);\r\n let g_ai = ai + WID * 2;\r\n let g_bi = bi + WID * 2;\r\n temp[s_ai] = items[g_ai];\r\n temp[s_bi] = items[g_bi];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n var last_offset = ITEMS_PER_WORKGROUP - 1;\r\n last_offset += get_offset(last_offset);\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n ai += get_offset(ai);\r\n bi += get_offset(bi);\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[g_ai] = temp[s_ai];\r\n items[g_bi] = temp[s_bi];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumNoBankConflictSource","const prefixSumSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var items: array;\r\n@group(0) @binding(1) var blockSums: array;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ITEMS_PER_WORKGROUP: u32;\r\n\r\nvar temp: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn reduce_downsweep(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n let ELM_TID = TID * 2; // Element pair local ID\r\n let ELM_GID = GID * 2; // Element pair global ID\r\n \r\n // Load input to shared memory\r\n temp[ELM_TID] = items[ELM_GID];\r\n temp[ELM_TID + 1] = items[ELM_GID + 1];\r\n\r\n var offset: u32 = 1;\r\n\r\n // Up-sweep (reduce) phase\r\n for (var d: u32 = ITEMS_PER_WORKGROUP >> 1; d > 0; d >>= 1) {\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n temp[bi] += temp[ai];\r\n }\r\n\r\n offset *= 2;\r\n }\r\n\r\n // Save workgroup sum and clear last element\r\n if (TID == 0) {\r\n let last_offset = ITEMS_PER_WORKGROUP - 1;\r\n\r\n blockSums[WORKGROUP_ID] = temp[last_offset];\r\n temp[last_offset] = 0;\r\n }\r\n\r\n // Down-sweep phase\r\n for (var d: u32 = 1; d < ITEMS_PER_WORKGROUP; d *= 2) {\r\n offset >>= 1;\r\n workgroupBarrier();\r\n\r\n if (TID < d) {\r\n var ai: u32 = offset * (ELM_TID + 1) - 1;\r\n var bi: u32 = offset * (ELM_TID + 2) - 1;\r\n\r\n let t: u32 = temp[ai];\r\n temp[ai] = temp[bi];\r\n temp[bi] += t;\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Copy result from shared memory to global memory\r\n items[ELM_GID] = temp[ELM_TID];\r\n items[ELM_GID + 1] = temp[ELM_TID + 1];\r\n}\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn add_block_sums(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n \r\n\r\n let ELM_ID = GID * 2;\r\n let blockSum = blockSums[WORKGROUP_ID];\r\n\r\n items[ELM_ID] += blockSum;\r\n items[ELM_ID + 1] += blockSum;\r\n}`\r\n\r\nexport default prefixSumSource","const radixSortReorderSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var inputKeys: array;\r\n@group(0) @binding(1) var outputKeys: array;\r\n@group(0) @binding(2) var local_prefix_sum: array;\r\n@group(0) @binding(3) var prefix_block_sum: array;\r\n@group(0) @binding(4) var inputValues: array;\r\n@group(0) @binding(5) var outputValues: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort_reorder(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) { \r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n if (GID >= ELEMENT_COUNT) {\r\n return;\r\n }\r\n\r\n let k = inputKeys[GID];\r\n let v = inputValues[GID];\r\n\r\n let local_prefix = local_prefix_sum[GID];\r\n\r\n // Calculate new position\r\n let extract_bits = (k >> CURRENT_BIT) & 0x3;\r\n let pid = extract_bits * WORKGROUP_COUNT + WORKGROUP_ID;\r\n let sorted_position = prefix_block_sum[pid] + local_prefix;\r\n \r\n outputKeys[sorted_position] = k;\r\n outputValues[sorted_position] = v;\r\n}`\r\n\r\nexport default radixSortReorderSource;","const checkSortSource = (isFirstPass = false, isLastPass = false, isFullCheck = false) => /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var output: array;\r\n@group(0) @binding(2) var original: array;\r\n@group(0) @binding(3) var is_sorted: u32;\r\n\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride ELEMENT_COUNT: u32;\r\noverride START_ELEMENT: u32;\r\n\r\nvar s_data: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn check_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP + START_ELEMENT;\r\n let GID = TID + WID; // Global thread ID\r\n\r\n // Load data into shared memory\r\n ${ isFirstPass ? first_pass_load_data : \"s_data[TID] = select(0u, input[GID], GID < ELEMENT_COUNT);\" }\r\n\r\n // Perform parallel reduction\r\n for (var d = 1u; d < THREADS_PER_WORKGROUP; d *= 2u) { \r\n workgroupBarrier(); \r\n if (TID % (2u * d) == 0u) {\r\n s_data[TID] += s_data[TID + d];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n // Write reduction result\r\n ${ isLastPass ? last_pass(isFullCheck) : write_reduction_result }\r\n}`\r\n\r\nconst write_reduction_result = /* wgsl */ `\r\n if (TID == 0) {\r\n output[WORKGROUP_ID] = s_data[0];\r\n }\r\n`\r\n\r\nconst first_pass_load_data = /* wgsl */ `\r\n let LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n\r\n // Load current element into shared memory\r\n // Also load next element for comparison\r\n let elm = select(0u, input[GID], GID < ELEMENT_COUNT);\r\n let next = select(0u, input[GID + 1], GID < ELEMENT_COUNT-1);\r\n s_data[TID] = elm;\r\n workgroupBarrier();\r\n\r\n s_data[TID] = select(0u, 1u, GID < ELEMENT_COUNT-1 && elm > next);\r\n`\r\n\r\nconst last_pass = (isFullCheck) => /* wgsl */ `\r\n let fullDispatchLength = arrayLength(&output);\r\n let dispatchIndex = TID * 3;\r\n\r\n if (dispatchIndex >= fullDispatchLength) {\r\n return;\r\n }\r\n\r\n ${isFullCheck ? last_pass_full : last_pass_fast}\r\n`\r\n\r\nconst last_pass_fast = /* wgsl */ `\r\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] == 0 && is_sorted == 0u);\r\n`\r\n\r\nconst last_pass_full = /* wgsl */ `\r\n if (TID == 0 && s_data[0] == 0) {\r\n is_sorted = 1u;\r\n }\r\n\r\n output[dispatchIndex] = select(0, original[dispatchIndex], s_data[0] != 0);\r\n`\r\nexport default checkSortSource","import checkSortSource from \"./shaders/check_sort\"\r\nimport { find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass CheckSortKernel {\r\n /**\r\n * CheckSortKernel - Performs a parralel reduction to check if an array is sorted.\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} data - The buffer containing the data to check\r\n * @param {GPUBuffer} result - The result dispatch size buffer\r\n * @param {GPUBuffer} original - The original dispatch size buffer\r\n * @param {GPUBuffer} is_sorted - 1-element buffer to store whether the array is sorted\r\n * @param {number} count - The number of elements to check\r\n * @param {number} start - The index to start checking from\r\n * @param {boolean} full_check - Whether this kernel is performing a full check or a fast check\r\n * @param {object} workgroup_size - The workgroup size in x and y dimensions\r\n */\r\n constructor({\r\n device,\r\n data,\r\n result,\r\n original,\r\n is_sorted,\r\n count,\r\n start = 0,\r\n full_check = true,\r\n workgroup_size = { x: 16, y: 16 },\r\n }) {\r\n this.device = device\r\n this.count = count\r\n this.start = start\r\n this.full_check = full_check\r\n this.workgroup_size = workgroup_size\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n\r\n this.pipelines = []\r\n\r\n this.buffers = {\r\n data, \r\n result, \r\n original, \r\n is_sorted,\r\n outputs: []\r\n }\r\n\r\n this.create_passes_recursive(data, count)\r\n }\r\n\r\n // Find the best dispatch size for each pass to minimize unused workgroups\r\n static find_optimal_dispatch_chain(device, item_count, workgroup_size) {\r\n const threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n const sizes = []\r\n\r\n do {\r\n // Number of workgroups required to process all items\r\n const target_workgroup_count = Math.ceil(item_count / threads_per_workgroup)\r\n \r\n // Optimal dispatch size and updated workgroup count\r\n const dispatchSize = find_optimal_dispatch_size(device, target_workgroup_count)\r\n \r\n sizes.push(dispatchSize.x, dispatchSize.y, 1)\r\n item_count = target_workgroup_count\r\n } while (item_count > 1)\r\n \r\n return sizes\r\n }\r\n\r\n create_passes_recursive(buffer, count, passIndex = 0) {\r\n const workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n\r\n const isFirstPass = passIndex === 0\r\n const isLastPass = workgroup_count <= 1\r\n\r\n const outputBuffer = isLastPass ? this.buffers.result : this.device.createBuffer({\r\n label: `check-sort-${this.full_check ? 'full' : 'fast'}-${passIndex}`,\r\n size: workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n // Last pass bindings\r\n ...(isLastPass ? [{\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n }, {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : []),\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: buffer }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outputBuffer }\r\n },\r\n // Last pass buffers\r\n ...(isLastPass ? [{\r\n binding: 2,\r\n resource: { buffer: this.buffers.original }\r\n }, {\r\n binding: 3,\r\n resource: { buffer: this.buffers.is_sorted }\r\n }] : []),\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [bindGroupLayout]\r\n })\r\n\r\n const element_count = isFirstPass ? this.start + count : count\r\n const start_element = isFirstPass ? this.start : 0\r\n\r\n const checkSortPipeline = this.device.createComputePipeline({\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.device.createShaderModule({\r\n code: checkSortSource(isFirstPass, isLastPass, this.full_check),\r\n label: 'check-sort',\r\n }),\r\n entryPoint: 'check_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': element_count,\r\n 'START_ELEMENT': start_element,\r\n },\r\n }\r\n })\r\n\r\n this.buffers.outputs.push(outputBuffer)\r\n this.pipelines.push({ pipeline: checkSortPipeline, bindGroup })\r\n \r\n if (!isLastPass) {\r\n this.create_passes_recursive(outputBuffer, workgroup_count, passIndex + 1)\r\n }\r\n }\r\n\r\n dispatch(pass, dispatchSize, offset = 0) {\r\n for (let i = 0; i < this.pipelines.length; i++) {\r\n const { pipeline, bindGroup } = this.pipelines[i]\r\n\r\n const dispatchIndirect = (this.full_check || i < this.pipelines.length - 1)\r\n\r\n pass.setPipeline(pipeline)\r\n pass.setBindGroup(0, bindGroup)\r\n\r\n if (dispatchIndirect)\r\n pass.dispatchWorkgroupsIndirect(dispatchSize, offset + i * 3 * 4)\r\n else\r\n // Only the last dispatch of the fast check kernel is constant to (1, 1, 1)\r\n pass.dispatchWorkgroups(1, 1, 1)\r\n }\r\n }\r\n}\r\n\r\nexport default CheckSortKernel","import PrefixSumKernel from \"./PrefixSumKernel\"\r\nimport radixSortSource from \"./shaders/radix_sort\"\r\nimport radixSortSource_LocalShuffle from \"./shaders/optimizations/radix_sort_local_shuffle\"\r\nimport reorderSource from \"./shaders/radix_sort_reorder\"\r\nimport CheckSortKernel from \"./CheckSortKernel\"\r\nimport { create_buffer_from_data, find_optimal_dispatch_size } from \"./utils\"\r\n\r\nclass RadixSortKernel {\r\n /**\r\n * Perform a parallel radix sort on the GPU given a buffer of keys and (optionnaly) values\r\n * Note: The buffers are sorted in-place.\r\n * \r\n * Based on \"Fast 4-way parallel radix sorting on GPUs\"\r\n * https://www.sci.utah.edu/~csilva/papers/cgf.pdf]\r\n * \r\n * @param {GPUDevice} device\r\n * @param {GPUBuffer} keys - Buffer containing the keys to sort\r\n * @param {GPUBuffer} values - (optional) Buffer containing the associated values\r\n * @param {number} count - Number of elements to sort\r\n * @param {number} bit_count - Number of bits per element (default: 32)\r\n * @param {object} workgroup_size - Workgroup size in x and y dimensions. (x * y) must be a power of two\r\n * @param {boolean} check_order - Enable \"order checking\" optimization. Useful if the data needs to be sorted in real-time and doesn't change much. (default: false)\r\n * @param {boolean} local_shuffle - Enable \"local shuffling\" optimization for the radix sort kernel (default: false)\r\n * @param {boolean} avoid_bank_conflicts - Enable \"avoiding bank conflicts\" optimization for the prefix sum kernel (default: false)\r\n */\r\n constructor({\r\n device,\r\n keys,\r\n values,\r\n count,\r\n bit_count = 32,\r\n workgroup_size = { x: 16, y: 16 },\r\n check_order = false,\r\n local_shuffle = false,\r\n avoid_bank_conflicts = false,\r\n } = {}) {\r\n if (device == null) throw new Error('No device provided')\r\n if (keys == null) throw new Error('No keys buffer provided')\r\n if (!Number.isInteger(count) || count <= 0) throw new Error('Invalid count parameter')\r\n if (!Number.isInteger(bit_count) || bit_count <= 0 || bit_count > 32) throw new Error('Invalid bit_count parameter')\r\n if (!Number.isInteger(workgroup_size.x) || !Number.isInteger(workgroup_size.y)) throw new Error('Invalid workgroup_size parameter')\r\n if (bit_count % 4 != 0) throw new Error('bit_count must be a multiple of 4')\r\n\r\n this.device = device\r\n this.count = count\r\n this.bit_count = bit_count\r\n this.workgroup_size = workgroup_size\r\n this.check_order = check_order\r\n this.local_shuffle = local_shuffle\r\n this.avoid_bank_conflicts = avoid_bank_conflicts\r\n\r\n this.threads_per_workgroup = workgroup_size.x * workgroup_size.y\r\n this.workgroup_count = Math.ceil(count / this.threads_per_workgroup)\r\n this.prefix_block_workgroup_count = 4 * this.workgroup_count\r\n\r\n this.has_values = (values != null) // Is the values buffer provided ?\r\n\r\n this.dispatchSize = {} // Dispatch dimension x and y\r\n this.shaderModules = {} // GPUShaderModules\r\n this.buffers = {} // GPUBuffers\r\n this.pipelines = [] // List of passes\r\n this.kernels = {}\r\n\r\n // Find best dispatch x and y dimensions to minimize unused threads\r\n this.dispatchSize = find_optimal_dispatch_size(this.device, this.workgroup_count)\r\n\r\n // Create shader modules from wgsl code\r\n this.create_shader_modules()\r\n \r\n // Create multi-pass pipelines\r\n this.create_pipelines(keys, values)\r\n }\r\n\r\n create_shader_modules() {\r\n // Remove every occurence of \"values\" in the shader code if values buffer is not provided\r\n const remove_values = (source) => {\r\n return source.split('\\n')\r\n .filter(line => !line.toLowerCase().includes('values'))\r\n .join('\\n')\r\n }\r\n\r\n const blockSumSource = this.local_shuffle ? radixSortSource_LocalShuffle : radixSortSource\r\n \r\n this.shaderModules = {\r\n blockSum: this.device.createShaderModule({\r\n label: 'radix-sort-block-sum',\r\n code: this.has_values ? blockSumSource : remove_values(blockSumSource),\r\n }),\r\n reorder: this.device.createShaderModule({\r\n label: 'radix-sort-reorder',\r\n code: this.has_values ? reorderSource : remove_values(reorderSource),\r\n })\r\n }\r\n }\r\n\r\n create_pipelines(keys, values) { \r\n // Block prefix sum kernel \r\n const { prefixSumKernel, prefixBlockSumBuffer } = this.create_prefix_sum_kernel()\r\n\r\n // Indirect dispatch buffers\r\n const dispatchData = this.calculate_dispatch_sizes(prefixSumKernel)\r\n\r\n // GPU buffers\r\n this.create_buffers(keys, values, prefixBlockSumBuffer, dispatchData)\r\n\r\n // Check sort kernels\r\n this.create_check_sort_kernels(this.buffers.keys, dispatchData)\r\n\r\n // Radix sort passes for every 2 bits\r\n for (let bit = 0; bit < this.bit_count; bit += 2) {\r\n // Swap buffers every pass\r\n const even = (bit % 4 == 0)\r\n const inKeys = even ? this.buffers.keys : this.buffers.tmpKeys\r\n const inValues = even ? this.buffers.values : this.buffers.tmpValues\r\n const outKeys = even ? this.buffers.tmpKeys : this.buffers.keys\r\n const outValues = even ? this.buffers.tmpValues : this.buffers.values\r\n\r\n // Compute local prefix sums and block sums\r\n const blockSumPipeline = this.create_block_sum_pipeline(inKeys, inValues, bit)\r\n \r\n // Reorder keys and values\r\n const reorderPipeline = this.create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit)\r\n\r\n this.pipelines.push({ blockSumPipeline, reorderPipeline })\r\n }\r\n }\r\n\r\n create_prefix_sum_kernel() {\r\n // Prefix Block Sum buffer (4 element per workgroup)\r\n const prefixBlockSumBuffer = this.device.createBuffer({\r\n label: 'radix-sort-prefix-block-sum',\r\n size: this.prefix_block_workgroup_count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Create block prefix sum kernel\r\n const prefixSumKernel = new PrefixSumKernel({ \r\n device: this.device,\r\n data: prefixBlockSumBuffer, \r\n count: this.prefix_block_workgroup_count,\r\n workgroup_size: this.workgroup_size,\r\n avoid_bank_conflicts: this.avoid_bank_conflicts,\r\n })\r\n\r\n this.kernels.prefixSum = prefixSumKernel\r\n\r\n return { prefixSumKernel, prefixBlockSumBuffer }\r\n }\r\n\r\n calculate_dispatch_sizes(prefixSumKernel) {\r\n // Prefix sum dispatch sizes\r\n const prefixSumDispatchSize = prefixSumKernel.get_dispatch_chain()\r\n\r\n // Check sort element count (fast/full)\r\n const check_sort_fast_count = Math.min(this.count, this.threads_per_workgroup * 4)\r\n const check_sort_full_count = this.count - check_sort_fast_count\r\n const start_full = check_sort_fast_count - 1\r\n\r\n // Check sort dispatch sizes\r\n const dispatchSizesFast = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_fast_count, this.workgroup_size)\r\n const dispatchSizesFull = CheckSortKernel.find_optimal_dispatch_chain(this.device, check_sort_full_count, this.workgroup_size)\r\n\r\n // Initial dispatch sizes\r\n const initialDispatch = [\r\n this.dispatchSize.x, this.dispatchSize.y, 1, // Radix Sort + Reorder\r\n ...dispatchSizesFast.slice(0, 3), // Check sort fast\r\n ...prefixSumDispatchSize // Prefix Sum\r\n ]\r\n\r\n // Dispatch offsets in main buffer\r\n this.dispatchOffsets = {\r\n radix_sort: 0,\r\n check_sort_fast: 3 * 4,\r\n prefix_sum: 6 * 4\r\n }\r\n\r\n return {\r\n initialDispatch,\r\n dispatchSizesFull,\r\n check_sort_fast_count, \r\n check_sort_full_count, \r\n start_full \r\n }\r\n }\r\n\r\n create_buffers(keys, values, prefixBlockSumBuffer, dispatchData) {\r\n // Keys and values double buffering\r\n const tmpKeysBuffer = this.device.createBuffer({\r\n label: 'radix-sort-tmp-keys',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n const tmpValuesBuffer = !this.has_values ? null : this.device.createBuffer({\r\n label: 'radix-sort-tmp-values',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Local Prefix Sum buffer (1 element per item)\r\n const localPrefixSumBuffer = this.device.createBuffer({\r\n label: 'radix-sort-local-prefix-sum',\r\n size: this.count * 4,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n\r\n // Dispatch sizes (radix sort, check sort, prefix sum)\r\n const dispatchBuffer = create_buffer_from_data({\r\n device: this.device, \r\n label: 'radix-sort-dispatch-size',\r\n data: dispatchData.initialDispatch, \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT\r\n })\r\n const originalDispatchBuffer = create_buffer_from_data({\r\n device: this.device, \r\n label: 'radix-sort-dispatch-size-original',\r\n data: dispatchData.initialDispatch, \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC\r\n })\r\n\r\n // Dispatch sizes (full sort)\r\n const checkSortFullDispatchBuffer = create_buffer_from_data({\r\n label: 'check-sort-full-dispatch-size',\r\n device: this.device, \r\n data: dispatchData.dispatchSizesFull,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.INDIRECT\r\n })\r\n const checkSortFullOriginalDispatchBuffer = create_buffer_from_data({\r\n label: 'check-sort-full-dispatch-size-original',\r\n device: this.device, \r\n data: dispatchData.dispatchSizesFull,\r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC\r\n })\r\n\r\n // Flag to tell if the data is sorted\r\n const isSortedBuffer = create_buffer_from_data({\r\n label: 'is-sorted',\r\n device: this.device, \r\n data: new Uint32Array([0]), \r\n usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST\r\n })\r\n \r\n this.buffers = {\r\n keys: keys,\r\n values: values,\r\n tmpKeys: tmpKeysBuffer,\r\n tmpValues: tmpValuesBuffer,\r\n localPrefixSum: localPrefixSumBuffer,\r\n prefixBlockSum: prefixBlockSumBuffer,\r\n \r\n dispatchSize: dispatchBuffer,\r\n originalDispatchSize: originalDispatchBuffer,\r\n checkSortFullDispatchSize: checkSortFullDispatchBuffer,\r\n originalCheckSortFullDispatchSize: checkSortFullOriginalDispatchBuffer,\r\n isSorted: isSortedBuffer,\r\n }\r\n }\r\n\r\n create_check_sort_kernels(inKeys, checkSortPartitionData) {\r\n // Skip check sort if disabled\r\n if (!this.check_order) {\r\n return [ null, null ]\r\n }\r\n\r\n const { check_sort_fast_count, check_sort_full_count, start_full } = checkSortPartitionData\r\n\r\n // Create the full pass\r\n const checkSortFull = new CheckSortKernel({\r\n device: this.device,\r\n data: inKeys,\r\n result: this.buffers.dispatchSize,\r\n original: this.buffers.originalDispatchSize,\r\n is_sorted: this.buffers.isSorted,\r\n count: check_sort_full_count,\r\n start: start_full,\r\n full_check: true,\r\n workgroup_size: this.workgroup_size\r\n })\r\n\r\n // Create the fast pass\r\n const checkSortFast = new CheckSortKernel({\r\n device: this.device,\r\n data: inKeys,\r\n result: this.buffers.checkSortFullDispatchSize,\r\n original: this.buffers.originalCheckSortFullDispatchSize,\r\n is_sorted: this.buffers.isSorted,\r\n count: check_sort_fast_count,\r\n full_check: false,\r\n workgroup_size: this.workgroup_size\r\n })\r\n\r\n if (checkSortFast.threads_per_workgroup < checkSortFull.pipelines.length) {\r\n console.warn(`Warning: workgroup size is too small to enable check sort optimization, disabling...`)\r\n this.check_order = false\r\n return [ null, null ]\r\n }\r\n\r\n this.kernels.checkSortFast = checkSortFast\r\n this.kernels.checkSortFull = checkSortFull\r\n }\r\n\r\n create_block_sum_pipeline(inKeys, inValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-block-sum',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: this.local_shuffle ? 'storage' : 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n // \"Local shuffle\" optimization needs access to the values buffer\r\n ...(this.local_shuffle && this.has_values ? [{\r\n binding: 3,\r\n resource: { buffer: inValues }\r\n }] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const blockSumPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-block-sum',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.blockSum,\r\n entryPoint: 'radix_sort',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: blockSumPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n create_reorder_pipeline(inKeys, inValues, outKeys, outValues, bit) {\r\n const bindGroupLayout = this.device.createBindGroupLayout({\r\n label: 'radix-sort-reorder',\r\n entries: [\r\n {\r\n binding: 0,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 1,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n },\r\n {\r\n binding: 2,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 3,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'read-only-storage' }\r\n },\r\n {\r\n binding: 5,\r\n visibility: GPUShaderStage.COMPUTE,\r\n buffer: { type: 'storage' }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const bindGroup = this.device.createBindGroup({\r\n layout: bindGroupLayout,\r\n entries: [\r\n {\r\n binding: 0,\r\n resource: { buffer: inKeys }\r\n },\r\n {\r\n binding: 1,\r\n resource: { buffer: outKeys }\r\n },\r\n {\r\n binding: 2,\r\n resource: { buffer: this.buffers.localPrefixSum }\r\n },\r\n {\r\n binding: 3,\r\n resource: { buffer: this.buffers.prefixBlockSum }\r\n },\r\n ...(this.has_values ? [\r\n {\r\n binding: 4,\r\n resource: { buffer: inValues }\r\n },\r\n {\r\n binding: 5,\r\n resource: { buffer: outValues }\r\n }\r\n ] : [])\r\n ]\r\n })\r\n\r\n const pipelineLayout = this.device.createPipelineLayout({\r\n bindGroupLayouts: [ bindGroupLayout ]\r\n })\r\n\r\n const reorderPipeline = this.device.createComputePipeline({\r\n label: 'radix-sort-reorder',\r\n layout: pipelineLayout,\r\n compute: {\r\n module: this.shaderModules.reorder,\r\n entryPoint: 'radix_sort_reorder',\r\n constants: {\r\n 'WORKGROUP_SIZE_X': this.workgroup_size.x,\r\n 'WORKGROUP_SIZE_Y': this.workgroup_size.y,\r\n 'WORKGROUP_COUNT': this.workgroup_count,\r\n 'THREADS_PER_WORKGROUP': this.threads_per_workgroup,\r\n 'ELEMENT_COUNT': this.count,\r\n 'CURRENT_BIT': bit,\r\n }\r\n }\r\n })\r\n\r\n return {\r\n pipeline: reorderPipeline,\r\n bindGroup\r\n }\r\n }\r\n\r\n /**\r\n * Encode all pipelines into the current pass\r\n * \r\n * @param {GPUComputePassEncoder} pass \r\n */\r\n dispatch(pass) { \r\n for (let i = 0; i < this.bit_count / 2; i++) {\r\n const { blockSumPipeline, reorderPipeline } = this.pipelines[i]\r\n\r\n if (this.check_order && i % 2 == 0) {\r\n this.kernels.checkSortFast.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.check_sort_fast)\r\n this.kernels.checkSortFull.dispatch(pass, this.buffers.checkSortFullDispatchSize)\r\n }\r\n \r\n pass.setPipeline(blockSumPipeline.pipeline)\r\n pass.setBindGroup(0, blockSumPipeline.bindGroup)\r\n pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort)\r\n\r\n this.kernels.prefixSum.dispatch(pass, this.buffers.dispatchSize, this.dispatchOffsets.prefix_sum)\r\n\r\n pass.setPipeline(reorderPipeline.pipeline)\r\n pass.setBindGroup(0, reorderPipeline.bindGroup)\r\n pass.dispatchWorkgroupsIndirect(this.buffers.dispatchSize, this.dispatchOffsets.radix_sort)\r\n }\r\n }\r\n}\r\n\r\nexport default RadixSortKernel","/**\r\n * Radix sort with \"local shuffle and coalesced mapping\" optimization\r\n * \r\n * (see Implementation section in README for details)\r\n */\r\nconst radixSortCoalescedSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n@group(0) @binding(3) var values: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\nvar s_prefix_sum_scan: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let val = values[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n let prefix_sum = bit_prefix_sums[extract_bits]; \r\n\r\n // Scan bit prefix sums\r\n if (TID == LAST_THREAD) {\r\n var sum: u32 = 0;\r\n bit_prefix_sums[extract_bits] += 1;\r\n for (var i: u32 = 0; i < 4; i++) {\r\n s_prefix_sum_scan[i] = sum;\r\n sum += bit_prefix_sums[i];\r\n }\r\n }\r\n workgroupBarrier();\r\n\r\n if (GID < ELEMENT_COUNT) {\r\n // Compute new position\r\n let new_pos: u32 = prefix_sum + s_prefix_sum_scan[extract_bits];\r\n\r\n // Shuffle elements locally\r\n input[WID + new_pos] = elm;\r\n values[WID + new_pos] = val;\r\n local_prefix_sums[WID + new_pos] = prefix_sum;\r\n }\r\n}`\r\n\r\nexport default radixSortCoalescedSource;","const radixSortSource = /* wgsl */ `\r\n\r\n@group(0) @binding(0) var input: array;\r\n@group(0) @binding(1) var local_prefix_sums: array;\r\n@group(0) @binding(2) var block_sums: array;\r\n\r\noverride WORKGROUP_COUNT: u32;\r\noverride THREADS_PER_WORKGROUP: u32;\r\noverride WORKGROUP_SIZE_X: u32;\r\noverride WORKGROUP_SIZE_Y: u32;\r\noverride CURRENT_BIT: u32;\r\noverride ELEMENT_COUNT: u32;\r\n\r\nvar s_prefix_sum: array;\r\n\r\n@compute @workgroup_size(WORKGROUP_SIZE_X, WORKGROUP_SIZE_Y, 1)\r\nfn radix_sort(\r\n @builtin(workgroup_id) w_id: vec3,\r\n @builtin(num_workgroups) w_dim: vec3,\r\n @builtin(local_invocation_index) TID: u32, // Local thread ID\r\n) {\r\n let WORKGROUP_ID = w_id.x + w_id.y * w_dim.x;\r\n let WID = WORKGROUP_ID * THREADS_PER_WORKGROUP;\r\n let GID = WID + TID; // Global thread ID\r\n\r\n // Extract 2 bits from the input\r\n let elm = input[GID];\r\n let extract_bits: u32 = (elm >> CURRENT_BIT) & 0x3;\r\n\r\n var bit_prefix_sums = array(0, 0, 0, 0);\r\n\r\n // If the workgroup is inactive, prevent block_sums buffer update\r\n var LAST_THREAD: u32 = 0xffffffff; \r\n\r\n if (WORKGROUP_ID < WORKGROUP_COUNT) {\r\n // Otherwise store the index of the last active thread in the workgroup\r\n LAST_THREAD = min(THREADS_PER_WORKGROUP, ELEMENT_COUNT - WID) - 1;\r\n }\r\n\r\n // Initialize parameters for double-buffering\r\n let TPW = THREADS_PER_WORKGROUP + 1;\r\n var swapOffset: u32 = 0;\r\n var inOffset: u32 = TID;\r\n var outOffset: u32 = TID + TPW;\r\n\r\n // 4-way prefix sum\r\n for (var b: u32 = 0; b < 4; b++) {\r\n // Initialize local prefix with bitmask\r\n let bitmask = select(0u, 1u, extract_bits == b);\r\n s_prefix_sum[inOffset + 1] = bitmask;\r\n workgroupBarrier();\r\n\r\n // Prefix sum\r\n for (var offset: u32 = 1; offset < THREADS_PER_WORKGROUP; offset *= 2) {\r\n if (TID >= offset) {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset] + s_prefix_sum[inOffset - offset];\r\n } else {\r\n s_prefix_sum[outOffset] = s_prefix_sum[inOffset];\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n \r\n workgroupBarrier();\r\n }\r\n\r\n // Store prefix sum for current bit\r\n let prefix_sum = s_prefix_sum[inOffset];\r\n bit_prefix_sums[b] = prefix_sum;\r\n\r\n if (TID == LAST_THREAD) {\r\n // Store block sum to global memory\r\n let total_sum: u32 = prefix_sum + bitmask;\r\n block_sums[b * WORKGROUP_COUNT + WORKGROUP_ID] = total_sum;\r\n }\r\n\r\n // Swap buffers\r\n outOffset = inOffset;\r\n swapOffset = TPW - swapOffset;\r\n inOffset = TID + swapOffset;\r\n }\r\n\r\n // Store local prefix sum to global memory\r\n local_prefix_sums[GID] = bit_prefix_sums[extract_bits];\r\n}`\r\n\r\nexport default radixSortSource;"],"names":["find_optimal_dispatch_size","device","workgroup_count","dispatchSize","x","y","limits","maxComputeWorkgroupsPerDimension","Math","floor","sqrt","ceil","create_buffer_from_data","_ref","label","data","_ref$usage","usage","dispatchSizes","createBuffer","size","length","mappedAtCreation","Uint32Array","getMappedRange","set","unmap","PrefixSumKernel","_createClass","count","_ref$workgroup_size","workgroup_size","_ref$avoid_bank_confl","avoid_bank_conflicts","_classCallCheck","this","threads_per_workgroup","items_per_workgroup","log2","Error","concat","pipelines","shaderModule","createShaderModule","code","create_pass_recursive","key","value","blockSumBuffer","GPUBufferUsage","STORAGE","COPY_SRC","COPY_DST","bindGroupLayout","createBindGroupLayout","entries","binding","visibility","GPUShaderStage","COMPUTE","buffer","type","bindGroup","createBindGroup","layout","resource","pipelineLayout","createPipelineLayout","bindGroupLayouts","scanPipeline","createComputePipeline","compute","module","entryPoint","constants","WORKGROUP_SIZE_X","WORKGROUP_SIZE_Y","THREADS_PER_WORKGROUP","ITEMS_PER_WORKGROUP","push","pipeline","blockSumPipeline","flatMap","p","pass","offset","arguments","undefined","i","_this$pipelines$i","setPipeline","setBindGroup","dispatchWorkgroupsIndirect","radixSortReorderSource","checkSortSource","isLastPass","isFullCheck","first_pass_load_data","last_pass","write_reduction_result","last_pass_full","last_pass_fast","CheckSortKernel","result","original","is_sorted","_ref$start","start","_ref$full_check","full_check","buffers","outputs","create_passes_recursive","passIndex","isFirstPass","outputBuffer","_toConsumableArray","element_count","start_element","checkSortPipeline","ELEMENT_COUNT","START_ELEMENT","dispatchIndirect","dispatchWorkgroups","item_count","sizes","target_workgroup_count","RadixSortKernel","keys","values","_ref$bit_count","bit_count","_ref$check_order","check_order","_ref$local_shuffle","local_shuffle","Number","isInteger","prefix_block_workgroup_count","has_values","shaderModules","kernels","create_shader_modules","create_pipelines","remove_values","source","split","filter","line","toLowerCase","includes","join","blockSumSource","blockSum","reorder","reorderSource","_this$create_prefix_s","create_prefix_sum_kernel","prefixSumKernel","prefixBlockSumBuffer","dispatchData","calculate_dispatch_sizes","create_buffers","create_check_sort_kernels","bit","even","inKeys","tmpKeys","inValues","tmpValues","outKeys","outValues","create_block_sum_pipeline","reorderPipeline","create_reorder_pipeline","prefixSum","prefixSumDispatchSize","get_dispatch_chain","check_sort_fast_count","min","check_sort_full_count","start_full","dispatchSizesFast","find_optimal_dispatch_chain","dispatchSizesFull","initialDispatch","slice","dispatchOffsets","radix_sort","check_sort_fast","prefix_sum","tmpKeysBuffer","tmpValuesBuffer","localPrefixSumBuffer","dispatchBuffer","INDIRECT","originalDispatchBuffer","checkSortFullDispatchBuffer","checkSortFullOriginalDispatchBuffer","isSortedBuffer","localPrefixSum","prefixBlockSum","originalDispatchSize","checkSortFullDispatchSize","originalCheckSortFullDispatchSize","isSorted","checkSortPartitionData","checkSortFull","checkSortFast","console","warn","WORKGROUP_COUNT","CURRENT_BIT","dispatch"],"mappings":"qoDAOA,SAASA,EAA2BC,EAAQC,GACxC,IAAMC,EAAe,CACjBC,EAAGF,EACHG,EAAG,GAGP,GAAIH,EAAkBD,EAAOK,OAAOC,iCAAkC,CAClE,IAAMH,EAAII,KAAKC,MAAMD,KAAKE,KAAKR,IACzBG,EAAIG,KAAKG,KAAKT,EAAkBE,GAEtCD,EAAaC,EAAIA,EACjBD,EAAaE,EAAIA,CACrB,CAEA,OAAOF,CACX,CAEA,SAASS,EAAuBC,GAAmC,IAAjCZ,EAAMY,EAANZ,OAAQa,EAAKD,EAALC,MAAOC,EAAIF,EAAJE,KAAIC,EAAAH,EAAEI,MAAAA,OAAQ,IAAHD,EAAG,EAACA,EACtDE,EAAgBjB,EAAOkB,aAAa,CACtCL,MAAOA,EACPG,MAAOA,EACPG,KAAoB,EAAdL,EAAKM,OACXC,kBAAkB,IAOtB,OAJqB,IAAIC,YAAYL,EAAcM,kBACtCC,IAAIV,GACjBG,EAAcQ,QAEPR,CACX,CCjCMS,IAAAA,EAAe,WAoChB,OAAAC,GAvBD,SAAAD,EAAAd,GAMG,IALCZ,EAAMY,EAANZ,OACAc,EAAIF,EAAJE,KACAc,EAAKhB,EAALgB,MAAKC,EAAAjB,EACLkB,eAAAA,OAAiB,IAAHD,EAAG,CAAE1B,EAAG,GAAIC,EAAG,IAAIyB,EAAAE,EAAAnB,EACjCoB,qBAAAA,OAAuB,IAAHD,GAAQA,EAO5B,GAP4BE,OAAAP,GAE5BQ,KAAKlC,OAASA,EACdkC,KAAKJ,eAAiBA,EACtBI,KAAKC,sBAAwBL,EAAe3B,EAAI2B,EAAe1B,EAC/D8B,KAAKE,oBAAsB,EAAIF,KAAKC,sBAEhC5B,KAAK8B,KAAKH,KAAKC,uBAAyB,GAAM,EAC9C,MAAM,IAAIG,MAAKC,yEAAAA,OAA0EL,KAAKC,sBAAqB,MAEvHD,KAAKM,UAAY,GAEjBN,KAAKO,aAAeP,KAAKlC,OAAO0C,mBAAmB,CAC/C7B,MAAO,aACP8B,KAAMX,EC4EhB,kuGCvBA,+oFFlDME,KAAKU,sBAAsB9B,EAAMc,EACrC,GAAC,CAAA,CAAAiB,IAAA,wBAAAC,MAED,SAAsBhC,EAAMc,GAExB,IAAM3B,EAAkBM,KAAKG,KAAKkB,EAAQM,KAAKE,qBACzClC,EAAeH,EAA2BmC,KAAKlC,OAAQC,GAGvD8C,EAAiBb,KAAKlC,OAAOkB,aAAa,CAC5CL,MAAO,uBACPM,KAAwB,EAAlBlB,EACNe,MAAOgC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAIvEC,EAAkBlB,KAAKlC,OAAOqD,sBAAsB,CACtDC,QAAS,CACL,CACIC,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,YAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,eAKtBC,EAAY3B,KAAKlC,OAAO8D,gBAAgB,CAC1CjD,MAAO,wBACPkD,OAAQX,EACRE,QAAS,CACL,CACIC,QAAS,EACTS,SAAU,CAAEL,OAAQ7C,IAExB,CACIyC,QAAS,EACTS,SAAU,CAAEL,OAAQZ,OAK1BkB,EAAiB/B,KAAKlC,OAAOkE,qBAAqB,CACpDC,iBAAkB,CAAEf,KAIlBgB,EAAelC,KAAKlC,OAAOqE,sBAAsB,CACnDxD,MAAO,2BACPkD,OAAQE,EACRK,QAAS,CACLC,OAAQrC,KAAKO,aACb+B,WAAY,mBACZC,UAAW,CACPC,iBAAoBxC,KAAKJ,eAAe3B,EACxCwE,iBAAoBzC,KAAKJ,eAAe1B,EACxCwE,sBAAyB1C,KAAKC,sBAC9B0C,oBAAuB3C,KAAKE,wBAOxC,GAFAF,KAAKM,UAAUsC,KAAK,CAAEC,SAAUX,EAAcP,UAAAA,EAAW3D,aAAAA,IAErDD,EAAkB,EAAG,CAErBiC,KAAKU,sBAAsBG,EAAgB9C,GAG3C,IAAM+E,EAAmB9C,KAAKlC,OAAOqE,sBAAsB,CACvDxD,MAAO,gCACPkD,OAAQE,EACRK,QAAS,CACLC,OAAQrC,KAAKO,aACb+B,WAAY,iBACZC,UAAW,CACPC,iBAAoBxC,KAAKJ,eAAe3B,EACxCwE,iBAAoBzC,KAAKJ,eAAe1B,EACxCwE,sBAAyB1C,KAAKC,0BAK1CD,KAAKM,UAAUsC,KAAK,CAAEC,SAAUC,EAAkBnB,UAAAA,EAAW3D,aAAAA,GACjE,CACJ,GAAC,CAAA2C,IAAA,qBAAAC,MAED,WACI,OAAOZ,KAAKM,UAAUyC,SAAQ,SAAAC,GAAC,MAAI,CAAEA,EAAEhF,aAAaC,EAAG+E,EAAEhF,aAAaE,EAAG,KAC7E,GAAC,CAAAyC,IAAA,WAAAC,MAED,SAASqC,EAAMjF,GACX,IADqC,IAAZkF,EAAMC,UAAAjE,OAAA,QAAAkE,IAAAD,UAAA,GAAAA,UAAA,GAAG,EACzBE,EAAI,EAAGA,EAAIrD,KAAKM,UAAUpB,OAAQmE,IAAK,CAC5C,IAAAC,EAAgCtD,KAAKM,UAAU+C,GAAvCR,EAAQS,EAART,SAAUlB,EAAS2B,EAAT3B,UAElBsB,EAAKM,YAAYV,GACjBI,EAAKO,aAAa,EAAG7B,GACrBsB,EAAKQ,2BAA2BzF,EAAckF,EAAa,EAAJG,EAAQ,EACnE,CACJ,IAAC,CA1IgB,GGJfK,EA0CJ,y9CC1CIC,EAAkB,WAAH,IAAyBC,EAAUT,UAAAjE,OAAA,QAAAkE,IAAAD,UAAA,IAAAA,UAAA,GAAUU,EAAWV,UAAAjE,OAAA,QAAAkE,IAAAD,UAAA,IAAAA,UAAA,GAAQ,07BAAK9C,OAAtD8C,UAAAjE,OAAA,QAAAkE,IAAAD,UAAA,IAAAA,UAAA,GA0BfW,EAAuB,6DAA4D,8SAAAzD,OAYjGuD,EAAaG,EAAUF,GAAeG,EAAsB,MAAA,EAG7DA,EAIL,4EAEKF,EAWL,2aAEKC,EAAY,SAACF,GAAW,iLAAKxD,OAQ7BwD,EAAcI,EAAiBC,EAAc,KAAA,EAG7CA,EAEL,yGAEKD,EAML,+JC9EKE,EAAe,WA6CjB,OAAA1E,GA/BA,SAAA0E,EAAAzF,GAUG,IATCZ,EAAMY,EAANZ,OACAc,EAAIF,EAAJE,KACAwF,EAAM1F,EAAN0F,OACAC,EAAQ3F,EAAR2F,SACAC,EAAS5F,EAAT4F,UACA5E,EAAKhB,EAALgB,MAAK6E,EAAA7F,EACL8F,MAAAA,OAAQ,IAAHD,EAAG,EAACA,EAAAE,EAAA/F,EACTgG,WAAAA,OAAa,IAAHD,GAAOA,EAAA9E,EAAAjB,EACjBkB,eAAAA,OAAiB,IAAHD,EAAG,CAAE1B,EAAG,GAAIC,EAAG,IAAIyB,EAAAI,OAAAoE,GAEjCnE,KAAKlC,OAASA,EACdkC,KAAKN,MAAQA,EACbM,KAAKwE,MAAQA,EACbxE,KAAK0E,WAAaA,EAClB1E,KAAKJ,eAAiBA,EACtBI,KAAKC,sBAAwBL,EAAe3B,EAAI2B,EAAe1B,EAE/D8B,KAAKM,UAAY,GAEjBN,KAAK2E,QAAU,CACX/F,KAAAA,EACAwF,OAAAA,EACAC,SAAAA,EACAC,UAAAA,EACAM,QAAS,IAGb5E,KAAK6E,wBAAwBjG,EAAMc,EACvC,GAEA,CAAA,CAAAiB,IAAA,0BAAAC,MAmBA,SAAwBa,EAAQ/B,GAAsB,IAAfoF,EAAS3B,UAAAjE,OAAA,QAAAkE,IAAAD,UAAA,GAAAA,UAAA,GAAG,EACzCpF,EAAkBM,KAAKG,KAAKkB,EAAQM,KAAKC,uBAEzC8E,EAA4B,IAAdD,EACdlB,EAAa7F,GAAmB,EAEhCiH,EAAepB,EAAa5D,KAAK2E,QAAQP,OAASpE,KAAKlC,OAAOkB,aAAa,CAC7EL,MAAK0B,cAAAA,OAAgBL,KAAK0E,WAAa,OAAS,OAAM,KAAArE,OAAIyE,GAC1D7F,KAAwB,EAAlBlB,EACNe,MAAOgC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAGvEC,EAAkBlB,KAAKlC,OAAOqD,sBAAsB,CACtDC,QACI,CAAA,CACIC,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,sBAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,aACnBrB,OAAA4E,EAEGrB,EAAa,CAAC,CACdvC,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,sBACjB,CACCL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,aACf,OAIPC,EAAY3B,KAAKlC,OAAO8D,gBAAgB,CAC1CC,OAAQX,EACRE,QACI,CAAA,CACIC,QAAS,EACTS,SAAU,CAAEL,OAAQA,IAExB,CACIJ,QAAS,EACTS,SAAU,CAAEL,OAAQuD,KACvB3E,OAAA4E,EAEGrB,EAAa,CAAC,CACdvC,QAAS,EACTS,SAAU,CAAEL,OAAQzB,KAAK2E,QAAQN,WAClC,CACChD,QAAS,EACTS,SAAU,CAAEL,OAAQzB,KAAK2E,QAAQL,aAChC,OAIPvC,EAAiB/B,KAAKlC,OAAOkE,qBAAqB,CACpDC,iBAAkB,CAACf,KAGjBgE,EAAgBH,EAAc/E,KAAKwE,MAAQ9E,EAAQA,EACnDyF,EAAgBJ,EAAc/E,KAAKwE,MAAQ,EAE3CY,EAAoBpF,KAAKlC,OAAOqE,sBAAsB,CACxDN,OAAQE,EACRK,QAAS,CACLC,OAAQrC,KAAKlC,OAAO0C,mBAAmB,CACnCC,KAAMkD,EAAgBoB,EAAanB,EAAY5D,KAAK0E,YACpD/F,MAAO,eAEX2D,WAAY,aACZC,UAAW,CACPC,iBAAoBxC,KAAKJ,eAAe3B,EACxCwE,iBAAoBzC,KAAKJ,eAAe1B,EACxCwE,sBAAyB1C,KAAKC,sBAC9BoF,cAAiBH,EACjBI,cAAiBH,MAK7BnF,KAAK2E,QAAQC,QAAQhC,KAAKoC,GAC1BhF,KAAKM,UAAUsC,KAAK,CAAEC,SAAUuC,EAAmBzD,UAAAA,IAE9CiC,GACD5D,KAAK6E,wBAAwBG,EAAcjH,EAAiB+G,EAAY,EAEhF,GAAC,CAAAnE,IAAA,WAAAC,MAED,SAASqC,EAAMjF,GACX,IADqC,IAAZkF,EAAMC,UAAAjE,OAAA,QAAAkE,IAAAD,UAAA,GAAAA,UAAA,GAAG,EACzBE,EAAI,EAAGA,EAAIrD,KAAKM,UAAUpB,OAAQmE,IAAK,CAC5C,IAAAC,EAAgCtD,KAAKM,UAAU+C,GAAvCR,EAAQS,EAART,SAAUlB,EAAS2B,EAAT3B,UAEZ4D,EAAoBvF,KAAK0E,YAAcrB,EAAIrD,KAAKM,UAAUpB,OAAS,EAEzE+D,EAAKM,YAAYV,GACjBI,EAAKO,aAAa,EAAG7B,GAEjB4D,EACAtC,EAAKQ,2BAA2BzF,EAAckF,EAAa,EAAJG,EAAQ,GAG/DJ,EAAKuC,mBAAmB,EAAG,EAAG,EACtC,CACJ,IAAC,CAAA,CAAA7E,IAAA,8BAAAC,MA7HD,SAAmC9C,EAAQ2H,EAAY7F,GACnD,IAAMK,EAAwBL,EAAe3B,EAAI2B,EAAe1B,EAC1DwH,EAAQ,GAEd,EAAG,CAEC,IAAMC,EAAyBtH,KAAKG,KAAKiH,EAAaxF,GAGhDjC,EAAeH,EAA2BC,EAAQ6H,GAExDD,EAAM9C,KAAK5E,EAAaC,EAAGD,EAAaE,EAAG,GAC3CuH,EAAaE,QACRF,EAAa,GAEtB,OAAOC,CACX,IAAC,CA9DgB,GCIfE,EAAe,WAgEhB,OAAAnG,GA9CD,SAAAmG,IAUQ,IAAAlH,EAAAyE,UAAAjE,OAAA,QAAAkE,IAAAD,UAAA,GAAAA,UAAA,GAAJ,CAAE,EATFrF,EAAMY,EAANZ,OACA+H,EAAInH,EAAJmH,KACAC,EAAMpH,EAANoH,OACApG,EAAKhB,EAALgB,MAAKqG,EAAArH,EACLsH,UAAAA,OAAY,IAAHD,EAAG,GAAEA,EAAApG,EAAAjB,EACdkB,eAAAA,OAAiB,IAAHD,EAAG,CAAE1B,EAAG,GAAIC,EAAG,IAAIyB,EAAAsG,EAAAvH,EACjCwH,YAAAA,OAAc,IAAHD,GAAQA,EAAAE,EAAAzH,EACnB0H,cAAAA,OAAgB,IAAHD,GAAQA,EAAAtG,EAAAnB,EACrBoB,qBAAAA,OAAuB,IAAHD,GAAQA,EAE5B,GAF4BE,OAAA6F,GAEd,MAAV9H,EAAgB,MAAM,IAAIsC,MAAM,sBACpC,GAAY,MAARyF,EAAc,MAAM,IAAIzF,MAAM,2BAClC,IAAKiG,OAAOC,UAAU5G,IAAUA,GAAS,EAAG,MAAM,IAAIU,MAAM,2BAC5D,IAAKiG,OAAOC,UAAUN,IAAcA,GAAa,GAAKA,EAAY,GAAI,MAAM,IAAI5F,MAAM,+BACtF,IAAKiG,OAAOC,UAAU1G,EAAe3B,KAAOoI,OAAOC,UAAU1G,EAAe1B,GAAI,MAAM,IAAIkC,MAAM,oCAChG,GAAI4F,EAAY,GAAK,EAAG,MAAM,IAAI5F,MAAM,qCAExCJ,KAAKlC,OAASA,EACdkC,KAAKN,MAAQA,EACbM,KAAKgG,UAAYA,EACjBhG,KAAKJ,eAAiBA,EACtBI,KAAKkG,YAAcA,EACnBlG,KAAKoG,cAAgBA,EACrBpG,KAAKF,qBAAuBA,EAE5BE,KAAKC,sBAAwBL,EAAe3B,EAAI2B,EAAe1B,EAC/D8B,KAAKjC,gBAAkBM,KAAKG,KAAKkB,EAAQM,KAAKC,uBAC9CD,KAAKuG,6BAA+B,EAAIvG,KAAKjC,gBAE7CiC,KAAKwG,WAAwB,MAAVV,EAEnB9F,KAAKhC,aAAe,GACpBgC,KAAKyG,cAAgB,GACrBzG,KAAK2E,QAAU,GACf3E,KAAKM,UAAY,GACjBN,KAAK0G,QAAU,GAGf1G,KAAKhC,aAAeH,EAA2BmC,KAAKlC,OAAQkC,KAAKjC,iBAGjEiC,KAAK2G,wBAGL3G,KAAK4G,iBAAiBf,EAAMC,EAChC,GAAC,CAAA,CAAAnF,IAAA,wBAAAC,MAED,WAEI,IAAMiG,EAAgB,SAACC,GACnB,OAAOA,EAAOC,MAAM,MACNC,QAAO,SAAAC,GAAI,OAAKA,EAAKC,cAAcC,SAAS,SAAS,IACrDC,KAAK,OAGjBC,EAAiBrH,KAAKoG,cCiClC,mpHC5BA,o8FFHMpG,KAAKyG,cAAgB,CACjBa,SAAUtH,KAAKlC,OAAO0C,mBAAmB,CACrC7B,MAAO,uBACP8B,KAAMT,KAAKwG,WAAaa,EAAiBR,EAAcQ,KAE3DE,QAASvH,KAAKlC,OAAO0C,mBAAmB,CACpC7B,MAAO,qBACP8B,KAAMT,KAAKwG,WAAagB,EAAgBX,EAAcW,KAGlE,GAAC,CAAA7G,IAAA,mBAAAC,MAED,SAAiBiF,EAAMC,GAEnB,IAAA2B,EAAkDzH,KAAK0H,2BAA/CC,EAAeF,EAAfE,gBAAiBC,EAAoBH,EAApBG,qBAGnBC,EAAe7H,KAAK8H,yBAAyBH,GAGnD3H,KAAK+H,eAAelC,EAAMC,EAAQ8B,EAAsBC,GAGxD7H,KAAKgI,0BAA0BhI,KAAK2E,QAAQkB,KAAMgC,GAGlD,IAAK,IAAII,EAAM,EAAGA,EAAMjI,KAAKgG,UAAWiC,GAAO,EAAG,CAE9C,IAAMC,EAAaD,EAAM,GAAK,EACxBE,EAAYD,EAAOlI,KAAK2E,QAAQkB,KAAO7F,KAAK2E,QAAQyD,QACpDC,EAAYH,EAAOlI,KAAK2E,QAAQmB,OAAS9F,KAAK2E,QAAQ2D,UACtDC,EAAYL,EAAOlI,KAAK2E,QAAQyD,QAAUpI,KAAK2E,QAAQkB,KACvD2C,EAAYN,EAAOlI,KAAK2E,QAAQ2D,UAAYtI,KAAK2E,QAAQmB,OAGzDhD,EAAmB9C,KAAKyI,0BAA0BN,EAAQE,EAAUJ,GAGpES,EAAkB1I,KAAK2I,wBAAwBR,EAAQE,EAAUE,EAASC,EAAWP,GAE3FjI,KAAKM,UAAUsC,KAAK,CAAEE,iBAAAA,EAAkB4F,gBAAAA,GAC5C,CACJ,GAAC,CAAA/H,IAAA,2BAAAC,MAED,WAEI,IAAMgH,EAAuB5H,KAAKlC,OAAOkB,aAAa,CAClDL,MAAO,8BACPM,KAA0C,EAApCe,KAAKuG,6BACXzH,MAAOgC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAIvE0G,EAAkB,IAAInI,EAAgB,CACxC1B,OAAQkC,KAAKlC,OACbc,KAAMgJ,EACNlI,MAAOM,KAAKuG,6BACZ3G,eAAgBI,KAAKJ,eACrBE,qBAAsBE,KAAKF,uBAK/B,OAFAE,KAAK0G,QAAQkC,UAAYjB,EAElB,CAAEA,gBAAAA,EAAiBC,qBAAAA,EAC9B,GAAC,CAAAjH,IAAA,2BAAAC,MAED,SAAyB+G,GAErB,IAAMkB,EAAwBlB,EAAgBmB,qBAGxCC,EAAwB1K,KAAK2K,IAAIhJ,KAAKN,MAAoC,EAA7BM,KAAKC,uBAClDgJ,EAAwBjJ,KAAKN,MAAQqJ,EACrCG,EAAaH,EAAwB,EAGrCI,EAAoBhF,EAAgBiF,4BAA4BpJ,KAAKlC,OAAQiL,EAAuB/I,KAAKJ,gBACzGyJ,EAAoBlF,EAAgBiF,4BAA4BpJ,KAAKlC,OAAQmL,EAAuBjJ,KAAKJ,gBAGzG0J,EAAe,CACjBtJ,KAAKhC,aAAaC,EAAG+B,KAAKhC,aAAaE,EAAG,GAACmC,OAAA4E,EACxCkE,EAAkBI,MAAM,EAAG,IAAEtE,EAC7B4D,IAUP,OANA7I,KAAKwJ,gBAAkB,CACnBC,WAAY,EACZC,gBAAiB,GACjBC,WAAY,IAGT,CACHL,gBAAAA,EACAD,kBAAAA,EACAN,sBAAAA,EACAE,sBAAAA,EACAC,WAAAA,EAER,GAAC,CAAAvI,IAAA,iBAAAC,MAED,SAAeiF,EAAMC,EAAQ8B,EAAsBC,GAE/C,IAAM+B,EAAgB5J,KAAKlC,OAAOkB,aAAa,CAC3CL,MAAO,sBACPM,KAAmB,EAAbe,KAAKN,MACXZ,MAAOgC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAEvE4I,EAAmB7J,KAAKwG,WAAoBxG,KAAKlC,OAAOkB,aAAa,CACvEL,MAAO,wBACPM,KAAmB,EAAbe,KAAKN,MACXZ,MAAOgC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAHlC,KAOrC6I,EAAuB9J,KAAKlC,OAAOkB,aAAa,CAClDL,MAAO,8BACPM,KAAmB,EAAbe,KAAKN,MACXZ,MAAOgC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAIvE8I,EAAiBtL,EAAwB,CAC3CX,OAAQkC,KAAKlC,OACba,MAAO,2BACPC,KAAMiJ,EAAayB,gBACnBxK,MAAOgC,eAAeC,QAAUD,eAAeE,SAAWF,eAAekJ,WAEvEC,EAAyBxL,EAAwB,CACnDX,OAAQkC,KAAKlC,OACba,MAAO,oCACPC,KAAMiJ,EAAayB,gBACnBxK,MAAOgC,eAAeC,QAAUD,eAAeE,WAI7CkJ,EAA8BzL,EAAwB,CACxDE,MAAO,gCACPb,OAAQkC,KAAKlC,OACbc,KAAMiJ,EAAawB,kBACnBvK,MAAOgC,eAAeC,QAAUD,eAAeE,SAAWF,eAAekJ,WAEvEG,EAAsC1L,EAAwB,CAChEE,MAAO,yCACPb,OAAQkC,KAAKlC,OACbc,KAAMiJ,EAAawB,kBACnBvK,MAAOgC,eAAeC,QAAUD,eAAeE,WAI7CoJ,EAAiB3L,EAAwB,CAC3CE,MAAO,YACPb,OAAQkC,KAAKlC,OACbc,KAAM,IAAIQ,YAAY,CAAC,IACvBN,MAAOgC,eAAeC,QAAUD,eAAeE,SAAWF,eAAeG,WAG7EjB,KAAK2E,QAAU,CACXkB,KAAMA,EACNC,OAAQA,EACRsC,QAASwB,EACTtB,UAAWuB,EACXQ,eAAgBP,EAChBQ,eAAgB1C,EAEhB5J,aAAc+L,EACdQ,qBAAsBN,EACtBO,0BAA2BN,EAC3BO,kCAAmCN,EACnCO,SAAUN,EAElB,GAAC,CAAAzJ,IAAA,4BAAAC,MAED,SAA0BuH,EAAQwC,GAE9B,IAAK3K,KAAKkG,YACN,MAAO,CAAE,KAAM,MAGnB,IAAQ6C,EAA6D4B,EAA7D5B,sBAAuBE,EAAsC0B,EAAtC1B,sBAAuBC,EAAeyB,EAAfzB,WAGhD0B,EAAgB,IAAIzG,EAAgB,CACtCrG,OAAQkC,KAAKlC,OACbc,KAAMuJ,EACN/D,OAAQpE,KAAK2E,QAAQ3G,aACrBqG,SAAUrE,KAAK2E,QAAQ4F,qBACvBjG,UAAWtE,KAAK2E,QAAQ+F,SACxBhL,MAAOuJ,EACPzE,MAAO0E,EACPxE,YAAY,EACZ9E,eAAgBI,KAAKJ,iBAInBiL,EAAgB,IAAI1G,EAAgB,CACtCrG,OAAQkC,KAAKlC,OACbc,KAAMuJ,EACN/D,OAAQpE,KAAK2E,QAAQ6F,0BACrBnG,SAAUrE,KAAK2E,QAAQ8F,kCACvBnG,UAAWtE,KAAK2E,QAAQ+F,SACxBhL,MAAOqJ,EACPrE,YAAY,EACZ9E,eAAgBI,KAAKJ,iBAGzB,GAAIiL,EAAc5K,sBAAwB2K,EAActK,UAAUpB,OAG9D,OAFA4L,QAAQC,KAAI,wFACZ/K,KAAKkG,aAAc,EACZ,CAAE,KAAM,MAGnBlG,KAAK0G,QAAQmE,cAAgBA,EAC7B7K,KAAK0G,QAAQkE,cAAgBA,CACjC,GAAC,CAAAjK,IAAA,4BAAAC,MAED,SAA0BuH,EAAQE,EAAUJ,GACxC,IAAM/G,EAAkBlB,KAAKlC,OAAOqD,sBAAsB,CACtDxC,MAAO,uBACPyC,QACI,CAAA,CACIC,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM1B,KAAKoG,cAAgB,UAAY,sBAErD,CACI/E,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,YAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,aACnBrB,OAAA4E,EACGjF,KAAKoG,eAAiBpG,KAAKwG,WAAa,CAAC,CACzCnF,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,aACf,OAIPC,EAAY3B,KAAKlC,OAAO8D,gBAAgB,CAC1CC,OAAQX,EACRE,QACI,CAAA,CACIC,QAAS,EACTS,SAAU,CAAEL,OAAQ0G,IAExB,CACI9G,QAAS,EACTS,SAAU,CAAEL,OAAQzB,KAAK2E,QAAQ0F,iBAErC,CACIhJ,QAAS,EACTS,SAAU,CAAEL,OAAQzB,KAAK2E,QAAQ2F,kBACpCjK,OAAA4E,EAEGjF,KAAKoG,eAAiBpG,KAAKwG,WAAa,CAAC,CACzCnF,QAAS,EACTS,SAAU,CAAEL,OAAQ4G,KACnB,OAIPtG,EAAiB/B,KAAKlC,OAAOkE,qBAAqB,CACpDC,iBAAkB,CAAEf,KAoBxB,MAAO,CACH2B,SAlBqB7C,KAAKlC,OAAOqE,sBAAsB,CACvDxD,MAAO,uBACPkD,OAAQE,EACRK,QAAS,CACLC,OAAQrC,KAAKyG,cAAca,SAC3BhF,WAAY,aACZC,UAAW,CACPC,iBAAoBxC,KAAKJ,eAAe3B,EACxCwE,iBAAoBzC,KAAKJ,eAAe1B,EACxC8M,gBAAmBhL,KAAKjC,gBACxB2E,sBAAyB1C,KAAKC,sBAC9BoF,cAAiBrF,KAAKN,MACtBuL,YAAehD,MAOvBtG,UAAAA,EAER,GAAC,CAAAhB,IAAA,0BAAAC,MAED,SAAwBuH,EAAQE,EAAUE,EAASC,EAAWP,GAC1D,IAAM/G,EAAkBlB,KAAKlC,OAAOqD,sBAAsB,CACtDxC,MAAO,qBACPyC,QACI,CAAA,CACIC,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,sBAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,YAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,sBAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,uBACnBrB,OAAA4E,EACGjF,KAAKwG,WAAa,CAClB,CACInF,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,sBAEpB,CACIL,QAAS,EACTC,WAAYC,eAAeC,QAC3BC,OAAQ,CAAEC,KAAM,aAEpB,OAINC,EAAY3B,KAAKlC,OAAO8D,gBAAgB,CAC1CC,OAAQX,EACRE,QACI,CAAA,CACIC,QAAS,EACTS,SAAU,CAAEL,OAAQ0G,IAExB,CACI9G,QAAS,EACTS,SAAU,CAAEL,OAAQ8G,IAExB,CACIlH,QAAS,EACTS,SAAU,CAAEL,OAAQzB,KAAK2E,QAAQ0F,iBAErC,CACIhJ,QAAS,EACTS,SAAU,CAAEL,OAAQzB,KAAK2E,QAAQ2F,kBACpCjK,OAAA4E,EACGjF,KAAKwG,WAAa,CAClB,CACInF,QAAS,EACTS,SAAU,CAAEL,OAAQ4G,IAExB,CACIhH,QAAS,EACTS,SAAU,CAAEL,OAAQ+G,KAExB,OAINzG,EAAiB/B,KAAKlC,OAAOkE,qBAAqB,CACpDC,iBAAkB,CAAEf,KAoBxB,MAAO,CACH2B,SAlBoB7C,KAAKlC,OAAOqE,sBAAsB,CACtDxD,MAAO,qBACPkD,OAAQE,EACRK,QAAS,CACLC,OAAQrC,KAAKyG,cAAcc,QAC3BjF,WAAY,qBACZC,UAAW,CACPC,iBAAoBxC,KAAKJ,eAAe3B,EACxCwE,iBAAoBzC,KAAKJ,eAAe1B,EACxC8M,gBAAmBhL,KAAKjC,gBACxB2E,sBAAyB1C,KAAKC,sBAC9BoF,cAAiBrF,KAAKN,MACtBuL,YAAehD,MAOvBtG,UAAAA,EAER,GAEA,CAAAhB,IAAA,WAAAC,MAKA,SAASqC,GACL,IAAK,IAAII,EAAI,EAAGA,EAAIrD,KAAKgG,UAAY,EAAG3C,IAAK,CACzC,IAAAC,EAA8CtD,KAAKM,UAAU+C,GAArDP,EAAgBQ,EAAhBR,iBAAkB4F,EAAepF,EAAfoF,gBAEtB1I,KAAKkG,aAAe7C,EAAI,GAAK,IAC7BrD,KAAK0G,QAAQmE,cAAcK,SAASjI,EAAMjD,KAAK2E,QAAQ3G,aAAcgC,KAAKwJ,gBAAgBE,iBAC1F1J,KAAK0G,QAAQkE,cAAcM,SAASjI,EAAMjD,KAAK2E,QAAQ6F,4BAG3DvH,EAAKM,YAAYT,EAAiBD,UAClCI,EAAKO,aAAa,EAAGV,EAAiBnB,WACtCsB,EAAKQ,2BAA2BzD,KAAK2E,QAAQ3G,aAAcgC,KAAKwJ,gBAAgBC,YAEhFzJ,KAAK0G,QAAQkC,UAAUsC,SAASjI,EAAMjD,KAAK2E,QAAQ3G,aAAcgC,KAAKwJ,gBAAgBG,YAEtF1G,EAAKM,YAAYmF,EAAgB7F,UACjCI,EAAKO,aAAa,EAAGkF,EAAgB/G,WACrCsB,EAAKQ,2BAA2BzD,KAAK2E,QAAQ3G,aAAcgC,KAAKwJ,gBAAgBC,WACpF,CACJ,IAAC,CA5egB"} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 4d13164..2461edd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "webgpu-radix-sort", - "version": "1.0.0", + "version": "1.0.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "webgpu-radix-sort", - "version": "1.0.0", + "version": "1.0.4", "license": "MIT", "devDependencies": { "@babel/core": "^7.24.6", diff --git a/package.json b/package.json index 77696c6..50fe2a7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "webgpu-radix-sort", - "version": "1.0.3", + "version": "1.0.4", "description": "Fast 4-way parallel radix sort for WebGPU", "main": "dist/cjs/radix-sort-cjs.js", "module": "dist/esm/radix-sort-esm.js",