diff --git a/cuda/CudaMatch.cu b/cuda/CudaMatch.cu
index e827547f..c1fdbff7 100644
--- a/cuda/CudaMatch.cu
+++ b/cuda/CudaMatch.cu
@@ -564,17 +564,6 @@ cudaError CudaHarvestMatchK32(
     HarvestMatchK32Kernel<<<kblocks, kthreads, 0, stream>>>(
         devOutPairs, devMatchCount, devYEntries, entryCount, matchOffset );
 
-// #if _DEBUG
-//     uint32 matchCount = 0;
-//     CudaErrCheck( cudaMemcpyAsync( &matchCount, devMatchCount, sizeof( uint32 ) , cudaMemcpyDeviceToHost, stream ) );
-//     CudaErrCheck( cudaStreamSynchronize( stream ) );
-//     CudaErrCheck( cudaStreamSynchronize( stream ) );
-
-//     Pair* matches = new Pair[matchCount];
-//     CudaErrCheck( cudaMemcpyAsync( matches, devOutPairs, sizeof( Pair ) * matchCount , cudaMemcpyDeviceToHost, stream ) );
-//     CudaErrCheck( cudaStreamSynchronize( stream ) );
-//     CudaErrCheck( cudaStreamSynchronize( stream ) );
-// #endif
 
     return cudaSuccess;
 }
@@ -603,8 +592,10 @@ void CudaMatchBucketizedK32(
         CudaSetFirstAndLastGroup<<<1,2,0,stream>>>( tmpGroupCounts, entryCount );
     }
 
-    CudaErrCheck( cudaMemsetAsync( cx.devGroupCount, 0, sizeof( uint32 ), stream ) );
-    CudaErrCheck( cudaMemsetAsync( cx.devMatchCount, 0, sizeof( uint32 ), stream ) );
+    Log::Line( "Marker Set to %d", 1)
+CudaErrCheck( cudaMemsetAsync( cx.devGroupCount, 0, sizeof( uint32 ), stream ) );
+    Log::Line( "Marker Set to %d", 2)
+CudaErrCheck( cudaMemsetAsync( cx.devMatchCount, 0, sizeof( uint32 ), stream ) );
     ScanGroupsCudaK32Bucket<<<kscanblocks, BBCU_SCAN_GROUP_THREADS, 0, stream>>>( devY, tmpGroupCounts+2, cx.devGroupCount, entryCount, bucketMask );
 
     byte*  sortTmpAlloc = (byte*)( tmpGroupCounts + BBCU_MAX_GROUP_COUNT );
@@ -621,49 +612,3 @@ void CudaMatchBucketizedK32(
     MatchCudaK32Bucket<<<BBCU_MAX_GROUP_COUNT, BBCU_THREADS_PER_MATCH_GROUP, 0, stream>>>( bucketMask, entryCount, cx.devGroupCount, devY, cx.devGroupBoundaries, cx.devMatchCount, cx.devMatches );
 }
 
-//-----------------------------------------------------------
-// cudaError CudaHarvestMatchK32WithGroupScan(
-//     Pair*         devOutPairs,
-//     uint32*       devMatchCount,
-//     const uint32  maxMatches,
-//     uint32*       devGroupIndices,
-//     uint32*       devGroupIndicesTemp,
-//     const uint32  maxGroups,
-//     void*         sortBuffer,
-//     const size_t  sortBufferSize,
-//     const uint64* devYEntries,
-//     const uint32  entryCount,
-//     const uint32  matchOffset,
-//     cudaStream_t  stream )
-// {
-//     // Scan for BC groups
-//     {
-//         const uint32 kblocks  = 0;
-//         const uint32 kthreads = 0;
-
-
-//     // constexpr uint32 kscanblocks = CuCDiv( BBCU_BUCKET_ALLOC_ENTRY_COUNT, BBCU_SCAN_GROUP_THREADS );
-//         // Initialize the entries to the max value so that they are not included in the sort
-//         CudaInitGroups<<<kblocks, kthreads, 0, stream>>>( devGroupIndicesTemp, entryCount );
-//         // CudaInitGroupsBucket<<<kscanblocks, BBCU_SCAN_GROUP_THREADS, 0, stream>>>( tmpGroupCounts );
-
-//         // Add first group and last ghost group
-//         CudaSetFirstAndLastGroup<<<1,2,0,stream>>>( tmpGroupCounts, entryCount );
-//     }
-
-//     CudaErrCheck( cudaMemsetAsync( cx.devGroupCount, 0, sizeof( uint32 ), stream ) );
-//     CudaErrCheck( cudaMemsetAsync( cx.devMatchCount, 0, sizeof( uint32 ), stream ) );
-//     ScanGroupsCudaK32Bucket<<<kscanblocks, BBCU_SCAN_GROUP_THREADS, 0, stream>>>( devY, tmpGroupCounts+2, cx.devGroupCount, entryCount, bucketMask );
-
-//     byte*  sortTmpAlloc = (byte*)( tmpGroupCounts + BBCU_MAX_GROUP_COUNT );
-//     size_t sortTmpSize  = ( BBCU_BUCKET_ALLOC_ENTRY_COUNT - BBCU_MAX_GROUP_COUNT ) * sizeof( uint32 );
-
-// #if _DEBUG
-//     size_t sortSize = 0;
-//     cub::DeviceRadixSort::SortKeys<uint32, uint32>( nullptr, sortSize, nullptr, nullptr, BBCU_MAX_GROUP_COUNT, 0, 32 );
-//     ASSERT( sortSize <= sortTmpSize );
-// #endif
-
-//     cub::DeviceRadixSort::SortKeys<uint32, uint32>( sortTmpAlloc, sortTmpSize, tmpGroupCounts, cx.devGroupBoundaries, BBCU_MAX_GROUP_COUNT, 0, 32, stream );
-
-// }
diff --git a/cuda/CudaParkSerializer.cu b/cuda/CudaParkSerializer.cu
index f3e8b8d4..91516a7a 100644
--- a/cuda/CudaParkSerializer.cu
+++ b/cuda/CudaParkSerializer.cu
@@ -20,8 +20,10 @@ void InitFSEBitMask( CudaK32PlotContext& cx )
         0x3FFFFFFF, 0x7FFFFFFF
     };
 
-    CudaErrCheck( cudaMemcpyToSymbolAsync( CUDA_FSE_BIT_mask, bitmask, sizeof( bitmask ), 0, cudaMemcpyHostToDevice, cx.computeStream ) );
-    CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 3)
+CudaErrCheck( cudaMemcpyToSymbolAsync( CUDA_FSE_BIT_mask, bitmask, sizeof( bitmask ), 0, cudaMemcpyHostToDevice, cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 4)
+CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
 }
 
 
diff --git a/cuda/CudaPlotContext.h b/cuda/CudaPlotContext.h
index fc5884b3..c83b3a32 100644
--- a/cuda/CudaPlotContext.h
+++ b/cuda/CudaPlotContext.h
@@ -576,8 +576,10 @@ inline void DbgPrintDeviceHash( const char* msg, const void* ptr, const size_t s
     byte hash[32];
 
     void* hostBuffer = bbvirtallocboundednuma<byte>( size );
-    CudaErrCheck( cudaMemcpyAsync( hostBuffer, ptr, size, cudaMemcpyDeviceToHost, stream ) );
-    CudaErrCheck( cudaStreamSynchronize( stream ) );
+    Log::Line( "Marker Set to %d", 5)
+CudaErrCheck( cudaMemcpyAsync( hostBuffer, ptr, size, cudaMemcpyDeviceToHost, stream ) );
+    Log::Line( "Marker Set to %d", 6)
+CudaErrCheck( cudaStreamSynchronize( stream ) );
 
     blake3_hasher hasher;
     blake3_hasher_init( &hasher );
diff --git a/cuda/CudaPlotPhase2.cu b/cuda/CudaPlotPhase2.cu
index 8d2d5094..d66ab8b6 100644
--- a/cuda/CudaPlotPhase2.cu
+++ b/cuda/CudaPlotPhase2.cu
@@ -119,7 +119,8 @@ static void BytefieldToBitfield( CudaK32PlotContext& cx, const byte* bytefield,
 
     #if DBG_BBCU_P2_COUNT_PRUNED_ENTRIES
         #define G_PRUNED_COUNTS ,cx.phase2->devPrunedCount
-        CudaErrCheck( cudaMemsetAsync( cx.phase2->devPrunedCount, 0, sizeof( uint32 ), stream ) );
+        Log::Line( "Marker Set to %d", 7)
+CudaErrCheck( cudaMemsetAsync( cx.phase2->devPrunedCount, 0, sizeof( uint32 ), stream ) );
     #else
         #define G_PRUNED_COUNTS 
     #endif
@@ -176,7 +177,8 @@ void MarkTable( CudaK32PlotContext& cx, CudaK32Phase2& p2 )
     }
 
     // Zero-out marks
-    CudaErrCheck( cudaMemsetAsync( devLMarks, 0, GetMarkingTableByteSize(), cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 8)
+CudaErrCheck( cudaMemsetAsync( devLMarks, 0, GetMarkingTableByteSize(), cx.computeStream ) );
 
     // Load first bucket's worth of pairs
     LoadPairs( cx, p2, rTable, 0 );
@@ -230,9 +232,12 @@ void MarkTable( CudaK32PlotContext& cx, CudaK32Phase2& p2 )
 #if DBG_BBCU_P2_COUNT_PRUNED_ENTRIES
     {
         uint32 prunedEntryCount = 0;
-        CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
-        CudaErrCheck( cudaMemcpyAsync( &prunedEntryCount, p2.devPrunedCount, sizeof( uint32 ), cudaMemcpyDeviceToHost, cx.computeStream ) );
-        CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
+        Log::Line( "Marker Set to %d", 9)
+CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
+        Log::Line( "Marker Set to %d", 10)
+CudaErrCheck( cudaMemcpyAsync( &prunedEntryCount, p2.devPrunedCount, sizeof( uint32 ), cudaMemcpyDeviceToHost, cx.computeStream ) );
+        Log::Line( "Marker Set to %d", 11)
+CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
 
         const uint64 lEntryCount = cx.tableEntryCounts[(int)lTable];
         Log::Line( "Table %u now has %u / %llu  ( %.2lf%% ) entries.", (uint)lTable+1, 
@@ -245,22 +250,7 @@ void MarkTable( CudaK32PlotContext& cx, CudaK32Phase2& p2 )
         #if _DEBUG
         p2.outMarks.WaitForCompletion();
 
-    //     CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
-    //     CudaErrCheck( cudaStreamSynchronize( cx.gpuDownloadStream[0]->GetStream() ) );
-    //     CudaErrCheck( cudaStreamSynchronize( cx.gpuDownloadStream[1]->GetStream() ) );
-    //     CudaErrCheck( cudaStreamSynchronize( cx.gpuDownloadStream[2]->GetStream() ) );
 
-    //     byte*   hByteField = bbcvirtalloc<byte>( GetMarkingTableByteSize() );
-        // uint64* hBitField  = bbcvirtalloc<uint64>( GetMarkingTableBitFieldSize() );
-    //     uint64* rBitField  = bbcvirtalloc<uint64>( GetMarkingTableBitFieldSize() );
-    //     CudaErrCheck( cudaMemcpyAsync( hByteField, devLMarks, GetMarkingTableByteSize(), cudaMemcpyDeviceToHost, cx.computeStream  ) );
-    //     CudaErrCheck( cudaMemcpyAsync( hBitField, bitfield, GetMarkingTableBitFieldSize(), cudaMemcpyDeviceToHost, cx.computeStream  ) );
-
-    //     if( rTable < TableId::Table7 )
-    //         CudaErrCheck( cudaMemcpyAsync( rBitField, p2.devRMarks, GetMarkingTableBitFieldSize(), cudaMemcpyDeviceToHost, cx.computeStream  ) );
-
-    //     CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
-    //     // (void)p2.outMarks.GetDeviceBuffer();
         uint64* hBitField = cx.hostMarkingTables[(int)lTable];
         
         std::atomic<uint64> bitfieldPrunedEntryCount = 0;
@@ -276,23 +266,10 @@ void MarkTable( CudaK32PlotContext& cx, CudaK32Phase2& p2 )
             uint64 localPrunedEntryCount = 0;
             uint64 rPrunedEntryCount     = 0;
 
-    //         BitField rMarks( rBitField, rEntryCount );
-    //         const byte* bytefield = hByteField;
 
             uint64 count, offset, end;
 
-    //         // Count r entries again to make sure it's still valid
-    //         if( rt < TableId::Table7 )
-    //         {
-    //             GetThreadOffsets( self, rEntryCount, count, offset, end );
-    //             for( uint64 i = offset; i < end; i++ )
-    //             {
-    //                 if( rMarks.Get( i ) )
-    //                     rPrunedEntryCount ++;
-    //             }
-        
-    //             rTablePrunedEntryCount += rPrunedEntryCount;   
-    //         }
+
 
             GetThreadOffsets( self, lEntryCount, count, offset, end );
     //         for( uint64 i = offset; i < end; i++ )
diff --git a/cuda/CudaPlotPhase3.cu b/cuda/CudaPlotPhase3.cu
index 8fcdfe2a..176f344c 100644
--- a/cuda/CudaPlotPhase3.cu
+++ b/cuda/CudaPlotPhase3.cu
@@ -374,12 +374,14 @@ void Step1( CudaK32PlotContext& cx )
     const TableId rTable = cx.table;
 
     // Clear pruned table count
-    CudaErrCheck( cudaMemsetAsync( p3.devPrunedEntryCount, 0, sizeof( uint32 ), cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 12)
+CudaErrCheck( cudaMemsetAsync( p3.devPrunedEntryCount, 0, sizeof( uint32 ), cx.computeStream ) );
 
     // Load marking table (must be loaded before first bucket, on the same stream)
     if( cx.table < TableId::Table7 )
     {
-        CudaErrCheck( cudaMemcpyAsync( s1.rTableMarks, cx.hostMarkingTables[(int)rTable],
+        Log::Line( "Marker Set to %d", 13)
+CudaErrCheck( cudaMemcpyAsync( s1.rTableMarks, cx.hostMarkingTables[(int)rTable],
                         GetMarkingTableBitFieldSize(), cudaMemcpyHostToDevice, s1.pairsLIn.GetQueue()->GetStream() ) );
     }
 
@@ -416,7 +418,8 @@ void Step1( CudaK32PlotContext& cx )
         // Generate map
         #define KERN_RMAP_ARGS entryCount, rTableOffset, devSliceCounts, p3.devPrunedEntryCount, devRMap, devLPairs, devRPairs, s1.rTableMarks
 
-        CudaErrCheck( cudaMemsetAsync( devSliceCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT, cx.computeStream ) );
+        Log::Line( "Marker Set to %d", 14)
+CudaErrCheck( cudaMemsetAsync( devSliceCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT, cx.computeStream ) );
 
         if( cx.table < TableId::Table7 )
             PruneAndWriteRMap<true><<<blocksPerGrid, threadPerBlock, 0, cx.computeStream>>>( KERN_RMAP_ARGS );
@@ -436,7 +439,8 @@ void Step1( CudaK32PlotContext& cx )
     // Download slice counts
     cudaStream_t downloadStream = s1.rMapOut.GetQueue()->GetStream();
 
-    CudaErrCheck( cudaMemcpyAsync( cx.hostBucketSlices, cx.devSliceCounts, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, 
+    Log::Line( "Marker Set to %d", 15)
+CudaErrCheck( cudaMemcpyAsync( cx.hostBucketSlices, cx.devSliceCounts, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT,
                     cudaMemcpyDeviceToHost, downloadStream ) );
 
     // Wait for completion
@@ -446,7 +450,8 @@ void Step1( CudaK32PlotContext& cx )
     s1.pairsLIn.Reset();
     s1.pairsRIn.Reset();
 
-    CudaErrCheck( cudaStreamSynchronize( downloadStream ) );
+    Log::Line( "Marker Set to %d", 16)
+CudaErrCheck( cudaStreamSynchronize( downloadStream ) );
 
     // Add-up pruned bucket counts and tables counts
     memcpy( &s1.prunedBucketSlices[0][0], cx.hostBucketSlices, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT );
@@ -521,7 +526,8 @@ void CompressInlinedTable( CudaK32PlotContext& cx )
     #endif
 
     // Load R Marking table (must be loaded before first bucket, on the same stream)
-    CudaErrCheck( cudaMemcpyAsync( (void*)tx.devRMarks, cx.hostMarkingTables[(int)rTable],
+    Log::Line( "Marker Set to %d", 17)
+CudaErrCheck( cudaMemcpyAsync( (void*)tx.devRMarks, cx.hostMarkingTables[(int)rTable],
                 GetMarkingTableBitFieldSize(), cudaMemcpyHostToDevice, p3.xTable.xIn.GetQueue()->GetStream() ) );
 
     // Load initial bucket
@@ -536,7 +542,8 @@ void CompressInlinedTable( CudaK32PlotContext& cx )
     uint64 tablePrunedEntryCount = 0;
     uint32 rTableOffset          = 0;
 
-    CudaErrCheck( cudaMemsetAsync( cx.devSliceCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 18)
+CudaErrCheck( cudaMemsetAsync( cx.devSliceCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, cx.computeStream ) );
 
     for( uint32 bucket = 0; bucket < BBCU_BUCKET_COUNT; bucket++ )
     {
@@ -559,7 +566,8 @@ void CompressInlinedTable( CudaK32PlotContext& cx )
         uint32* devSliceCounts = cx.devSliceCounts + bucket * BBCU_BUCKET_COUNT;
 
         #if _DEBUG
-            CudaErrCheck( cudaMemsetAsync( outLps, 0, sizeof( uint64 ) * P3_PRUNED_BUCKET_MAX, cx.computeStream ) );
+            Log::Line( "Marker Set to %d", 19)
+CudaErrCheck( cudaMemsetAsync( outLps, 0, sizeof( uint64 ) * P3_PRUNED_BUCKET_MAX, cx.computeStream ) );
         #endif
 
         CudaConvertInlinedXsToLinePoints<<<blocksPerGrid, threadPerBlock, 0, cx.computeStream>>>(
diff --git a/cuda/CudaPlotPhase3Step2.cu b/cuda/CudaPlotPhase3Step2.cu
index 3a7a6449..eaa03f15 100644
--- a/cuda/CudaPlotPhase3Step2.cu
+++ b/cuda/CudaPlotPhase3Step2.cu
@@ -148,7 +148,8 @@ static void ConvertRMapToLinePoints( CudaK32PlotContext& cx, const uint32 entryC
             const uint64 divisor          = P3_CalculateMaxLPValue( prunedEntryCount ) / BBCU_BUCKET_COUNT;
 
             // #TODO: Use upload stream?
-            CudaErrCheck( cudaMemcpyToSymbolAsync( BucketDivisor, &divisor, sizeof( divisor ), 0, cudaMemcpyHostToDevice, cx.computeStream ) );
+            Log::Line( "Marker Set to %d", 20)
+CudaErrCheck( cudaMemcpyToSymbolAsync( BucketDivisor, &divisor, sizeof( divisor ), 0, cudaMemcpyHostToDevice, cx.computeStream ) );
         }
 
         CudaConvertRMapToLinePoints<false><<<blocks, threads, 0, stream>>>( Rmap2LPParams, 0 );
@@ -223,7 +224,8 @@ void CudaK32PlotPhase3Step2( CudaK32PlotContext& cx )
         if( isCompressed )
         {
             // Copy from upload buffer to working buffer
-            CudaErrCheck( cudaMemcpyAsync( lTable, lMap, BBCU_BUCKET_ENTRY_COUNT * sizeof( uint32 ), cudaMemcpyDeviceToDevice, cx.computeStream ) );
+            Log::Line( "Marker Set to %d", 21)
+CudaErrCheck( cudaMemcpyAsync( lTable, lMap, BBCU_BUCKET_ENTRY_COUNT * sizeof( uint32 ), cudaMemcpyDeviceToDevice, cx.computeStream ) );
         }
         else
         {
@@ -264,7 +266,8 @@ void CudaK32PlotPhase3Step2( CudaK32PlotContext& cx )
     LoadRBucket( cx, 0 );
 
     // Clear pruned entry count
-    CudaErrCheck( cudaMemsetAsync( p3.devPrunedEntryCount, 0, sizeof( uint32 ), cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 22)
+CudaErrCheck( cudaMemsetAsync( p3.devPrunedEntryCount, 0, sizeof( uint32 ), cx.computeStream ) );
 
     // Unpack the first map beforehand
     UnpackLBucket( cx, 0 );
@@ -275,7 +278,8 @@ void CudaK32PlotPhase3Step2( CudaK32PlotContext& cx )
     /// 
     uint32 rTableOffset = 0; // Track the global origin index of R entry/line point 
 
-    CudaErrCheck( cudaMemsetAsync( cx.devSliceCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 23)
+CudaErrCheck( cudaMemsetAsync( cx.devSliceCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, cx.computeStream ) );
 
     for( uint32 bucket = 0; bucket < BBCU_BUCKET_COUNT; bucket++ )
     {
@@ -298,7 +302,8 @@ void CudaK32PlotPhase3Step2( CudaK32PlotContext& cx )
             static_assert( BBCU_BUCKET_ALLOC_ENTRY_COUNT - BBCU_BUCKET_ENTRY_COUNT == copyCount );
 
             uint32* nextLTable = s2.devLTable[nextBucket & 1];
-            CudaErrCheck( cudaMemcpyAsync( (uint32*)devLTable + BBCU_BUCKET_ENTRY_COUNT, nextLTable, copyCount * sizeof( uint32 ), cudaMemcpyDeviceToDevice, cx.computeStream ) );
+            Log::Line( "Marker Set to %d", 24)
+CudaErrCheck( cudaMemcpyAsync( (uint32*)devLTable + BBCU_BUCKET_ENTRY_COUNT, nextLTable, copyCount * sizeof( uint32 ), cudaMemcpyDeviceToDevice, cx.computeStream ) );
         }
 
         if( nextBucketL < BBCU_BUCKET_COUNT )
@@ -351,12 +356,14 @@ void CudaK32PlotPhase3Step2( CudaK32PlotContext& cx )
     // Copy slice counts & bucket count
     cudaStream_t downloadStream = s2.lpOut.GetQueue()->GetStream();
 
-    CudaErrCheck( cudaMemcpyAsync( cx.hostBucketSlices, cx.devSliceCounts, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT,
+    Log::Line( "Marker Set to %d", 25)
+CudaErrCheck( cudaMemcpyAsync( cx.hostBucketSlices, cx.devSliceCounts, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT,
                     cudaMemcpyDeviceToHost, downloadStream ) );
 
     memset( p3.prunedBucketCounts[(int)rTable], 0, BBCU_BUCKET_COUNT * sizeof( uint32 ) );
 
-    CudaErrCheck( cudaStreamSynchronize( downloadStream ) );
+    Log::Line( "Marker Set to %d", 26)
+CudaErrCheck( cudaStreamSynchronize( downloadStream ) );
     bbmemcpy_t( &s2.prunedBucketSlices[0][0], cx.hostBucketSlices, BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT );
     for( uint32 bucket = 0; bucket < BBCU_BUCKET_COUNT; bucket++ )
     {
@@ -481,9 +488,11 @@ void WritePark7( CudaK32PlotContext& cx )
             const size_t  copySize             = sizeof( uint32 ) * retainedEntryCount;
 
             if( !isLastBucket )
-                CudaErrCheck( cudaMemcpyAsync( devIndexBuffer - retainedEntryCount, copySource, copySize, cudaMemcpyDeviceToDevice, cx.computeStream ) );
+                Log::Line( "Marker Set to %d", 27)
+CudaErrCheck( cudaMemcpyAsync( devIndexBuffer - retainedEntryCount, copySource, copySize, cudaMemcpyDeviceToDevice, cx.computeStream ) );
             else
-                CudaErrCheck( cudaMemcpyAsync( hostLastParkEntries, copySource, copySize, cudaMemcpyDeviceToHost, cx.computeStream ) );
+                Log::Line( "Marker Set to %d", 28)
+CudaErrCheck( cudaMemcpyAsync( hostLastParkEntries, copySource, copySize, cudaMemcpyDeviceToHost, cx.computeStream ) );
         }
 
         // Download parks & write to plot
diff --git a/cuda/CudaPlotPhase3Step3.cu b/cuda/CudaPlotPhase3Step3.cu
index c8f9337b..93b7e681 100644
--- a/cuda/CudaPlotPhase3Step3.cu
+++ b/cuda/CudaPlotPhase3Step3.cu
@@ -61,7 +61,8 @@ void CudaK32PlotPhase3Step3( CudaK32PlotContext& cx )
     const FSE_CTable* hostCTable = !isCompressed ? CTables[(int)lTable] : cx.gCfg->ctable;
 
     // (upload must be loaded before first bucket, on the same stream)
-    CudaErrCheck( cudaMemcpyAsync( s3.devCTable, hostCTable, cTableSize, cudaMemcpyHostToDevice, 
+    Log::Line( "Marker Set to %d", 29)
+CudaErrCheck( cudaMemcpyAsync( s3.devCTable, hostCTable, cTableSize, cudaMemcpyHostToDevice,
                     s3.lpIn.GetQueue()->GetStream() ) );
 
     // Load initial bucket
@@ -116,11 +117,14 @@ void CudaK32PlotPhase3Step3( CudaK32PlotContext& cx )
     cudaStream_t lpStream         = cx.computeStream;//B;
     cudaStream_t downloadStream   = cx.gpuDownloadStream[0]->GetStream();
 
-    CudaErrCheck( cudaMemsetAsync( cx.devSliceCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, sortAndMapStream ) );
-    CudaErrCheck( cudaMemsetAsync( s3.devParkOverrunCount, 0, sizeof( uint32 ), sortAndMapStream ) );
+    Log::Line( "Marker Set to %d", 30)
+CudaErrCheck( cudaMemsetAsync( cx.devSliceCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, sortAndMapStream ) );
+    Log::Line( "Marker Set to %d", 31)
+CudaErrCheck( cudaMemsetAsync( s3.devParkOverrunCount, 0, sizeof( uint32 ), sortAndMapStream ) );
 
     // Set initial event LP stream event as set.
-    CudaErrCheck( cudaEventRecord( cx.computeEventA, lpStream ) );
+    Log::Line( "Marker Set to %d", 32)
+CudaErrCheck( cudaEventRecord( cx.computeEventA, lpStream ) );
 
     cx.parkFence->Reset( 0 );
     s3.parkBucket = 0;
@@ -145,23 +149,27 @@ void CudaK32PlotPhase3Step3( CudaK32PlotContext& cx )
         #if _DEBUG
         {
             size_t sortRequiredSize = 0;
-            CudaErrCheck( cub::DeviceRadixSort::SortPairs<uint64, uint32>( nullptr, sortRequiredSize, nullptr, nullptr, nullptr, nullptr, bucketEntryCount, 0, 64 ) );
+            Log::Line( "Marker Set to %d", 33)
+CudaErrCheck( cub::DeviceRadixSort::SortPairs<uint64, uint32>( nullptr, sortRequiredSize, nullptr, nullptr, nullptr, nullptr, bucketEntryCount, 0, 64 ) );
             ASSERT( s3.sizeTmpSort >= sortRequiredSize );
         }
         #endif
 
         // Wait for the previous bucket's LP work to finish, so we can re-use the device buffer
-        CudaErrCheck( cudaStreamWaitEvent( sortAndMapStream, cx.computeEventA ) );
+        Log::Line( "Marker Set to %d", 34)
+CudaErrCheck( cudaStreamWaitEvent( sortAndMapStream, cx.computeEventA ) );
 
         // #TODO: We can use 63-7 (log2(128 buckets)), which might be faster
         // #NOTE: I did change it and the sort failed. Investigate.
-        CudaErrCheck( cub::DeviceRadixSort::SortPairs<uint64, uint32>(
+        Log::Line( "Marker Set to %d", 35)
+CudaErrCheck( cub::DeviceRadixSort::SortPairs<uint64, uint32>(
             s3.devSortTmpData,  s3.sizeTmpSort,
             unsortedLinePoints, sortedLinePoints,
             unsortedIndices,    sortedIndices,
             bucketEntryCount, 0, 64, sortAndMapStream ) );
 
-        CudaErrCheck( cudaEventRecord( cx.computeEventB, sortAndMapStream ) );
+        Log::Line( "Marker Set to %d", 36)
+CudaErrCheck( cudaEventRecord( cx.computeEventB, sortAndMapStream ) );
 
         s3.lpIn   .ReleaseDeviceBuffer( sortAndMapStream ); unsortedLinePoints = nullptr;
         s3.indexIn.ReleaseDeviceBuffer( sortAndMapStream ); unsortedIndices    = nullptr;
@@ -190,12 +198,14 @@ void CudaK32PlotPhase3Step3( CudaK32PlotContext& cx )
         ASSERT( parkCount <= P3_PRUNED_MAX_PARKS_PER_BUCKET );
 
         // Wait for sort to finish
-        CudaErrCheck( cudaStreamWaitEvent( lpStream, cx.computeEventB ) );
+        Log::Line( "Marker Set to %d", 37)
+CudaErrCheck( cudaStreamWaitEvent( lpStream, cx.computeEventB ) );
 
         // Deltafy line points
         DeltafyLinePoints( cx, entryCount, parkLinePoints, s3.devDeltaLinePoints, lpStream );
 
-        CudaErrCheck( cudaEventRecord( cx.computeEventC, lpStream ) );  // Signal download stream can download remaining line points for last park
+        Log::Line( "Marker Set to %d", 38)
+CudaErrCheck( cudaEventRecord( cx.computeEventC, lpStream ) );  // Signal download stream can download remaining line points for last park
 
         // Compress line point parks
         byte* devParks = (byte*)s3.parksOut.LockDeviceBuffer( lpStream );
@@ -214,20 +224,24 @@ void CudaK32PlotPhase3Step3( CudaK32PlotContext& cx )
             if( !isLastBucket )
             {
                 // Not the last bucket, so retain entries for the next GPU compression bucket
-                CudaErrCheck( cudaMemcpyAsync( sortedLinePoints - retainedLPCount, copySource, copySize, cudaMemcpyDeviceToDevice, lpStream ) );
+                Log::Line( "Marker Set to %d", 39)
+CudaErrCheck( cudaMemcpyAsync( sortedLinePoints - retainedLPCount, copySource, copySize, cudaMemcpyDeviceToDevice, lpStream ) );
             }
             else
             {       
                 // No more buckets so we have to compress this last park on the CPU
-                CudaErrCheck( cudaStreamWaitEvent( downloadStream, cx.computeEventC ) );
+                Log::Line( "Marker Set to %d", 40)
+CudaErrCheck( cudaStreamWaitEvent( downloadStream, cx.computeEventC ) );
 
                 hostRetainedEntries = cx.useParkContext ? cx.parkContext->hostRetainedLinePoints :
                                                        (uint64*)( hostParksWriter + hostParkSize * parkCount );
-                CudaErrCheck( cudaMemcpyAsync( hostRetainedEntries, copySource, copySize, cudaMemcpyDeviceToHost, downloadStream ) );
+                Log::Line( "Marker Set to %d", 41)
+CudaErrCheck( cudaMemcpyAsync( hostRetainedEntries, copySource, copySize, cudaMemcpyDeviceToHost, downloadStream ) );
             }
         }
 
-        CudaErrCheck( cudaEventRecord( cx.computeEventA, lpStream ) );  // Signal sortedLinePoints buffer ready for use again
+        Log::Line( "Marker Set to %d", 42)
+CudaErrCheck( cudaEventRecord( cx.computeEventA, lpStream ) );  // Signal sortedLinePoints buffer ready for use again
 
 
         // Download parks
@@ -270,17 +284,20 @@ void CudaK32PlotPhase3Step3( CudaK32PlotContext& cx )
     }
 
     // Copy park overrun count
-    CudaErrCheck( cudaMemcpyAsync( s3.hostParkOverrunCount, s3.devParkOverrunCount, sizeof( uint32 ), cudaMemcpyDeviceToHost, downloadStream ) );
+    Log::Line( "Marker Set to %d", 43)
+CudaErrCheck( cudaMemcpyAsync( s3.hostParkOverrunCount, s3.devParkOverrunCount, sizeof( uint32 ), cudaMemcpyDeviceToHost, downloadStream ) );
 
     // Wait for parks to complete downloading
     s3.parksOut.WaitForCompletion();
     s3.parksOut.Reset();
 
     // Copy map slice counts (for the next step 2)
-    CudaErrCheck( cudaMemcpyAsync( cx.hostBucketSlices, cx.devSliceCounts, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT,
+    Log::Line( "Marker Set to %d", 44)
+CudaErrCheck( cudaMemcpyAsync( cx.hostBucketSlices, cx.devSliceCounts, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT,
                     cudaMemcpyDeviceToHost, downloadStream ) );
 
-    CudaErrCheck( cudaStreamSynchronize( downloadStream ) );
+    Log::Line( "Marker Set to %d", 45)
+CudaErrCheck( cudaStreamSynchronize( downloadStream ) );
     memcpy( &s3.prunedBucketSlices[0][0], cx.hostBucketSlices, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT );
 
     FatalIf( *s3.hostParkOverrunCount > 0, "Park buffer overrun." );
diff --git a/cuda/CudaPlotUtil.cu b/cuda/CudaPlotUtil.cu
index 4f7f18b3..228be9f6 100644
--- a/cuda/CudaPlotUtil.cu
+++ b/cuda/CudaPlotUtil.cu
@@ -21,7 +21,8 @@ void CudaK32PlotGenSortKey( const uint32 entryCount, uint32* devKey, cudaStream_
 
     GenSortKey<<<blockCount, threadsPerBlock, 0, stream>>>( entryCount, devKey );
     if( synchronize )
-        CudaErrCheck( cudaStreamSynchronize( stream ) );
+        Log::Line( "Marker Set to %d", 46)
+CudaErrCheck( cudaStreamSynchronize( stream ) );
     
 }
 
@@ -48,7 +49,8 @@ void CudaK32PlotSortByKey( const uint32 entryCount, const uint32* devKey, const
 
     SortByKey<T><<<blockCount, threadsPerBlock, 0, stream>>>( entryCount, devKey, devInput, devOutput );
     if( synchronize )
-        CudaErrCheck( cudaStreamSynchronize( stream ) );
+        Log::Line( "Marker Set to %d", 47)
+CudaErrCheck( cudaStreamSynchronize( stream ) );
 }
 
 //-----------------------------------------------------------
diff --git a/cuda/CudaPlotter.cu b/cuda/CudaPlotter.cu
index 80ba8b0e..64e9e94d 100644
--- a/cuda/CudaPlotter.cu
+++ b/cuda/CudaPlotter.cu
@@ -201,10 +201,14 @@ void InitContext( CudaK32PlotConfig& cfg, CudaK32PlotContext*& outContext )
 
     CudaInit( cx );
 
-    CudaErrCheck( cudaStreamCreateWithFlags( &cx.computeStream , cudaStreamNonBlocking ) );
-    CudaErrCheck( cudaStreamCreateWithFlags( &cx.computeStreamB, cudaStreamNonBlocking ) );
-    CudaErrCheck( cudaStreamCreateWithFlags( &cx.computeStreamC, cudaStreamNonBlocking ) );
-    CudaErrCheck( cudaStreamCreateWithFlags( &cx.computeStreamD, cudaStreamNonBlocking ) );
+    Log::Line( "Marker Set to %d", 48)
+CudaErrCheck( cudaStreamCreateWithFlags( &cx.computeStream , cudaStreamNonBlocking ) );
+    Log::Line( "Marker Set to %d", 49)
+CudaErrCheck( cudaStreamCreateWithFlags( &cx.computeStreamB, cudaStreamNonBlocking ) );
+    Log::Line( "Marker Set to %d", 50)
+CudaErrCheck( cudaStreamCreateWithFlags( &cx.computeStreamC, cudaStreamNonBlocking ) );
+    Log::Line( "Marker Set to %d", 51)
+CudaErrCheck( cudaStreamCreateWithFlags( &cx.computeStreamD, cudaStreamNonBlocking ) );
 
     cudaEventCreateWithFlags( &cx.computeEventA, cudaEventDisableTiming );
     cudaEventCreateWithFlags( &cx.computeEventB, cudaEventDisableTiming );
@@ -318,7 +322,8 @@ void CudaInit( CudaK32PlotContext& cx )
     cx.cudaDevice = (int32)cx.cfg.deviceIndex;
 
     cudaDeviceProp* cudaDevProps = new cudaDeviceProp{};
-    CudaErrCheck( cudaGetDeviceProperties( cudaDevProps, cx.cudaDevice ) );
+    Log::Line( "Marker Set to %d", 52)
+CudaErrCheck( cudaGetDeviceProperties( cudaDevProps, cx.cudaDevice ) );
     cx.cudaDevProps = cudaDevProps;
 
     Log::Line( "Selected cuda device %u : %s", cx.cudaDevice, cudaDevProps->name );
@@ -364,7 +369,8 @@ void CudaK32Plotter::Run( const PlotRequest& req )
     const auto& cfg = _cfg;
 
     // Only start profiling from here (don't profile allocations)
-    CudaErrCheck( cudaProfilerStart() );
+    Log::Line( "Marker Set to %d", 53)
+CudaErrCheck( cudaProfilerStart() );
 
     ASSERT( cx.plotWriter == nullptr );
     cx.plotWriter = new PlotWriter( !cfg.gCfg->disableOutputDirectIO );
@@ -522,7 +528,8 @@ void FpTable( CudaK32PlotContext& cx )
     }
 
     // Clear slice counts
-    CudaErrCheck( cudaMemsetAsync( cx.devSliceCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 54)
+CudaErrCheck( cudaMemsetAsync( cx.devSliceCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, cx.computeStream ) );
 
     // Load initial buckets
     UploadBucketForTable( cx, 0 );
@@ -533,7 +540,8 @@ void FpTable( CudaK32PlotContext& cx )
         FpTableBucket( cx, bucket );
     }
 
-    CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 55)
+CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
 
     // Copy bucket slices to host
     cudaMemcpyAsync( cx.hostBucketSlices, cx.devSliceCounts, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, 
diff --git a/cuda/GpuDownloadStream.cu b/cuda/GpuDownloadStream.cu
index 3d06973c..d903adc2 100644
--- a/cuda/GpuDownloadStream.cu
+++ b/cuda/GpuDownloadStream.cu
@@ -11,7 +11,8 @@ void* GpuDownloadBuffer::GetDeviceBuffer()
 {
     const uint32 index = self->outgoingSequence % self->bufferCount;
 
-    CudaErrCheck( cudaEventSynchronize( self->events[index] ) );
+    Log::Line( "Marker Set to %d", 56)
+CudaErrCheck( cudaEventSynchronize( self->events[index] ) );
 
     return self->deviceBuffer[index];
 }
@@ -25,7 +26,8 @@ void* GpuDownloadBuffer::LockDeviceBuffer( cudaStream_t stream )
     self->lockSequence++;
 
     // Wait for the device buffer to be free to be used by kernels
-    CudaErrCheck( cudaStreamWaitEvent( stream, self->events[index] ) );
+    Log::Line( "Marker Set to %d", 57)
+CudaErrCheck( cudaStreamWaitEvent( stream, self->events[index] ) );
     return self->deviceBuffer[index];
 }
 
@@ -64,22 +66,21 @@ void GpuDownloadBuffer::DownloadAndCopy( void* hostBuffer, void* finalBuffer, co
     // const void* devBuffer    = self->deviceBuffer[index];
 
     // // Signal from the work stream when it has finished doing kernel work with the device buffer
-    // CudaErrCheck( cudaEventRecord( self->readyEvents[index], workStream ) );
 
 
     // // Ensure the work stream has completed writing data to the device buffer
     // cudaStream_t stream = self->queue->_stream;
 
-    // CudaErrCheck( cudaStreamWaitEvent( stream, self->readyEvents[index] ) );
 
     // // Copy
-    // CudaErrCheck( cudaMemcpyAsync( hostBuffer, devBuffer, size, cudaMemcpyDeviceToHost, stream ) );
-    
+
     // // Signal that the device buffer is free to be re-used
-    // CudaErrCheck( cudaEventRecord( self->events[index], stream ) );
+    // Log::Line( "Marker Set to %d", 58)
+CudaErrCheck( cudaEventRecord( self->events[index], stream ) );
 
     // // Launch copy command
-    // CudaErrCheck( cudaLaunchHostFunc( stream, []( void* userData ){
+    // Log::Line( "Marker Set to %d", 59)
+CudaErrCheck( cudaLaunchHostFunc( stream, []( void* userData ){
 
     //     const CopyInfo& c = *reinterpret_cast<CopyInfo*>( userData );
     //     IGpuBuffer* self = c.self;
@@ -134,11 +135,13 @@ void GpuDownloadBuffer::PerformDownload2D( void* hostBuffer, size_t width, size_
 
 
     // Signal from the work stream when it has finished doing kernel work with the device buffer
-    CudaErrCheck( cudaEventRecord( self->workEvent[index], workStream ) );
+    Log::Line( "Marker Set to %d", 60)
+CudaErrCheck( cudaEventRecord( self->workEvent[index], workStream ) );
 
     // From the download stream, wait for the work stream to finish
     cudaStream_t downloadStream = self->queue->_stream;
-    CudaErrCheck( cudaStreamWaitEvent( downloadStream, self->workEvent[index] ) );
+    Log::Line( "Marker Set to %d", 61)
+CudaErrCheck( cudaStreamWaitEvent( downloadStream, self->workEvent[index] ) );
 
 
     if( self->diskBuffer )
diff --git a/cuda/GpuQueue.cu b/cuda/GpuQueue.cu
index 399a0fbf..110929a0 100644
--- a/cuda/GpuQueue.cu
+++ b/cuda/GpuQueue.cu
@@ -9,9 +9,12 @@
 GpuQueue::GpuQueue( Kind kind ) : _kind( kind )
     , _bufferReadySignal( BBCU_BUCKET_COUNT )
 {
-    CudaErrCheck( cudaStreamCreateWithFlags( &_stream        , cudaStreamNonBlocking ) );
-    CudaErrCheck( cudaStreamCreateWithFlags( &_preloadStream , cudaStreamNonBlocking ) );
-    CudaErrCheck( cudaStreamCreateWithFlags( &_callbackStream, cudaStreamNonBlocking ) );
+    Log::Line( "Marker Set to %d", 62)
+CudaErrCheck( cudaStreamCreateWithFlags( &_stream        , cudaStreamNonBlocking ) );
+    Log::Line( "Marker Set to %d", 63)
+CudaErrCheck( cudaStreamCreateWithFlags( &_preloadStream , cudaStreamNonBlocking ) );
+    Log::Line( "Marker Set to %d", 64)
+CudaErrCheck( cudaStreamCreateWithFlags( &_callbackStream, cudaStreamNonBlocking ) );
 
     _queueThread.Run( QueueThreadEntryPoint, this );
 }
@@ -168,14 +171,19 @@ struct IGpuBuffer* GpuQueue::CreateGpuBuffer( const GpuStreamDescriptor& desc, b
 
         for( int32 i = 0; i < desc.bufferCount; i++ )
         {
-            CudaErrCheck( cudaEventCreateWithFlags( &buf->events[i]         , cudaEventDisableTiming ) );
-            CudaErrCheck( cudaEventCreateWithFlags( &buf->completedEvents[i], cudaEventDisableTiming ) );
-            CudaErrCheck( cudaEventCreateWithFlags( &buf->readyEvents[i]    , cudaEventDisableTiming ) );
-            // CudaErrCheck( cudaEventCreateWithFlags( &buf->preloadEvents[i]  , cudaEventDisableTiming ) );
-            CudaErrCheck( cudaEventCreateWithFlags( &buf->pinnedEvent[i]  , cudaEventDisableTiming ) );
-
-            CudaErrCheck( cudaEventCreateWithFlags( &buf->callbackLockEvent     , cudaEventDisableTiming ) );
-            CudaErrCheck( cudaEventCreateWithFlags( &buf->callbackCompletedEvent, cudaEventDisableTiming ) );
+            Log::Line( "Marker Set to %d", 65)
+CudaErrCheck( cudaEventCreateWithFlags( &buf->events[i]         , cudaEventDisableTiming ) );
+            Log::Line( "Marker Set to %d", 66)
+CudaErrCheck( cudaEventCreateWithFlags( &buf->completedEvents[i], cudaEventDisableTiming ) );
+            Log::Line( "Marker Set to %d", 67)
+CudaErrCheck( cudaEventCreateWithFlags( &buf->readyEvents[i]    , cudaEventDisableTiming ) );
+            Log::Line( "Marker Set to %d", 68)
+CudaErrCheck( cudaEventCreateWithFlags( &buf->pinnedEvent[i]  , cudaEventDisableTiming ) );
+
+            Log::Line( "Marker Set to %d", 69)
+CudaErrCheck( cudaEventCreateWithFlags( &buf->callbackLockEvent     , cudaEventDisableTiming ) );
+            Log::Line( "Marker Set to %d", 70)
+CudaErrCheck( cudaEventCreateWithFlags( &buf->callbackCompletedEvent, cudaEventDisableTiming ) );
             
             buf->deviceBuffer[i] = devBuffers[i];
             buf->pinnedBuffer[i] = pinnedBuffers[i];
@@ -241,14 +249,17 @@ void GpuQueue::DispatchHostFunc( GpuCallbackDispath func, cudaStream_t stream, c
     // #MAYBE: Perhaps support having multiple callback streams, and multiple copy streams.
 
     // Signal from the work stream into the callback stream that we are ready for callback
-    CudaErrCheck( cudaEventRecord( lockEvent, stream ) );
+    Log::Line( "Marker Set to %d", 71)
+CudaErrCheck( cudaEventRecord( lockEvent, stream ) );
 
     // Wait on the callback stream until it's ready to dsitpatch
-    CudaErrCheck( cudaStreamWaitEvent( _callbackStream, lockEvent ) );
+    Log::Line( "Marker Set to %d", 72)
+CudaErrCheck( cudaStreamWaitEvent( _callbackStream, lockEvent ) );
 
     // #MAYBE: Use a bump allocator perhaps later to avoid locking here by new/delete if needed for performance.
     auto* fnCpy = new std::function<void()>( std::move( func ) );
-    CudaErrCheck( cudaLaunchHostFunc( _callbackStream, []( void* userData ){
+    Log::Line( "Marker Set to %d", 73)
+CudaErrCheck( cudaLaunchHostFunc( _callbackStream, []( void* userData ){
 
         auto& fn = *reinterpret_cast<std::function<void()>*>( userData );
         fn();
@@ -257,10 +268,12 @@ void GpuQueue::DispatchHostFunc( GpuCallbackDispath func, cudaStream_t stream, c
     }, fnCpy ) );
 
     // Signal from the callback stream that the callback finished
-    CudaErrCheck( cudaEventRecord( completedEvent, _callbackStream ) );
+    Log::Line( "Marker Set to %d", 74)
+CudaErrCheck( cudaEventRecord( completedEvent, _callbackStream ) );
 
     // Wait on work stream for the callback to complete
-    CudaErrCheck( cudaStreamWaitEvent( stream, completedEvent ) );
+    Log::Line( "Marker Set to %d", 75)
+CudaErrCheck( cudaStreamWaitEvent( stream, completedEvent ) );
 }
 
 size_t GpuQueue::CalculateSliceSizeFromDescriptor( const GpuStreamDescriptor& desc )
diff --git a/cuda/GpuStreams.cu b/cuda/GpuStreams.cu
index 63700c9c..e075e306 100644
--- a/cuda/GpuStreams.cu
+++ b/cuda/GpuStreams.cu
@@ -60,26 +60,32 @@ void GpuUploadBuffer::Upload( const void* hostBuffer, size_t size, cudaStream_t
     {
         // Copy from unpinned to pinned first
         // #TODO: This should be done in a different backgrund host-to-host copy stream
-        CudaErrCheck( cudaStreamWaitEvent( uploadStream, self->pinnedEvent[index] ) );
-        CudaErrCheck( cudaMemcpyAsync( self->pinnedBuffer[index], hostBuffer, size, cudaMemcpyHostToHost, uploadStream ) );
+        Log::Line( "Marker Set to %d", 76)
+CudaErrCheck( cudaStreamWaitEvent( uploadStream, self->pinnedEvent[index] ) );
+        Log::Line( "Marker Set to %d", 77)
+CudaErrCheck( cudaMemcpyAsync( self->pinnedBuffer[index], hostBuffer, size, cudaMemcpyHostToHost, uploadStream ) );
 
         hostBuffer = self->pinnedBuffer[index];
     }
 
     // Ensure the device buffer is ready for use
-    CudaErrCheck( cudaStreamWaitEvent( uploadStream, self->deviceEvents[index] ) );
+    Log::Line( "Marker Set to %d", 78)
+CudaErrCheck( cudaStreamWaitEvent( uploadStream, self->deviceEvents[index] ) );
 
     // Upload to the device buffer
-    CudaErrCheck( cudaMemcpyAsync( self->deviceBuffer[index], hostBuffer, size, cudaMemcpyHostToDevice, uploadStream ) );
+    Log::Line( "Marker Set to %d", 79)
+CudaErrCheck( cudaMemcpyAsync( self->deviceBuffer[index], hostBuffer, size, cudaMemcpyHostToDevice, uploadStream ) );
 
     if( !isDirect )
     {
         // Signal that the pinned buffer is ready for re-use
-        CudaErrCheck( cudaEventRecord( self->pinnedEvent[index], uploadStream ) );
+        Log::Line( "Marker Set to %d", 80)
+CudaErrCheck( cudaEventRecord( self->pinnedEvent[index], uploadStream ) );
     }
 
     // Signal work stream that the device buffer is ready to be used
-    CudaErrCheck( cudaEventRecord( self->readyEvents[index], uploadStream ) );
+    Log::Line( "Marker Set to %d", 81)
+CudaErrCheck( cudaEventRecord( self->readyEvents[index], uploadStream ) );
 }
 
 void GpuUploadBuffer::UploadAndPreLoad( void* hostBuffer, const size_t size, const void* copyBufferSrc, const size_t copySize )
@@ -99,7 +105,6 @@ void GpuUploadBuffer::UploadAndPreLoad( void* hostBuffer, const size_t size, con
     // cpy.copy.size       = copySize;
 
     // // Launch copy command
-    // CudaErrCheck( cudaLaunchHostFunc( self->queue->GetStream(), []( void* userData ){
 
     //     const CopyInfo& c = *reinterpret_cast<CopyInfo*>( userData );
     //     IGpuBuffer* self = c.self;
@@ -165,7 +170,8 @@ void GpuUploadBuffer::UploadArray( const void* hostBuffer, uint32 length, uint32
         const auto copyMode  = isDirect ? cudaMemcpyHostToDevice : cudaMemcpyHostToHost;
 
         // Wait on device or pinned buffer to be ready (depending if a direct copy or not)
-        CudaErrCheck( cudaStreamWaitEvent( uploadStream, waitEvent ) );
+        Log::Line( "Marker Set to %d", 82)
+CudaErrCheck( cudaStreamWaitEvent( uploadStream, waitEvent ) );
 
         const byte*   src   = (byte*)hostBuffer;
               byte*   dst   = (byte*)( isDirect ? self->deviceBuffer[index] : self->pinnedBuffer[index] );
@@ -175,7 +181,8 @@ void GpuUploadBuffer::UploadArray( const void* hostBuffer, uint32 length, uint32
         {
             const size_t size = *sizes * (size_t)elementSize;
 
-            CudaErrCheck( cudaMemcpyAsync( dst, src, size, copyMode, uploadStream ) );
+            Log::Line( "Marker Set to %d", 83)
+CudaErrCheck( cudaMemcpyAsync( dst, src, size, copyMode, uploadStream ) );
 
             dst    += size;
             src    += srcStride;
@@ -200,15 +207,19 @@ void GpuUploadBuffer::UploadArray( const void* hostBuffer, uint32 length, uint32
         }
 
         // #TODO: This should be done in a copy stream to perform the copies in the background
-        CudaErrCheck( cudaStreamWaitEvent( uploadStream, self->deviceEvents[index] ) );
-        CudaErrCheck( cudaMemcpyAsync( self->deviceBuffer[index], hostBuffer, totalBufferSize, cudaMemcpyHostToDevice, uploadStream ) );
+        Log::Line( "Marker Set to %d", 84)
+CudaErrCheck( cudaStreamWaitEvent( uploadStream, self->deviceEvents[index] ) );
+        Log::Line( "Marker Set to %d", 84)
+CudaErrCheck( cudaMemcpyAsync( self->deviceBuffer[index], hostBuffer, totalBufferSize, cudaMemcpyHostToDevice, uploadStream ) );
 
         if( !self->diskBuffer )
-            CudaErrCheck( cudaEventRecord( self->pinnedEvent[index], uploadStream ) );
+            Log::Line( "Marker Set to %d", 85)
+CudaErrCheck( cudaEventRecord( self->pinnedEvent[index], uploadStream ) );
     }
 
     // Signal work stream that the device buffer is ready to be used
-    CudaErrCheck( cudaEventRecord( self->readyEvents[index], uploadStream ) );
+    Log::Line( "Marker Set to %d", 86)
+CudaErrCheck( cudaEventRecord( self->readyEvents[index], uploadStream ) );
 }
 
 void GpuUploadBuffer::UploadArrayForIndex( const uint32 index, const void* hostBuffer, uint32 length, 
@@ -219,7 +230,8 @@ void GpuUploadBuffer::UploadArrayForIndex( const uint32 index, const void* hostB
     auto stream = self->queue->GetStream();
 
     // Ensure the device buffer is ready for use
-    CudaErrCheck( cudaStreamWaitEvent( stream, self->events[index] ) );
+    Log::Line( "Marker Set to %d", 87)
+CudaErrCheck( cudaStreamWaitEvent( stream, self->events[index] ) );
 
     // Perform uploads
     //size_t deviceCopySize = 0;
@@ -230,7 +242,8 @@ void GpuUploadBuffer::UploadArrayForIndex( const uint32 index, const void* hostB
     {
         const size_t size = *counts * (size_t)elementSize;
         //memcpy( dst, src, size );
-        CudaErrCheck( cudaMemcpyAsync( dst, src, size, cudaMemcpyHostToDevice, stream ) );
+        Log::Line( "Marker Set to %d", 88)
+CudaErrCheck( cudaMemcpyAsync( dst, src, size, cudaMemcpyHostToDevice, stream ) );
 
         //deviceCopySize += size;
 
@@ -240,10 +253,12 @@ void GpuUploadBuffer::UploadArrayForIndex( const uint32 index, const void* hostB
     }
 
     // Copy to device buffer
-    //CudaErrCheck( cudaMemcpyAsync( self->deviceBuffer[index], cpy.dstBuffer, deviceCopySize, cudaMemcpyHostToDevice, _stream ) );
+    //Log::Line( "Marker Set to %d", 89)
+CudaErrCheck( cudaMemcpyAsync( self->deviceBuffer[index], cpy.dstBuffer, deviceCopySize, cudaMemcpyHostToDevice, _stream ) );
 
     // Signal work stream that the device buffer is ready to be used
-    CudaErrCheck( cudaEventRecord( self->readyEvents[index], stream ) );
+    Log::Line( "Marker Set to %d", 90)
+CudaErrCheck( cudaEventRecord( self->readyEvents[index], stream ) );
 }
 
 void GpuUploadBuffer::Upload( const void* hostBuffer, const size_t size )
@@ -270,7 +285,8 @@ void* GpuUploadBuffer::GetUploadedDeviceBuffer( cudaStream_t workStream )
     const uint32 index = self->completedSequence % self->bufferCount;
     self->completedSequence++;
 
-    CudaErrCheck( cudaStreamWaitEvent( workStream, self->readyEvents[index] ) );
+    Log::Line( "Marker Set to %d", 91)
+CudaErrCheck( cudaStreamWaitEvent( workStream, self->readyEvents[index] ) );
 
     return self->deviceBuffer[index];
 }
@@ -284,7 +300,8 @@ void GpuUploadBuffer::ReleaseDeviceBuffer( cudaStream_t workStream )
     const uint32 index = self->lockSequence % self->bufferCount;
     self->lockSequence++;
 
-    CudaErrCheck( cudaEventRecord( self->deviceEvents[index], workStream ) );
+    Log::Line( "Marker Set to %d", 92)
+CudaErrCheck( cudaEventRecord( self->deviceEvents[index], workStream ) );
 }
 
 void GpuUploadBuffer::WaitForPreloadsToComplete()
@@ -342,7 +359,8 @@ DiskBufferBase* GpuUploadBuffer::GetDiskBuffer() const
 void GpuUploadBuffer::CallHostFunctionOnStream( cudaStream_t stream, std::function<void()> func )
 {
     auto* fnCpy = new std::function<void()>( std::move( func ) );
-    CudaErrCheck( cudaLaunchHostFunc( stream, []( void* userData ) {
+    Log::Line( "Marker Set to %d", 93)
+CudaErrCheck( cudaLaunchHostFunc( stream, []( void* userData ) {
 
         auto& fn = *reinterpret_cast<std::function<void()>*>( userData );
         fn();
diff --git a/cuda/chacha8.cu b/cuda/chacha8.cu
index 7fb7c5d0..32f665f2 100644
--- a/cuda/chacha8.cu
+++ b/cuda/chacha8.cu
@@ -201,8 +201,10 @@ void GenF1Cuda( CudaK32PlotContext& cx )
     chacha8_ctx chacha;
     chacha8_keysetup( &chacha, key, 256, nullptr );
     
-    CudaErrCheck( cudaMemcpyAsync( devChaChaInput, chacha.input, 64, cudaMemcpyHostToDevice, cx.computeStream ) );
-    CudaErrCheck( cudaMemsetAsync( devBucketCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 94)
+CudaErrCheck( cudaMemcpyAsync( devChaChaInput, chacha.input, 64, cudaMemcpyHostToDevice, cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 95)
+CudaErrCheck( cudaMemsetAsync( devBucketCounts, 0, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, cx.computeStream ) );
    
     const uint32 outIndex = CudaK32PlotGetOutputIndex( cx );
 
@@ -218,7 +220,8 @@ void GenF1Cuda( CudaK32PlotContext& cx )
         uint32* devMeta = (uint32*)cx.metaOut.LockDeviceBuffer( cx.computeStream );
 
         #if _DEBUG
-            CudaErrCheck( cudaMemsetAsync( devY, 0, sizeof( uint32 ) * BBCU_BUCKET_ALLOC_ENTRY_COUNT, cx.computeStream ) );
+            Log::Line( "Marker Set to %d", 96)
+CudaErrCheck( cudaMemsetAsync( devY, 0, sizeof( uint32 ) * BBCU_BUCKET_ALLOC_ENTRY_COUNT, cx.computeStream ) );
         #endif
 
         // Gen chacha blocks
@@ -229,10 +232,12 @@ void GenF1Cuda( CudaK32PlotContext& cx )
     }
 
     // Copy bucket slices to host
-    CudaErrCheck( cudaMemcpyAsync( cx.hostBucketSlices, cx.devSliceCounts, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT, 
+    Log::Line( "Marker Set to %d", 97)
+CudaErrCheck( cudaMemcpyAsync( cx.hostBucketSlices, cx.devSliceCounts, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT,
                         cudaMemcpyDeviceToHost, cx.computeStream ) );
 
-    CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
+    Log::Line( "Marker Set to %d", 98)
+CudaErrCheck( cudaStreamSynchronize( cx.computeStream ) );
 
     memcpy( &cx.bucketSlices[0], cx.hostBucketSlices, sizeof( uint32 ) * BBCU_BUCKET_COUNT * BBCU_BUCKET_COUNT );
 
@@ -293,7 +298,6 @@ static void DbgValidateBucket( CudaK32PlotContext& cx, const uint32 bucket )
     if( _dbgPool == nullptr )
         _dbgPool = new ThreadPool( SysHost::GetLogicalCPUCount() );
 
-    //CudaErrCheck( cudaStreamSynchronize( cx.downloadStream ) );
 
     Log::Line( "Validating bucket %u", bucket );
     AnonMTJob::Run( *_dbgPool, [&cx, bucket]( AnonMTJob* self ) {
diff --git a/cuda/harvesting/CudaThresher.cu b/cuda/harvesting/CudaThresher.cu
index b023290e..31214524 100644
--- a/cuda/harvesting/CudaThresher.cu
+++ b/cuda/harvesting/CudaThresher.cu
@@ -102,11 +102,11 @@ public:
     bool AllocateBuffers( const uint k, uint maxCompressionLevel ) override
     {
         // Only support C7 max for now
-        if( maxCompressionLevel > 7 )
+        if( maxCompressionLevel > 16 )
             return false;
 
         // #NOTE: For now we always preallocate for the maximum compression level
-        maxCompressionLevel = 7;
+        maxCompressionLevel = 16;
 
         if( _maxCompressionLevel >= maxCompressionLevel )
             return true;
diff --git a/src/commands/CmdSimulator.cpp b/src/commands/CmdSimulator.cpp
index e1c48251..6e4c7b78 100644
--- a/src/commands/CmdSimulator.cpp
+++ b/src/commands/CmdSimulator.cpp
@@ -250,8 +250,8 @@ size_t CalculatePlotSizeBytes( const uint32 k, const uint32 compressionLevel )
 
     const size_t parkSizes[] = {
         0, // Table 1 is dropped
-        compressionLevel >= 9 ? 0 : info.tableParkSize,
-        compressionLevel >= 9 ? info.tableParkSize : CalculateParkSize( TableId::Table3 ),
+        compressionLevel >= 40 ? 0 : info.tableParkSize,
+        compressionLevel >= 40 ? info.tableParkSize : CalculateParkSize( TableId::Table3 ),
         CalculateParkSize( TableId::Table4 ),
         CalculateParkSize( TableId::Table5 ),
         CalculateParkSize( TableId::Table6 ),
diff --git a/src/harvesting/GreenReaper.cpp b/src/harvesting/GreenReaper.cpp
index 3aae4cdd..3036eee6 100644
--- a/src/harvesting/GreenReaper.cpp
+++ b/src/harvesting/GreenReaper.cpp
@@ -346,7 +346,7 @@ GRResult grGetCompressionInfo( GRCompressionInfo* outInfo, const size_t infoStru
     if( outInfo == nullptr 
         || k != 32
         || compressionLevel < 1
-        || compressionLevel > 9 )
+        || compressionLevel > 40 )
     {
         return GRResult_InvalidArg;
     }
@@ -385,7 +385,7 @@ GRResult grFetchProofForChallenge( GreenReaperContext* cx, GRCompressedProofRequ
     uint32 xGroups[GR_POST_PROOF_X_COUNT] = {};
 
     // Unpack x groups first
-    if( req->compressionLevel < 9 )
+    if( req->compressionLevel < 40 )
     {
         for( uint32 i = 0, j = 0; i < numGroups; i++, j+=2 )
         {
@@ -514,7 +514,7 @@ GRResult grGetFetchQualitiesXPair( GreenReaperContext* cx, GRCompressedQualities
 
         proofMightBeDropped = (x1x2.x == 0 || x1x2.y == 0) || (x2x3.x == 0 || x2x3.y == 0);
 
-        if( req->compressionLevel < 9 )
+        if( req->compressionLevel < 40 )
         {
             numXGroups = 2;
             xGroups[0] = (uint32)x1x2.x;
@@ -541,7 +541,7 @@ GRResult grGetFetchQualitiesXPair( GreenReaperContext* cx, GRCompressedQualities
         }
     }
 
-    if( req->compressionLevel >= 6 && req->compressionLevel < 9 )
+    if( req->compressionLevel >= 6 && req->compressionLevel < 40 )
     {
         const BackPtr p = LinePointToSquare( ((uint128)req->xLinePoints[1].hi) << 64 | (uint128)req->xLinePoints[1].lo );
 
@@ -550,7 +550,7 @@ GRResult grGetFetchQualitiesXPair( GreenReaperContext* cx, GRCompressedQualities
 
         proofMightBeDropped = proofMightBeDropped || (x1x2.x == 0 || x1x2.y == 0) || (x2x3.x == 0 || x2x3.y == 0);
 
-        if( req->compressionLevel < 9 )
+        if( req->compressionLevel < 40 )
         {
             numXGroups = 4;
             xGroups[4] = (uint32)x1x2.x;
@@ -894,7 +894,7 @@ void BacktraceProof( GreenReaperContext& cx, const TableId tableStart, uint64 pr
 //-----------------------------------------------------------
 GRResult RequestSetup( GreenReaperContext* cx, const uint32 k, const uint32 compressionLevel )
 {
-    if( compressionLevel < 1 || compressionLevel > 9 )
+    if( compressionLevel < 1 || compressionLevel > 40 )
         return GRResult_Failed;
 
     // Make sure we have our CUDA decompressor working in case it was deleted after a failure
diff --git a/src/harvesting/GreenReaperInternal.h b/src/harvesting/GreenReaperInternal.h
index 6ae101d0..ae4c1d02 100644
--- a/src/harvesting/GreenReaperInternal.h
+++ b/src/harvesting/GreenReaperInternal.h
@@ -26,6 +26,6 @@ inline uint64 GetEntriesPerBucketForCompressionLevel( const uint32 k, const uint
 
 inline uint64 GetMaxTablePairsForCompressionLevel( const uint32 k, const uint32 cLevel )
 {
-    const double factor = cLevel >= 9 ? GR_MAX_MATCHES_MULTIPLIER_2T_DROP : GR_MAX_MATCHES_MULTIPLIER;
+    const double factor = cLevel >= 40 ? GR_MAX_MATCHES_MULTIPLIER_2T_DROP : GR_MAX_MATCHES_MULTIPLIER;
     return (uint64)( GetEntriesPerBucketForCompressionLevel( k, cLevel ) * factor ) * (uint64)GR_MAX_BUCKETS;
 }
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index c510ed5c..1bda9d84 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -273,7 +273,7 @@ void ParseCommandLine( GlobalPlotConfig& cfg, IPlotter*& outPlotter, int argc, c
         }
         else if( cli.ArgConsume( "ramplot" ) )
         {
-            FatalIf( cfg.compressionLevel > 7, "ramplot currently does not support compression levels greater than 7" );
+            FatalIf( cfg.compressionLevel > 16, "ramplot currently does not support compression levels greater than 7" );
 
             plotter = new MemPlotter();
             break;
@@ -390,7 +390,7 @@ void ParseCommandLine( GlobalPlotConfig& cfg, IPlotter*& outPlotter, int argc, c
 
 
     // FatalIf( cfg.compressionLevel > 7, "Invalid compression level. Please specify a compression level between 0 and 7 (inclusive)." );
-    FatalIf( cfg.compressionLevel > 9, "Invalid compression level. Please specify a compression level between 0 and 9 (inclusive)." );
+    FatalIf( cfg.compressionLevel > 40, "Invalid compression level. Please specify a compression level between 0 and 40 (inclusive)." );
     // If making compressed plots, get thr compression CTable, etc.
     if( cfg.compressionLevel > 0 )
     {
@@ -402,7 +402,7 @@ void ParseCommandLine( GlobalPlotConfig& cfg, IPlotter*& outPlotter, int argc, c
         cfg.ctable              = CreateCompressionCTable( cfg.compressionLevel, &cfg.cTableSize );
         cfg.compressionInfo     = GetCompressionInfoForLevel( cfg.compressionLevel );
         cfg.compressedEntryBits = cfg.compressionInfo.entrySizeBits;
-        cfg.numDroppedTables    = cfg.compressionLevel < 9 ? 1 : 2;
+        cfg.numDroppedTables    = cfg.compressionLevel < 40 ? 1 : 2;
 
         cfg.ctable          = CreateCompressionCTable( cfg.compressionLevel );
         cfg.compressionInfo = GetCompressionInfoForLevel( cfg.compressionLevel );
diff --git a/src/plotting/Compression.cpp b/src/plotting/Compression.cpp
index bde4313b..50f84446 100644
--- a/src/plotting/Compression.cpp
+++ b/src/plotting/Compression.cpp
@@ -83,7 +83,14 @@ CompressionInfo GetCompressionInfoForLevel( const uint32 compressionLevel )
         case 7: GetCompressionInfoForLevel<7>( info ); break;
         case 8: GetCompressionInfoForLevel<8>( info ); break;
         case 9: GetCompressionInfoForLevel<9>( info ); break;
-    
+        case 10: GetCompressionInfoForLevel<10>( info ); break;
+        case 11: GetCompressionInfoForLevel<11>( info ); break;
+        case 12: GetCompressionInfoForLevel<12>( info ); break;
+        case 13: GetCompressionInfoForLevel<13>( info ); break;
+        case 14: GetCompressionInfoForLevel<14>( info ); break;
+        case 15: GetCompressionInfoForLevel<15>( info ); break;
+        case 16: GetCompressionInfoForLevel<16>( info ); break;
+
     default:
         Fatal( "Invalid compression level %u.", compressionLevel );
         break;
@@ -96,6 +103,7 @@ void* CreateCompressionTable( const uint32 compressionLevel, size_t* outTableSiz
 {
     switch ( compressionLevel )
     {
+
         case 1: return CreateCompressionCTable<1>( outTableSize, compress );
         case 2: return CreateCompressionCTable<2>( outTableSize, compress );
         case 3: return CreateCompressionCTable<3>( outTableSize, compress );
@@ -105,6 +113,13 @@ void* CreateCompressionTable( const uint32 compressionLevel, size_t* outTableSiz
         case 7: return CreateCompressionCTable<7>( outTableSize, compress );
         case 8: return CreateCompressionCTable<8>( outTableSize, compress );
         case 9: return CreateCompressionCTable<9>( outTableSize, compress );
+        case 10: return CreateCompressionCTable<10>( outTableSize, compress );
+        case 11: return CreateCompressionCTable<11>( outTableSize, compress );
+        case 12: return CreateCompressionCTable<12>( outTableSize, compress );
+        case 13: return CreateCompressionCTable<13>( outTableSize, compress );
+        case 14: return CreateCompressionCTable<14>( outTableSize, compress );
+        case 15: return CreateCompressionCTable<15>( outTableSize, compress );
+        case 16: return CreateCompressionCTable<16>( outTableSize, compress );
     
         default:
         break;
@@ -146,14 +161,22 @@ uint32 GetCompressedLPBitCount( const uint32 compressionLevel )
 size_t GetLargestCompressedParkSize()
 {
     return std::max( {
-        GetCompressionInfoForLevel( 1 ).tableParkSize,
-        GetCompressionInfoForLevel( 2 ).tableParkSize,
-        GetCompressionInfoForLevel( 3 ).tableParkSize,
-        GetCompressionInfoForLevel( 4 ).tableParkSize,
-        GetCompressionInfoForLevel( 5 ).tableParkSize,
-        GetCompressionInfoForLevel( 6 ).tableParkSize,
-        GetCompressionInfoForLevel( 7 ).tableParkSize,
-        GetCompressionInfoForLevel( 8 ).tableParkSize,
-        GetCompressionInfoForLevel( 9 ).tableParkSize }
+       GetCompressionInfoForLevel( 1 ).tableParkSize,
+       GetCompressionInfoForLevel( 2 ).tableParkSize,
+       GetCompressionInfoForLevel( 3 ).tableParkSize,
+       GetCompressionInfoForLevel( 4 ).tableParkSize,
+       GetCompressionInfoForLevel( 5 ).tableParkSize,
+       GetCompressionInfoForLevel( 6 ).tableParkSize,
+       GetCompressionInfoForLevel( 7 ).tableParkSize,
+       GetCompressionInfoForLevel( 8 ).tableParkSize,
+       GetCompressionInfoForLevel( 9 ).tableParkSize,
+       GetCompressionInfoForLevel( 10 ).tableParkSize,
+       GetCompressionInfoForLevel( 11 ).tableParkSize,
+       GetCompressionInfoForLevel( 12 ).tableParkSize,
+       GetCompressionInfoForLevel( 13 ).tableParkSize,
+       GetCompressionInfoForLevel( 14 ).tableParkSize,
+       GetCompressionInfoForLevel( 15 ).tableParkSize,
+       GetCompressionInfoForLevel( 16 ).tableParkSize
+         }
     );
 }
\ No newline at end of file
diff --git a/src/plotting/Compression.h b/src/plotting/Compression.h
index dbb01228..7a926a70 100644
--- a/src/plotting/Compression.h
+++ b/src/plotting/Compression.h
@@ -106,7 +106,65 @@ struct CompressionLevelInfo<9>
     static constexpr double   ANS_R_VALUE     = 4.54;
 };
 
+template<>
+struct CompressionLevelInfo<10>
+{
+    static constexpr uint32_t ENTRY_SIZE      = 7;
+    static constexpr uint32_t STUB_BIT_SIZE   = 26;
+    static constexpr size_t   TABLE_PARK_SIZE = 7896;
+    static constexpr double   ANS_R_VALUE     = 4.54;
+};
 
+template<>
+struct CompressionLevelInfo<11>
+{
+    static constexpr uint32_t ENTRY_SIZE      = 6;
+    static constexpr uint32_t STUB_BIT_SIZE   = 22;
+    static constexpr size_t   TABLE_PARK_SIZE = 6930;
+    static constexpr double   ANS_R_VALUE     = 4.54;
+};
 
+template<>
+struct CompressionLevelInfo<12>
+{
+    static constexpr uint32_t ENTRY_SIZE      = 5;
+    static constexpr uint32_t STUB_BIT_SIZE   = 18;
+    static constexpr size_t   TABLE_PARK_SIZE = 5953;
+    static constexpr double   ANS_R_VALUE     = 4.54;
+};
 
+template<>
+struct CompressionLevelInfo<13>
+{
+    static constexpr uint32_t ENTRY_SIZE      = 4;
+    static constexpr uint32_t STUB_BIT_SIZE   = 14;
+    static constexpr size_t   TABLE_PARK_SIZE = 4956;
+    static constexpr double   ANS_R_VALUE     = 4.54;
+};
+
+template<>
+struct CompressionLevelInfo<14>
+{
+    static constexpr uint32_t ENTRY_SIZE      = 3;
+    static constexpr uint32_t STUB_BIT_SIZE   = 10;
+    static constexpr size_t   TABLE_PARK_SIZE = 3944;
+    static constexpr double   ANS_R_VALUE     = 4.54;
+};
+
+template<>
+struct CompressionLevelInfo<15>
+{
+    static constexpr uint32_t ENTRY_SIZE      = 2;
+    static constexpr uint32_t STUB_BIT_SIZE   = 6;
+    static constexpr size_t   TABLE_PARK_SIZE = 2930;
+    static constexpr double   ANS_R_VALUE     = 4.54;
+};
 
+template<>
+struct CompressionLevelInfo<16>
+{
+    static constexpr uint32_t ENTRY_SIZE      = 1;
+    static constexpr uint32_t STUB_BIT_SIZE   = 2;
+    static constexpr size_t   TABLE_PARK_SIZE = 1913;
+    static constexpr double   ANS_R_VALUE     = 4.54;
+};
\ No newline at end of file
diff --git a/src/plotting/PlotWriter.cpp b/src/plotting/PlotWriter.cpp
index 3d3440c7..307b1a23 100644
--- a/src/plotting/PlotWriter.cpp
+++ b/src/plotting/PlotWriter.cpp
@@ -95,7 +95,7 @@ bool PlotWriter::BeginPlotInternal( PlotVersion version,
     if( !plotMemo || !plotMemoSize )
         return false;
 
-    ASSERT( compressionLevel >= 0 && compressionLevel <= 9 );
+    ASSERT( compressionLevel >= 0 && compressionLevel <= 40 );
 
     if( compressionLevel > 0 && version < PlotVersion::v2_0 )
         return false;
diff --git a/src/tools/PlotComparer.cpp b/src/tools/PlotComparer.cpp
index f275d980..18728835 100644
--- a/src/tools/PlotComparer.cpp
+++ b/src/tools/PlotComparer.cpp
@@ -376,14 +376,14 @@ void TestTable( FilePlot& ref, FilePlot& tgt, TableId table )
     if( table == TableId::Table1 && tgt.CompressionLevel() > 0 )
         return;
 
-    if( table == TableId::Table2 && tgt.CompressionLevel() >= 9 )
+    if( table == TableId::Table2 && tgt.CompressionLevel() >= 40 )
         return;
 
     // if( table == TableId::Table7 ) return;
 
     Log::Line( "Reading Table %u...", table+1 );
 
-    const uint32 numTablesDropped = tgt.CompressionLevel() >= 9 ? 2 :
+    const uint32 numTablesDropped = tgt.CompressionLevel() >= 40 ? 2 :
                                     tgt.CompressionLevel() >= 1 ? 1 : 0;
 
     const size_t parkSize = table < TableId::Table7 ? 
diff --git a/src/tools/PlotReader.cpp b/src/tools/PlotReader.cpp
index 0059f186..6aad1ec2 100644
--- a/src/tools/PlotReader.cpp
+++ b/src/tools/PlotReader.cpp
@@ -439,7 +439,7 @@ TableId PlotReader::GetLowestStoredTable() const
     const uint32 compressionLevel = _plot.CompressionLevel();
 
     const uint32 numDroppedTables = compressionLevel == 0 ? 0 :
-                                    compressionLevel >= 9 ? 2 : 1;
+                                    compressionLevel >= 40 ? 2 : 1;
 
     return TableId::Table1 + numDroppedTables;
 }
@@ -732,7 +732,7 @@ ProofFetchResult PlotReader::DecompressProof( const uint64 compressedProof[BB_PL
     req.plotId           = _plot.PlotId();
     req.compressionLevel = compressionLevel;
 
-    const uint32 compressedProofCount = compressionLevel < 9 ? PROOF_X_COUNT / 2 : PROOF_X_COUNT / 4;
+    const uint32 compressedProofCount = compressionLevel < 40 ? PROOF_X_COUNT / 2 : PROOF_X_COUNT / 4;
 
     for( uint32 i = 0; i < compressedProofCount; i++ )
         req.compressedProof[i] = compressedProof[i];
diff --git a/src/tools/PlotValidator.cpp b/src/tools/PlotValidator.cpp
index 16a7f14a..c1dad4fa 100644
--- a/src/tools/PlotValidator.cpp
+++ b/src/tools/PlotValidator.cpp
@@ -1137,7 +1137,7 @@ bool DecompressProof( const byte plotId[BB_PLOT_ID_LEN], const uint32 compressio
     req.compressionLevel = compressionLevel;
     req.plotId           = plotId;
 
-    const uint32 compressedProofCount = compressionLevel < 9 ? PROOF_X_COUNT / 2 : PROOF_X_COUNT / 4;
+    const uint32 compressedProofCount = compressionLevel < 40 ? PROOF_X_COUNT / 2 : PROOF_X_COUNT / 4;
 
     for( uint32 i = 0; i < compressedProofCount; i++ )
         req.compressedProof[i] = compressedProof[i];
@@ -1170,7 +1170,7 @@ bool FetchProof( PlotReader& plot, uint64 t6LPIndex, uint64 fullProofXs[PROOF_X_
 
     const bool    isCompressed = plot.PlotFile().CompressionLevel() > 0;
     const TableId endTable     = !isCompressed ? TableId::Table1 :
-                                    plot.PlotFile().CompressionLevel() < 9 ? 
+                                    plot.PlotFile().CompressionLevel() < 40 ?
                                     TableId::Table2 : TableId::Table3;
 
     for( TableId table = TableId::Table6; table >= endTable; table-- )